1 /*
2 * Copyright (c) 2018 Intel Corporation.
3 * Copyright (c) 2018-present Facebook.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 #include <boost/algorithm/string/join.hpp>
19 #include <boost/container/flat_map.hpp>
20 #include <ipmid/api.hpp>
21 #include <nlohmann/json.hpp>
22 #include <phosphor-logging/log.hpp>
23 #include <sdbusplus/message/types.hpp>
24 #include <sdbusplus/timer.hpp>
25 #include <storagecommands.hpp>
26
27 #include <fstream>
28 #include <iostream>
29 #include <sstream>
30 #include <thread>
31
32 enum class MemErrType
33 {
34 memTrainErr = 0,
35 memPmicErr = 7
36 };
37
38 enum class PostEvtType
39 {
40 pxeBootFail = 0,
41 httpBootFail = 6,
42 getCertFail = 7,
43 amdAblFail = 10
44 };
45
46 enum class PcieEvtType
47 {
48 dpc = 0
49 };
50
51 enum class MemEvtType
52 {
53 ppr = 0,
54 adddc = 5,
55 noDimm = 7
56 };
57
58 //----------------------------------------------------------------------
59 // Platform specific functions for storing app data
60 //----------------------------------------------------------------------
61
byteToStr(uint8_t byte)62 static std::string byteToStr(uint8_t byte)
63 {
64 std::stringstream ss;
65
66 ss << std::hex << std::uppercase << std::setfill('0');
67 ss << std::setw(2) << (int)byte;
68
69 return ss.str();
70 }
71
toHexStr(std::vector<uint8_t> & bytes,std::string & hexStr)72 static void toHexStr(std::vector<uint8_t>& bytes, std::string& hexStr)
73 {
74 std::stringstream stream;
75 stream << std::hex << std::uppercase << std::setfill('0');
76 for (const uint8_t byte : bytes)
77 {
78 stream << std::setw(2) << static_cast<int>(byte);
79 }
80 hexStr = stream.str();
81 }
82
fromHexStr(const std::string hexStr,std::vector<uint8_t> & data)83 static int fromHexStr(const std::string hexStr, std::vector<uint8_t>& data)
84 {
85 for (unsigned int i = 0; i < hexStr.size(); i += 2)
86 {
87 try
88 {
89 data.push_back(static_cast<uint8_t>(
90 std::stoul(hexStr.substr(i, 2), nullptr, 16)));
91 }
92 catch (const std::invalid_argument& e)
93 {
94 phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
95 return -1;
96 }
97 catch (const std::out_of_range& e)
98 {
99 phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
100 return -1;
101 }
102 }
103 return 0;
104 }
105
106 namespace fb_oem::ipmi::sel
107 {
108
109 class SELData
110 {
111 private:
112 nlohmann::json selDataObj;
113
flush()114 void flush()
115 {
116 std::ofstream file(SEL_JSON_DATA_FILE);
117 file << selDataObj;
118 file.close();
119 }
120
init()121 void init()
122 {
123 selDataObj[KEY_SEL_VER] = 0x51;
124 selDataObj[KEY_SEL_COUNT] = 0;
125 selDataObj[KEY_ADD_TIME] = 0xFFFFFFFF;
126 selDataObj[KEY_ERASE_TIME] = 0xFFFFFFFF;
127 selDataObj[KEY_OPER_SUPP] = 0x02;
128 /* Spec indicates that more than 64kB is free */
129 selDataObj[KEY_FREE_SPACE] = 0xFFFF;
130 }
131
writeEmptyJson()132 void writeEmptyJson()
133 {
134 selDataObj = nlohmann::json::object(); // Create an empty JSON object
135 std::ofstream outFile(SEL_JSON_DATA_FILE);
136 if (outFile)
137 {
138 // Write empty JSON object to the file
139 outFile << selDataObj.dump(4);
140 outFile.close();
141 }
142 else
143 {
144 lg2::info("Failed to create SEL JSON file with empty JSON.");
145 }
146 }
147
148 public:
SELData()149 SELData()
150 {
151 /* Get App data stored in json file */
152 std::ifstream file(SEL_JSON_DATA_FILE);
153 if (file)
154 {
155 try
156 {
157 file >> selDataObj;
158 }
159 catch (const nlohmann::json::parse_error& e)
160 {
161 lg2::error("Error parsing SEL JSON file: {ERROR}", "ERROR", e);
162 writeEmptyJson();
163 init(); // Initialize to default values
164 }
165 file.close();
166 }
167 else
168 {
169 lg2::info("Failed to open SEL JSON file.");
170 writeEmptyJson();
171 init();
172 }
173
174 /* Initialize SelData object if no entries. */
175 if (selDataObj.find(KEY_SEL_COUNT) == selDataObj.end())
176 {
177 init();
178 }
179 }
180
clear()181 int clear()
182 {
183 /* Clear the complete Sel Json object */
184 selDataObj.clear();
185 /* Reinitialize it with basic data */
186 init();
187 /* Save the erase time */
188 struct timespec selTime = {};
189 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
190 {
191 return -1;
192 }
193 selDataObj[KEY_ERASE_TIME] = selTime.tv_sec;
194 flush();
195 return 0;
196 }
197
getCount()198 uint32_t getCount()
199 {
200 return selDataObj[KEY_SEL_COUNT];
201 }
202
getInfo(GetSELInfoData & info)203 void getInfo(GetSELInfoData& info)
204 {
205 info.selVersion = selDataObj[KEY_SEL_VER];
206 info.entries = selDataObj[KEY_SEL_COUNT];
207 info.freeSpace = selDataObj[KEY_FREE_SPACE];
208 info.addTimeStamp = selDataObj[KEY_ADD_TIME];
209 info.eraseTimeStamp = selDataObj[KEY_ERASE_TIME];
210 info.operationSupport = selDataObj[KEY_OPER_SUPP];
211 }
212
getEntry(uint32_t index,std::string & rawStr)213 int getEntry(uint32_t index, std::string& rawStr)
214 {
215 std::stringstream ss;
216 ss << std::hex;
217 ss << std::setw(2) << std::setfill('0') << index;
218
219 /* Check or the requested SEL Entry, if record is available */
220 if (selDataObj.find(ss.str()) == selDataObj.end())
221 {
222 return -1;
223 }
224
225 rawStr = selDataObj[ss.str()][KEY_SEL_ENTRY_RAW];
226 return 0;
227 }
228
addEntry(std::string keyStr)229 int addEntry(std::string keyStr)
230 {
231 struct timespec selTime = {};
232
233 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
234 {
235 return -1;
236 }
237
238 selDataObj[KEY_ADD_TIME] = selTime.tv_sec;
239
240 int selCount = selDataObj[KEY_SEL_COUNT];
241 selDataObj[KEY_SEL_COUNT] = ++selCount;
242
243 std::stringstream ss;
244 ss << std::hex;
245 ss << std::setw(2) << std::setfill('0') << selCount;
246
247 selDataObj[ss.str()][KEY_SEL_ENTRY_RAW] = keyStr;
248 flush();
249 return selCount;
250 }
251 };
252
253 /*
254 * A Function to parse common SEL message, a helper function
255 * for parseStdSel.
256 *
257 * Note that this function __CANNOT__ be overridden.
258 * To add board specific routine, please override parseStdSel.
259 */
260
261 /*Used by decoding ME event*/
262 std::vector<std::string> nmDomName = {
263 "Entire Platform", "CPU Subsystem",
264 "Memory Subsystem", "HW Protection",
265 "High Power I/O subsystem", "Unknown"};
266
267 /* Default log message for unknown type */
logDefault(uint8_t *,std::string & errLog)268 static void logDefault(uint8_t*, std::string& errLog)
269 {
270 errLog = "Unknown";
271 }
272
logSysEvent(uint8_t * data,std::string & errLog)273 static void logSysEvent(uint8_t* data, std::string& errLog)
274 {
275 if (data[0] == 0xE5)
276 {
277 errLog = "Cause of Time change - ";
278 switch (data[2])
279 {
280 case 0x00:
281 errLog += "NTP";
282 break;
283 case 0x01:
284 errLog += "Host RTL";
285 break;
286 case 0x02:
287 errLog += "Set SEL time cmd";
288 break;
289 case 0x03:
290 errLog += "Set SEL time UTC offset cmd";
291 break;
292 default:
293 errLog += "Unknown";
294 }
295
296 if (data[1] == 0x00)
297 errLog += " - First Time";
298 else if (data[1] == 0x80)
299 errLog += " - Second Time";
300 }
301 else
302 {
303 errLog = "Unknown";
304 }
305 }
306
logThermalEvent(uint8_t * data,std::string & errLog)307 static void logThermalEvent(uint8_t* data, std::string& errLog)
308 {
309 if (data[0] == 0x1)
310 {
311 errLog = "Limit Exceeded";
312 }
313 else
314 {
315 errLog = "Unknown";
316 }
317 }
318
logCritIrq(uint8_t * data,std::string & errLog)319 static void logCritIrq(uint8_t* data, std::string& errLog)
320 {
321 if (data[0] == 0x0)
322 {
323 errLog = "NMI / Diagnostic Interrupt";
324 }
325 else if (data[0] == 0x03)
326 {
327 errLog = "Software NMI";
328 }
329 else
330 {
331 errLog = "Unknown";
332 }
333
334 /* TODO: Call add_cri_sel for CRITICAL_IRQ */
335 }
336
logPostErr(uint8_t * data,std::string & errLog)337 static void logPostErr(uint8_t* data, std::string& errLog)
338 {
339 if ((data[0] & 0x0F) == 0x0)
340 {
341 errLog = "System Firmware Error";
342 }
343 else
344 {
345 errLog = "Unknown";
346 }
347
348 if (((data[0] >> 6) & 0x03) == 0x3)
349 {
350 // TODO: Need to implement IPMI spec based Post Code
351 errLog += ", IPMI Post Code";
352 }
353 else if (((data[0] >> 6) & 0x03) == 0x2)
354 {
355 errLog += ", OEM Post Code 0x" + byteToStr(data[2]) +
356 byteToStr(data[1]);
357
358 switch ((data[2] << 8) | data[1])
359 {
360 case 0xA105:
361 errLog += ", BMC Failed (No Response)";
362 break;
363 case 0xA106:
364 errLog += ", BMC Failed (Self Test Fail)";
365 break;
366 case 0xA10A:
367 errLog += ", System Firmware Corruption Detected";
368 break;
369 case 0xA10B:
370 errLog += ", TPM Self-Test FAIL Detected";
371 }
372 }
373 }
374
logMchChkErr(uint8_t * data,std::string & errLog)375 static void logMchChkErr(uint8_t* data, std::string& errLog)
376 {
377 /* TODO: Call add_cri_sel for CRITICAL_IRQ */
378 switch (data[0] & 0x0F)
379 {
380 case 0x0B:
381 switch ((data[1] >> 5) & 0x03)
382 {
383 case 0x00:
384 errLog = "Uncorrected Recoverable Error";
385 break;
386 case 0x01:
387 errLog = "Uncorrected Thread Fatal Error";
388 break;
389 case 0x02:
390 errLog = "Uncorrected System Fatal Error";
391 break;
392 default:
393 errLog = "Unknown";
394 }
395 break;
396 case 0x0C:
397 switch ((data[1] >> 5) & 0x03)
398 {
399 case 0x00:
400 errLog = "Correctable Error";
401 break;
402 case 0x01:
403 errLog = "Deferred Error";
404 break;
405 default:
406 errLog = "Unknown";
407 }
408 break;
409 default:
410 errLog = "Unknown";
411 }
412
413 errLog += ", Machine Check bank Number " + std::to_string(data[1]) +
414 ", CPU " + std::to_string(data[2] >> 5) + ", Core " +
415 std::to_string(data[2] & 0x1F);
416 }
417
logPcieErr(uint8_t * data,std::string & errLog)418 static void logPcieErr(uint8_t* data, std::string& errLog)
419 {
420 std::stringstream tmp1, tmp2;
421 tmp1 << std::hex << std::uppercase << std::setfill('0');
422 tmp2 << std::hex << std::uppercase << std::setfill('0');
423 tmp1 << " (Bus " << std::setw(2) << (int)(data[2]) << " / Dev "
424 << std::setw(2) << (int)(data[1] >> 3) << " / Fun " << std::setw(2)
425 << (int)(data[1] & 0x7) << ")";
426
427 switch (data[0] & 0xF)
428 {
429 case 0x4:
430 errLog = "PCI PERR" + tmp1.str();
431 break;
432 case 0x5:
433 errLog = "PCI SERR" + tmp1.str();
434 break;
435 case 0x7:
436 errLog = "Correctable" + tmp1.str();
437 break;
438 case 0x8:
439 errLog = "Uncorrectable" + tmp1.str();
440 break;
441 case 0xA:
442 errLog = "Bus Fatal" + tmp1.str();
443 break;
444 case 0xD:
445 {
446 uint32_t venId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
447 tmp2 << "Vendor ID: 0x" << std::setw(4) << venId;
448 errLog = tmp2.str();
449 }
450 break;
451 case 0xE:
452 {
453 uint32_t devId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
454 tmp2 << "Device ID: 0x" << std::setw(4) << devId;
455 errLog = tmp2.str();
456 }
457 break;
458 case 0xF:
459 tmp2 << "Error ID from downstream: 0x" << std::setw(2)
460 << (int)(data[1]) << std::setw(2) << (int)(data[2]);
461 errLog = tmp2.str();
462 break;
463 default:
464 errLog = "Unknown";
465 }
466 }
467
logIioErr(uint8_t * data,std::string & errLog)468 static void logIioErr(uint8_t* data, std::string& errLog)
469 {
470 std::vector<std::string> tmpStr = {
471 "IRP0", "IRP1", " IIO-Core", "VT-d", "Intel Quick Data",
472 "Misc", " DMA", "ITC", "OTC", "CI"};
473
474 if ((data[0] & 0xF) == 0)
475 {
476 errLog += "CPU " + std::to_string(data[2] >> 5) + ", Error ID 0x" +
477 byteToStr(data[1]) + " - ";
478
479 if ((data[2] & 0xF) <= 0x9)
480 {
481 errLog += tmpStr[(data[2] & 0xF)];
482 }
483 else
484 {
485 errLog += "Reserved";
486 }
487 }
488 else
489 {
490 errLog = "Unknown";
491 }
492 }
493
logMemErr(uint8_t * dataPtr,std::string & errLog)494 [[maybe_unused]] static void logMemErr(uint8_t* dataPtr, std::string& errLog)
495 {
496 uint8_t snrType = dataPtr[0];
497 uint8_t snrNum = dataPtr[1];
498 uint8_t* data = &(dataPtr[3]);
499
500 /* TODO: add pal_add_cri_sel */
501
502 if (snrNum == memoryEccError)
503 {
504 /* SEL from MEMORY_ECC_ERR Sensor */
505 switch (data[0] & 0x0F)
506 {
507 case 0x0:
508 if (snrType == 0x0C)
509 {
510 errLog = "Correctable";
511 }
512 else if (snrType == 0x10)
513 {
514 errLog = "Correctable ECC error Logging Disabled";
515 }
516 break;
517 case 0x1:
518 errLog = "Uncorrectable";
519 break;
520 case 0x5:
521 errLog = "Correctable ECC error Logging Limit Disabled";
522 break;
523 default:
524 errLog = "Unknown";
525 }
526 }
527 else if (snrNum == memoryErrLogDIS)
528 {
529 // SEL from MEMORY_ERR_LOG_DIS Sensor
530 if ((data[0] & 0x0F) == 0x0)
531 {
532 errLog = "Correctable Memory Error Logging Disabled";
533 }
534 else
535 {
536 errLog = "Unknown";
537 }
538 }
539 else
540 {
541 errLog = "Unknown";
542 return;
543 }
544
545 /* Common routine for both MEM_ECC_ERR and MEMORY_ERR_LOG_DIS */
546
547 errLog += " (DIMM " + byteToStr(data[2]) + ") Logical Rank " +
548 std::to_string(data[1] & 0x03);
549
550 /* DIMM number (data[2]):
551 * Bit[7:5]: Socket number (Range: 0-7)
552 * Bit[4:3]: Channel number (Range: 0-3)
553 * Bit[2:0]: DIMM number (Range: 0-7)
554 */
555
556 /* TODO: Verify these bits */
557 std::string cpuStr = "CPU# " + std::to_string((data[2] & 0xE0) >> 5);
558 std::string chStr = "CHN# " + std::to_string((data[2] & 0x18) >> 3);
559 std::string dimmStr = "DIMM#" + std::to_string(data[2] & 0x7);
560
561 switch ((data[1] & 0xC) >> 2)
562 {
563 case 0x0:
564 {
565 /* All Info Valid */
566 [[maybe_unused]] uint8_t chnNum = (data[2] & 0x1C) >> 2;
567 [[maybe_unused]] uint8_t dimmNum = data[2] & 0x3;
568
569 /* TODO: If critical SEL logging is available, do it */
570 if (snrType == 0x0C)
571 {
572 if ((data[0] & 0x0F) == 0x0)
573 {
574 /* TODO: add_cri_sel */
575 /* "DIMM"+ 'A'+ chnNum + dimmNum + " ECC err,FRU:1"
576 */
577 }
578 else if ((data[0] & 0x0F) == 0x1)
579 {
580 /* TODO: add_cri_sel */
581 /* "DIMM"+ 'A'+ chnNum + dimmNum + " UECC err,FRU:1"
582 */
583 }
584 }
585 /* Continue to parse the error into a string. All Info Valid
586 */
587 errLog += " (" + cpuStr + ", " + chStr + ", " + dimmStr + ")";
588 }
589
590 break;
591 case 0x1:
592
593 /* DIMM info not valid */
594 errLog += " (" + cpuStr + ", " + chStr + ")";
595 break;
596 case 0x2:
597
598 /* CHN info not valid */
599 errLog += " (" + cpuStr + ", " + dimmStr + ")";
600 break;
601 case 0x3:
602
603 /* CPU info not valid */
604 errLog += " (" + chStr + ", " + dimmStr + ")";
605 break;
606 }
607 }
608
logPwrErr(uint8_t * data,std::string & errLog)609 static void logPwrErr(uint8_t* data, std::string& errLog)
610 {
611 if (data[0] == 0x1)
612 {
613 errLog = "SYS_PWROK failure";
614 /* Also try logging to Critical log file, if available */
615 /* "SYS_PWROK failure,FRU:1" */
616 }
617 else if (data[0] == 0x2)
618 {
619 errLog = "PCH_PWROK failure";
620 /* Also try logging to Critical log file, if available */
621 /* "PCH_PWROK failure,FRU:1" */
622 }
623 else
624 {
625 errLog = "Unknown";
626 }
627 }
628
logCatErr(uint8_t * data,std::string & errLog)629 static void logCatErr(uint8_t* data, std::string& errLog)
630 {
631 if (data[0] == 0x0)
632 {
633 errLog = "IERR/CATERR";
634 /* Also try logging to Critical log file, if available */
635 /* "IERR,FRU:1 */
636 }
637 else if (data[0] == 0xB)
638 {
639 errLog = "MCERR/CATERR";
640 /* Also try logging to Critical log file, if available */
641 /* "MCERR,FRU:1 */
642 }
643 else
644 {
645 errLog = "Unknown";
646 }
647 }
648
logDimmHot(uint8_t * data,std::string & errLog)649 static void logDimmHot(uint8_t* data, std::string& errLog)
650 {
651 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x01FFFF)
652 {
653 errLog = "SOC MEMHOT";
654 }
655 else
656 {
657 errLog = "Unknown";
658 /* Also try logging to Critical log file, if available */
659 /* ""CPU_DIMM_HOT %s,FRU:1" */
660 }
661 }
662
logSwNMI(uint8_t * data,std::string & errLog)663 static void logSwNMI(uint8_t* data, std::string& errLog)
664 {
665 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x03FFFF)
666 {
667 errLog = "Software NMI";
668 }
669 else
670 {
671 errLog = "Unknown SW NMI";
672 }
673 }
674
logCPUThermalSts(uint8_t * data,std::string & errLog)675 static void logCPUThermalSts(uint8_t* data, std::string& errLog)
676 {
677 switch (data[0])
678 {
679 case 0x0:
680 errLog = "CPU Critical Temperature";
681 break;
682 case 0x1:
683 errLog = "PROCHOT#";
684 break;
685 case 0x2:
686 errLog = "TCC Activation";
687 break;
688 default:
689 errLog = "Unknown";
690 }
691 }
692
logMEPwrState(uint8_t * data,std::string & errLog)693 static void logMEPwrState(uint8_t* data, std::string& errLog)
694 {
695 switch (data[0])
696 {
697 case 0:
698 errLog = "RUNNING";
699 break;
700 case 2:
701 errLog = "POWER_OFF";
702 break;
703 default:
704 errLog = "Unknown[" + std::to_string(data[0]) + "]";
705 break;
706 }
707 }
708
logSPSFwHealth(uint8_t * data,std::string & errLog)709 static void logSPSFwHealth(uint8_t* data, std::string& errLog)
710 {
711 if ((data[0] & 0x0F) == 0x00)
712 {
713 const std::vector<std::string> tmpStr = {
714 "Recovery GPIO forced",
715 "Image execution failed",
716 "Flash erase error",
717 "Flash state information",
718 "Internal error",
719 "BMC did not respond",
720 "Direct Flash update",
721 "Manufacturing error",
722 "Automatic Restore to Factory Presets",
723 "Firmware Exception",
724 "Flash Wear-Out Protection Warning",
725 "Unknown",
726 "Unknown",
727 "DMI interface error",
728 "MCTP interface error",
729 "Auto-configuration finished",
730 "Unsupported Segment Defined Feature",
731 "Unknown",
732 "CPU Debug Capability Disabled",
733 "UMA operation error"};
734
735 if (data[1] < 0x14)
736 {
737 errLog = tmpStr[data[1]];
738 }
739 else
740 {
741 errLog = "Unknown";
742 }
743 }
744 else if ((data[0] & 0x0F) == 0x01)
745 {
746 errLog = "SMBus link failure";
747 }
748 else
749 {
750 errLog = "Unknown";
751 }
752 }
753
logNmExcA(uint8_t * data,std::string & errLog)754 static void logNmExcA(uint8_t* data, std::string& errLog)
755 {
756 /*NM4.0 #550710, Revision 1.95, and turn to p.155*/
757 if (data[0] == 0xA8)
758 {
759 errLog = "Policy Correction Time Exceeded";
760 }
761 else
762 {
763 errLog = "Unknown";
764 }
765 }
766
logPCHThermal(uint8_t * data,std::string & errLog)767 static void logPCHThermal(uint8_t* data, std::string& errLog)
768 {
769 const std::vector<std::string> thresEvtName = {
770 "Lower Non-critical",
771 "Unknown",
772 "Lower Critical",
773 "Unknown",
774 "Lower Non-recoverable",
775 "Unknown",
776 "Unknown",
777 "Upper Non-critical",
778 "Unknown",
779 "Upper Critical",
780 "Unknown",
781 "Upper Non-recoverable"};
782
783 if ((data[0] & 0x0f) < 12)
784 {
785 errLog = thresEvtName[(data[0] & 0x0f)];
786 }
787 else
788 {
789 errLog = "Unknown";
790 }
791
792 errLog += ", curr_val: " + std::to_string(data[1]) +
793 " C, thresh_val: " + std::to_string(data[2]) + " C";
794 }
795
logNmHealth(uint8_t * data,std::string & errLog)796 static void logNmHealth(uint8_t* data, std::string& errLog)
797 {
798 std::vector<std::string> nmErrType = {
799 "Unknown",
800 "Unknown",
801 "Unknown",
802 "Unknown",
803 "Unknown",
804 "Unknown",
805 "Unknown",
806 "Extended Telemetry Device Reading Failure",
807 "Outlet Temperature Reading Failure",
808 "Volumetric Airflow Reading Failure",
809 "Policy Misconfiguration",
810 "Power Sensor Reading Failure",
811 "Inlet Temperature Reading Failure",
812 "Host Communication Error",
813 "Real-time Clock Synchronization Failure",
814 "Platform Shutdown Initiated by Intel NM Policy",
815 "Unknown"};
816 uint8_t nmTypeIdx = (data[0] & 0xf);
817 uint8_t domIdx = (data[1] & 0xf);
818 uint8_t errIdx = ((data[1] >> 4) & 0xf);
819
820 if (nmTypeIdx == 2)
821 {
822 errLog = "SensorIntelNM";
823 }
824 else
825 {
826 errLog = "Unknown";
827 }
828
829 errLog += ", Domain:" + nmDomName[domIdx] + ", ErrType:" +
830 nmErrType[errIdx] + ", Err:0x" + byteToStr(data[2]);
831 }
832
logNmCap(uint8_t * data,std::string & errLog)833 static void logNmCap(uint8_t* data, std::string& errLog)
834 {
835 const std::vector<std::string> nmCapStsStr = {"Not Available", "Available"};
836 if (data[0] & 0x7) // BIT1=policy, BIT2=monitoring, BIT3=pwr
837 // limit and the others are reserved
838 {
839 errLog = "PolicyInterface:" + nmCapStsStr[BIT(data[0], 0)] +
840 ",Monitoring:" + nmCapStsStr[BIT(data[0], 1)] +
841 ",PowerLimit:" + nmCapStsStr[BIT(data[0], 2)];
842 }
843 else
844 {
845 errLog = "Unknown";
846 }
847 }
848
logNmThreshold(uint8_t * data,std::string & errLog)849 static void logNmThreshold(uint8_t* data, std::string& errLog)
850 {
851 uint8_t thresNum = (data[0] & 0x3);
852 uint8_t domIdx = (data[1] & 0xf);
853 uint8_t polId = data[2];
854 uint8_t polEvtIdx = BIT(data[0], 3);
855 const std::vector<std::string> polEvtStr = {
856 "Threshold Exceeded", "Policy Correction Time Exceeded"};
857
858 errLog = "Threshold Number:" + std::to_string(thresNum) + "-" +
859 polEvtStr[polEvtIdx] + ", Domain:" + nmDomName[domIdx] +
860 ", PolicyID:0x" + byteToStr(polId);
861 }
862
logPwrThreshold(uint8_t * data,std::string & errLog)863 static void logPwrThreshold(uint8_t* data, std::string& errLog)
864 {
865 if (data[0] == 0x00)
866 {
867 errLog = "Limit Not Exceeded";
868 }
869 else if (data[0] == 0x01)
870 {
871 errLog = "Limit Exceeded";
872 }
873 else
874 {
875 errLog = "Unknown";
876 }
877 }
878
logMSMI(uint8_t * data,std::string & errLog)879 static void logMSMI(uint8_t* data, std::string& errLog)
880 {
881 if (data[0] == 0x0)
882 {
883 errLog = "IERR/MSMI";
884 }
885 else if (data[0] == 0x0B)
886 {
887 errLog = "MCERR/MSMI";
888 }
889 else
890 {
891 errLog = "Unknown";
892 }
893 }
894
logHprWarn(uint8_t * data,std::string & errLog)895 static void logHprWarn(uint8_t* data, std::string& errLog)
896 {
897 if (data[2] == 0x01)
898 {
899 if (data[1] == 0xFF)
900 {
901 errLog = "Infinite Time";
902 }
903 else
904 {
905 errLog = std::to_string(data[1]) + " minutes";
906 }
907 }
908 else
909 {
910 errLog = "Unknown";
911 }
912 }
913
914 static const boost::container::flat_map<
915 uint8_t,
916 std::pair<std::string, std::function<void(uint8_t*, std::string&)>>>
917 sensorNameTable = {
918 {0xE9, {"SYSTEM_EVENT", logSysEvent}},
919 {0x7D, {"THERM_THRESH_EVT", logThermalEvent}},
920 {0xAA, {"BUTTON", logDefault}},
921 {0xAB, {"POWER_STATE", logDefault}},
922 {0xEA, {"CRITICAL_IRQ", logCritIrq}},
923 {0x2B, {"POST_ERROR", logPostErr}},
924 {0x40, {"MACHINE_CHK_ERR", logMchChkErr}},
925 {0x41, {"PCIE_ERR", logPcieErr}},
926 {0x43, {"IIO_ERR", logIioErr}},
927 {0X63, {"MEMORY_ECC_ERR", logDefault}},
928 {0X87, {"MEMORY_ERR_LOG_DIS", logDefault}},
929 {0X51, {"PROCHOT_EXT", logDefault}},
930 {0X56, {"PWR_ERR", logPwrErr}},
931 {0xE6, {"CATERR_A", logCatErr}},
932 {0xEB, {"CATERR_B", logCatErr}},
933 {0xB3, {"CPU_DIMM_HOT", logDimmHot}},
934 {0x90, {"SOFTWARE_NMI", logSwNMI}},
935 {0x1C, {"CPU0_THERM_STATUS", logCPUThermalSts}},
936 {0x1D, {"CPU1_THERM_STATUS", logCPUThermalSts}},
937 {0x16, {"ME_POWER_STATE", logMEPwrState}},
938 {0x17, {"SPS_FW_HEALTH", logSPSFwHealth}},
939 {0x18, {"NM_EXCEPTION_A", logNmExcA}},
940 {0x08, {"PCH_THERM_THRESHOLD", logPCHThermal}},
941 {0x19, {"NM_HEALTH", logNmHealth}},
942 {0x1A, {"NM_CAPABILITIES", logNmCap}},
943 {0x1B, {"NM_THRESHOLD", logNmThreshold}},
944 {0x3B, {"PWR_THRESH_EVT", logPwrThreshold}},
945 {0xE7, {"MSMI", logMSMI}},
946 {0xC5, {"HPR_WARNING", logHprWarn}}};
947
parseSelHelper(StdSELEntry * data,std::string & errStr)948 static void parseSelHelper(StdSELEntry* data, std::string& errStr)
949 {
950 /* Check if sensor type is OS_BOOT (0x1f) */
951 if (data->sensorType == 0x1F)
952 {
953 /* OS_BOOT used by OS */
954 switch (data->eventData1 & 0xF)
955 {
956 case 0x07:
957 errStr = "Base OS/Hypervisor Installation started";
958 break;
959 case 0x08:
960 errStr = "Base OS/Hypervisor Installation completed";
961 break;
962 case 0x09:
963 errStr = "Base OS/Hypervisor Installation aborted";
964 break;
965 case 0x0A:
966 errStr = "Base OS/Hypervisor Installation failed";
967 break;
968 default:
969 errStr = "Unknown";
970 }
971 return;
972 }
973
974 auto findSensorName = sensorNameTable.find(data->sensorNum);
975 if (findSensorName == sensorNameTable.end())
976 {
977 errStr = "Unknown";
978 return;
979 }
980 else
981 {
982 switch (data->sensorNum)
983 {
984 /* logMemErr function needs data from sensor type */
985 case memoryEccError:
986 case memoryErrLogDIS:
987 findSensorName->second.second(&(data->sensorType), errStr);
988 break;
989 /* Other sensor function needs only event data for parsing */
990 default:
991 findSensorName->second.second(&(data->eventData1), errStr);
992 }
993 }
994
995 if (((data->eventData3 & 0x80) >> 7) == 0)
996 {
997 errStr += " Assertion";
998 }
999 else
1000 {
1001 errStr += " Deassertion";
1002 }
1003 }
1004
parseDimmPhyloc(StdSELEntry * data,std::string & errStr)1005 static void parseDimmPhyloc(StdSELEntry* data, std::string& errStr)
1006 {
1007 // Log when " All info available"
1008 uint8_t chNum = (data->eventData3 & 0x18) >> 3;
1009 uint8_t dimmNum = data->eventData3 & 0x7;
1010 uint8_t rankNum = data->eventData2 & 0x03;
1011 uint8_t nodeNum = (data->eventData3 & 0xE0) >> 5;
1012
1013 if (chNum == 3 && dimmNum == 0)
1014 {
1015 errStr += " Node: " + std::to_string(nodeNum) + "," +
1016 " Card: " + std::to_string(chNum) + "," +
1017 " Module: " + std::to_string(dimmNum) + "," +
1018 " Rank Number: " + std::to_string(rankNum) + "," +
1019 " Location: DIMM A0";
1020 }
1021 else if (chNum == 2 && dimmNum == 0)
1022 {
1023 errStr += " Node: " + std::to_string(nodeNum) + "," +
1024 " Card: " + std::to_string(chNum) + "," +
1025 " Module: " + std::to_string(dimmNum) + "," +
1026 " Rank Number: " + std::to_string(rankNum) + "," +
1027 " Location: DIMM B0";
1028 }
1029 else if (chNum == 4 && dimmNum == 0)
1030 {
1031 errStr += " Node: " + std::to_string(nodeNum) + "," +
1032 " Card: " + std::to_string(chNum) + "," +
1033 " Module: " + std::to_string(dimmNum) + "," +
1034 " Rank Number: " + std::to_string(rankNum) + "," +
1035 " Location: DIMM C0 ";
1036 }
1037 else if (chNum == 5 && dimmNum == 0)
1038 {
1039 errStr += " Node: " + std::to_string(nodeNum) + "," +
1040 " Card: " + std::to_string(chNum) + "," +
1041 " Module: " + std::to_string(dimmNum) + "," +
1042 " Rank Number: " + std::to_string(rankNum) + "," +
1043 " Location: DIMM D0";
1044 }
1045 else
1046 {
1047 errStr += " Node: " + std::to_string(nodeNum) + "," +
1048 " Card: " + std::to_string(chNum) + "," +
1049 " Module: " + std::to_string(dimmNum) + "," +
1050 " Rank Number: " + std::to_string(rankNum) + "," +
1051 " Location: DIMM Unknown";
1052 }
1053 }
1054
parseStdSel(StdSELEntry * data,std::string & errStr)1055 static void parseStdSel(StdSELEntry* data, std::string& errStr)
1056 {
1057 std::stringstream tmpStream;
1058 tmpStream << std::hex << std::uppercase;
1059
1060 /* TODO: add pal_add_cri_sel */
1061 switch (data->sensorNum)
1062 {
1063 case memoryEccError:
1064 switch (data->eventData1 & 0x0F)
1065 {
1066 case 0x00:
1067 errStr = "Correctable";
1068 tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1069 << data->eventData3 << " ECC err";
1070 parseDimmPhyloc(data, errStr);
1071 break;
1072 case 0x01:
1073 errStr = "Uncorrectable";
1074 tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1075 << data->eventData3 << " UECC err";
1076 parseDimmPhyloc(data, errStr);
1077 break;
1078 case 0x02:
1079 errStr = "Parity";
1080 break;
1081 case 0x05:
1082 errStr = "Correctable ECC error Logging Limit Reached";
1083 break;
1084 default:
1085 errStr = "Unknown";
1086 }
1087 break;
1088 case memoryErrLogDIS:
1089 if ((data->eventData1 & 0x0F) == 0)
1090 {
1091 errStr = "Correctable Memory Error Logging Disabled";
1092 }
1093 else
1094 {
1095 errStr = "Unknown";
1096 }
1097 break;
1098 default:
1099 parseSelHelper(data, errStr);
1100 return;
1101 }
1102
1103 errStr += " (DIMM " + std::to_string(data->eventData3) + ")";
1104 errStr += " Logical Rank " + std::to_string(data->eventData2 & 0x03);
1105
1106 switch ((data->eventData2 & 0x0C) >> 2)
1107 {
1108 case 0x00:
1109 // Ignore when " All info available"
1110 break;
1111 case 0x01:
1112 errStr += " DIMM info not valid";
1113 break;
1114 case 0x02:
1115 errStr += " CHN info not valid";
1116 break;
1117 case 0x03:
1118 errStr += " CPU info not valid";
1119 break;
1120 default:
1121 errStr += " Unknown";
1122 }
1123
1124 if (((data->eventType & 0x80) >> 7) == 0)
1125 {
1126 errStr += " Assertion";
1127 }
1128 else
1129 {
1130 errStr += " Deassertion";
1131 }
1132
1133 return;
1134 }
1135
parseOemSel(TsOemSELEntry * data,std::string & errStr)1136 static void parseOemSel(TsOemSELEntry* data, std::string& errStr)
1137 {
1138 std::stringstream tmpStream;
1139 tmpStream << std::hex << std::uppercase << std::setfill('0');
1140
1141 switch (data->recordType)
1142 {
1143 case 0xC0:
1144 tmpStream << "VID:0x" << std::setw(2) << (int)data->oemData[1]
1145 << std::setw(2) << (int)data->oemData[0] << " DID:0x"
1146 << std::setw(2) << (int)data->oemData[3] << std::setw(2)
1147 << (int)data->oemData[2] << " Slot:0x" << std::setw(2)
1148 << (int)data->oemData[4] << " Error ID:0x" << std::setw(2)
1149 << (int)data->oemData[5];
1150 break;
1151 case 0xC2:
1152 tmpStream << "Extra info:0x" << std::setw(2)
1153 << (int)data->oemData[1] << " MSCOD:0x" << std::setw(2)
1154 << (int)data->oemData[3] << std::setw(2)
1155 << (int)data->oemData[2] << " MCACOD:0x" << std::setw(2)
1156 << (int)data->oemData[5] << std::setw(2)
1157 << (int)data->oemData[4];
1158 break;
1159 case 0xC3:
1160 int bank = (data->oemData[1] & 0xf0) >> 4;
1161 int col = ((data->oemData[1] & 0x0f) << 8) | data->oemData[2];
1162
1163 tmpStream << "Fail Device:0x" << std::setw(2)
1164 << (int)data->oemData[0] << " Bank:0x" << std::setw(2)
1165 << bank << " Column:0x" << std::setw(2) << col
1166 << " Failed Row:0x" << std::setw(2)
1167 << (int)data->oemData[3] << std::setw(2)
1168 << (int)data->oemData[4] << std::setw(2)
1169 << (int)data->oemData[5];
1170 }
1171
1172 errStr = tmpStream.str();
1173
1174 return;
1175 }
1176
dimmLocationStr(uint8_t socket,uint8_t channel,uint8_t slot)1177 static std::string dimmLocationStr(uint8_t socket, uint8_t channel,
1178 uint8_t slot)
1179 {
1180 uint8_t sled = (socket >> 4) & 0x3;
1181
1182 socket &= 0xf;
1183 if (channel == 0xFF && slot == 0xFF)
1184 {
1185 return std::format(
1186 "DIMM Slot Location: Sled {:02}/Socket {:02}, Channel unknown"
1187 ", Slot unknown, DIMM unknown",
1188 sled, socket);
1189 }
1190 else
1191 {
1192 channel &= 0xf;
1193 slot &= 0xf;
1194 const char label[] = {'A', 'C', 'B', 'D'};
1195 uint8_t idx = socket * 2 + slot;
1196 return std::format("DIMM Slot Location: Sled {:02}/Socket {:02}"
1197 ", Channel {:02}, Slot {:02} DIMM {}",
1198 sled, socket, channel, slot,
1199 (idx < sizeof(label))
1200 ? label[idx] + std::to_string(channel)
1201 : "NA");
1202 }
1203 }
1204
parseOemUnifiedSel(NtsOemSELEntry * data,std::string & errStr)1205 static void parseOemUnifiedSel(NtsOemSELEntry* data, std::string& errStr)
1206 {
1207 uint8_t* ptr = data->oemData;
1208 uint8_t eventType = ptr[5] & 0xf;
1209 int genInfo = ptr[0];
1210 int errType = genInfo & 0x0f;
1211 std::vector<std::string> dimmErr = {
1212 "Memory training failure",
1213 "Memory correctable error",
1214 "Memory uncorrectable error",
1215 "Memory correctable error (Patrol scrub)",
1216 "Memory uncorrectable error (Patrol scrub)",
1217 "Memory Parity Error (PCC=0)",
1218 "Memory Parity Error (PCC=1)",
1219 "Memory PMIC Error",
1220 "CXL Memory training error",
1221 "Reserved"};
1222 std::vector<std::string> postEvent = {
1223 "System PXE boot fail",
1224 "CMOS/NVRAM configuration cleared",
1225 "TPM Self-Test Fail",
1226 "Boot Drive failure",
1227 "Data Drive failure",
1228 "Received invalid boot order request from BMC",
1229 "System HTTP boot fail",
1230 "BIOS fails to get the certificate from BMC",
1231 "Password cleared by jumper",
1232 "DXE FV check failure",
1233 "AMD ABL failure",
1234 "Reserved"};
1235 std::vector<std::string> certErr = {
1236 "No certificate at BMC", "IPMI transaction fail",
1237 "Certificate data corrupted", "Reserved"};
1238 std::vector<std::string> pcieEvent = {
1239 "PCIe DPC Event",
1240 "PCIe LER Event",
1241 "PCIe Link Retraining and Recovery",
1242 "PCIe Link CRC Error Check and Retry",
1243 "PCIe Corrupt Data Containment",
1244 "PCIe Express ECRC",
1245 "Reserved"};
1246 std::vector<std::string> memEvent = {
1247 "Memory PPR event",
1248 "Memory Correctable Error logging limit reached",
1249 "Memory disable/map-out for FRB",
1250 "Memory SDDC",
1251 "Memory Address range/Partial mirroring",
1252 "Memory ADDDC",
1253 "Memory SMBus hang recovery",
1254 "No DIMM in System",
1255 "Reserved"};
1256 std::vector<std::string> memPprTime = {"Boot time", "Autonomous",
1257 "Run time", "Reserved"};
1258 std::vector<std::string> memPpr = {"PPR success", "PPR fail", "PPR request",
1259 "Reserved"};
1260 std::vector<std::string> memAdddc = {
1261 "Bank VLS", "r-Bank VLS + re-buddy", "r-Bank VLS + Rank VLS",
1262 "r-Rank VLS + re-buddy", "Reserved"};
1263 std::vector<std::string> pprEvent = {"PPR disable", "Soft PPR", "Hard PPR",
1264 "Reserved"};
1265
1266 std::stringstream tmpStream;
1267
1268 switch (errType)
1269 {
1270 case unifiedPcieErr:
1271 tmpStream << std::format(
1272 "GeneralInfo: x86/PCIeErr(0x{:02X})"
1273 ", Bus {:02X}/Dev {:02X}/Fun {:02X}, TotalErrID1Cnt: 0x{:04X}"
1274 ", ErrID2: 0x{:02X}, ErrID1: 0x{:02X}",
1275 genInfo, ptr[8], ptr[7] >> 3, ptr[7] & 0x7,
1276 (ptr[10] << 8) | ptr[9], ptr[11], ptr[12]);
1277 break;
1278 case unifiedMemErr:
1279 eventType = ptr[9] & 0xf;
1280 tmpStream << std::format(
1281 "GeneralInfo: MemErr(0x{:02X}), {}, DIMM Failure Event: {}",
1282 genInfo, dimmLocationStr(ptr[5], ptr[6], ptr[7]),
1283 dimmErr[std::min(eventType,
1284 static_cast<uint8_t>(dimmErr.size() - 1))]);
1285
1286 if (static_cast<MemErrType>(eventType) == MemErrType::memTrainErr ||
1287 static_cast<MemErrType>(eventType) == MemErrType::memPmicErr)
1288 {
1289 bool amd = ptr[9] & 0x80;
1290 tmpStream << std::format(
1291 ", Major Code: 0x{:02X}, Minor Code: 0x{:0{}X}", ptr[10],
1292 amd ? (ptr[12] << 8 | ptr[11]) : ptr[11], amd ? 4 : 2);
1293 }
1294 break;
1295 case unifiedIioErr:
1296 tmpStream << std::format(
1297 "GeneralInfo: IIOErr(0x{:02X})"
1298 ", IIO Port Location: Sled {:02}/Socket {:02}, Stack 0x{:02X}"
1299 ", Error Type: 0x{:02X}, Error Severity: 0x{:02X}"
1300 ", Error ID: 0x{:02X}",
1301 genInfo, (ptr[5] >> 4) & 0x3, ptr[5] & 0xf, ptr[6], ptr[10],
1302 ptr[11] & 0xf, ptr[12]);
1303 break;
1304 case unifiedPostEvt:
1305 tmpStream << std::format(
1306 "GeneralInfo: POST(0x{:02X}), POST Failure Event: {}", genInfo,
1307 postEvent[std::min(
1308 eventType, static_cast<uint8_t>(postEvent.size() - 1))]);
1309
1310 switch (static_cast<PostEvtType>(eventType))
1311 {
1312 case PostEvtType::pxeBootFail:
1313 case PostEvtType::httpBootFail:
1314 {
1315 uint8_t failType = ptr[10] & 0xf;
1316 tmpStream
1317 << std::format(", Fail Type: {}, Error Code: 0x{:02X}",
1318 (failType == 4 || failType == 6)
1319 ? std::format("IPv{} fail", failType)
1320 : std::format("0x{:02X}", ptr[10]),
1321 ptr[11]);
1322 break;
1323 }
1324 case PostEvtType::getCertFail:
1325 tmpStream << std::format(
1326 ", Failure Detail: {}",
1327 certErr[std::min(
1328 ptr[9], static_cast<uint8_t>(certErr.size() - 1))]);
1329 break;
1330 case PostEvtType::amdAblFail:
1331 tmpStream << std::format(", ABL Error Code: 0x{:04X}",
1332 (ptr[12] << 8) | ptr[11]);
1333 break;
1334 }
1335 break;
1336 case unifiedPcieEvt:
1337 tmpStream << std::format(
1338 "GeneralInfo: PCIeEvent(0x{:02X}), PCIe Failure Event: {}",
1339 genInfo,
1340 pcieEvent[std::min(
1341 eventType, static_cast<uint8_t>(pcieEvent.size() - 1))]);
1342
1343 if (static_cast<PcieEvtType>(eventType) == PcieEvtType::dpc)
1344 {
1345 tmpStream << std::format(
1346 ", Status: 0x{:04X}, Source ID: 0x{:04X}",
1347 (ptr[8] << 8) | ptr[7], (ptr[10] << 8) | ptr[9]);
1348 }
1349 break;
1350 case unifiedMemEvt:
1351 eventType = ptr[9] & 0xf;
1352 tmpStream
1353 << std::format("GeneralInfo: MemEvent(0x{:02X})", genInfo)
1354 << (static_cast<MemEvtType>(eventType) != MemEvtType::noDimm
1355 ? std::format(", {}",
1356 dimmLocationStr(ptr[5], ptr[6], ptr[7]))
1357 : "")
1358 << ", DIMM Failure Event: ";
1359
1360 switch (static_cast<MemEvtType>(eventType))
1361 {
1362 case MemEvtType::ppr:
1363 tmpStream << std::format("{} {}",
1364 memPprTime[(ptr[10] >> 2) & 0x3],
1365 memPpr[ptr[10] & 0x3]);
1366 break;
1367 case MemEvtType::adddc:
1368 tmpStream << std::format(
1369 "{} {}",
1370 memEvent[std::min(eventType, static_cast<uint8_t>(
1371 memEvent.size() - 1))],
1372 memAdddc[std::min(
1373 static_cast<uint8_t>(ptr[11] & 0xf),
1374 static_cast<uint8_t>(memAdddc.size() - 1))]);
1375 break;
1376 default:
1377 tmpStream << std::format(
1378 "{}", memEvent[std::min(
1379 eventType,
1380 static_cast<uint8_t>(memEvent.size() - 1))]);
1381 break;
1382 }
1383 break;
1384 case unifiedBootGuard:
1385 tmpStream << std::format(
1386 "GeneralInfo: Boot Guard ACM Failure Events(0x{:02X})"
1387 ", Error Class: 0x{:02X}, Error Code: 0x{:02X}",
1388 genInfo, ptr[9], ptr[10]);
1389 break;
1390 case unifiedPprEvt:
1391 tmpStream << std::format(
1392 "GeneralInfo: PPREvent(0x{:02X}), {}"
1393 ", DIMM Info: {:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
1394 genInfo,
1395 pprEvent[std::min(eventType,
1396 static_cast<uint8_t>(pprEvent.size() - 1))],
1397 ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12]);
1398 break;
1399 default:
1400 std::vector<uint8_t> oemData(ptr, ptr + 13);
1401 std::string oemDataStr;
1402 toHexStr(oemData, oemDataStr);
1403 tmpStream << std::format("Undefined Error Type(0x{:02X}), Raw: {}",
1404 errType, oemDataStr);
1405 }
1406
1407 errStr = tmpStream.str();
1408
1409 return;
1410 }
1411
parseSelData(uint8_t fruId,std::vector<uint8_t> & reqData,std::string & msgLog)1412 static void parseSelData(uint8_t fruId, std::vector<uint8_t>& reqData,
1413 std::string& msgLog)
1414 {
1415 /* Get record type */
1416 int recType = reqData[2];
1417 std::string errType, errLog;
1418
1419 uint8_t* ptr = NULL;
1420
1421 std::stringstream recTypeStream;
1422 recTypeStream << std::hex << std::uppercase << std::setfill('0')
1423 << std::setw(2) << recType;
1424
1425 msgLog = "SEL Entry: FRU: " + std::to_string(fruId) + ", Record: ";
1426
1427 if (recType == stdErrType)
1428 {
1429 StdSELEntry* data = reinterpret_cast<StdSELEntry*>(&reqData[0]);
1430 std::string sensorName;
1431
1432 errType = stdErr;
1433 if (data->sensorType == 0x1F)
1434 {
1435 sensorName = "OS";
1436 }
1437 else
1438 {
1439 auto findSensorName = sensorNameTable.find(data->sensorNum);
1440 if (findSensorName == sensorNameTable.end())
1441 {
1442 sensorName = "Unknown";
1443 }
1444 else
1445 {
1446 sensorName = findSensorName->second.first;
1447 }
1448 }
1449
1450 parseStdSel(data, errLog);
1451 ptr = &(data->eventData1);
1452 std::vector<uint8_t> evtData(ptr, ptr + 3);
1453 std::string eventData;
1454 toHexStr(evtData, eventData);
1455
1456 std::stringstream senNumStream;
1457 senNumStream << std::hex << std::uppercase << std::setfill('0')
1458 << std::setw(2) << (int)(data->sensorNum);
1459
1460 msgLog += errType + " (0x" + recTypeStream.str() +
1461 "), Sensor: " + sensorName + " (0x" + senNumStream.str() +
1462 "), Event Data: (" + eventData + ") " + errLog;
1463 }
1464 else if ((recType >= oemTSErrTypeMin) && (recType <= oemTSErrTypeMax))
1465 {
1466 /* timestamped OEM SEL records */
1467 TsOemSELEntry* data = reinterpret_cast<TsOemSELEntry*>(&reqData[0]);
1468 ptr = data->mfrId;
1469 std::vector<uint8_t> mfrIdData(ptr, ptr + 3);
1470 std::string mfrIdStr;
1471 toHexStr(mfrIdData, mfrIdStr);
1472
1473 ptr = data->oemData;
1474 std::vector<uint8_t> oemData(ptr, ptr + 6);
1475 std::string oemDataStr;
1476 toHexStr(oemData, oemDataStr);
1477
1478 errType = oemTSErr;
1479 parseOemSel(data, errLog);
1480
1481 msgLog += errType + " (0x" + recTypeStream.str() + "), MFG ID: " +
1482 mfrIdStr + ", OEM Data: (" + oemDataStr + ") " + errLog;
1483 }
1484 else if (recType == fbUniErrType)
1485 {
1486 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1487 errType = fbUniSELErr;
1488 parseOemUnifiedSel(data, errLog);
1489 msgLog += errType + " (0x" + recTypeStream.str() + "), " + errLog;
1490 }
1491 else if ((recType >= oemNTSErrTypeMin) && (recType <= oemNTSErrTypeMax))
1492 {
1493 /* Non timestamped OEM SEL records */
1494 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1495 errType = oemNTSErr;
1496
1497 ptr = data->oemData;
1498 std::vector<uint8_t> oemData(ptr, ptr + 13);
1499 std::string oemDataStr;
1500 toHexStr(oemData, oemDataStr);
1501
1502 parseOemSel((TsOemSELEntry*)data, errLog);
1503 msgLog += errType + " (0x" + recTypeStream.str() + "), OEM Data: (" +
1504 oemDataStr + ") " + errLog;
1505 }
1506 else
1507 {
1508 errType = unknownErr;
1509 toHexStr(reqData, errLog);
1510 msgLog += errType + " (0x" + recTypeStream.str() +
1511 ") RawData: " + errLog;
1512 }
1513 }
1514
1515 } // namespace fb_oem::ipmi::sel
1516
1517 namespace ipmi
1518 {
1519
1520 namespace storage
1521 {
1522
1523 static void registerSELFunctions() __attribute__((constructor));
1524 static fb_oem::ipmi::sel::SELData selObj __attribute__((init_priority(101)));
1525
1526 ipmi::RspType<uint8_t, // SEL version
1527 uint16_t, // SEL entry count
1528 uint16_t, // free space
1529 uint32_t, // last add timestamp
1530 uint32_t, // last erase timestamp
1531 uint8_t> // operation support
ipmiStorageGetSELInfo()1532 ipmiStorageGetSELInfo()
1533 {
1534 fb_oem::ipmi::sel::GetSELInfoData info;
1535
1536 selObj.getInfo(info);
1537 return ipmi::responseSuccess(info.selVersion, info.entries, info.freeSpace,
1538 info.addTimeStamp, info.eraseTimeStamp,
1539 info.operationSupport);
1540 }
1541
1542 ipmi::RspType<uint16_t, std::vector<uint8_t>>
ipmiStorageGetSELEntry(std::vector<uint8_t> data)1543 ipmiStorageGetSELEntry(std::vector<uint8_t> data)
1544 {
1545 if (data.size() != sizeof(fb_oem::ipmi::sel::GetSELEntryRequest))
1546 {
1547 return ipmi::responseReqDataLenInvalid();
1548 }
1549
1550 fb_oem::ipmi::sel::GetSELEntryRequest* reqData =
1551 reinterpret_cast<fb_oem::ipmi::sel::GetSELEntryRequest*>(&data[0]);
1552
1553 if (reqData->reservID != 0)
1554 {
1555 if (!checkSELReservation(reqData->reservID))
1556 {
1557 return ipmi::responseInvalidReservationId();
1558 }
1559 }
1560
1561 uint16_t selCnt = selObj.getCount();
1562 if (selCnt == 0)
1563 {
1564 return ipmi::responseSensorInvalid();
1565 }
1566
1567 /* If it is asked for first entry */
1568 if (reqData->recordID == fb_oem::ipmi::sel::firstEntry)
1569 {
1570 /* First Entry (0x0000) as per Spec */
1571 reqData->recordID = 1;
1572 }
1573 else if (reqData->recordID == fb_oem::ipmi::sel::lastEntry)
1574 {
1575 /* Last entry (0xFFFF) as per Spec */
1576 reqData->recordID = selCnt;
1577 }
1578
1579 std::string ipmiRaw;
1580
1581 if (selObj.getEntry(reqData->recordID, ipmiRaw) < 0)
1582 {
1583 return ipmi::responseSensorInvalid();
1584 }
1585
1586 std::vector<uint8_t> recDataBytes;
1587 if (fromHexStr(ipmiRaw, recDataBytes) < 0)
1588 {
1589 return ipmi::responseUnspecifiedError();
1590 }
1591
1592 /* Identify the next SEL record ID. If recordID is same as
1593 * total SeL count then next id should be last entry else
1594 * it should be incremented by 1 to current RecordID
1595 */
1596 uint16_t nextRecord;
1597 if (reqData->recordID == selCnt)
1598 {
1599 nextRecord = fb_oem::ipmi::sel::lastEntry;
1600 }
1601 else
1602 {
1603 nextRecord = reqData->recordID + 1;
1604 }
1605
1606 if (reqData->readLen == fb_oem::ipmi::sel::entireRecord)
1607 {
1608 return ipmi::responseSuccess(nextRecord, recDataBytes);
1609 }
1610 else
1611 {
1612 if (reqData->offset >= fb_oem::ipmi::sel::selRecordSize ||
1613 reqData->readLen > fb_oem::ipmi::sel::selRecordSize)
1614 {
1615 return ipmi::responseUnspecifiedError();
1616 }
1617 std::vector<uint8_t> recPartData;
1618
1619 auto diff = fb_oem::ipmi::sel::selRecordSize - reqData->offset;
1620 auto readLength = std::min(diff, static_cast<int>(reqData->readLen));
1621
1622 for (int i = 0; i < readLength; i++)
1623 {
1624 recPartData.push_back(recDataBytes[i + reqData->offset]);
1625 }
1626 return ipmi::responseSuccess(nextRecord, recPartData);
1627 }
1628 }
1629
1630 // Retry function to log the SEL entry message and make D-Bus call
logWithRetry(const std::string & journalMsg,const std::string & messageID,const std::string & logErr,const std::string & severity,const std::map<std::string,std::string> & ad,int maxRetries=10,std::chrono::milliseconds waitTimeMs=std::chrono::milliseconds (100))1631 bool logWithRetry(
1632 const std::string& journalMsg, const std::string& messageID,
1633 const std::string& logErr, const std::string& severity,
1634 const std::map<std::string, std::string>& ad, int maxRetries = 10,
1635 std::chrono::milliseconds waitTimeMs = std::chrono::milliseconds(100))
1636 {
1637 // Attempt to log the SEL entry message
1638 lg2::info(
1639 "SEL Entry Added: {IPMI_RAW}, IPMISEL_MESSAGE_ID={MESSAGE_ID}, IPMISEL_MESSAGE_ARGS={LOG_ERR}",
1640 "IPMI_RAW", journalMsg, "MESSAGE_ID", messageID, "LOG_ERR", logErr);
1641
1642 int attempts = 0;
1643 while (attempts < maxRetries)
1644 {
1645 // Create D-Bus call
1646 auto bus = sdbusplus::bus::new_default();
1647 auto reqMsg = bus.new_method_call(
1648 "xyz.openbmc_project.Logging", "/xyz/openbmc_project/logging",
1649 "xyz.openbmc_project.Logging.Create", "Create");
1650 reqMsg.append(logErr, severity, ad);
1651
1652 try
1653 {
1654 // Attempt to make the D-Bus call
1655 bus.call(reqMsg);
1656 return true; // D-Bus call successful, exit the loop
1657 }
1658 catch (sdbusplus::exception_t& e)
1659 {
1660 lg2::error("D-Bus call failed: {ERROR}", "ERROR", e);
1661 }
1662
1663 // Wait before retrying
1664 std::this_thread::sleep_for(std::chrono::milliseconds(waitTimeMs));
1665 attempts++;
1666 }
1667
1668 return false; // Failed after max retries
1669 }
1670
1671 // Main function to add SEL entry
1672 ipmi::RspType<uint16_t>
ipmiStorageAddSELEntry(ipmi::Context::ptr ctx,std::vector<uint8_t> data)1673 ipmiStorageAddSELEntry(ipmi::Context::ptr ctx, std::vector<uint8_t> data)
1674 {
1675 /* Per the IPMI spec, need to cancel any reservation when a
1676 * SEL entry is added
1677 */
1678 cancelSELReservation();
1679
1680 if (data.size() != fb_oem::ipmi::sel::selRecordSize)
1681 {
1682 return ipmi::responseReqDataLenInvalid();
1683 }
1684
1685 std::string ipmiRaw, logErr;
1686 toHexStr(data, ipmiRaw);
1687
1688 /* Parse sel data and get an error log to be filed */
1689 fb_oem::ipmi::sel::parseSelData((ctx->hostIdx + 1), data, logErr);
1690
1691 static const std::string openBMCMessageRegistryVersion("0.1");
1692 std::string messageID =
1693 "OpenBMC." + openBMCMessageRegistryVersion + ".SELEntryAdded";
1694
1695 /* Log the Raw SEL message to the journal */
1696 std::string journalMsg = "SEL Entry Added: " + ipmiRaw;
1697
1698 std::map<std::string, std::string> ad;
1699 std::string severity = "xyz.openbmc_project.Logging.Entry.Level.Critical";
1700 ad.emplace("IPMI_RAW", ipmiRaw);
1701
1702 // Launch the logging thread
1703 std::thread([=]() {
1704 bool success =
1705 logWithRetry(journalMsg, messageID, logErr, severity, ad);
1706 if (!success)
1707 {
1708 lg2::error("Failed to log SEL entry added event after retries.");
1709 }
1710 }).detach();
1711
1712 int responseID = selObj.addEntry(ipmiRaw.c_str());
1713 if (responseID < 0)
1714 {
1715 return ipmi::responseUnspecifiedError();
1716 }
1717 return ipmi::responseSuccess(static_cast<uint16_t>(responseID));
1718 }
1719
ipmiStorageClearSEL(uint16_t reservationID,const std::array<uint8_t,3> & clr,uint8_t eraseOperation)1720 ipmi::RspType<uint8_t> ipmiStorageClearSEL(uint16_t reservationID,
1721 const std::array<uint8_t, 3>& clr,
1722 uint8_t eraseOperation)
1723 {
1724 if (!checkSELReservation(reservationID))
1725 {
1726 return ipmi::responseInvalidReservationId();
1727 }
1728
1729 static constexpr std::array<uint8_t, 3> clrExpected = {'C', 'L', 'R'};
1730 if (clr != clrExpected)
1731 {
1732 return ipmi::responseInvalidFieldRequest();
1733 }
1734
1735 /* If there is no sel then return erase complete */
1736 if (selObj.getCount() == 0)
1737 {
1738 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1739 }
1740
1741 /* Erasure status cannot be fetched, so always return erasure
1742 * status as `erase completed`.
1743 */
1744 if (eraseOperation == fb_oem::ipmi::sel::getEraseStatus)
1745 {
1746 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1747 }
1748
1749 /* Check that initiate erase is correct */
1750 if (eraseOperation != fb_oem::ipmi::sel::initiateErase)
1751 {
1752 return ipmi::responseInvalidFieldRequest();
1753 }
1754
1755 /* Per the IPMI spec, need to cancel any reservation when the
1756 * SEL is cleared
1757 */
1758 cancelSELReservation();
1759
1760 /* Clear the complete Sel Json object */
1761 if (selObj.clear() < 0)
1762 {
1763 return ipmi::responseUnspecifiedError();
1764 }
1765
1766 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1767 }
1768
ipmiStorageGetSELTime()1769 ipmi::RspType<uint32_t> ipmiStorageGetSELTime()
1770 {
1771 struct timespec selTime = {};
1772
1773 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
1774 {
1775 return ipmi::responseUnspecifiedError();
1776 }
1777
1778 return ipmi::responseSuccess(selTime.tv_sec);
1779 }
1780
ipmiStorageSetSELTime(uint32_t)1781 ipmi::RspType<> ipmiStorageSetSELTime(uint32_t)
1782 {
1783 // Set SEL Time is not supported
1784 return ipmi::responseInvalidCommand();
1785 }
1786
ipmiStorageGetSELTimeUtcOffset()1787 ipmi::RspType<uint16_t> ipmiStorageGetSELTimeUtcOffset()
1788 {
1789 /* TODO: For now, the SEL time stamp is based on UTC time,
1790 * so return 0x0000 as offset. Might need to change once
1791 * supporting zones in SEL time stamps
1792 */
1793
1794 uint16_t utcOffset = 0x0000;
1795 return ipmi::responseSuccess(utcOffset);
1796 }
1797
registerSELFunctions()1798 void registerSELFunctions()
1799 {
1800 // <Get SEL Info>
1801 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1802 ipmi::storage::cmdGetSelInfo, ipmi::Privilege::User,
1803 ipmiStorageGetSELInfo);
1804
1805 // <Get SEL Entry>
1806 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1807 ipmi::storage::cmdGetSelEntry, ipmi::Privilege::User,
1808 ipmiStorageGetSELEntry);
1809
1810 // <Add SEL Entry>
1811 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1812 ipmi::storage::cmdAddSelEntry,
1813 ipmi::Privilege::Operator, ipmiStorageAddSELEntry);
1814
1815 // <Clear SEL>
1816 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1817 ipmi::storage::cmdClearSel, ipmi::Privilege::Operator,
1818 ipmiStorageClearSEL);
1819
1820 // <Get SEL Time>
1821 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1822 ipmi::storage::cmdGetSelTime, ipmi::Privilege::User,
1823 ipmiStorageGetSELTime);
1824
1825 // <Set SEL Time>
1826 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1827 ipmi::storage::cmdSetSelTime,
1828 ipmi::Privilege::Operator, ipmiStorageSetSELTime);
1829
1830 // <Get SEL Time UTC Offset>
1831 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1832 ipmi::storage::cmdGetSelTimeUtcOffset,
1833 ipmi::Privilege::User,
1834 ipmiStorageGetSELTimeUtcOffset);
1835
1836 return;
1837 }
1838
1839 } // namespace storage
1840 } // namespace ipmi
1841