xref: /openbmc/fb-ipmi-oem/src/selcommands.cpp (revision 7f97f7557cc90413de9e1fa097391071136fb7dd)
1 /*
2  * Copyright (c)  2018 Intel Corporation.
3  * Copyright (c)  2018-present Facebook.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #include <boost/algorithm/string/join.hpp>
19 #include <boost/container/flat_map.hpp>
20 #include <ipmid/api.hpp>
21 #include <nlohmann/json.hpp>
22 #include <phosphor-logging/log.hpp>
23 #include <sdbusplus/message/types.hpp>
24 #include <sdbusplus/timer.hpp>
25 #include <storagecommands.hpp>
26 
27 #include <fstream>
28 #include <iostream>
29 #include <sstream>
30 #include <thread>
31 
32 enum class MemErrType
33 {
34     memTrainErr = 0,
35     memPmicErr = 7
36 };
37 
38 enum class PostEvtType
39 {
40     pxeBootFail = 0,
41     httpBootFail = 6,
42     getCertFail = 7,
43     amdAblFail = 10
44 };
45 
46 enum class PcieEvtType
47 {
48     dpc = 0
49 };
50 
51 enum class MemEvtType
52 {
53     ppr = 0,
54     adddc = 5,
55     noDimm = 7
56 };
57 
58 //----------------------------------------------------------------------
59 // Platform specific functions for storing app data
60 //----------------------------------------------------------------------
61 
byteToStr(uint8_t byte)62 static std::string byteToStr(uint8_t byte)
63 {
64     std::stringstream ss;
65 
66     ss << std::hex << std::uppercase << std::setfill('0');
67     ss << std::setw(2) << (int)byte;
68 
69     return ss.str();
70 }
71 
toHexStr(std::vector<uint8_t> & bytes,std::string & hexStr)72 static void toHexStr(std::vector<uint8_t>& bytes, std::string& hexStr)
73 {
74     std::stringstream stream;
75     stream << std::hex << std::uppercase << std::setfill('0');
76     for (const uint8_t byte : bytes)
77     {
78         stream << std::setw(2) << static_cast<int>(byte);
79     }
80     hexStr = stream.str();
81 }
82 
fromHexStr(const std::string hexStr,std::vector<uint8_t> & data)83 static int fromHexStr(const std::string hexStr, std::vector<uint8_t>& data)
84 {
85     for (unsigned int i = 0; i < hexStr.size(); i += 2)
86     {
87         try
88         {
89             data.push_back(static_cast<uint8_t>(
90                 std::stoul(hexStr.substr(i, 2), nullptr, 16)));
91         }
92         catch (const std::invalid_argument& e)
93         {
94             phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
95             return -1;
96         }
97         catch (const std::out_of_range& e)
98         {
99             phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
100             return -1;
101         }
102     }
103     return 0;
104 }
105 
106 namespace fb_oem::ipmi::sel
107 {
108 
109 class SELData
110 {
111   private:
112     nlohmann::json selDataObj;
113 
flush()114     void flush()
115     {
116         std::ofstream file(SEL_JSON_DATA_FILE);
117         file << selDataObj;
118         file.close();
119     }
120 
init()121     void init()
122     {
123         selDataObj[KEY_SEL_VER] = 0x51;
124         selDataObj[KEY_SEL_COUNT] = 0;
125         selDataObj[KEY_ADD_TIME] = 0xFFFFFFFF;
126         selDataObj[KEY_ERASE_TIME] = 0xFFFFFFFF;
127         selDataObj[KEY_OPER_SUPP] = 0x02;
128         /* Spec indicates that more than 64kB is free */
129         selDataObj[KEY_FREE_SPACE] = 0xFFFF;
130     }
131 
writeEmptyJson()132     void writeEmptyJson()
133     {
134         selDataObj = nlohmann::json::object(); // Create an empty JSON object
135         std::ofstream outFile(SEL_JSON_DATA_FILE);
136         if (outFile)
137         {
138             // Write empty JSON object to the file
139             outFile << selDataObj.dump(4);
140             outFile.close();
141         }
142         else
143         {
144             lg2::info("Failed to create SEL JSON file with empty JSON.");
145         }
146     }
147 
148   public:
SELData()149     SELData()
150     {
151         /* Get App data stored in json file */
152         std::ifstream file(SEL_JSON_DATA_FILE);
153         if (file)
154         {
155             try
156             {
157                 file >> selDataObj;
158             }
159             catch (const nlohmann::json::parse_error& e)
160             {
161                 lg2::error("Error parsing SEL JSON file: {ERROR}", "ERROR", e);
162                 writeEmptyJson();
163                 init(); // Initialize to default values
164             }
165             file.close();
166         }
167         else
168         {
169             lg2::info("Failed to open SEL JSON file.");
170             writeEmptyJson();
171             init();
172         }
173 
174         /* Initialize SelData object if no entries. */
175         if (selDataObj.find(KEY_SEL_COUNT) == selDataObj.end())
176         {
177             init();
178         }
179     }
180 
clear()181     int clear()
182     {
183         /* Clear the complete Sel Json object */
184         selDataObj.clear();
185         /* Reinitialize it with basic data */
186         init();
187         /* Save the erase time */
188         struct timespec selTime = {};
189         if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
190         {
191             return -1;
192         }
193         selDataObj[KEY_ERASE_TIME] = selTime.tv_sec;
194         flush();
195         return 0;
196     }
197 
getCount()198     uint32_t getCount()
199     {
200         return selDataObj[KEY_SEL_COUNT];
201     }
202 
getInfo(GetSELInfoData & info)203     void getInfo(GetSELInfoData& info)
204     {
205         info.selVersion = selDataObj[KEY_SEL_VER];
206         info.entries = selDataObj[KEY_SEL_COUNT];
207         info.freeSpace = selDataObj[KEY_FREE_SPACE];
208         info.addTimeStamp = selDataObj[KEY_ADD_TIME];
209         info.eraseTimeStamp = selDataObj[KEY_ERASE_TIME];
210         info.operationSupport = selDataObj[KEY_OPER_SUPP];
211     }
212 
getEntry(uint32_t index,std::string & rawStr)213     int getEntry(uint32_t index, std::string& rawStr)
214     {
215         std::stringstream ss;
216         ss << std::hex;
217         ss << std::setw(2) << std::setfill('0') << index;
218 
219         /* Check or the requested SEL Entry, if record is available */
220         if (selDataObj.find(ss.str()) == selDataObj.end())
221         {
222             return -1;
223         }
224 
225         rawStr = selDataObj[ss.str()][KEY_SEL_ENTRY_RAW];
226         return 0;
227     }
228 
addEntry(std::string keyStr)229     int addEntry(std::string keyStr)
230     {
231         struct timespec selTime = {};
232 
233         if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
234         {
235             return -1;
236         }
237 
238         selDataObj[KEY_ADD_TIME] = selTime.tv_sec;
239 
240         int selCount = selDataObj[KEY_SEL_COUNT];
241         selDataObj[KEY_SEL_COUNT] = ++selCount;
242 
243         std::stringstream ss;
244         ss << std::hex;
245         ss << std::setw(2) << std::setfill('0') << selCount;
246 
247         selDataObj[ss.str()][KEY_SEL_ENTRY_RAW] = keyStr;
248         flush();
249         return selCount;
250     }
251 };
252 
253 /*
254  * A Function to parse common SEL message, a helper function
255  * for parseStdSel.
256  *
257  * Note that this function __CANNOT__ be overridden.
258  * To add board specific routine, please override parseStdSel.
259  */
260 
261 /*Used by decoding ME event*/
262 std::vector<std::string> nmDomName = {
263     "Entire Platform",          "CPU Subsystem",
264     "Memory Subsystem",         "HW Protection",
265     "High Power I/O subsystem", "Unknown"};
266 
267 /* Default log message for unknown type */
logDefault(uint8_t *,std::string & errLog)268 static void logDefault(uint8_t*, std::string& errLog)
269 {
270     errLog = "Unknown";
271 }
272 
logSysEvent(uint8_t * data,std::string & errLog)273 static void logSysEvent(uint8_t* data, std::string& errLog)
274 {
275     if (data[0] == 0xE5)
276     {
277         errLog = "Cause of Time change - ";
278         switch (data[2])
279         {
280             case 0x00:
281                 errLog += "NTP";
282                 break;
283             case 0x01:
284                 errLog += "Host RTL";
285                 break;
286             case 0x02:
287                 errLog += "Set SEL time cmd";
288                 break;
289             case 0x03:
290                 errLog += "Set SEL time UTC offset cmd";
291                 break;
292             default:
293                 errLog += "Unknown";
294         }
295 
296         if (data[1] == 0x00)
297             errLog += " - First Time";
298         else if (data[1] == 0x80)
299             errLog += " - Second Time";
300     }
301     else
302     {
303         errLog = "Unknown";
304     }
305 }
306 
logThermalEvent(uint8_t * data,std::string & errLog)307 static void logThermalEvent(uint8_t* data, std::string& errLog)
308 {
309     if (data[0] == 0x1)
310     {
311         errLog = "Limit Exceeded";
312     }
313     else
314     {
315         errLog = "Unknown";
316     }
317 }
318 
logCritIrq(uint8_t * data,std::string & errLog)319 static void logCritIrq(uint8_t* data, std::string& errLog)
320 {
321     if (data[0] == 0x0)
322     {
323         errLog = "NMI / Diagnostic Interrupt";
324     }
325     else if (data[0] == 0x03)
326     {
327         errLog = "Software NMI";
328     }
329     else
330     {
331         errLog = "Unknown";
332     }
333 
334     /* TODO: Call add_cri_sel for CRITICAL_IRQ */
335 }
336 
logPostErr(uint8_t * data,std::string & errLog)337 static void logPostErr(uint8_t* data, std::string& errLog)
338 {
339     if ((data[0] & 0x0F) == 0x0)
340     {
341         errLog = "System Firmware Error";
342     }
343     else
344     {
345         errLog = "Unknown";
346     }
347 
348     if (((data[0] >> 6) & 0x03) == 0x3)
349     {
350         // TODO: Need to implement IPMI spec based Post Code
351         errLog += ", IPMI Post Code";
352     }
353     else if (((data[0] >> 6) & 0x03) == 0x2)
354     {
355         errLog += ", OEM Post Code 0x" + byteToStr(data[2]) +
356                   byteToStr(data[1]);
357 
358         switch ((data[2] << 8) | data[1])
359         {
360             case 0xA105:
361                 errLog += ", BMC Failed (No Response)";
362                 break;
363             case 0xA106:
364                 errLog += ", BMC Failed (Self Test Fail)";
365                 break;
366             case 0xA10A:
367                 errLog += ", System Firmware Corruption Detected";
368                 break;
369             case 0xA10B:
370                 errLog += ", TPM Self-Test FAIL Detected";
371         }
372     }
373 }
374 
logMchChkErr(uint8_t * data,std::string & errLog)375 static void logMchChkErr(uint8_t* data, std::string& errLog)
376 {
377     /* TODO: Call add_cri_sel for CRITICAL_IRQ */
378     switch (data[0] & 0x0F)
379     {
380         case 0x0B:
381             switch ((data[1] >> 5) & 0x03)
382             {
383                 case 0x00:
384                     errLog = "Uncorrected Recoverable Error";
385                     break;
386                 case 0x01:
387                     errLog = "Uncorrected Thread Fatal Error";
388                     break;
389                 case 0x02:
390                     errLog = "Uncorrected System Fatal Error";
391                     break;
392                 default:
393                     errLog = "Unknown";
394             }
395             break;
396         case 0x0C:
397             switch ((data[1] >> 5) & 0x03)
398             {
399                 case 0x00:
400                     errLog = "Correctable Error";
401                     break;
402                 case 0x01:
403                     errLog = "Deferred Error";
404                     break;
405                 default:
406                     errLog = "Unknown";
407             }
408             break;
409         default:
410             errLog = "Unknown";
411     }
412 
413     errLog += ", Machine Check bank Number " + std::to_string(data[1]) +
414               ", CPU " + std::to_string(data[2] >> 5) + ", Core " +
415               std::to_string(data[2] & 0x1F);
416 }
417 
logPcieErr(uint8_t * data,std::string & errLog)418 static void logPcieErr(uint8_t* data, std::string& errLog)
419 {
420     std::stringstream tmp1, tmp2;
421     tmp1 << std::hex << std::uppercase << std::setfill('0');
422     tmp2 << std::hex << std::uppercase << std::setfill('0');
423     tmp1 << " (Bus " << std::setw(2) << (int)(data[2]) << " / Dev "
424          << std::setw(2) << (int)(data[1] >> 3) << " / Fun " << std::setw(2)
425          << (int)(data[1] & 0x7) << ")";
426 
427     switch (data[0] & 0xF)
428     {
429         case 0x4:
430             errLog = "PCI PERR" + tmp1.str();
431             break;
432         case 0x5:
433             errLog = "PCI SERR" + tmp1.str();
434             break;
435         case 0x7:
436             errLog = "Correctable" + tmp1.str();
437             break;
438         case 0x8:
439             errLog = "Uncorrectable" + tmp1.str();
440             break;
441         case 0xA:
442             errLog = "Bus Fatal" + tmp1.str();
443             break;
444         case 0xD:
445         {
446             uint32_t venId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
447             tmp2 << "Vendor ID: 0x" << std::setw(4) << venId;
448             errLog = tmp2.str();
449         }
450         break;
451         case 0xE:
452         {
453             uint32_t devId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
454             tmp2 << "Device ID: 0x" << std::setw(4) << devId;
455             errLog = tmp2.str();
456         }
457         break;
458         case 0xF:
459             tmp2 << "Error ID from downstream: 0x" << std::setw(2)
460                  << (int)(data[1]) << std::setw(2) << (int)(data[2]);
461             errLog = tmp2.str();
462             break;
463         default:
464             errLog = "Unknown";
465     }
466 }
467 
logIioErr(uint8_t * data,std::string & errLog)468 static void logIioErr(uint8_t* data, std::string& errLog)
469 {
470     std::vector<std::string> tmpStr = {
471         "IRP0", "IRP1", " IIO-Core", "VT-d", "Intel Quick Data",
472         "Misc", " DMA", "ITC",       "OTC",  "CI"};
473 
474     if ((data[0] & 0xF) == 0)
475     {
476         errLog += "CPU " + std::to_string(data[2] >> 5) + ", Error ID 0x" +
477                   byteToStr(data[1]) + " - ";
478 
479         if ((data[2] & 0xF) <= 0x9)
480         {
481             errLog += tmpStr[(data[2] & 0xF)];
482         }
483         else
484         {
485             errLog += "Reserved";
486         }
487     }
488     else
489     {
490         errLog = "Unknown";
491     }
492 }
493 
logMemErr(uint8_t * dataPtr,std::string & errLog)494 [[maybe_unused]] static void logMemErr(uint8_t* dataPtr, std::string& errLog)
495 {
496     uint8_t snrType = dataPtr[0];
497     uint8_t snrNum = dataPtr[1];
498     uint8_t* data = &(dataPtr[3]);
499 
500     /* TODO: add pal_add_cri_sel */
501 
502     if (snrNum == memoryEccError)
503     {
504         /* SEL from MEMORY_ECC_ERR Sensor */
505         switch (data[0] & 0x0F)
506         {
507             case 0x0:
508                 if (snrType == 0x0C)
509                 {
510                     errLog = "Correctable";
511                 }
512                 else if (snrType == 0x10)
513                 {
514                     errLog = "Correctable ECC error Logging Disabled";
515                 }
516                 break;
517             case 0x1:
518                 errLog = "Uncorrectable";
519                 break;
520             case 0x5:
521                 errLog = "Correctable ECC error Logging Limit Disabled";
522                 break;
523             default:
524                 errLog = "Unknown";
525         }
526     }
527     else if (snrNum == memoryErrLogDIS)
528     {
529         // SEL from MEMORY_ERR_LOG_DIS Sensor
530         if ((data[0] & 0x0F) == 0x0)
531         {
532             errLog = "Correctable Memory Error Logging Disabled";
533         }
534         else
535         {
536             errLog = "Unknown";
537         }
538     }
539     else
540     {
541         errLog = "Unknown";
542         return;
543     }
544 
545     /* Common routine for both MEM_ECC_ERR and MEMORY_ERR_LOG_DIS */
546 
547     errLog += " (DIMM " + byteToStr(data[2]) + ") Logical Rank " +
548               std::to_string(data[1] & 0x03);
549 
550     /* DIMM number (data[2]):
551      * Bit[7:5]: Socket number  (Range: 0-7)
552      * Bit[4:3]: Channel number (Range: 0-3)
553      * Bit[2:0]: DIMM number    (Range: 0-7)
554      */
555 
556     /* TODO: Verify these bits */
557     std::string cpuStr = "CPU# " + std::to_string((data[2] & 0xE0) >> 5);
558     std::string chStr = "CHN# " + std::to_string((data[2] & 0x18) >> 3);
559     std::string dimmStr = "DIMM#" + std::to_string(data[2] & 0x7);
560 
561     switch ((data[1] & 0xC) >> 2)
562     {
563         case 0x0:
564         {
565             /* All Info Valid */
566             [[maybe_unused]] uint8_t chnNum = (data[2] & 0x1C) >> 2;
567             [[maybe_unused]] uint8_t dimmNum = data[2] & 0x3;
568 
569             /* TODO: If critical SEL logging is available, do it */
570             if (snrType == 0x0C)
571             {
572                 if ((data[0] & 0x0F) == 0x0)
573                 {
574                     /* TODO: add_cri_sel */
575                     /* "DIMM"+ 'A'+ chnNum + dimmNum + " ECC err,FRU:1"
576                      */
577                 }
578                 else if ((data[0] & 0x0F) == 0x1)
579                 {
580                     /* TODO: add_cri_sel */
581                     /* "DIMM"+ 'A'+ chnNum + dimmNum + " UECC err,FRU:1"
582                      */
583                 }
584             }
585             /* Continue to parse the error into a string. All Info Valid
586              */
587             errLog += " (" + cpuStr + ", " + chStr + ", " + dimmStr + ")";
588         }
589 
590         break;
591         case 0x1:
592 
593             /* DIMM info not valid */
594             errLog += " (" + cpuStr + ", " + chStr + ")";
595             break;
596         case 0x2:
597 
598             /* CHN info not valid */
599             errLog += " (" + cpuStr + ", " + dimmStr + ")";
600             break;
601         case 0x3:
602 
603             /* CPU info not valid */
604             errLog += " (" + chStr + ", " + dimmStr + ")";
605             break;
606     }
607 }
608 
logPwrErr(uint8_t * data,std::string & errLog)609 static void logPwrErr(uint8_t* data, std::string& errLog)
610 {
611     if (data[0] == 0x1)
612     {
613         errLog = "SYS_PWROK failure";
614         /* Also try logging to Critical log file, if available */
615         /* "SYS_PWROK failure,FRU:1" */
616     }
617     else if (data[0] == 0x2)
618     {
619         errLog = "PCH_PWROK failure";
620         /* Also try logging to Critical log file, if available */
621         /* "PCH_PWROK failure,FRU:1" */
622     }
623     else
624     {
625         errLog = "Unknown";
626     }
627 }
628 
logCatErr(uint8_t * data,std::string & errLog)629 static void logCatErr(uint8_t* data, std::string& errLog)
630 {
631     if (data[0] == 0x0)
632     {
633         errLog = "IERR/CATERR";
634         /* Also try logging to Critical log file, if available */
635         /* "IERR,FRU:1 */
636     }
637     else if (data[0] == 0xB)
638     {
639         errLog = "MCERR/CATERR";
640         /* Also try logging to Critical log file, if available */
641         /* "MCERR,FRU:1 */
642     }
643     else
644     {
645         errLog = "Unknown";
646     }
647 }
648 
logDimmHot(uint8_t * data,std::string & errLog)649 static void logDimmHot(uint8_t* data, std::string& errLog)
650 {
651     if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x01FFFF)
652     {
653         errLog = "SOC MEMHOT";
654     }
655     else
656     {
657         errLog = "Unknown";
658         /* Also try logging to Critical log file, if available */
659         /* ""CPU_DIMM_HOT %s,FRU:1" */
660     }
661 }
662 
logSwNMI(uint8_t * data,std::string & errLog)663 static void logSwNMI(uint8_t* data, std::string& errLog)
664 {
665     if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x03FFFF)
666     {
667         errLog = "Software NMI";
668     }
669     else
670     {
671         errLog = "Unknown SW NMI";
672     }
673 }
674 
logCPUThermalSts(uint8_t * data,std::string & errLog)675 static void logCPUThermalSts(uint8_t* data, std::string& errLog)
676 {
677     switch (data[0])
678     {
679         case 0x0:
680             errLog = "CPU Critical Temperature";
681             break;
682         case 0x1:
683             errLog = "PROCHOT#";
684             break;
685         case 0x2:
686             errLog = "TCC Activation";
687             break;
688         default:
689             errLog = "Unknown";
690     }
691 }
692 
logMEPwrState(uint8_t * data,std::string & errLog)693 static void logMEPwrState(uint8_t* data, std::string& errLog)
694 {
695     switch (data[0])
696     {
697         case 0:
698             errLog = "RUNNING";
699             break;
700         case 2:
701             errLog = "POWER_OFF";
702             break;
703         default:
704             errLog = "Unknown[" + std::to_string(data[0]) + "]";
705             break;
706     }
707 }
708 
logSPSFwHealth(uint8_t * data,std::string & errLog)709 static void logSPSFwHealth(uint8_t* data, std::string& errLog)
710 {
711     if ((data[0] & 0x0F) == 0x00)
712     {
713         const std::vector<std::string> tmpStr = {
714             "Recovery GPIO forced",
715             "Image execution failed",
716             "Flash erase error",
717             "Flash state information",
718             "Internal error",
719             "BMC did not respond",
720             "Direct Flash update",
721             "Manufacturing error",
722             "Automatic Restore to Factory Presets",
723             "Firmware Exception",
724             "Flash Wear-Out Protection Warning",
725             "Unknown",
726             "Unknown",
727             "DMI interface error",
728             "MCTP interface error",
729             "Auto-configuration finished",
730             "Unsupported Segment Defined Feature",
731             "Unknown",
732             "CPU Debug Capability Disabled",
733             "UMA operation error"};
734 
735         if (data[1] < 0x14)
736         {
737             errLog = tmpStr[data[1]];
738         }
739         else
740         {
741             errLog = "Unknown";
742         }
743     }
744     else if ((data[0] & 0x0F) == 0x01)
745     {
746         errLog = "SMBus link failure";
747     }
748     else
749     {
750         errLog = "Unknown";
751     }
752 }
753 
logNmExcA(uint8_t * data,std::string & errLog)754 static void logNmExcA(uint8_t* data, std::string& errLog)
755 {
756     /*NM4.0 #550710, Revision 1.95, and turn to p.155*/
757     if (data[0] == 0xA8)
758     {
759         errLog = "Policy Correction Time Exceeded";
760     }
761     else
762     {
763         errLog = "Unknown";
764     }
765 }
766 
logPCHThermal(uint8_t * data,std::string & errLog)767 static void logPCHThermal(uint8_t* data, std::string& errLog)
768 {
769     const std::vector<std::string> thresEvtName = {
770         "Lower Non-critical",
771         "Unknown",
772         "Lower Critical",
773         "Unknown",
774         "Lower Non-recoverable",
775         "Unknown",
776         "Unknown",
777         "Upper Non-critical",
778         "Unknown",
779         "Upper Critical",
780         "Unknown",
781         "Upper Non-recoverable"};
782 
783     if ((data[0] & 0x0f) < 12)
784     {
785         errLog = thresEvtName[(data[0] & 0x0f)];
786     }
787     else
788     {
789         errLog = "Unknown";
790     }
791 
792     errLog += ", curr_val: " + std::to_string(data[1]) +
793               " C, thresh_val: " + std::to_string(data[2]) + " C";
794 }
795 
logNmHealth(uint8_t * data,std::string & errLog)796 static void logNmHealth(uint8_t* data, std::string& errLog)
797 {
798     std::vector<std::string> nmErrType = {
799         "Unknown",
800         "Unknown",
801         "Unknown",
802         "Unknown",
803         "Unknown",
804         "Unknown",
805         "Unknown",
806         "Extended Telemetry Device Reading Failure",
807         "Outlet Temperature Reading Failure",
808         "Volumetric Airflow Reading Failure",
809         "Policy Misconfiguration",
810         "Power Sensor Reading Failure",
811         "Inlet Temperature Reading Failure",
812         "Host Communication Error",
813         "Real-time Clock Synchronization Failure",
814         "Platform Shutdown Initiated by Intel NM Policy",
815         "Unknown"};
816     uint8_t nmTypeIdx = (data[0] & 0xf);
817     uint8_t domIdx = (data[1] & 0xf);
818     uint8_t errIdx = ((data[1] >> 4) & 0xf);
819 
820     if (nmTypeIdx == 2)
821     {
822         errLog = "SensorIntelNM";
823     }
824     else
825     {
826         errLog = "Unknown";
827     }
828 
829     errLog += ", Domain:" + nmDomName[domIdx] + ", ErrType:" +
830               nmErrType[errIdx] + ", Err:0x" + byteToStr(data[2]);
831 }
832 
logNmCap(uint8_t * data,std::string & errLog)833 static void logNmCap(uint8_t* data, std::string& errLog)
834 {
835     const std::vector<std::string> nmCapStsStr = {"Not Available", "Available"};
836     if (data[0] & 0x7) // BIT1=policy, BIT2=monitoring, BIT3=pwr
837                        // limit and the others are reserved
838     {
839         errLog = "PolicyInterface:" + nmCapStsStr[BIT(data[0], 0)] +
840                  ",Monitoring:" + nmCapStsStr[BIT(data[0], 1)] +
841                  ",PowerLimit:" + nmCapStsStr[BIT(data[0], 2)];
842     }
843     else
844     {
845         errLog = "Unknown";
846     }
847 }
848 
logNmThreshold(uint8_t * data,std::string & errLog)849 static void logNmThreshold(uint8_t* data, std::string& errLog)
850 {
851     uint8_t thresNum = (data[0] & 0x3);
852     uint8_t domIdx = (data[1] & 0xf);
853     uint8_t polId = data[2];
854     uint8_t polEvtIdx = BIT(data[0], 3);
855     const std::vector<std::string> polEvtStr = {
856         "Threshold Exceeded", "Policy Correction Time Exceeded"};
857 
858     errLog = "Threshold Number:" + std::to_string(thresNum) + "-" +
859              polEvtStr[polEvtIdx] + ", Domain:" + nmDomName[domIdx] +
860              ", PolicyID:0x" + byteToStr(polId);
861 }
862 
logPwrThreshold(uint8_t * data,std::string & errLog)863 static void logPwrThreshold(uint8_t* data, std::string& errLog)
864 {
865     if (data[0] == 0x00)
866     {
867         errLog = "Limit Not Exceeded";
868     }
869     else if (data[0] == 0x01)
870     {
871         errLog = "Limit Exceeded";
872     }
873     else
874     {
875         errLog = "Unknown";
876     }
877 }
878 
logMSMI(uint8_t * data,std::string & errLog)879 static void logMSMI(uint8_t* data, std::string& errLog)
880 {
881     if (data[0] == 0x0)
882     {
883         errLog = "IERR/MSMI";
884     }
885     else if (data[0] == 0x0B)
886     {
887         errLog = "MCERR/MSMI";
888     }
889     else
890     {
891         errLog = "Unknown";
892     }
893 }
894 
logHprWarn(uint8_t * data,std::string & errLog)895 static void logHprWarn(uint8_t* data, std::string& errLog)
896 {
897     if (data[2] == 0x01)
898     {
899         if (data[1] == 0xFF)
900         {
901             errLog = "Infinite Time";
902         }
903         else
904         {
905             errLog = std::to_string(data[1]) + " minutes";
906         }
907     }
908     else
909     {
910         errLog = "Unknown";
911     }
912 }
913 
914 static const boost::container::flat_map<
915     uint8_t,
916     std::pair<std::string, std::function<void(uint8_t*, std::string&)>>>
917     sensorNameTable = {
918         {0xE9, {"SYSTEM_EVENT", logSysEvent}},
919         {0x7D, {"THERM_THRESH_EVT", logThermalEvent}},
920         {0xAA, {"BUTTON", logDefault}},
921         {0xAB, {"POWER_STATE", logDefault}},
922         {0xEA, {"CRITICAL_IRQ", logCritIrq}},
923         {0x2B, {"POST_ERROR", logPostErr}},
924         {0x40, {"MACHINE_CHK_ERR", logMchChkErr}},
925         {0x41, {"PCIE_ERR", logPcieErr}},
926         {0x43, {"IIO_ERR", logIioErr}},
927         {0X63, {"MEMORY_ECC_ERR", logDefault}},
928         {0X87, {"MEMORY_ERR_LOG_DIS", logDefault}},
929         {0X51, {"PROCHOT_EXT", logDefault}},
930         {0X56, {"PWR_ERR", logPwrErr}},
931         {0xE6, {"CATERR_A", logCatErr}},
932         {0xEB, {"CATERR_B", logCatErr}},
933         {0xB3, {"CPU_DIMM_HOT", logDimmHot}},
934         {0x90, {"SOFTWARE_NMI", logSwNMI}},
935         {0x1C, {"CPU0_THERM_STATUS", logCPUThermalSts}},
936         {0x1D, {"CPU1_THERM_STATUS", logCPUThermalSts}},
937         {0x16, {"ME_POWER_STATE", logMEPwrState}},
938         {0x17, {"SPS_FW_HEALTH", logSPSFwHealth}},
939         {0x18, {"NM_EXCEPTION_A", logNmExcA}},
940         {0x08, {"PCH_THERM_THRESHOLD", logPCHThermal}},
941         {0x19, {"NM_HEALTH", logNmHealth}},
942         {0x1A, {"NM_CAPABILITIES", logNmCap}},
943         {0x1B, {"NM_THRESHOLD", logNmThreshold}},
944         {0x3B, {"PWR_THRESH_EVT", logPwrThreshold}},
945         {0xE7, {"MSMI", logMSMI}},
946         {0xC5, {"HPR_WARNING", logHprWarn}}};
947 
parseSelHelper(StdSELEntry * data,std::string & errStr)948 static void parseSelHelper(StdSELEntry* data, std::string& errStr)
949 {
950     /* Check if sensor type is OS_BOOT (0x1f) */
951     if (data->sensorType == 0x1F)
952     {
953         /* OS_BOOT used by OS */
954         switch (data->eventData1 & 0xF)
955         {
956             case 0x07:
957                 errStr = "Base OS/Hypervisor Installation started";
958                 break;
959             case 0x08:
960                 errStr = "Base OS/Hypervisor Installation completed";
961                 break;
962             case 0x09:
963                 errStr = "Base OS/Hypervisor Installation aborted";
964                 break;
965             case 0x0A:
966                 errStr = "Base OS/Hypervisor Installation failed";
967                 break;
968             default:
969                 errStr = "Unknown";
970         }
971         return;
972     }
973 
974     auto findSensorName = sensorNameTable.find(data->sensorNum);
975     if (findSensorName == sensorNameTable.end())
976     {
977         errStr = "Unknown";
978         return;
979     }
980     else
981     {
982         switch (data->sensorNum)
983         {
984             /* logMemErr function needs data from sensor type */
985             case memoryEccError:
986             case memoryErrLogDIS:
987                 findSensorName->second.second(&(data->sensorType), errStr);
988                 break;
989             /* Other sensor function needs only event data for parsing */
990             default:
991                 findSensorName->second.second(&(data->eventData1), errStr);
992         }
993     }
994 
995     if (((data->eventData3 & 0x80) >> 7) == 0)
996     {
997         errStr += " Assertion";
998     }
999     else
1000     {
1001         errStr += " Deassertion";
1002     }
1003 }
1004 
parseDimmPhyloc(StdSELEntry * data,std::string & errStr)1005 static void parseDimmPhyloc(StdSELEntry* data, std::string& errStr)
1006 {
1007     // Log when " All info available"
1008     uint8_t chNum = (data->eventData3 & 0x18) >> 3;
1009     uint8_t dimmNum = data->eventData3 & 0x7;
1010     uint8_t rankNum = data->eventData2 & 0x03;
1011     uint8_t nodeNum = (data->eventData3 & 0xE0) >> 5;
1012 
1013     if (chNum == 3 && dimmNum == 0)
1014     {
1015         errStr += " Node: " + std::to_string(nodeNum) + "," +
1016                   " Card: " + std::to_string(chNum) + "," +
1017                   " Module: " + std::to_string(dimmNum) + "," +
1018                   " Rank Number: " + std::to_string(rankNum) + "," +
1019                   "  Location: DIMM A0";
1020     }
1021     else if (chNum == 2 && dimmNum == 0)
1022     {
1023         errStr += " Node: " + std::to_string(nodeNum) + "," +
1024                   " Card: " + std::to_string(chNum) + "," +
1025                   " Module: " + std::to_string(dimmNum) + "," +
1026                   " Rank Number: " + std::to_string(rankNum) + "," +
1027                   " Location: DIMM B0";
1028     }
1029     else if (chNum == 4 && dimmNum == 0)
1030     {
1031         errStr += " Node: " + std::to_string(nodeNum) + "," +
1032                   " Card: " + std::to_string(chNum) + "," +
1033                   " Module: " + std::to_string(dimmNum) + "," +
1034                   " Rank Number: " + std::to_string(rankNum) + "," +
1035                   " Location: DIMM C0 ";
1036     }
1037     else if (chNum == 5 && dimmNum == 0)
1038     {
1039         errStr += " Node: " + std::to_string(nodeNum) + "," +
1040                   " Card: " + std::to_string(chNum) + "," +
1041                   " Module: " + std::to_string(dimmNum) + "," +
1042                   " Rank Number: " + std::to_string(rankNum) + "," +
1043                   " Location: DIMM D0";
1044     }
1045     else
1046     {
1047         errStr += " Node: " + std::to_string(nodeNum) + "," +
1048                   " Card: " + std::to_string(chNum) + "," +
1049                   " Module: " + std::to_string(dimmNum) + "," +
1050                   " Rank Number: " + std::to_string(rankNum) + "," +
1051                   " Location: DIMM Unknown";
1052     }
1053 }
1054 
parseStdSel(StdSELEntry * data,std::string & errStr)1055 static void parseStdSel(StdSELEntry* data, std::string& errStr)
1056 {
1057     std::stringstream tmpStream;
1058     tmpStream << std::hex << std::uppercase;
1059 
1060     /* TODO: add pal_add_cri_sel */
1061     switch (data->sensorNum)
1062     {
1063         case memoryEccError:
1064             switch (data->eventData1 & 0x0F)
1065             {
1066                 case 0x00:
1067                     errStr = "Correctable";
1068                     tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1069                               << data->eventData3 << " ECC err";
1070                     parseDimmPhyloc(data, errStr);
1071                     break;
1072                 case 0x01:
1073                     errStr = "Uncorrectable";
1074                     tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1075                               << data->eventData3 << " UECC err";
1076                     parseDimmPhyloc(data, errStr);
1077                     break;
1078                 case 0x02:
1079                     errStr = "Parity";
1080                     break;
1081                 case 0x05:
1082                     errStr = "Correctable ECC error Logging Limit Reached";
1083                     break;
1084                 default:
1085                     errStr = "Unknown";
1086             }
1087             break;
1088         case memoryErrLogDIS:
1089             if ((data->eventData1 & 0x0F) == 0)
1090             {
1091                 errStr = "Correctable Memory Error Logging Disabled";
1092             }
1093             else
1094             {
1095                 errStr = "Unknown";
1096             }
1097             break;
1098         default:
1099             parseSelHelper(data, errStr);
1100             return;
1101     }
1102 
1103     errStr += " (DIMM " + std::to_string(data->eventData3) + ")";
1104     errStr += " Logical Rank " + std::to_string(data->eventData2 & 0x03);
1105 
1106     switch ((data->eventData2 & 0x0C) >> 2)
1107     {
1108         case 0x00:
1109             // Ignore when " All info available"
1110             break;
1111         case 0x01:
1112             errStr += " DIMM info not valid";
1113             break;
1114         case 0x02:
1115             errStr += " CHN info not valid";
1116             break;
1117         case 0x03:
1118             errStr += " CPU info not valid";
1119             break;
1120         default:
1121             errStr += " Unknown";
1122     }
1123 
1124     if (((data->eventType & 0x80) >> 7) == 0)
1125     {
1126         errStr += " Assertion";
1127     }
1128     else
1129     {
1130         errStr += " Deassertion";
1131     }
1132 
1133     return;
1134 }
1135 
parseOemSel(TsOemSELEntry * data,std::string & errStr)1136 static void parseOemSel(TsOemSELEntry* data, std::string& errStr)
1137 {
1138     std::stringstream tmpStream;
1139     tmpStream << std::hex << std::uppercase << std::setfill('0');
1140 
1141     switch (data->recordType)
1142     {
1143         case 0xC0:
1144             tmpStream << "VID:0x" << std::setw(2) << (int)data->oemData[1]
1145                       << std::setw(2) << (int)data->oemData[0] << " DID:0x"
1146                       << std::setw(2) << (int)data->oemData[3] << std::setw(2)
1147                       << (int)data->oemData[2] << " Slot:0x" << std::setw(2)
1148                       << (int)data->oemData[4] << " Error ID:0x" << std::setw(2)
1149                       << (int)data->oemData[5];
1150             break;
1151         case 0xC2:
1152             tmpStream << "Extra info:0x" << std::setw(2)
1153                       << (int)data->oemData[1] << " MSCOD:0x" << std::setw(2)
1154                       << (int)data->oemData[3] << std::setw(2)
1155                       << (int)data->oemData[2] << " MCACOD:0x" << std::setw(2)
1156                       << (int)data->oemData[5] << std::setw(2)
1157                       << (int)data->oemData[4];
1158             break;
1159         case 0xC3:
1160             int bank = (data->oemData[1] & 0xf0) >> 4;
1161             int col = ((data->oemData[1] & 0x0f) << 8) | data->oemData[2];
1162 
1163             tmpStream << "Fail Device:0x" << std::setw(2)
1164                       << (int)data->oemData[0] << " Bank:0x" << std::setw(2)
1165                       << bank << " Column:0x" << std::setw(2) << col
1166                       << " Failed Row:0x" << std::setw(2)
1167                       << (int)data->oemData[3] << std::setw(2)
1168                       << (int)data->oemData[4] << std::setw(2)
1169                       << (int)data->oemData[5];
1170     }
1171 
1172     errStr = tmpStream.str();
1173 
1174     return;
1175 }
1176 
dimmLocationStr(uint8_t socket,uint8_t channel,uint8_t slot)1177 static std::string dimmLocationStr(uint8_t socket, uint8_t channel,
1178                                    uint8_t slot)
1179 {
1180     uint8_t sled = (socket >> 4) & 0x3;
1181 
1182     socket &= 0xf;
1183     if (channel == 0xFF && slot == 0xFF)
1184     {
1185         return std::format(
1186             "DIMM Slot Location: Sled {:02}/Socket {:02}, Channel unknown"
1187             ", Slot unknown, DIMM unknown",
1188             sled, socket);
1189     }
1190     else
1191     {
1192         channel &= 0xf;
1193         slot &= 0xf;
1194         const char label[] = {'A', 'C', 'B', 'D'};
1195         uint8_t idx = socket * 2 + slot;
1196         return std::format("DIMM Slot Location: Sled {:02}/Socket {:02}"
1197                            ", Channel {:02}, Slot {:02} DIMM {}",
1198                            sled, socket, channel, slot,
1199                            (idx < sizeof(label))
1200                                ? label[idx] + std::to_string(channel)
1201                                : "NA");
1202     }
1203 }
1204 
parseOemUnifiedSel(NtsOemSELEntry * data,std::string & errStr)1205 static void parseOemUnifiedSel(NtsOemSELEntry* data, std::string& errStr)
1206 {
1207     uint8_t* ptr = data->oemData;
1208     uint8_t eventType = ptr[5] & 0xf;
1209     int genInfo = ptr[0];
1210     int errType = genInfo & 0x0f;
1211     std::vector<std::string> dimmErr = {
1212         "Memory training failure",
1213         "Memory correctable error",
1214         "Memory uncorrectable error",
1215         "Memory correctable error (Patrol scrub)",
1216         "Memory uncorrectable error (Patrol scrub)",
1217         "Memory Parity Error (PCC=0)",
1218         "Memory Parity Error (PCC=1)",
1219         "Memory PMIC Error",
1220         "CXL Memory training error",
1221         "Reserved"};
1222     std::vector<std::string> postEvent = {
1223         "System PXE boot fail",
1224         "CMOS/NVRAM configuration cleared",
1225         "TPM Self-Test Fail",
1226         "Boot Drive failure",
1227         "Data Drive failure",
1228         "Received invalid boot order request from BMC",
1229         "System HTTP boot fail",
1230         "BIOS fails to get the certificate from BMC",
1231         "Password cleared by jumper",
1232         "DXE FV check failure",
1233         "AMD ABL failure",
1234         "Reserved"};
1235     std::vector<std::string> certErr = {
1236         "No certificate at BMC", "IPMI transaction fail",
1237         "Certificate data corrupted", "Reserved"};
1238     std::vector<std::string> pcieEvent = {
1239         "PCIe DPC Event",
1240         "PCIe LER Event",
1241         "PCIe Link Retraining and Recovery",
1242         "PCIe Link CRC Error Check and Retry",
1243         "PCIe Corrupt Data Containment",
1244         "PCIe Express ECRC",
1245         "Reserved"};
1246     std::vector<std::string> memEvent = {
1247         "Memory PPR event",
1248         "Memory Correctable Error logging limit reached",
1249         "Memory disable/map-out for FRB",
1250         "Memory SDDC",
1251         "Memory Address range/Partial mirroring",
1252         "Memory ADDDC",
1253         "Memory SMBus hang recovery",
1254         "No DIMM in System",
1255         "Reserved"};
1256     std::vector<std::string> memPprTime = {"Boot time", "Autonomous",
1257                                            "Run time", "Reserved"};
1258     std::vector<std::string> memPpr = {"PPR success", "PPR fail", "PPR request",
1259                                        "Reserved"};
1260     std::vector<std::string> memAdddc = {
1261         "Bank VLS", "r-Bank VLS + re-buddy", "r-Bank VLS + Rank VLS",
1262         "r-Rank VLS + re-buddy", "Reserved"};
1263     std::vector<std::string> pprEvent = {"PPR disable", "Soft PPR", "Hard PPR",
1264                                          "Reserved"};
1265 
1266     std::stringstream tmpStream;
1267 
1268     switch (errType)
1269     {
1270         case unifiedPcieErr:
1271             tmpStream << std::format(
1272                 "GeneralInfo: x86/PCIeErr(0x{:02X})"
1273                 ", Bus {:02X}/Dev {:02X}/Fun {:02X}, TotalErrID1Cnt: 0x{:04X}"
1274                 ", ErrID2: 0x{:02X}, ErrID1: 0x{:02X}",
1275                 genInfo, ptr[8], ptr[7] >> 3, ptr[7] & 0x7,
1276                 (ptr[10] << 8) | ptr[9], ptr[11], ptr[12]);
1277             break;
1278         case unifiedMemErr:
1279             eventType = ptr[9] & 0xf;
1280             tmpStream << std::format(
1281                 "GeneralInfo: MemErr(0x{:02X}), {}, DIMM Failure Event: {}",
1282                 genInfo, dimmLocationStr(ptr[5], ptr[6], ptr[7]),
1283                 dimmErr[std::min(eventType,
1284                                  static_cast<uint8_t>(dimmErr.size() - 1))]);
1285 
1286             if (static_cast<MemErrType>(eventType) == MemErrType::memTrainErr ||
1287                 static_cast<MemErrType>(eventType) == MemErrType::memPmicErr)
1288             {
1289                 bool amd = ptr[9] & 0x80;
1290                 tmpStream << std::format(
1291                     ", Major Code: 0x{:02X}, Minor Code: 0x{:0{}X}", ptr[10],
1292                     amd ? (ptr[12] << 8 | ptr[11]) : ptr[11], amd ? 4 : 2);
1293             }
1294             break;
1295         case unifiedIioErr:
1296             tmpStream << std::format(
1297                 "GeneralInfo: IIOErr(0x{:02X})"
1298                 ", IIO Port Location: Sled {:02}/Socket {:02}, Stack 0x{:02X}"
1299                 ", Error Type: 0x{:02X}, Error Severity: 0x{:02X}"
1300                 ", Error ID: 0x{:02X}",
1301                 genInfo, (ptr[5] >> 4) & 0x3, ptr[5] & 0xf, ptr[6], ptr[10],
1302                 ptr[11] & 0xf, ptr[12]);
1303             break;
1304         case unifiedPostEvt:
1305             tmpStream << std::format(
1306                 "GeneralInfo: POST(0x{:02X}), POST Failure Event: {}", genInfo,
1307                 postEvent[std::min(
1308                     eventType, static_cast<uint8_t>(postEvent.size() - 1))]);
1309 
1310             switch (static_cast<PostEvtType>(eventType))
1311             {
1312                 case PostEvtType::pxeBootFail:
1313                 case PostEvtType::httpBootFail:
1314                 {
1315                     uint8_t failType = ptr[10] & 0xf;
1316                     tmpStream
1317                         << std::format(", Fail Type: {}, Error Code: 0x{:02X}",
1318                                        (failType == 4 || failType == 6)
1319                                            ? std::format("IPv{} fail", failType)
1320                                            : std::format("0x{:02X}", ptr[10]),
1321                                        ptr[11]);
1322                     break;
1323                 }
1324                 case PostEvtType::getCertFail:
1325                     tmpStream << std::format(
1326                         ", Failure Detail: {}",
1327                         certErr[std::min(
1328                             ptr[9], static_cast<uint8_t>(certErr.size() - 1))]);
1329                     break;
1330                 case PostEvtType::amdAblFail:
1331                     tmpStream << std::format(", ABL Error Code: 0x{:04X}",
1332                                              (ptr[12] << 8) | ptr[11]);
1333                     break;
1334             }
1335             break;
1336         case unifiedPcieEvt:
1337             tmpStream << std::format(
1338                 "GeneralInfo: PCIeEvent(0x{:02X}), PCIe Failure Event: {}",
1339                 genInfo,
1340                 pcieEvent[std::min(
1341                     eventType, static_cast<uint8_t>(pcieEvent.size() - 1))]);
1342 
1343             if (static_cast<PcieEvtType>(eventType) == PcieEvtType::dpc)
1344             {
1345                 tmpStream << std::format(
1346                     ", Status: 0x{:04X}, Source ID: 0x{:04X}",
1347                     (ptr[8] << 8) | ptr[7], (ptr[10] << 8) | ptr[9]);
1348             }
1349             break;
1350         case unifiedMemEvt:
1351             eventType = ptr[9] & 0xf;
1352             tmpStream
1353                 << std::format("GeneralInfo: MemEvent(0x{:02X})", genInfo)
1354                 << (static_cast<MemEvtType>(eventType) != MemEvtType::noDimm
1355                         ? std::format(", {}",
1356                                       dimmLocationStr(ptr[5], ptr[6], ptr[7]))
1357                         : "")
1358                 << ", DIMM Failure Event: ";
1359 
1360             switch (static_cast<MemEvtType>(eventType))
1361             {
1362                 case MemEvtType::ppr:
1363                     tmpStream << std::format("{} {}",
1364                                              memPprTime[(ptr[10] >> 2) & 0x3],
1365                                              memPpr[ptr[10] & 0x3]);
1366                     break;
1367                 case MemEvtType::adddc:
1368                     tmpStream << std::format(
1369                         "{} {}",
1370                         memEvent[std::min(eventType, static_cast<uint8_t>(
1371                                                          memEvent.size() - 1))],
1372                         memAdddc[std::min(
1373                             static_cast<uint8_t>(ptr[11] & 0xf),
1374                             static_cast<uint8_t>(memAdddc.size() - 1))]);
1375                     break;
1376                 default:
1377                     tmpStream << std::format(
1378                         "{}", memEvent[std::min(
1379                                   eventType,
1380                                   static_cast<uint8_t>(memEvent.size() - 1))]);
1381                     break;
1382             }
1383             break;
1384         case unifiedBootGuard:
1385             tmpStream << std::format(
1386                 "GeneralInfo: Boot Guard ACM Failure Events(0x{:02X})"
1387                 ", Error Class: 0x{:02X}, Error Code: 0x{:02X}",
1388                 genInfo, ptr[9], ptr[10]);
1389             break;
1390         case unifiedPprEvt:
1391             tmpStream << std::format(
1392                 "GeneralInfo: PPREvent(0x{:02X}), {}"
1393                 ", DIMM Info: {:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
1394                 genInfo,
1395                 pprEvent[std::min(eventType,
1396                                   static_cast<uint8_t>(pprEvent.size() - 1))],
1397                 ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12]);
1398             break;
1399         default:
1400             std::vector<uint8_t> oemData(ptr, ptr + 13);
1401             std::string oemDataStr;
1402             toHexStr(oemData, oemDataStr);
1403             tmpStream << std::format("Undefined Error Type(0x{:02X}), Raw: {}",
1404                                      errType, oemDataStr);
1405     }
1406 
1407     errStr = tmpStream.str();
1408 
1409     return;
1410 }
1411 
parseSelData(uint8_t fruId,std::vector<uint8_t> & reqData,std::string & msgLog)1412 static void parseSelData(uint8_t fruId, std::vector<uint8_t>& reqData,
1413                          std::string& msgLog)
1414 {
1415     /* Get record type */
1416     int recType = reqData[2];
1417     std::string errType, errLog;
1418 
1419     uint8_t* ptr = NULL;
1420 
1421     std::stringstream recTypeStream;
1422     recTypeStream << std::hex << std::uppercase << std::setfill('0')
1423                   << std::setw(2) << recType;
1424 
1425     msgLog = "SEL Entry: FRU: " + std::to_string(fruId) + ", Record: ";
1426 
1427     if (recType == stdErrType)
1428     {
1429         StdSELEntry* data = reinterpret_cast<StdSELEntry*>(&reqData[0]);
1430         std::string sensorName;
1431 
1432         errType = stdErr;
1433         if (data->sensorType == 0x1F)
1434         {
1435             sensorName = "OS";
1436         }
1437         else
1438         {
1439             auto findSensorName = sensorNameTable.find(data->sensorNum);
1440             if (findSensorName == sensorNameTable.end())
1441             {
1442                 sensorName = "Unknown";
1443             }
1444             else
1445             {
1446                 sensorName = findSensorName->second.first;
1447             }
1448         }
1449 
1450         parseStdSel(data, errLog);
1451         ptr = &(data->eventData1);
1452         std::vector<uint8_t> evtData(ptr, ptr + 3);
1453         std::string eventData;
1454         toHexStr(evtData, eventData);
1455 
1456         std::stringstream senNumStream;
1457         senNumStream << std::hex << std::uppercase << std::setfill('0')
1458                      << std::setw(2) << (int)(data->sensorNum);
1459 
1460         msgLog += errType + " (0x" + recTypeStream.str() +
1461                   "), Sensor: " + sensorName + " (0x" + senNumStream.str() +
1462                   "), Event Data: (" + eventData + ") " + errLog;
1463     }
1464     else if ((recType >= oemTSErrTypeMin) && (recType <= oemTSErrTypeMax))
1465     {
1466         /* timestamped OEM SEL records */
1467         TsOemSELEntry* data = reinterpret_cast<TsOemSELEntry*>(&reqData[0]);
1468         ptr = data->mfrId;
1469         std::vector<uint8_t> mfrIdData(ptr, ptr + 3);
1470         std::string mfrIdStr;
1471         toHexStr(mfrIdData, mfrIdStr);
1472 
1473         ptr = data->oemData;
1474         std::vector<uint8_t> oemData(ptr, ptr + 6);
1475         std::string oemDataStr;
1476         toHexStr(oemData, oemDataStr);
1477 
1478         errType = oemTSErr;
1479         parseOemSel(data, errLog);
1480 
1481         msgLog += errType + " (0x" + recTypeStream.str() + "), MFG ID: " +
1482                   mfrIdStr + ", OEM Data: (" + oemDataStr + ") " + errLog;
1483     }
1484     else if (recType == fbUniErrType)
1485     {
1486         NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1487         errType = fbUniSELErr;
1488         parseOemUnifiedSel(data, errLog);
1489         msgLog += errType + " (0x" + recTypeStream.str() + "), " + errLog;
1490     }
1491     else if ((recType >= oemNTSErrTypeMin) && (recType <= oemNTSErrTypeMax))
1492     {
1493         /* Non timestamped OEM SEL records */
1494         NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1495         errType = oemNTSErr;
1496 
1497         ptr = data->oemData;
1498         std::vector<uint8_t> oemData(ptr, ptr + 13);
1499         std::string oemDataStr;
1500         toHexStr(oemData, oemDataStr);
1501 
1502         parseOemSel((TsOemSELEntry*)data, errLog);
1503         msgLog += errType + " (0x" + recTypeStream.str() + "), OEM Data: (" +
1504                   oemDataStr + ") " + errLog;
1505     }
1506     else
1507     {
1508         errType = unknownErr;
1509         toHexStr(reqData, errLog);
1510         msgLog += errType + " (0x" + recTypeStream.str() +
1511                   ") RawData: " + errLog;
1512     }
1513 }
1514 
1515 } // namespace fb_oem::ipmi::sel
1516 
1517 namespace ipmi
1518 {
1519 
1520 namespace storage
1521 {
1522 
1523 static void registerSELFunctions() __attribute__((constructor));
1524 static fb_oem::ipmi::sel::SELData selObj __attribute__((init_priority(101)));
1525 
1526 ipmi::RspType<uint8_t,  // SEL version
1527               uint16_t, // SEL entry count
1528               uint16_t, // free space
1529               uint32_t, // last add timestamp
1530               uint32_t, // last erase timestamp
1531               uint8_t>  // operation support
ipmiStorageGetSELInfo()1532     ipmiStorageGetSELInfo()
1533 {
1534     fb_oem::ipmi::sel::GetSELInfoData info;
1535 
1536     selObj.getInfo(info);
1537     return ipmi::responseSuccess(info.selVersion, info.entries, info.freeSpace,
1538                                  info.addTimeStamp, info.eraseTimeStamp,
1539                                  info.operationSupport);
1540 }
1541 
1542 ipmi::RspType<uint16_t, std::vector<uint8_t>>
ipmiStorageGetSELEntry(std::vector<uint8_t> data)1543     ipmiStorageGetSELEntry(std::vector<uint8_t> data)
1544 {
1545     if (data.size() != sizeof(fb_oem::ipmi::sel::GetSELEntryRequest))
1546     {
1547         return ipmi::responseReqDataLenInvalid();
1548     }
1549 
1550     fb_oem::ipmi::sel::GetSELEntryRequest* reqData =
1551         reinterpret_cast<fb_oem::ipmi::sel::GetSELEntryRequest*>(&data[0]);
1552 
1553     if (reqData->reservID != 0)
1554     {
1555         if (!checkSELReservation(reqData->reservID))
1556         {
1557             return ipmi::responseInvalidReservationId();
1558         }
1559     }
1560 
1561     uint16_t selCnt = selObj.getCount();
1562     if (selCnt == 0)
1563     {
1564         return ipmi::responseSensorInvalid();
1565     }
1566 
1567     /* If it is asked for first entry */
1568     if (reqData->recordID == fb_oem::ipmi::sel::firstEntry)
1569     {
1570         /* First Entry (0x0000) as per Spec */
1571         reqData->recordID = 1;
1572     }
1573     else if (reqData->recordID == fb_oem::ipmi::sel::lastEntry)
1574     {
1575         /* Last entry (0xFFFF) as per Spec */
1576         reqData->recordID = selCnt;
1577     }
1578 
1579     std::string ipmiRaw;
1580 
1581     if (selObj.getEntry(reqData->recordID, ipmiRaw) < 0)
1582     {
1583         return ipmi::responseSensorInvalid();
1584     }
1585 
1586     std::vector<uint8_t> recDataBytes;
1587     if (fromHexStr(ipmiRaw, recDataBytes) < 0)
1588     {
1589         return ipmi::responseUnspecifiedError();
1590     }
1591 
1592     /* Identify the next SEL record ID. If recordID is same as
1593      * total SeL count then next id should be last entry else
1594      * it should be incremented by 1 to current RecordID
1595      */
1596     uint16_t nextRecord;
1597     if (reqData->recordID == selCnt)
1598     {
1599         nextRecord = fb_oem::ipmi::sel::lastEntry;
1600     }
1601     else
1602     {
1603         nextRecord = reqData->recordID + 1;
1604     }
1605 
1606     if (reqData->readLen == fb_oem::ipmi::sel::entireRecord)
1607     {
1608         return ipmi::responseSuccess(nextRecord, recDataBytes);
1609     }
1610     else
1611     {
1612         if (reqData->offset >= fb_oem::ipmi::sel::selRecordSize ||
1613             reqData->readLen > fb_oem::ipmi::sel::selRecordSize)
1614         {
1615             return ipmi::responseUnspecifiedError();
1616         }
1617         std::vector<uint8_t> recPartData;
1618 
1619         auto diff = fb_oem::ipmi::sel::selRecordSize - reqData->offset;
1620         auto readLength = std::min(diff, static_cast<int>(reqData->readLen));
1621 
1622         for (int i = 0; i < readLength; i++)
1623         {
1624             recPartData.push_back(recDataBytes[i + reqData->offset]);
1625         }
1626         return ipmi::responseSuccess(nextRecord, recPartData);
1627     }
1628 }
1629 
1630 // Retry function to log the SEL entry message and make D-Bus call
logWithRetry(const std::string & journalMsg,const std::string & messageID,const std::string & logErr,const std::string & severity,const std::map<std::string,std::string> & ad,int maxRetries=10,std::chrono::milliseconds waitTimeMs=std::chrono::milliseconds (100))1631 bool logWithRetry(
1632     const std::string& journalMsg, const std::string& messageID,
1633     const std::string& logErr, const std::string& severity,
1634     const std::map<std::string, std::string>& ad, int maxRetries = 10,
1635     std::chrono::milliseconds waitTimeMs = std::chrono::milliseconds(100))
1636 {
1637     // Attempt to log the SEL entry message
1638     lg2::info(
1639         "SEL Entry Added: {IPMI_RAW}, IPMISEL_MESSAGE_ID={MESSAGE_ID}, IPMISEL_MESSAGE_ARGS={LOG_ERR}",
1640         "IPMI_RAW", journalMsg, "MESSAGE_ID", messageID, "LOG_ERR", logErr);
1641 
1642     int attempts = 0;
1643     while (attempts < maxRetries)
1644     {
1645         // Create D-Bus call
1646         auto bus = sdbusplus::bus::new_default();
1647         auto reqMsg = bus.new_method_call(
1648             "xyz.openbmc_project.Logging", "/xyz/openbmc_project/logging",
1649             "xyz.openbmc_project.Logging.Create", "Create");
1650         reqMsg.append(logErr, severity, ad);
1651 
1652         try
1653         {
1654             // Attempt to make the D-Bus call
1655             bus.call(reqMsg);
1656             return true; // D-Bus call successful, exit the loop
1657         }
1658         catch (sdbusplus::exception_t& e)
1659         {
1660             lg2::error("D-Bus call failed: {ERROR}", "ERROR", e);
1661         }
1662 
1663         // Wait before retrying
1664         std::this_thread::sleep_for(std::chrono::milliseconds(waitTimeMs));
1665         attempts++;
1666     }
1667 
1668     return false; // Failed after max retries
1669 }
1670 
1671 // Main function to add SEL entry
1672 ipmi::RspType<uint16_t>
ipmiStorageAddSELEntry(ipmi::Context::ptr ctx,std::vector<uint8_t> data)1673     ipmiStorageAddSELEntry(ipmi::Context::ptr ctx, std::vector<uint8_t> data)
1674 {
1675     /* Per the IPMI spec, need to cancel any reservation when a
1676      * SEL entry is added
1677      */
1678     cancelSELReservation();
1679 
1680     if (data.size() != fb_oem::ipmi::sel::selRecordSize)
1681     {
1682         return ipmi::responseReqDataLenInvalid();
1683     }
1684 
1685     std::string ipmiRaw, logErr;
1686     toHexStr(data, ipmiRaw);
1687 
1688     /* Parse sel data and get an error log to be filed */
1689     fb_oem::ipmi::sel::parseSelData((ctx->hostIdx + 1), data, logErr);
1690 
1691     static const std::string openBMCMessageRegistryVersion("0.1");
1692     std::string messageID =
1693         "OpenBMC." + openBMCMessageRegistryVersion + ".SELEntryAdded";
1694 
1695     /* Log the Raw SEL message to the journal */
1696     std::string journalMsg = "SEL Entry Added: " + ipmiRaw;
1697 
1698     std::map<std::string, std::string> ad;
1699     std::string severity = "xyz.openbmc_project.Logging.Entry.Level.Critical";
1700     ad.emplace("IPMI_RAW", ipmiRaw);
1701 
1702     // Launch the logging thread
1703     std::thread([=]() {
1704         bool success =
1705             logWithRetry(journalMsg, messageID, logErr, severity, ad);
1706         if (!success)
1707         {
1708             lg2::error("Failed to log SEL entry added event after retries.");
1709         }
1710     }).detach();
1711 
1712     int responseID = selObj.addEntry(ipmiRaw.c_str());
1713     if (responseID < 0)
1714     {
1715         return ipmi::responseUnspecifiedError();
1716     }
1717     return ipmi::responseSuccess(static_cast<uint16_t>(responseID));
1718 }
1719 
ipmiStorageClearSEL(uint16_t reservationID,const std::array<uint8_t,3> & clr,uint8_t eraseOperation)1720 ipmi::RspType<uint8_t> ipmiStorageClearSEL(uint16_t reservationID,
1721                                            const std::array<uint8_t, 3>& clr,
1722                                            uint8_t eraseOperation)
1723 {
1724     if (!checkSELReservation(reservationID))
1725     {
1726         return ipmi::responseInvalidReservationId();
1727     }
1728 
1729     static constexpr std::array<uint8_t, 3> clrExpected = {'C', 'L', 'R'};
1730     if (clr != clrExpected)
1731     {
1732         return ipmi::responseInvalidFieldRequest();
1733     }
1734 
1735     /* If there is no sel then return erase complete */
1736     if (selObj.getCount() == 0)
1737     {
1738         return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1739     }
1740 
1741     /* Erasure status cannot be fetched, so always return erasure
1742      * status as `erase completed`.
1743      */
1744     if (eraseOperation == fb_oem::ipmi::sel::getEraseStatus)
1745     {
1746         return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1747     }
1748 
1749     /* Check that initiate erase is correct */
1750     if (eraseOperation != fb_oem::ipmi::sel::initiateErase)
1751     {
1752         return ipmi::responseInvalidFieldRequest();
1753     }
1754 
1755     /* Per the IPMI spec, need to cancel any reservation when the
1756      * SEL is cleared
1757      */
1758     cancelSELReservation();
1759 
1760     /* Clear the complete Sel Json object */
1761     if (selObj.clear() < 0)
1762     {
1763         return ipmi::responseUnspecifiedError();
1764     }
1765 
1766     return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1767 }
1768 
ipmiStorageGetSELTime()1769 ipmi::RspType<uint32_t> ipmiStorageGetSELTime()
1770 {
1771     struct timespec selTime = {};
1772 
1773     if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
1774     {
1775         return ipmi::responseUnspecifiedError();
1776     }
1777 
1778     return ipmi::responseSuccess(selTime.tv_sec);
1779 }
1780 
ipmiStorageSetSELTime(uint32_t)1781 ipmi::RspType<> ipmiStorageSetSELTime(uint32_t)
1782 {
1783     // Set SEL Time is not supported
1784     return ipmi::responseInvalidCommand();
1785 }
1786 
ipmiStorageGetSELTimeUtcOffset()1787 ipmi::RspType<uint16_t> ipmiStorageGetSELTimeUtcOffset()
1788 {
1789     /* TODO: For now, the SEL time stamp is based on UTC time,
1790      * so return 0x0000 as offset. Might need to change once
1791      * supporting zones in SEL time stamps
1792      */
1793 
1794     uint16_t utcOffset = 0x0000;
1795     return ipmi::responseSuccess(utcOffset);
1796 }
1797 
registerSELFunctions()1798 void registerSELFunctions()
1799 {
1800     // <Get SEL Info>
1801     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1802                           ipmi::storage::cmdGetSelInfo, ipmi::Privilege::User,
1803                           ipmiStorageGetSELInfo);
1804 
1805     // <Get SEL Entry>
1806     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1807                           ipmi::storage::cmdGetSelEntry, ipmi::Privilege::User,
1808                           ipmiStorageGetSELEntry);
1809 
1810     // <Add SEL Entry>
1811     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1812                           ipmi::storage::cmdAddSelEntry,
1813                           ipmi::Privilege::Operator, ipmiStorageAddSELEntry);
1814 
1815     // <Clear SEL>
1816     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1817                           ipmi::storage::cmdClearSel, ipmi::Privilege::Operator,
1818                           ipmiStorageClearSEL);
1819 
1820     // <Get SEL Time>
1821     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1822                           ipmi::storage::cmdGetSelTime, ipmi::Privilege::User,
1823                           ipmiStorageGetSELTime);
1824 
1825     // <Set SEL Time>
1826     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1827                           ipmi::storage::cmdSetSelTime,
1828                           ipmi::Privilege::Operator, ipmiStorageSetSELTime);
1829 
1830     // <Get SEL Time UTC Offset>
1831     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1832                           ipmi::storage::cmdGetSelTimeUtcOffset,
1833                           ipmi::Privilege::User,
1834                           ipmiStorageGetSELTimeUtcOffset);
1835 
1836     return;
1837 }
1838 
1839 } // namespace storage
1840 } // namespace ipmi
1841