xref: /openbmc/fb-ipmi-oem/src/selcommands.cpp (revision 27010c102747d5c0fb6b527189fbe287129d486b)
1 /*
2  * Copyright (c)  2018 Intel Corporation.
3  * Copyright (c)  2018-present Facebook.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #include <boost/algorithm/string/join.hpp>
19 #include <boost/container/flat_map.hpp>
20 #include <com/meta/IPMI/UnifiedSEL/event.hpp>
21 #include <ipmid/api.hpp>
22 #include <nlohmann/json.hpp>
23 #include <phosphor-logging/commit.hpp>
24 #include <phosphor-logging/log.hpp>
25 #include <sdbusplus/message/types.hpp>
26 #include <sdbusplus/timer.hpp>
27 #include <storagecommands.hpp>
28 
29 #include <fstream>
30 #include <iostream>
31 #include <sstream>
32 #include <thread>
33 
34 enum class MemErrType
35 {
36     memTrainErr = 0,
37     memPmicErr = 7
38 };
39 
40 enum class PostEvtType
41 {
42     pxeBootFail = 0,
43     httpBootFail = 6,
44     getCertFail = 7,
45     amdAblFail = 10
46 };
47 
48 enum class PcieEvtType
49 {
50     dpc = 0
51 };
52 
53 enum class MemEvtType
54 {
55     ppr = 0,
56     adddc = 5,
57     noDimm = 7
58 };
59 
60 //----------------------------------------------------------------------
61 // Platform specific functions for storing app data
62 //----------------------------------------------------------------------
63 
byteToStr(uint8_t byte)64 static std::string byteToStr(uint8_t byte)
65 {
66     std::stringstream ss;
67 
68     ss << std::hex << std::uppercase << std::setfill('0');
69     ss << std::setw(2) << (int)byte;
70 
71     return ss.str();
72 }
73 
toHexStr(std::vector<uint8_t> & bytes,std::string & hexStr)74 static void toHexStr(std::vector<uint8_t>& bytes, std::string& hexStr)
75 {
76     std::stringstream stream;
77     stream << std::hex << std::uppercase << std::setfill('0');
78     for (const uint8_t byte : bytes)
79     {
80         stream << std::setw(2) << static_cast<int>(byte);
81     }
82     hexStr = stream.str();
83 }
84 
fromHexStr(const std::string hexStr,std::vector<uint8_t> & data)85 static int fromHexStr(const std::string hexStr, std::vector<uint8_t>& data)
86 {
87     for (unsigned int i = 0; i < hexStr.size(); i += 2)
88     {
89         try
90         {
91             data.push_back(static_cast<uint8_t>(
92                 std::stoul(hexStr.substr(i, 2), nullptr, 16)));
93         }
94         catch (const std::invalid_argument& e)
95         {
96             phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
97             return -1;
98         }
99         catch (const std::out_of_range& e)
100         {
101             phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
102             return -1;
103         }
104     }
105     return 0;
106 }
107 
108 namespace fb_oem::ipmi::sel
109 {
110 
111 class SELData
112 {
113   private:
114     nlohmann::json selDataObj;
115 
flush()116     void flush()
117     {
118         std::ofstream file(SEL_JSON_DATA_FILE);
119         file << selDataObj;
120         file.close();
121     }
122 
init()123     void init()
124     {
125         selDataObj[KEY_SEL_VER] = 0x51;
126         selDataObj[KEY_SEL_COUNT] = 0;
127         selDataObj[KEY_ADD_TIME] = 0xFFFFFFFF;
128         selDataObj[KEY_ERASE_TIME] = 0xFFFFFFFF;
129         selDataObj[KEY_OPER_SUPP] = 0x02;
130         /* Spec indicates that more than 64kB is free */
131         selDataObj[KEY_FREE_SPACE] = 0xFFFF;
132     }
133 
writeEmptyJson()134     void writeEmptyJson()
135     {
136         selDataObj = nlohmann::json::object(); // Create an empty JSON object
137         std::ofstream outFile(SEL_JSON_DATA_FILE);
138         if (outFile)
139         {
140             // Write empty JSON object to the file
141             outFile << selDataObj.dump(4);
142             outFile.close();
143         }
144         else
145         {
146             lg2::info("Failed to create SEL JSON file with empty JSON.");
147         }
148     }
149 
150   public:
SELData()151     SELData()
152     {
153         /* Get App data stored in json file */
154         std::ifstream file(SEL_JSON_DATA_FILE);
155         if (file)
156         {
157             try
158             {
159                 file >> selDataObj;
160             }
161             catch (const nlohmann::json::parse_error& e)
162             {
163                 lg2::error("Error parsing SEL JSON file: {ERROR}", "ERROR", e);
164                 writeEmptyJson();
165                 init(); // Initialize to default values
166             }
167             file.close();
168         }
169         else
170         {
171             lg2::info("Failed to open SEL JSON file.");
172             writeEmptyJson();
173             init();
174         }
175 
176         /* Initialize SelData object if no entries. */
177         if (selDataObj.find(KEY_SEL_COUNT) == selDataObj.end())
178         {
179             init();
180         }
181     }
182 
clear()183     int clear()
184     {
185         /* Clear the complete Sel Json object */
186         selDataObj.clear();
187         /* Reinitialize it with basic data */
188         init();
189         /* Save the erase time */
190         struct timespec selTime = {};
191         if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
192         {
193             return -1;
194         }
195         selDataObj[KEY_ERASE_TIME] = selTime.tv_sec;
196         flush();
197         return 0;
198     }
199 
getCount()200     uint32_t getCount()
201     {
202         return selDataObj[KEY_SEL_COUNT];
203     }
204 
getInfo(GetSELInfoData & info)205     void getInfo(GetSELInfoData& info)
206     {
207         info.selVersion = selDataObj[KEY_SEL_VER];
208         info.entries = selDataObj[KEY_SEL_COUNT];
209         info.freeSpace = selDataObj[KEY_FREE_SPACE];
210         info.addTimeStamp = selDataObj[KEY_ADD_TIME];
211         info.eraseTimeStamp = selDataObj[KEY_ERASE_TIME];
212         info.operationSupport = selDataObj[KEY_OPER_SUPP];
213     }
214 
getEntry(uint32_t index,std::string & rawStr)215     int getEntry(uint32_t index, std::string& rawStr)
216     {
217         std::stringstream ss;
218         ss << std::hex;
219         ss << std::setw(2) << std::setfill('0') << index;
220 
221         /* Check or the requested SEL Entry, if record is available */
222         if (selDataObj.find(ss.str()) == selDataObj.end())
223         {
224             return -1;
225         }
226 
227         rawStr = selDataObj[ss.str()][KEY_SEL_ENTRY_RAW];
228         return 0;
229     }
230 
addEntry(std::string keyStr)231     int addEntry(std::string keyStr)
232     {
233         struct timespec selTime = {};
234 
235         if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
236         {
237             return -1;
238         }
239 
240         selDataObj[KEY_ADD_TIME] = selTime.tv_sec;
241 
242         int selCount = selDataObj[KEY_SEL_COUNT];
243         selDataObj[KEY_SEL_COUNT] = ++selCount;
244 
245         std::stringstream ss;
246         ss << std::hex;
247         ss << std::setw(2) << std::setfill('0') << selCount;
248 
249         selDataObj[ss.str()][KEY_SEL_ENTRY_RAW] = keyStr;
250         flush();
251         return selCount;
252     }
253 };
254 
255 /*
256  * A Function to parse common SEL message, a helper function
257  * for parseStdSel.
258  *
259  * Note that this function __CANNOT__ be overridden.
260  * To add board specific routine, please override parseStdSel.
261  */
262 
263 /*Used by decoding ME event*/
264 std::vector<std::string> nmDomName = {
265     "Entire Platform",          "CPU Subsystem",
266     "Memory Subsystem",         "HW Protection",
267     "High Power I/O subsystem", "Unknown"};
268 
269 /* Default log message for unknown type */
logDefault(uint8_t *,std::string & errLog)270 static void logDefault(uint8_t*, std::string& errLog)
271 {
272     errLog = "Unknown";
273 }
274 
logSysEvent(uint8_t * data,std::string & errLog)275 static void logSysEvent(uint8_t* data, std::string& errLog)
276 {
277     if (data[0] == 0xE5)
278     {
279         errLog = "Cause of Time change - ";
280         switch (data[2])
281         {
282             case 0x00:
283                 errLog += "NTP";
284                 break;
285             case 0x01:
286                 errLog += "Host RTL";
287                 break;
288             case 0x02:
289                 errLog += "Set SEL time cmd";
290                 break;
291             case 0x03:
292                 errLog += "Set SEL time UTC offset cmd";
293                 break;
294             default:
295                 errLog += "Unknown";
296         }
297 
298         if (data[1] == 0x00)
299             errLog += " - First Time";
300         else if (data[1] == 0x80)
301             errLog += " - Second Time";
302     }
303     else
304     {
305         errLog = "Unknown";
306     }
307 }
308 
logThermalEvent(uint8_t * data,std::string & errLog)309 static void logThermalEvent(uint8_t* data, std::string& errLog)
310 {
311     if (data[0] == 0x1)
312     {
313         errLog = "Limit Exceeded";
314     }
315     else
316     {
317         errLog = "Unknown";
318     }
319 }
320 
logCritIrq(uint8_t * data,std::string & errLog)321 static void logCritIrq(uint8_t* data, std::string& errLog)
322 {
323     if (data[0] == 0x0)
324     {
325         errLog = "NMI / Diagnostic Interrupt";
326     }
327     else if (data[0] == 0x03)
328     {
329         errLog = "Software NMI";
330     }
331     else
332     {
333         errLog = "Unknown";
334     }
335 
336     /* TODO: Call add_cri_sel for CRITICAL_IRQ */
337 }
338 
logPostErr(uint8_t * data,std::string & errLog)339 static void logPostErr(uint8_t* data, std::string& errLog)
340 {
341     if ((data[0] & 0x0F) == 0x0)
342     {
343         errLog = "System Firmware Error";
344     }
345     else
346     {
347         errLog = "Unknown";
348     }
349 
350     if (((data[0] >> 6) & 0x03) == 0x3)
351     {
352         // TODO: Need to implement IPMI spec based Post Code
353         errLog += ", IPMI Post Code";
354     }
355     else if (((data[0] >> 6) & 0x03) == 0x2)
356     {
357         errLog += ", OEM Post Code 0x" + byteToStr(data[2]) +
358                   byteToStr(data[1]);
359 
360         switch ((data[2] << 8) | data[1])
361         {
362             case 0xA105:
363                 errLog += ", BMC Failed (No Response)";
364                 break;
365             case 0xA106:
366                 errLog += ", BMC Failed (Self Test Fail)";
367                 break;
368             case 0xA10A:
369                 errLog += ", System Firmware Corruption Detected";
370                 break;
371             case 0xA10B:
372                 errLog += ", TPM Self-Test FAIL Detected";
373         }
374     }
375 }
376 
logMchChkErr(uint8_t * data,std::string & errLog)377 static void logMchChkErr(uint8_t* data, std::string& errLog)
378 {
379     /* TODO: Call add_cri_sel for CRITICAL_IRQ */
380     switch (data[0] & 0x0F)
381     {
382         case 0x0B:
383             switch ((data[1] >> 5) & 0x03)
384             {
385                 case 0x00:
386                     errLog = "Uncorrected Recoverable Error";
387                     break;
388                 case 0x01:
389                     errLog = "Uncorrected Thread Fatal Error";
390                     break;
391                 case 0x02:
392                     errLog = "Uncorrected System Fatal Error";
393                     break;
394                 default:
395                     errLog = "Unknown";
396             }
397             break;
398         case 0x0C:
399             switch ((data[1] >> 5) & 0x03)
400             {
401                 case 0x00:
402                     errLog = "Correctable Error";
403                     break;
404                 case 0x01:
405                     errLog = "Deferred Error";
406                     break;
407                 default:
408                     errLog = "Unknown";
409             }
410             break;
411         default:
412             errLog = "Unknown";
413     }
414 
415     errLog += ", Machine Check bank Number " + std::to_string(data[1]) +
416               ", CPU " + std::to_string(data[2] >> 5) + ", Core " +
417               std::to_string(data[2] & 0x1F);
418 }
419 
logPcieErr(uint8_t * data,std::string & errLog)420 static void logPcieErr(uint8_t* data, std::string& errLog)
421 {
422     std::stringstream tmp1, tmp2;
423     tmp1 << std::hex << std::uppercase << std::setfill('0');
424     tmp2 << std::hex << std::uppercase << std::setfill('0');
425     tmp1 << " (Bus " << std::setw(2) << (int)(data[2]) << " / Dev "
426          << std::setw(2) << (int)(data[1] >> 3) << " / Fun " << std::setw(2)
427          << (int)(data[1] & 0x7) << ")";
428 
429     switch (data[0] & 0xF)
430     {
431         case 0x4:
432             errLog = "PCI PERR" + tmp1.str();
433             break;
434         case 0x5:
435             errLog = "PCI SERR" + tmp1.str();
436             break;
437         case 0x7:
438             errLog = "Correctable" + tmp1.str();
439             break;
440         case 0x8:
441             errLog = "Uncorrectable" + tmp1.str();
442             break;
443         case 0xA:
444             errLog = "Bus Fatal" + tmp1.str();
445             break;
446         case 0xD:
447         {
448             uint32_t venId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
449             tmp2 << "Vendor ID: 0x" << std::setw(4) << venId;
450             errLog = tmp2.str();
451         }
452         break;
453         case 0xE:
454         {
455             uint32_t devId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
456             tmp2 << "Device ID: 0x" << std::setw(4) << devId;
457             errLog = tmp2.str();
458         }
459         break;
460         case 0xF:
461             tmp2 << "Error ID from downstream: 0x" << std::setw(2)
462                  << (int)(data[1]) << std::setw(2) << (int)(data[2]);
463             errLog = tmp2.str();
464             break;
465         default:
466             errLog = "Unknown";
467     }
468 }
469 
logIioErr(uint8_t * data,std::string & errLog)470 static void logIioErr(uint8_t* data, std::string& errLog)
471 {
472     std::vector<std::string> tmpStr = {
473         "IRP0", "IRP1", " IIO-Core", "VT-d", "Intel Quick Data",
474         "Misc", " DMA", "ITC",       "OTC",  "CI"};
475 
476     if ((data[0] & 0xF) == 0)
477     {
478         errLog += "CPU " + std::to_string(data[2] >> 5) + ", Error ID 0x" +
479                   byteToStr(data[1]) + " - ";
480 
481         if ((data[2] & 0xF) <= 0x9)
482         {
483             errLog += tmpStr[(data[2] & 0xF)];
484         }
485         else
486         {
487             errLog += "Reserved";
488         }
489     }
490     else
491     {
492         errLog = "Unknown";
493     }
494 }
495 
logMemErr(uint8_t * dataPtr,std::string & errLog)496 [[maybe_unused]] static void logMemErr(uint8_t* dataPtr, std::string& errLog)
497 {
498     uint8_t snrType = dataPtr[0];
499     uint8_t snrNum = dataPtr[1];
500     uint8_t* data = &(dataPtr[3]);
501 
502     /* TODO: add pal_add_cri_sel */
503 
504     if (snrNum == memoryEccError)
505     {
506         /* SEL from MEMORY_ECC_ERR Sensor */
507         switch (data[0] & 0x0F)
508         {
509             case 0x0:
510                 if (snrType == 0x0C)
511                 {
512                     errLog = "Correctable";
513                 }
514                 else if (snrType == 0x10)
515                 {
516                     errLog = "Correctable ECC error Logging Disabled";
517                 }
518                 break;
519             case 0x1:
520                 errLog = "Uncorrectable";
521                 break;
522             case 0x5:
523                 errLog = "Correctable ECC error Logging Limit Disabled";
524                 break;
525             default:
526                 errLog = "Unknown";
527         }
528     }
529     else if (snrNum == memoryErrLogDIS)
530     {
531         // SEL from MEMORY_ERR_LOG_DIS Sensor
532         if ((data[0] & 0x0F) == 0x0)
533         {
534             errLog = "Correctable Memory Error Logging Disabled";
535         }
536         else
537         {
538             errLog = "Unknown";
539         }
540     }
541     else
542     {
543         errLog = "Unknown";
544         return;
545     }
546 
547     /* Common routine for both MEM_ECC_ERR and MEMORY_ERR_LOG_DIS */
548 
549     errLog += " (DIMM " + byteToStr(data[2]) + ") Logical Rank " +
550               std::to_string(data[1] & 0x03);
551 
552     /* DIMM number (data[2]):
553      * Bit[7:5]: Socket number  (Range: 0-7)
554      * Bit[4:3]: Channel number (Range: 0-3)
555      * Bit[2:0]: DIMM number    (Range: 0-7)
556      */
557 
558     /* TODO: Verify these bits */
559     std::string cpuStr = "CPU# " + std::to_string((data[2] & 0xE0) >> 5);
560     std::string chStr = "CHN# " + std::to_string((data[2] & 0x18) >> 3);
561     std::string dimmStr = "DIMM#" + std::to_string(data[2] & 0x7);
562 
563     switch ((data[1] & 0xC) >> 2)
564     {
565         case 0x0:
566         {
567             /* All Info Valid */
568             [[maybe_unused]] uint8_t chnNum = (data[2] & 0x1C) >> 2;
569             [[maybe_unused]] uint8_t dimmNum = data[2] & 0x3;
570 
571             /* TODO: If critical SEL logging is available, do it */
572             if (snrType == 0x0C)
573             {
574                 if ((data[0] & 0x0F) == 0x0)
575                 {
576                     /* TODO: add_cri_sel */
577                     /* "DIMM"+ 'A'+ chnNum + dimmNum + " ECC err,FRU:1"
578                      */
579                 }
580                 else if ((data[0] & 0x0F) == 0x1)
581                 {
582                     /* TODO: add_cri_sel */
583                     /* "DIMM"+ 'A'+ chnNum + dimmNum + " UECC err,FRU:1"
584                      */
585                 }
586             }
587             /* Continue to parse the error into a string. All Info Valid
588              */
589             errLog += " (" + cpuStr + ", " + chStr + ", " + dimmStr + ")";
590         }
591 
592         break;
593         case 0x1:
594 
595             /* DIMM info not valid */
596             errLog += " (" + cpuStr + ", " + chStr + ")";
597             break;
598         case 0x2:
599 
600             /* CHN info not valid */
601             errLog += " (" + cpuStr + ", " + dimmStr + ")";
602             break;
603         case 0x3:
604 
605             /* CPU info not valid */
606             errLog += " (" + chStr + ", " + dimmStr + ")";
607             break;
608     }
609 }
610 
logPwrErr(uint8_t * data,std::string & errLog)611 static void logPwrErr(uint8_t* data, std::string& errLog)
612 {
613     if (data[0] == 0x1)
614     {
615         errLog = "SYS_PWROK failure";
616         /* Also try logging to Critical log file, if available */
617         /* "SYS_PWROK failure,FRU:1" */
618     }
619     else if (data[0] == 0x2)
620     {
621         errLog = "PCH_PWROK failure";
622         /* Also try logging to Critical log file, if available */
623         /* "PCH_PWROK failure,FRU:1" */
624     }
625     else
626     {
627         errLog = "Unknown";
628     }
629 }
630 
logCatErr(uint8_t * data,std::string & errLog)631 static void logCatErr(uint8_t* data, std::string& errLog)
632 {
633     if (data[0] == 0x0)
634     {
635         errLog = "IERR/CATERR";
636         /* Also try logging to Critical log file, if available */
637         /* "IERR,FRU:1 */
638     }
639     else if (data[0] == 0xB)
640     {
641         errLog = "MCERR/CATERR";
642         /* Also try logging to Critical log file, if available */
643         /* "MCERR,FRU:1 */
644     }
645     else
646     {
647         errLog = "Unknown";
648     }
649 }
650 
logDimmHot(uint8_t * data,std::string & errLog)651 static void logDimmHot(uint8_t* data, std::string& errLog)
652 {
653     if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x01FFFF)
654     {
655         errLog = "SOC MEMHOT";
656     }
657     else
658     {
659         errLog = "Unknown";
660         /* Also try logging to Critical log file, if available */
661         /* ""CPU_DIMM_HOT %s,FRU:1" */
662     }
663 }
664 
logSwNMI(uint8_t * data,std::string & errLog)665 static void logSwNMI(uint8_t* data, std::string& errLog)
666 {
667     if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x03FFFF)
668     {
669         errLog = "Software NMI";
670     }
671     else
672     {
673         errLog = "Unknown SW NMI";
674     }
675 }
676 
logCPUThermalSts(uint8_t * data,std::string & errLog)677 static void logCPUThermalSts(uint8_t* data, std::string& errLog)
678 {
679     switch (data[0])
680     {
681         case 0x0:
682             errLog = "CPU Critical Temperature";
683             break;
684         case 0x1:
685             errLog = "PROCHOT#";
686             break;
687         case 0x2:
688             errLog = "TCC Activation";
689             break;
690         default:
691             errLog = "Unknown";
692     }
693 }
694 
logMEPwrState(uint8_t * data,std::string & errLog)695 static void logMEPwrState(uint8_t* data, std::string& errLog)
696 {
697     switch (data[0])
698     {
699         case 0:
700             errLog = "RUNNING";
701             break;
702         case 2:
703             errLog = "POWER_OFF";
704             break;
705         default:
706             errLog = "Unknown[" + std::to_string(data[0]) + "]";
707             break;
708     }
709 }
710 
logSPSFwHealth(uint8_t * data,std::string & errLog)711 static void logSPSFwHealth(uint8_t* data, std::string& errLog)
712 {
713     if ((data[0] & 0x0F) == 0x00)
714     {
715         const std::vector<std::string> tmpStr = {
716             "Recovery GPIO forced",
717             "Image execution failed",
718             "Flash erase error",
719             "Flash state information",
720             "Internal error",
721             "BMC did not respond",
722             "Direct Flash update",
723             "Manufacturing error",
724             "Automatic Restore to Factory Presets",
725             "Firmware Exception",
726             "Flash Wear-Out Protection Warning",
727             "Unknown",
728             "Unknown",
729             "DMI interface error",
730             "MCTP interface error",
731             "Auto-configuration finished",
732             "Unsupported Segment Defined Feature",
733             "Unknown",
734             "CPU Debug Capability Disabled",
735             "UMA operation error"};
736 
737         if (data[1] < 0x14)
738         {
739             errLog = tmpStr[data[1]];
740         }
741         else
742         {
743             errLog = "Unknown";
744         }
745     }
746     else if ((data[0] & 0x0F) == 0x01)
747     {
748         errLog = "SMBus link failure";
749     }
750     else
751     {
752         errLog = "Unknown";
753     }
754 }
755 
logNmExcA(uint8_t * data,std::string & errLog)756 static void logNmExcA(uint8_t* data, std::string& errLog)
757 {
758     /*NM4.0 #550710, Revision 1.95, and turn to p.155*/
759     if (data[0] == 0xA8)
760     {
761         errLog = "Policy Correction Time Exceeded";
762     }
763     else
764     {
765         errLog = "Unknown";
766     }
767 }
768 
logPCHThermal(uint8_t * data,std::string & errLog)769 static void logPCHThermal(uint8_t* data, std::string& errLog)
770 {
771     const std::vector<std::string> thresEvtName = {
772         "Lower Non-critical",
773         "Unknown",
774         "Lower Critical",
775         "Unknown",
776         "Lower Non-recoverable",
777         "Unknown",
778         "Unknown",
779         "Upper Non-critical",
780         "Unknown",
781         "Upper Critical",
782         "Unknown",
783         "Upper Non-recoverable"};
784 
785     if ((data[0] & 0x0f) < 12)
786     {
787         errLog = thresEvtName[(data[0] & 0x0f)];
788     }
789     else
790     {
791         errLog = "Unknown";
792     }
793 
794     errLog += ", curr_val: " + std::to_string(data[1]) +
795               " C, thresh_val: " + std::to_string(data[2]) + " C";
796 }
797 
logNmHealth(uint8_t * data,std::string & errLog)798 static void logNmHealth(uint8_t* data, std::string& errLog)
799 {
800     std::vector<std::string> nmErrType = {
801         "Unknown",
802         "Unknown",
803         "Unknown",
804         "Unknown",
805         "Unknown",
806         "Unknown",
807         "Unknown",
808         "Extended Telemetry Device Reading Failure",
809         "Outlet Temperature Reading Failure",
810         "Volumetric Airflow Reading Failure",
811         "Policy Misconfiguration",
812         "Power Sensor Reading Failure",
813         "Inlet Temperature Reading Failure",
814         "Host Communication Error",
815         "Real-time Clock Synchronization Failure",
816         "Platform Shutdown Initiated by Intel NM Policy",
817         "Unknown"};
818     uint8_t nmTypeIdx = (data[0] & 0xf);
819     uint8_t domIdx = (data[1] & 0xf);
820     uint8_t errIdx = ((data[1] >> 4) & 0xf);
821 
822     if (nmTypeIdx == 2)
823     {
824         errLog = "SensorIntelNM";
825     }
826     else
827     {
828         errLog = "Unknown";
829     }
830 
831     errLog += ", Domain:" + nmDomName[domIdx] + ", ErrType:" +
832               nmErrType[errIdx] + ", Err:0x" + byteToStr(data[2]);
833 }
834 
logNmCap(uint8_t * data,std::string & errLog)835 static void logNmCap(uint8_t* data, std::string& errLog)
836 {
837     const std::vector<std::string> nmCapStsStr = {"Not Available", "Available"};
838     if (data[0] & 0x7) // BIT1=policy, BIT2=monitoring, BIT3=pwr
839                        // limit and the others are reserved
840     {
841         errLog = "PolicyInterface:" + nmCapStsStr[BIT(data[0], 0)] +
842                  ",Monitoring:" + nmCapStsStr[BIT(data[0], 1)] +
843                  ",PowerLimit:" + nmCapStsStr[BIT(data[0], 2)];
844     }
845     else
846     {
847         errLog = "Unknown";
848     }
849 }
850 
logNmThreshold(uint8_t * data,std::string & errLog)851 static void logNmThreshold(uint8_t* data, std::string& errLog)
852 {
853     uint8_t thresNum = (data[0] & 0x3);
854     uint8_t domIdx = (data[1] & 0xf);
855     uint8_t polId = data[2];
856     uint8_t polEvtIdx = BIT(data[0], 3);
857     const std::vector<std::string> polEvtStr = {
858         "Threshold Exceeded", "Policy Correction Time Exceeded"};
859 
860     errLog = "Threshold Number:" + std::to_string(thresNum) + "-" +
861              polEvtStr[polEvtIdx] + ", Domain:" + nmDomName[domIdx] +
862              ", PolicyID:0x" + byteToStr(polId);
863 }
864 
logPwrThreshold(uint8_t * data,std::string & errLog)865 static void logPwrThreshold(uint8_t* data, std::string& errLog)
866 {
867     if (data[0] == 0x00)
868     {
869         errLog = "Limit Not Exceeded";
870     }
871     else if (data[0] == 0x01)
872     {
873         errLog = "Limit Exceeded";
874     }
875     else
876     {
877         errLog = "Unknown";
878     }
879 }
880 
logMSMI(uint8_t * data,std::string & errLog)881 static void logMSMI(uint8_t* data, std::string& errLog)
882 {
883     if (data[0] == 0x0)
884     {
885         errLog = "IERR/MSMI";
886     }
887     else if (data[0] == 0x0B)
888     {
889         errLog = "MCERR/MSMI";
890     }
891     else
892     {
893         errLog = "Unknown";
894     }
895 }
896 
logHprWarn(uint8_t * data,std::string & errLog)897 static void logHprWarn(uint8_t* data, std::string& errLog)
898 {
899     if (data[2] == 0x01)
900     {
901         if (data[1] == 0xFF)
902         {
903             errLog = "Infinite Time";
904         }
905         else
906         {
907             errLog = std::to_string(data[1]) + " minutes";
908         }
909     }
910     else
911     {
912         errLog = "Unknown";
913     }
914 }
915 
916 static const boost::container::flat_map<
917     uint8_t,
918     std::pair<std::string, std::function<void(uint8_t*, std::string&)>>>
919     sensorNameTable = {
920         {0xE9, {"SYSTEM_EVENT", logSysEvent}},
921         {0x7D, {"THERM_THRESH_EVT", logThermalEvent}},
922         {0xAA, {"BUTTON", logDefault}},
923         {0xAB, {"POWER_STATE", logDefault}},
924         {0xEA, {"CRITICAL_IRQ", logCritIrq}},
925         {0x2B, {"POST_ERROR", logPostErr}},
926         {0x40, {"MACHINE_CHK_ERR", logMchChkErr}},
927         {0x41, {"PCIE_ERR", logPcieErr}},
928         {0x43, {"IIO_ERR", logIioErr}},
929         {0X63, {"MEMORY_ECC_ERR", logDefault}},
930         {0X87, {"MEMORY_ERR_LOG_DIS", logDefault}},
931         {0X51, {"PROCHOT_EXT", logDefault}},
932         {0X56, {"PWR_ERR", logPwrErr}},
933         {0xE6, {"CATERR_A", logCatErr}},
934         {0xEB, {"CATERR_B", logCatErr}},
935         {0xB3, {"CPU_DIMM_HOT", logDimmHot}},
936         {0x90, {"SOFTWARE_NMI", logSwNMI}},
937         {0x1C, {"CPU0_THERM_STATUS", logCPUThermalSts}},
938         {0x1D, {"CPU1_THERM_STATUS", logCPUThermalSts}},
939         {0x16, {"ME_POWER_STATE", logMEPwrState}},
940         {0x17, {"SPS_FW_HEALTH", logSPSFwHealth}},
941         {0x18, {"NM_EXCEPTION_A", logNmExcA}},
942         {0x08, {"PCH_THERM_THRESHOLD", logPCHThermal}},
943         {0x19, {"NM_HEALTH", logNmHealth}},
944         {0x1A, {"NM_CAPABILITIES", logNmCap}},
945         {0x1B, {"NM_THRESHOLD", logNmThreshold}},
946         {0x3B, {"PWR_THRESH_EVT", logPwrThreshold}},
947         {0xE7, {"MSMI", logMSMI}},
948         {0xC5, {"HPR_WARNING", logHprWarn}}};
949 
parseSelHelper(StdSELEntry * data,std::string & errStr)950 static void parseSelHelper(StdSELEntry* data, std::string& errStr)
951 {
952     /* Check if sensor type is OS_BOOT (0x1f) */
953     if (data->sensorType == 0x1F)
954     {
955         /* OS_BOOT used by OS */
956         switch (data->eventData1 & 0xF)
957         {
958             case 0x07:
959                 errStr = "Base OS/Hypervisor Installation started";
960                 break;
961             case 0x08:
962                 errStr = "Base OS/Hypervisor Installation completed";
963                 break;
964             case 0x09:
965                 errStr = "Base OS/Hypervisor Installation aborted";
966                 break;
967             case 0x0A:
968                 errStr = "Base OS/Hypervisor Installation failed";
969                 break;
970             default:
971                 errStr = "Unknown";
972         }
973         return;
974     }
975 
976     auto findSensorName = sensorNameTable.find(data->sensorNum);
977     if (findSensorName == sensorNameTable.end())
978     {
979         errStr = "Unknown";
980         return;
981     }
982     else
983     {
984         switch (data->sensorNum)
985         {
986             /* logMemErr function needs data from sensor type */
987             case memoryEccError:
988             case memoryErrLogDIS:
989                 findSensorName->second.second(&(data->sensorType), errStr);
990                 break;
991             /* Other sensor function needs only event data for parsing */
992             default:
993                 findSensorName->second.second(&(data->eventData1), errStr);
994         }
995     }
996 
997     if (((data->eventData3 & 0x80) >> 7) == 0)
998     {
999         errStr += " Assertion";
1000     }
1001     else
1002     {
1003         errStr += " Deassertion";
1004     }
1005 }
1006 
parseDimmPhyloc(StdSELEntry * data,std::string & errStr)1007 static void parseDimmPhyloc(StdSELEntry* data, std::string& errStr)
1008 {
1009     // Log when " All info available"
1010     uint8_t chNum = (data->eventData3 & 0x18) >> 3;
1011     uint8_t dimmNum = data->eventData3 & 0x7;
1012     uint8_t rankNum = data->eventData2 & 0x03;
1013     uint8_t nodeNum = (data->eventData3 & 0xE0) >> 5;
1014 
1015     if (chNum == 3 && dimmNum == 0)
1016     {
1017         errStr += " Node: " + std::to_string(nodeNum) + "," +
1018                   " Card: " + std::to_string(chNum) + "," +
1019                   " Module: " + std::to_string(dimmNum) + "," +
1020                   " Rank Number: " + std::to_string(rankNum) + "," +
1021                   "  Location: DIMM A0";
1022     }
1023     else if (chNum == 2 && dimmNum == 0)
1024     {
1025         errStr += " Node: " + std::to_string(nodeNum) + "," +
1026                   " Card: " + std::to_string(chNum) + "," +
1027                   " Module: " + std::to_string(dimmNum) + "," +
1028                   " Rank Number: " + std::to_string(rankNum) + "," +
1029                   " Location: DIMM B0";
1030     }
1031     else if (chNum == 4 && dimmNum == 0)
1032     {
1033         errStr += " Node: " + std::to_string(nodeNum) + "," +
1034                   " Card: " + std::to_string(chNum) + "," +
1035                   " Module: " + std::to_string(dimmNum) + "," +
1036                   " Rank Number: " + std::to_string(rankNum) + "," +
1037                   " Location: DIMM C0 ";
1038     }
1039     else if (chNum == 5 && dimmNum == 0)
1040     {
1041         errStr += " Node: " + std::to_string(nodeNum) + "," +
1042                   " Card: " + std::to_string(chNum) + "," +
1043                   " Module: " + std::to_string(dimmNum) + "," +
1044                   " Rank Number: " + std::to_string(rankNum) + "," +
1045                   " Location: DIMM D0";
1046     }
1047     else
1048     {
1049         errStr += " Node: " + std::to_string(nodeNum) + "," +
1050                   " Card: " + std::to_string(chNum) + "," +
1051                   " Module: " + std::to_string(dimmNum) + "," +
1052                   " Rank Number: " + std::to_string(rankNum) + "," +
1053                   " Location: DIMM Unknown";
1054     }
1055 }
1056 
parseStdSel(StdSELEntry * data,std::string & errStr)1057 static void parseStdSel(StdSELEntry* data, std::string& errStr)
1058 {
1059     std::stringstream tmpStream;
1060     tmpStream << std::hex << std::uppercase;
1061 
1062     /* TODO: add pal_add_cri_sel */
1063     switch (data->sensorNum)
1064     {
1065         case memoryEccError:
1066             switch (data->eventData1 & 0x0F)
1067             {
1068                 case 0x00:
1069                     errStr = "Correctable";
1070                     tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1071                               << data->eventData3 << " ECC err";
1072                     parseDimmPhyloc(data, errStr);
1073                     break;
1074                 case 0x01:
1075                     errStr = "Uncorrectable";
1076                     tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1077                               << data->eventData3 << " UECC err";
1078                     parseDimmPhyloc(data, errStr);
1079                     break;
1080                 case 0x02:
1081                     errStr = "Parity";
1082                     break;
1083                 case 0x05:
1084                     errStr = "Correctable ECC error Logging Limit Reached";
1085                     break;
1086                 default:
1087                     errStr = "Unknown";
1088             }
1089             break;
1090         case memoryErrLogDIS:
1091             if ((data->eventData1 & 0x0F) == 0)
1092             {
1093                 errStr = "Correctable Memory Error Logging Disabled";
1094             }
1095             else
1096             {
1097                 errStr = "Unknown";
1098             }
1099             break;
1100         default:
1101             parseSelHelper(data, errStr);
1102             return;
1103     }
1104 
1105     errStr += " (DIMM " + std::to_string(data->eventData3) + ")";
1106     errStr += " Logical Rank " + std::to_string(data->eventData2 & 0x03);
1107 
1108     switch ((data->eventData2 & 0x0C) >> 2)
1109     {
1110         case 0x00:
1111             // Ignore when " All info available"
1112             break;
1113         case 0x01:
1114             errStr += " DIMM info not valid";
1115             break;
1116         case 0x02:
1117             errStr += " CHN info not valid";
1118             break;
1119         case 0x03:
1120             errStr += " CPU info not valid";
1121             break;
1122         default:
1123             errStr += " Unknown";
1124     }
1125 
1126     if (((data->eventType & 0x80) >> 7) == 0)
1127     {
1128         errStr += " Assertion";
1129     }
1130     else
1131     {
1132         errStr += " Deassertion";
1133     }
1134 
1135     return;
1136 }
1137 
parseOemSel(TsOemSELEntry * data,std::string & errStr)1138 static void parseOemSel(TsOemSELEntry* data, std::string& errStr)
1139 {
1140     std::stringstream tmpStream;
1141     tmpStream << std::hex << std::uppercase << std::setfill('0');
1142 
1143     switch (data->recordType)
1144     {
1145         case 0xC0:
1146             tmpStream << "VID:0x" << std::setw(2) << (int)data->oemData[1]
1147                       << std::setw(2) << (int)data->oemData[0] << " DID:0x"
1148                       << std::setw(2) << (int)data->oemData[3] << std::setw(2)
1149                       << (int)data->oemData[2] << " Slot:0x" << std::setw(2)
1150                       << (int)data->oemData[4] << " Error ID:0x" << std::setw(2)
1151                       << (int)data->oemData[5];
1152             break;
1153         case 0xC2:
1154             tmpStream << "Extra info:0x" << std::setw(2)
1155                       << (int)data->oemData[1] << " MSCOD:0x" << std::setw(2)
1156                       << (int)data->oemData[3] << std::setw(2)
1157                       << (int)data->oemData[2] << " MCACOD:0x" << std::setw(2)
1158                       << (int)data->oemData[5] << std::setw(2)
1159                       << (int)data->oemData[4];
1160             break;
1161         case 0xC3:
1162             int bank = (data->oemData[1] & 0xf0) >> 4;
1163             int col = ((data->oemData[1] & 0x0f) << 8) | data->oemData[2];
1164 
1165             tmpStream << "Fail Device:0x" << std::setw(2)
1166                       << (int)data->oemData[0] << " Bank:0x" << std::setw(2)
1167                       << bank << " Column:0x" << std::setw(2) << col
1168                       << " Failed Row:0x" << std::setw(2)
1169                       << (int)data->oemData[3] << std::setw(2)
1170                       << (int)data->oemData[4] << std::setw(2)
1171                       << (int)data->oemData[5];
1172     }
1173 
1174     errStr = tmpStream.str();
1175 
1176     return;
1177 }
1178 
dimmLocationStr(uint8_t socket,uint8_t channel,uint8_t slot)1179 static std::string dimmLocationStr(uint8_t socket, uint8_t channel,
1180                                    uint8_t slot)
1181 {
1182     uint8_t sled = (socket >> 4) & 0x3;
1183 
1184     socket &= 0xf;
1185     if (channel == 0xFF && slot == 0xFF)
1186     {
1187         return std::format(
1188             "DIMM Slot Location: Sled {:02}/Socket {:02}, Channel unknown"
1189             ", Slot unknown, DIMM unknown",
1190             sled, socket);
1191     }
1192     else
1193     {
1194         channel &= 0xf;
1195         slot &= 0xf;
1196         const char label[] = {'A', 'C', 'B', 'D'};
1197         uint8_t idx = socket * 2 + slot;
1198         return std::format("DIMM Slot Location: Sled {:02}/Socket {:02}"
1199                            ", Channel {:02}, Slot {:02} DIMM {}",
1200                            sled, socket, channel, slot,
1201                            (idx < sizeof(label))
1202                                ? label[idx] + std::to_string(channel)
1203                                : "NA");
1204     }
1205 }
1206 
parseOemUnifiedSel(NtsOemSELEntry * data,std::string & errStr)1207 static void parseOemUnifiedSel(NtsOemSELEntry* data, std::string& errStr)
1208 {
1209     uint8_t* ptr = data->oemData;
1210     uint8_t eventType = ptr[5] & 0xf;
1211     int genInfo = ptr[0];
1212     int errType = genInfo & 0x0f;
1213     std::vector<std::string> dimmErr = {
1214         "Memory training failure",
1215         "Memory correctable error",
1216         "Memory uncorrectable error",
1217         "Memory correctable error (Patrol scrub)",
1218         "Memory uncorrectable error (Patrol scrub)",
1219         "Memory Parity Error (PCC=0)",
1220         "Memory Parity Error (PCC=1)",
1221         "Memory PMIC Error",
1222         "CXL Memory training error",
1223         "Reserved"};
1224     std::vector<std::string> postEvent = {
1225         "System PXE boot fail",
1226         "CMOS/NVRAM configuration cleared",
1227         "TPM Self-Test Fail",
1228         "Boot Drive failure",
1229         "Data Drive failure",
1230         "Received invalid boot order request from BMC",
1231         "System HTTP boot fail",
1232         "BIOS fails to get the certificate from BMC",
1233         "Password cleared by jumper",
1234         "DXE FV check failure",
1235         "AMD ABL failure",
1236         "Reserved"};
1237     std::vector<std::string> certErr = {
1238         "No certificate at BMC", "IPMI transaction fail",
1239         "Certificate data corrupted", "Reserved"};
1240     std::vector<std::string> pcieEvent = {
1241         "PCIe DPC Event",
1242         "PCIe LER Event",
1243         "PCIe Link Retraining and Recovery",
1244         "PCIe Link CRC Error Check and Retry",
1245         "PCIe Corrupt Data Containment",
1246         "PCIe Express ECRC",
1247         "Reserved"};
1248     std::vector<std::string> memEvent = {
1249         "Memory PPR event",
1250         "Memory Correctable Error logging limit reached",
1251         "Memory disable/map-out for FRB",
1252         "Memory SDDC",
1253         "Memory Address range/Partial mirroring",
1254         "Memory ADDDC",
1255         "Memory SMBus hang recovery",
1256         "No DIMM in System",
1257         "Reserved"};
1258     std::vector<std::string> memPprTime = {"Boot time", "Autonomous",
1259                                            "Run time", "Reserved"};
1260     std::vector<std::string> memPpr = {"PPR success", "PPR fail", "PPR request",
1261                                        "Reserved"};
1262     std::vector<std::string> memAdddc = {
1263         "Bank VLS", "r-Bank VLS + re-buddy", "r-Bank VLS + Rank VLS",
1264         "r-Rank VLS + re-buddy", "Reserved"};
1265     std::vector<std::string> pprEvent = {"PPR disable", "Soft PPR", "Hard PPR",
1266                                          "Reserved"};
1267 
1268     std::stringstream tmpStream;
1269 
1270     switch (errType)
1271     {
1272         case unifiedPcieErr:
1273             tmpStream << std::format(
1274                 "GeneralInfo: x86/PCIeErr(0x{:02X})"
1275                 ", Bus {:02X}/Dev {:02X}/Fun {:02X}, TotalErrID1Cnt: 0x{:04X}"
1276                 ", ErrID2: 0x{:02X}, ErrID1: 0x{:02X}",
1277                 genInfo, ptr[8], ptr[7] >> 3, ptr[7] & 0x7,
1278                 (ptr[10] << 8) | ptr[9], ptr[11], ptr[12]);
1279             break;
1280         case unifiedMemErr:
1281             eventType = ptr[9] & 0xf;
1282             tmpStream << std::format(
1283                 "GeneralInfo: MemErr(0x{:02X}), {}, DIMM Failure Event: {}",
1284                 genInfo, dimmLocationStr(ptr[5], ptr[6], ptr[7]),
1285                 dimmErr[std::min(eventType,
1286                                  static_cast<uint8_t>(dimmErr.size() - 1))]);
1287 
1288             if (static_cast<MemErrType>(eventType) == MemErrType::memTrainErr ||
1289                 static_cast<MemErrType>(eventType) == MemErrType::memPmicErr)
1290             {
1291                 bool amd = ptr[9] & 0x80;
1292                 tmpStream << std::format(
1293                     ", Major Code: 0x{:02X}, Minor Code: 0x{:0{}X}", ptr[10],
1294                     amd ? (ptr[12] << 8 | ptr[11]) : ptr[11], amd ? 4 : 2);
1295             }
1296             break;
1297         case unifiedIioErr:
1298             tmpStream << std::format(
1299                 "GeneralInfo: IIOErr(0x{:02X})"
1300                 ", IIO Port Location: Sled {:02}/Socket {:02}, Stack 0x{:02X}"
1301                 ", Error Type: 0x{:02X}, Error Severity: 0x{:02X}"
1302                 ", Error ID: 0x{:02X}",
1303                 genInfo, (ptr[5] >> 4) & 0x3, ptr[5] & 0xf, ptr[6], ptr[10],
1304                 ptr[11] & 0xf, ptr[12]);
1305             break;
1306         case unifiedPostEvt:
1307             tmpStream << std::format(
1308                 "GeneralInfo: POST(0x{:02X}), POST Failure Event: {}", genInfo,
1309                 postEvent[std::min(
1310                     eventType, static_cast<uint8_t>(postEvent.size() - 1))]);
1311 
1312             switch (static_cast<PostEvtType>(eventType))
1313             {
1314                 case PostEvtType::pxeBootFail:
1315                 case PostEvtType::httpBootFail:
1316                 {
1317                     uint8_t failType = ptr[10] & 0xf;
1318                     tmpStream
1319                         << std::format(", Fail Type: {}, Error Code: 0x{:02X}",
1320                                        (failType == 4 || failType == 6)
1321                                            ? std::format("IPv{} fail", failType)
1322                                            : std::format("0x{:02X}", ptr[10]),
1323                                        ptr[11]);
1324                     break;
1325                 }
1326                 case PostEvtType::getCertFail:
1327                     tmpStream << std::format(
1328                         ", Failure Detail: {}",
1329                         certErr[std::min(
1330                             ptr[9], static_cast<uint8_t>(certErr.size() - 1))]);
1331                     break;
1332                 case PostEvtType::amdAblFail:
1333                     tmpStream << std::format(", ABL Error Code: 0x{:04X}",
1334                                              (ptr[12] << 8) | ptr[11]);
1335                     break;
1336             }
1337             break;
1338         case unifiedPcieEvt:
1339             tmpStream << std::format(
1340                 "GeneralInfo: PCIeEvent(0x{:02X}), PCIe Failure Event: {}",
1341                 genInfo,
1342                 pcieEvent[std::min(
1343                     eventType, static_cast<uint8_t>(pcieEvent.size() - 1))]);
1344 
1345             if (static_cast<PcieEvtType>(eventType) == PcieEvtType::dpc)
1346             {
1347                 tmpStream << std::format(
1348                     ", Status: 0x{:04X}, Source ID: 0x{:04X}",
1349                     (ptr[8] << 8) | ptr[7], (ptr[10] << 8) | ptr[9]);
1350             }
1351             break;
1352         case unifiedMemEvt:
1353             eventType = ptr[9] & 0xf;
1354             tmpStream
1355                 << std::format("GeneralInfo: MemEvent(0x{:02X})", genInfo)
1356                 << (static_cast<MemEvtType>(eventType) != MemEvtType::noDimm
1357                         ? std::format(", {}",
1358                                       dimmLocationStr(ptr[5], ptr[6], ptr[7]))
1359                         : "")
1360                 << ", DIMM Failure Event: ";
1361 
1362             switch (static_cast<MemEvtType>(eventType))
1363             {
1364                 case MemEvtType::ppr:
1365                     tmpStream << std::format("{} {}",
1366                                              memPprTime[(ptr[10] >> 2) & 0x3],
1367                                              memPpr[ptr[10] & 0x3]);
1368                     break;
1369                 case MemEvtType::adddc:
1370                     tmpStream << std::format(
1371                         "{} {}",
1372                         memEvent[std::min(eventType, static_cast<uint8_t>(
1373                                                          memEvent.size() - 1))],
1374                         memAdddc[std::min(
1375                             static_cast<uint8_t>(ptr[11] & 0xf),
1376                             static_cast<uint8_t>(memAdddc.size() - 1))]);
1377                     break;
1378                 default:
1379                     tmpStream << std::format(
1380                         "{}", memEvent[std::min(
1381                                   eventType,
1382                                   static_cast<uint8_t>(memEvent.size() - 1))]);
1383                     break;
1384             }
1385             break;
1386         case unifiedBootGuard:
1387             tmpStream << std::format(
1388                 "GeneralInfo: Boot Guard ACM Failure Events(0x{:02X})"
1389                 ", Error Class: 0x{:02X}, Error Code: 0x{:02X}",
1390                 genInfo, ptr[9], ptr[10]);
1391             break;
1392         case unifiedPprEvt:
1393             tmpStream << std::format(
1394                 "GeneralInfo: PPREvent(0x{:02X}), {}"
1395                 ", DIMM Info: {:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
1396                 genInfo,
1397                 pprEvent[std::min(eventType,
1398                                   static_cast<uint8_t>(pprEvent.size() - 1))],
1399                 ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12]);
1400             break;
1401         default:
1402             std::vector<uint8_t> oemData(ptr, ptr + 13);
1403             std::string oemDataStr;
1404             toHexStr(oemData, oemDataStr);
1405             tmpStream << std::format("Undefined Error Type(0x{:02X}), Raw: {}",
1406                                      errType, oemDataStr);
1407     }
1408 
1409     errStr = tmpStream.str();
1410 
1411     return;
1412 }
1413 
parseSelData(uint8_t fruId,std::vector<uint8_t> & reqData,std::string & msgLog)1414 static void parseSelData(uint8_t fruId, std::vector<uint8_t>& reqData,
1415                          std::string& msgLog)
1416 {
1417     /* Get record type */
1418     int recType = reqData[2];
1419     std::string errType, errLog;
1420 
1421     uint8_t* ptr = NULL;
1422 
1423     std::stringstream recTypeStream;
1424     recTypeStream << std::hex << std::uppercase << std::setfill('0')
1425                   << std::setw(2) << recType;
1426 
1427     msgLog = "SEL Entry: FRU: " + std::to_string(fruId) + ", Record: ";
1428 
1429     if (recType == stdErrType)
1430     {
1431         StdSELEntry* data = reinterpret_cast<StdSELEntry*>(&reqData[0]);
1432         std::string sensorName;
1433 
1434         errType = stdErr;
1435         if (data->sensorType == 0x1F)
1436         {
1437             sensorName = "OS";
1438         }
1439         else
1440         {
1441             auto findSensorName = sensorNameTable.find(data->sensorNum);
1442             if (findSensorName == sensorNameTable.end())
1443             {
1444                 sensorName = "Unknown";
1445             }
1446             else
1447             {
1448                 sensorName = findSensorName->second.first;
1449             }
1450         }
1451 
1452         parseStdSel(data, errLog);
1453         ptr = &(data->eventData1);
1454         std::vector<uint8_t> evtData(ptr, ptr + 3);
1455         std::string eventData;
1456         toHexStr(evtData, eventData);
1457 
1458         std::stringstream senNumStream;
1459         senNumStream << std::hex << std::uppercase << std::setfill('0')
1460                      << std::setw(2) << (int)(data->sensorNum);
1461 
1462         msgLog += errType + " (0x" + recTypeStream.str() +
1463                   "), Sensor: " + sensorName + " (0x" + senNumStream.str() +
1464                   "), Event Data: (" + eventData + ") " + errLog;
1465     }
1466     else if ((recType >= oemTSErrTypeMin) && (recType <= oemTSErrTypeMax))
1467     {
1468         /* timestamped OEM SEL records */
1469         TsOemSELEntry* data = reinterpret_cast<TsOemSELEntry*>(&reqData[0]);
1470         ptr = data->mfrId;
1471         std::vector<uint8_t> mfrIdData(ptr, ptr + 3);
1472         std::string mfrIdStr;
1473         toHexStr(mfrIdData, mfrIdStr);
1474 
1475         ptr = data->oemData;
1476         std::vector<uint8_t> oemData(ptr, ptr + 6);
1477         std::string oemDataStr;
1478         toHexStr(oemData, oemDataStr);
1479 
1480         errType = oemTSErr;
1481         parseOemSel(data, errLog);
1482 
1483         msgLog += errType + " (0x" + recTypeStream.str() + "), MFG ID: " +
1484                   mfrIdStr + ", OEM Data: (" + oemDataStr + ") " + errLog;
1485     }
1486     else if (recType == fbUniErrType)
1487     {
1488         NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1489         errType = fbUniSELErr;
1490         parseOemUnifiedSel(data, errLog);
1491         msgLog += errType + " (0x" + recTypeStream.str() + "), " + errLog;
1492     }
1493     else if ((recType >= oemNTSErrTypeMin) && (recType <= oemNTSErrTypeMax))
1494     {
1495         /* Non timestamped OEM SEL records */
1496         NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1497         errType = oemNTSErr;
1498 
1499         ptr = data->oemData;
1500         std::vector<uint8_t> oemData(ptr, ptr + 13);
1501         std::string oemDataStr;
1502         toHexStr(oemData, oemDataStr);
1503 
1504         parseOemSel((TsOemSELEntry*)data, errLog);
1505         msgLog += errType + " (0x" + recTypeStream.str() + "), OEM Data: (" +
1506                   oemDataStr + ") " + errLog;
1507     }
1508     else
1509     {
1510         errType = unknownErr;
1511         toHexStr(reqData, errLog);
1512         msgLog += errType + " (0x" + recTypeStream.str() +
1513                   ") RawData: " + errLog;
1514     }
1515 }
1516 
1517 } // namespace fb_oem::ipmi::sel
1518 
1519 namespace ipmi
1520 {
1521 
1522 namespace storage
1523 {
1524 
1525 static void registerSELFunctions() __attribute__((constructor));
1526 static fb_oem::ipmi::sel::SELData selObj __attribute__((init_priority(101)));
1527 
1528 ipmi::RspType<uint8_t,  // SEL version
1529               uint16_t, // SEL entry count
1530               uint16_t, // free space
1531               uint32_t, // last add timestamp
1532               uint32_t, // last erase timestamp
1533               uint8_t>  // operation support
ipmiStorageGetSELInfo()1534     ipmiStorageGetSELInfo()
1535 {
1536     fb_oem::ipmi::sel::GetSELInfoData info;
1537 
1538     selObj.getInfo(info);
1539     return ipmi::responseSuccess(info.selVersion, info.entries, info.freeSpace,
1540                                  info.addTimeStamp, info.eraseTimeStamp,
1541                                  info.operationSupport);
1542 }
1543 
ipmiStorageGetSELEntry(std::vector<uint8_t> data)1544 ipmi::RspType<uint16_t, std::vector<uint8_t>> ipmiStorageGetSELEntry(
1545     std::vector<uint8_t> data)
1546 {
1547     if (data.size() != sizeof(fb_oem::ipmi::sel::GetSELEntryRequest))
1548     {
1549         return ipmi::responseReqDataLenInvalid();
1550     }
1551 
1552     fb_oem::ipmi::sel::GetSELEntryRequest* reqData =
1553         reinterpret_cast<fb_oem::ipmi::sel::GetSELEntryRequest*>(&data[0]);
1554 
1555     if (reqData->reservID != 0)
1556     {
1557         if (!checkSELReservation(reqData->reservID))
1558         {
1559             return ipmi::responseInvalidReservationId();
1560         }
1561     }
1562 
1563     uint16_t selCnt = selObj.getCount();
1564     if (selCnt == 0)
1565     {
1566         return ipmi::responseSensorInvalid();
1567     }
1568 
1569     /* If it is asked for first entry */
1570     if (reqData->recordID == fb_oem::ipmi::sel::firstEntry)
1571     {
1572         /* First Entry (0x0000) as per Spec */
1573         reqData->recordID = 1;
1574     }
1575     else if (reqData->recordID == fb_oem::ipmi::sel::lastEntry)
1576     {
1577         /* Last entry (0xFFFF) as per Spec */
1578         reqData->recordID = selCnt;
1579     }
1580 
1581     std::string ipmiRaw;
1582 
1583     if (selObj.getEntry(reqData->recordID, ipmiRaw) < 0)
1584     {
1585         return ipmi::responseSensorInvalid();
1586     }
1587 
1588     std::vector<uint8_t> recDataBytes;
1589     if (fromHexStr(ipmiRaw, recDataBytes) < 0)
1590     {
1591         return ipmi::responseUnspecifiedError();
1592     }
1593 
1594     /* Identify the next SEL record ID. If recordID is same as
1595      * total SeL count then next id should be last entry else
1596      * it should be incremented by 1 to current RecordID
1597      */
1598     uint16_t nextRecord;
1599     if (reqData->recordID == selCnt)
1600     {
1601         nextRecord = fb_oem::ipmi::sel::lastEntry;
1602     }
1603     else
1604     {
1605         nextRecord = reqData->recordID + 1;
1606     }
1607 
1608     if (reqData->readLen == fb_oem::ipmi::sel::entireRecord)
1609     {
1610         return ipmi::responseSuccess(nextRecord, recDataBytes);
1611     }
1612     else
1613     {
1614         if (reqData->offset >= fb_oem::ipmi::sel::selRecordSize ||
1615             reqData->readLen > fb_oem::ipmi::sel::selRecordSize)
1616         {
1617             return ipmi::responseUnspecifiedError();
1618         }
1619         std::vector<uint8_t> recPartData;
1620 
1621         auto diff = fb_oem::ipmi::sel::selRecordSize - reqData->offset;
1622         auto readLength = std::min(diff, static_cast<int>(reqData->readLen));
1623 
1624         for (int i = 0; i < readLength; i++)
1625         {
1626             recPartData.push_back(recDataBytes[i + reqData->offset]);
1627         }
1628         return ipmi::responseSuccess(nextRecord, recPartData);
1629     }
1630 }
1631 
1632 // Main function to add SEL entry
ipmiStorageAddSELEntry(ipmi::Context::ptr ctx,std::vector<uint8_t> data)1633 ipmi::RspType<uint16_t> ipmiStorageAddSELEntry(ipmi::Context::ptr ctx,
1634                                                std::vector<uint8_t> data)
1635 {
1636     /* Per the IPMI spec, need to cancel any reservation when a
1637      * SEL entry is added
1638      */
1639     cancelSELReservation();
1640 
1641     if (data.size() != fb_oem::ipmi::sel::selRecordSize)
1642     {
1643         return ipmi::responseReqDataLenInvalid();
1644     }
1645 
1646     std::string ipmiRaw, logErr;
1647     toHexStr(data, ipmiRaw);
1648 
1649     /* Parse sel data and get an error log to be filed */
1650     fb_oem::ipmi::sel::parseSelData((ctx->hostIdx + 1), data, logErr);
1651 
1652     std::string source = "/xyz/openbmc_project/state/host0";
1653     // Launch the logging thread
1654     std::thread([=]() {
1655         namespace Errors = sdbusplus::error::com::meta::ipmi::UnifiedSEL;
1656         lg2::commit(Errors::UnifiedSELEvent("SOURCE", source, "EVENT", logErr,
1657                                             "RAW_EVENT", ipmiRaw));
1658     }).detach();
1659 
1660     int responseID = selObj.addEntry(ipmiRaw.c_str());
1661     if (responseID < 0)
1662     {
1663         return ipmi::responseUnspecifiedError();
1664     }
1665     return ipmi::responseSuccess(static_cast<uint16_t>(responseID));
1666 }
1667 
ipmiStorageClearSEL(uint16_t reservationID,const std::array<uint8_t,3> & clr,uint8_t eraseOperation)1668 ipmi::RspType<uint8_t> ipmiStorageClearSEL(uint16_t reservationID,
1669                                            const std::array<uint8_t, 3>& clr,
1670                                            uint8_t eraseOperation)
1671 {
1672     if (!checkSELReservation(reservationID))
1673     {
1674         return ipmi::responseInvalidReservationId();
1675     }
1676 
1677     static constexpr std::array<uint8_t, 3> clrExpected = {'C', 'L', 'R'};
1678     if (clr != clrExpected)
1679     {
1680         return ipmi::responseInvalidFieldRequest();
1681     }
1682 
1683     /* If there is no sel then return erase complete */
1684     if (selObj.getCount() == 0)
1685     {
1686         return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1687     }
1688 
1689     /* Erasure status cannot be fetched, so always return erasure
1690      * status as `erase completed`.
1691      */
1692     if (eraseOperation == fb_oem::ipmi::sel::getEraseStatus)
1693     {
1694         return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1695     }
1696 
1697     /* Check that initiate erase is correct */
1698     if (eraseOperation != fb_oem::ipmi::sel::initiateErase)
1699     {
1700         return ipmi::responseInvalidFieldRequest();
1701     }
1702 
1703     /* Per the IPMI spec, need to cancel any reservation when the
1704      * SEL is cleared
1705      */
1706     cancelSELReservation();
1707 
1708     /* Clear the complete Sel Json object */
1709     if (selObj.clear() < 0)
1710     {
1711         return ipmi::responseUnspecifiedError();
1712     }
1713 
1714     return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1715 }
1716 
ipmiStorageGetSELTime()1717 ipmi::RspType<uint32_t> ipmiStorageGetSELTime()
1718 {
1719     struct timespec selTime = {};
1720 
1721     if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
1722     {
1723         return ipmi::responseUnspecifiedError();
1724     }
1725 
1726     return ipmi::responseSuccess(selTime.tv_sec);
1727 }
1728 
ipmiStorageSetSELTime(uint32_t)1729 ipmi::RspType<> ipmiStorageSetSELTime(uint32_t)
1730 {
1731     // Set SEL Time is not supported
1732     return ipmi::responseInvalidCommand();
1733 }
1734 
ipmiStorageGetSELTimeUtcOffset()1735 ipmi::RspType<uint16_t> ipmiStorageGetSELTimeUtcOffset()
1736 {
1737     /* TODO: For now, the SEL time stamp is based on UTC time,
1738      * so return 0x0000 as offset. Might need to change once
1739      * supporting zones in SEL time stamps
1740      */
1741 
1742     uint16_t utcOffset = 0x0000;
1743     return ipmi::responseSuccess(utcOffset);
1744 }
1745 
registerSELFunctions()1746 void registerSELFunctions()
1747 {
1748     // <Get SEL Info>
1749     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1750                           ipmi::storage::cmdGetSelInfo, ipmi::Privilege::User,
1751                           ipmiStorageGetSELInfo);
1752 
1753     // <Get SEL Entry>
1754     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1755                           ipmi::storage::cmdGetSelEntry, ipmi::Privilege::User,
1756                           ipmiStorageGetSELEntry);
1757 
1758     // <Add SEL Entry>
1759     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1760                           ipmi::storage::cmdAddSelEntry,
1761                           ipmi::Privilege::Operator, ipmiStorageAddSELEntry);
1762 
1763     // <Clear SEL>
1764     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1765                           ipmi::storage::cmdClearSel, ipmi::Privilege::Operator,
1766                           ipmiStorageClearSEL);
1767 
1768     // <Get SEL Time>
1769     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1770                           ipmi::storage::cmdGetSelTime, ipmi::Privilege::User,
1771                           ipmiStorageGetSELTime);
1772 
1773     // <Set SEL Time>
1774     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1775                           ipmi::storage::cmdSetSelTime,
1776                           ipmi::Privilege::Operator, ipmiStorageSetSELTime);
1777 
1778     // <Get SEL Time UTC Offset>
1779     ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1780                           ipmi::storage::cmdGetSelTimeUtcOffset,
1781                           ipmi::Privilege::User,
1782                           ipmiStorageGetSELTimeUtcOffset);
1783 
1784     return;
1785 }
1786 
1787 } // namespace storage
1788 } // namespace ipmi
1789