1 /*
2 * Copyright (c) 2018 Intel Corporation.
3 * Copyright (c) 2018-present Facebook.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 #include <boost/algorithm/string/join.hpp>
19 #include <boost/container/flat_map.hpp>
20 #include <com/meta/IPMI/UnifiedSEL/event.hpp>
21 #include <ipmid/api.hpp>
22 #include <nlohmann/json.hpp>
23 #include <phosphor-logging/commit.hpp>
24 #include <phosphor-logging/log.hpp>
25 #include <sdbusplus/message/types.hpp>
26 #include <sdbusplus/timer.hpp>
27 #include <storagecommands.hpp>
28
29 #include <fstream>
30 #include <iostream>
31 #include <sstream>
32 #include <thread>
33
34 enum class MemErrType
35 {
36 memTrainErr = 0,
37 memPmicErr = 7
38 };
39
40 enum class PostEvtType
41 {
42 pxeBootFail = 0,
43 httpBootFail = 6,
44 getCertFail = 7,
45 amdAblFail = 10
46 };
47
48 enum class PcieEvtType
49 {
50 dpc = 0
51 };
52
53 enum class MemEvtType
54 {
55 ppr = 0,
56 adddc = 5,
57 noDimm = 7
58 };
59
60 //----------------------------------------------------------------------
61 // Platform specific functions for storing app data
62 //----------------------------------------------------------------------
63
byteToStr(uint8_t byte)64 static std::string byteToStr(uint8_t byte)
65 {
66 std::stringstream ss;
67
68 ss << std::hex << std::uppercase << std::setfill('0');
69 ss << std::setw(2) << (int)byte;
70
71 return ss.str();
72 }
73
toHexStr(std::vector<uint8_t> & bytes,std::string & hexStr)74 static void toHexStr(std::vector<uint8_t>& bytes, std::string& hexStr)
75 {
76 std::stringstream stream;
77 stream << std::hex << std::uppercase << std::setfill('0');
78 for (const uint8_t byte : bytes)
79 {
80 stream << std::setw(2) << static_cast<int>(byte);
81 }
82 hexStr = stream.str();
83 }
84
fromHexStr(const std::string hexStr,std::vector<uint8_t> & data)85 static int fromHexStr(const std::string hexStr, std::vector<uint8_t>& data)
86 {
87 for (unsigned int i = 0; i < hexStr.size(); i += 2)
88 {
89 try
90 {
91 data.push_back(static_cast<uint8_t>(
92 std::stoul(hexStr.substr(i, 2), nullptr, 16)));
93 }
94 catch (const std::invalid_argument& e)
95 {
96 phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
97 return -1;
98 }
99 catch (const std::out_of_range& e)
100 {
101 phosphor::logging::log<phosphor::logging::level::ERR>(e.what());
102 return -1;
103 }
104 }
105 return 0;
106 }
107
108 namespace fb_oem::ipmi::sel
109 {
110
111 class SELData
112 {
113 private:
114 nlohmann::json selDataObj;
115
flush()116 void flush()
117 {
118 std::ofstream file(SEL_JSON_DATA_FILE);
119 file << selDataObj;
120 file.close();
121 }
122
init()123 void init()
124 {
125 selDataObj[KEY_SEL_VER] = 0x51;
126 selDataObj[KEY_SEL_COUNT] = 0;
127 selDataObj[KEY_ADD_TIME] = 0xFFFFFFFF;
128 selDataObj[KEY_ERASE_TIME] = 0xFFFFFFFF;
129 selDataObj[KEY_OPER_SUPP] = 0x02;
130 /* Spec indicates that more than 64kB is free */
131 selDataObj[KEY_FREE_SPACE] = 0xFFFF;
132 }
133
writeEmptyJson()134 void writeEmptyJson()
135 {
136 selDataObj = nlohmann::json::object(); // Create an empty JSON object
137 std::ofstream outFile(SEL_JSON_DATA_FILE);
138 if (outFile)
139 {
140 // Write empty JSON object to the file
141 outFile << selDataObj.dump(4);
142 outFile.close();
143 }
144 else
145 {
146 lg2::info("Failed to create SEL JSON file with empty JSON.");
147 }
148 }
149
150 public:
SELData()151 SELData()
152 {
153 /* Get App data stored in json file */
154 std::ifstream file(SEL_JSON_DATA_FILE);
155 if (file)
156 {
157 try
158 {
159 file >> selDataObj;
160 }
161 catch (const nlohmann::json::parse_error& e)
162 {
163 lg2::error("Error parsing SEL JSON file: {ERROR}", "ERROR", e);
164 writeEmptyJson();
165 init(); // Initialize to default values
166 }
167 file.close();
168 }
169 else
170 {
171 lg2::info("Failed to open SEL JSON file.");
172 writeEmptyJson();
173 init();
174 }
175
176 /* Initialize SelData object if no entries. */
177 if (selDataObj.find(KEY_SEL_COUNT) == selDataObj.end())
178 {
179 init();
180 }
181 }
182
clear()183 int clear()
184 {
185 /* Clear the complete Sel Json object */
186 selDataObj.clear();
187 /* Reinitialize it with basic data */
188 init();
189 /* Save the erase time */
190 struct timespec selTime = {};
191 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
192 {
193 return -1;
194 }
195 selDataObj[KEY_ERASE_TIME] = selTime.tv_sec;
196 flush();
197 return 0;
198 }
199
getCount()200 uint32_t getCount()
201 {
202 return selDataObj[KEY_SEL_COUNT];
203 }
204
getInfo(GetSELInfoData & info)205 void getInfo(GetSELInfoData& info)
206 {
207 info.selVersion = selDataObj[KEY_SEL_VER];
208 info.entries = selDataObj[KEY_SEL_COUNT];
209 info.freeSpace = selDataObj[KEY_FREE_SPACE];
210 info.addTimeStamp = selDataObj[KEY_ADD_TIME];
211 info.eraseTimeStamp = selDataObj[KEY_ERASE_TIME];
212 info.operationSupport = selDataObj[KEY_OPER_SUPP];
213 }
214
getEntry(uint32_t index,std::string & rawStr)215 int getEntry(uint32_t index, std::string& rawStr)
216 {
217 std::stringstream ss;
218 ss << std::hex;
219 ss << std::setw(2) << std::setfill('0') << index;
220
221 /* Check or the requested SEL Entry, if record is available */
222 if (selDataObj.find(ss.str()) == selDataObj.end())
223 {
224 return -1;
225 }
226
227 rawStr = selDataObj[ss.str()][KEY_SEL_ENTRY_RAW];
228 return 0;
229 }
230
addEntry(std::string keyStr)231 int addEntry(std::string keyStr)
232 {
233 struct timespec selTime = {};
234
235 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
236 {
237 return -1;
238 }
239
240 selDataObj[KEY_ADD_TIME] = selTime.tv_sec;
241
242 int selCount = selDataObj[KEY_SEL_COUNT];
243 selDataObj[KEY_SEL_COUNT] = ++selCount;
244
245 std::stringstream ss;
246 ss << std::hex;
247 ss << std::setw(2) << std::setfill('0') << selCount;
248
249 selDataObj[ss.str()][KEY_SEL_ENTRY_RAW] = keyStr;
250 flush();
251 return selCount;
252 }
253 };
254
255 /*
256 * A Function to parse common SEL message, a helper function
257 * for parseStdSel.
258 *
259 * Note that this function __CANNOT__ be overridden.
260 * To add board specific routine, please override parseStdSel.
261 */
262
263 /*Used by decoding ME event*/
264 std::vector<std::string> nmDomName = {
265 "Entire Platform", "CPU Subsystem",
266 "Memory Subsystem", "HW Protection",
267 "High Power I/O subsystem", "Unknown"};
268
269 /* Default log message for unknown type */
logDefault(uint8_t *,std::string & errLog)270 static void logDefault(uint8_t*, std::string& errLog)
271 {
272 errLog = "Unknown";
273 }
274
logSysEvent(uint8_t * data,std::string & errLog)275 static void logSysEvent(uint8_t* data, std::string& errLog)
276 {
277 if (data[0] == 0xE5)
278 {
279 errLog = "Cause of Time change - ";
280 switch (data[2])
281 {
282 case 0x00:
283 errLog += "NTP";
284 break;
285 case 0x01:
286 errLog += "Host RTL";
287 break;
288 case 0x02:
289 errLog += "Set SEL time cmd";
290 break;
291 case 0x03:
292 errLog += "Set SEL time UTC offset cmd";
293 break;
294 default:
295 errLog += "Unknown";
296 }
297
298 if (data[1] == 0x00)
299 errLog += " - First Time";
300 else if (data[1] == 0x80)
301 errLog += " - Second Time";
302 }
303 else
304 {
305 errLog = "Unknown";
306 }
307 }
308
logThermalEvent(uint8_t * data,std::string & errLog)309 static void logThermalEvent(uint8_t* data, std::string& errLog)
310 {
311 if (data[0] == 0x1)
312 {
313 errLog = "Limit Exceeded";
314 }
315 else
316 {
317 errLog = "Unknown";
318 }
319 }
320
logCritIrq(uint8_t * data,std::string & errLog)321 static void logCritIrq(uint8_t* data, std::string& errLog)
322 {
323 if (data[0] == 0x0)
324 {
325 errLog = "NMI / Diagnostic Interrupt";
326 }
327 else if (data[0] == 0x03)
328 {
329 errLog = "Software NMI";
330 }
331 else
332 {
333 errLog = "Unknown";
334 }
335
336 /* TODO: Call add_cri_sel for CRITICAL_IRQ */
337 }
338
logPostErr(uint8_t * data,std::string & errLog)339 static void logPostErr(uint8_t* data, std::string& errLog)
340 {
341 if ((data[0] & 0x0F) == 0x0)
342 {
343 errLog = "System Firmware Error";
344 }
345 else
346 {
347 errLog = "Unknown";
348 }
349
350 if (((data[0] >> 6) & 0x03) == 0x3)
351 {
352 // TODO: Need to implement IPMI spec based Post Code
353 errLog += ", IPMI Post Code";
354 }
355 else if (((data[0] >> 6) & 0x03) == 0x2)
356 {
357 errLog += ", OEM Post Code 0x" + byteToStr(data[2]) +
358 byteToStr(data[1]);
359
360 switch ((data[2] << 8) | data[1])
361 {
362 case 0xA105:
363 errLog += ", BMC Failed (No Response)";
364 break;
365 case 0xA106:
366 errLog += ", BMC Failed (Self Test Fail)";
367 break;
368 case 0xA10A:
369 errLog += ", System Firmware Corruption Detected";
370 break;
371 case 0xA10B:
372 errLog += ", TPM Self-Test FAIL Detected";
373 }
374 }
375 }
376
logMchChkErr(uint8_t * data,std::string & errLog)377 static void logMchChkErr(uint8_t* data, std::string& errLog)
378 {
379 /* TODO: Call add_cri_sel for CRITICAL_IRQ */
380 switch (data[0] & 0x0F)
381 {
382 case 0x0B:
383 switch ((data[1] >> 5) & 0x03)
384 {
385 case 0x00:
386 errLog = "Uncorrected Recoverable Error";
387 break;
388 case 0x01:
389 errLog = "Uncorrected Thread Fatal Error";
390 break;
391 case 0x02:
392 errLog = "Uncorrected System Fatal Error";
393 break;
394 default:
395 errLog = "Unknown";
396 }
397 break;
398 case 0x0C:
399 switch ((data[1] >> 5) & 0x03)
400 {
401 case 0x00:
402 errLog = "Correctable Error";
403 break;
404 case 0x01:
405 errLog = "Deferred Error";
406 break;
407 default:
408 errLog = "Unknown";
409 }
410 break;
411 default:
412 errLog = "Unknown";
413 }
414
415 errLog += ", Machine Check bank Number " + std::to_string(data[1]) +
416 ", CPU " + std::to_string(data[2] >> 5) + ", Core " +
417 std::to_string(data[2] & 0x1F);
418 }
419
logPcieErr(uint8_t * data,std::string & errLog)420 static void logPcieErr(uint8_t* data, std::string& errLog)
421 {
422 std::stringstream tmp1, tmp2;
423 tmp1 << std::hex << std::uppercase << std::setfill('0');
424 tmp2 << std::hex << std::uppercase << std::setfill('0');
425 tmp1 << " (Bus " << std::setw(2) << (int)(data[2]) << " / Dev "
426 << std::setw(2) << (int)(data[1] >> 3) << " / Fun " << std::setw(2)
427 << (int)(data[1] & 0x7) << ")";
428
429 switch (data[0] & 0xF)
430 {
431 case 0x4:
432 errLog = "PCI PERR" + tmp1.str();
433 break;
434 case 0x5:
435 errLog = "PCI SERR" + tmp1.str();
436 break;
437 case 0x7:
438 errLog = "Correctable" + tmp1.str();
439 break;
440 case 0x8:
441 errLog = "Uncorrectable" + tmp1.str();
442 break;
443 case 0xA:
444 errLog = "Bus Fatal" + tmp1.str();
445 break;
446 case 0xD:
447 {
448 uint32_t venId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
449 tmp2 << "Vendor ID: 0x" << std::setw(4) << venId;
450 errLog = tmp2.str();
451 }
452 break;
453 case 0xE:
454 {
455 uint32_t devId = (uint32_t)data[1] << 8 | (uint32_t)data[2];
456 tmp2 << "Device ID: 0x" << std::setw(4) << devId;
457 errLog = tmp2.str();
458 }
459 break;
460 case 0xF:
461 tmp2 << "Error ID from downstream: 0x" << std::setw(2)
462 << (int)(data[1]) << std::setw(2) << (int)(data[2]);
463 errLog = tmp2.str();
464 break;
465 default:
466 errLog = "Unknown";
467 }
468 }
469
logIioErr(uint8_t * data,std::string & errLog)470 static void logIioErr(uint8_t* data, std::string& errLog)
471 {
472 std::vector<std::string> tmpStr = {
473 "IRP0", "IRP1", " IIO-Core", "VT-d", "Intel Quick Data",
474 "Misc", " DMA", "ITC", "OTC", "CI"};
475
476 if ((data[0] & 0xF) == 0)
477 {
478 errLog += "CPU " + std::to_string(data[2] >> 5) + ", Error ID 0x" +
479 byteToStr(data[1]) + " - ";
480
481 if ((data[2] & 0xF) <= 0x9)
482 {
483 errLog += tmpStr[(data[2] & 0xF)];
484 }
485 else
486 {
487 errLog += "Reserved";
488 }
489 }
490 else
491 {
492 errLog = "Unknown";
493 }
494 }
495
logMemErr(uint8_t * dataPtr,std::string & errLog)496 [[maybe_unused]] static void logMemErr(uint8_t* dataPtr, std::string& errLog)
497 {
498 uint8_t snrType = dataPtr[0];
499 uint8_t snrNum = dataPtr[1];
500 uint8_t* data = &(dataPtr[3]);
501
502 /* TODO: add pal_add_cri_sel */
503
504 if (snrNum == memoryEccError)
505 {
506 /* SEL from MEMORY_ECC_ERR Sensor */
507 switch (data[0] & 0x0F)
508 {
509 case 0x0:
510 if (snrType == 0x0C)
511 {
512 errLog = "Correctable";
513 }
514 else if (snrType == 0x10)
515 {
516 errLog = "Correctable ECC error Logging Disabled";
517 }
518 break;
519 case 0x1:
520 errLog = "Uncorrectable";
521 break;
522 case 0x5:
523 errLog = "Correctable ECC error Logging Limit Disabled";
524 break;
525 default:
526 errLog = "Unknown";
527 }
528 }
529 else if (snrNum == memoryErrLogDIS)
530 {
531 // SEL from MEMORY_ERR_LOG_DIS Sensor
532 if ((data[0] & 0x0F) == 0x0)
533 {
534 errLog = "Correctable Memory Error Logging Disabled";
535 }
536 else
537 {
538 errLog = "Unknown";
539 }
540 }
541 else
542 {
543 errLog = "Unknown";
544 return;
545 }
546
547 /* Common routine for both MEM_ECC_ERR and MEMORY_ERR_LOG_DIS */
548
549 errLog += " (DIMM " + byteToStr(data[2]) + ") Logical Rank " +
550 std::to_string(data[1] & 0x03);
551
552 /* DIMM number (data[2]):
553 * Bit[7:5]: Socket number (Range: 0-7)
554 * Bit[4:3]: Channel number (Range: 0-3)
555 * Bit[2:0]: DIMM number (Range: 0-7)
556 */
557
558 /* TODO: Verify these bits */
559 std::string cpuStr = "CPU# " + std::to_string((data[2] & 0xE0) >> 5);
560 std::string chStr = "CHN# " + std::to_string((data[2] & 0x18) >> 3);
561 std::string dimmStr = "DIMM#" + std::to_string(data[2] & 0x7);
562
563 switch ((data[1] & 0xC) >> 2)
564 {
565 case 0x0:
566 {
567 /* All Info Valid */
568 [[maybe_unused]] uint8_t chnNum = (data[2] & 0x1C) >> 2;
569 [[maybe_unused]] uint8_t dimmNum = data[2] & 0x3;
570
571 /* TODO: If critical SEL logging is available, do it */
572 if (snrType == 0x0C)
573 {
574 if ((data[0] & 0x0F) == 0x0)
575 {
576 /* TODO: add_cri_sel */
577 /* "DIMM"+ 'A'+ chnNum + dimmNum + " ECC err,FRU:1"
578 */
579 }
580 else if ((data[0] & 0x0F) == 0x1)
581 {
582 /* TODO: add_cri_sel */
583 /* "DIMM"+ 'A'+ chnNum + dimmNum + " UECC err,FRU:1"
584 */
585 }
586 }
587 /* Continue to parse the error into a string. All Info Valid
588 */
589 errLog += " (" + cpuStr + ", " + chStr + ", " + dimmStr + ")";
590 }
591
592 break;
593 case 0x1:
594
595 /* DIMM info not valid */
596 errLog += " (" + cpuStr + ", " + chStr + ")";
597 break;
598 case 0x2:
599
600 /* CHN info not valid */
601 errLog += " (" + cpuStr + ", " + dimmStr + ")";
602 break;
603 case 0x3:
604
605 /* CPU info not valid */
606 errLog += " (" + chStr + ", " + dimmStr + ")";
607 break;
608 }
609 }
610
logPwrErr(uint8_t * data,std::string & errLog)611 static void logPwrErr(uint8_t* data, std::string& errLog)
612 {
613 if (data[0] == 0x1)
614 {
615 errLog = "SYS_PWROK failure";
616 /* Also try logging to Critical log file, if available */
617 /* "SYS_PWROK failure,FRU:1" */
618 }
619 else if (data[0] == 0x2)
620 {
621 errLog = "PCH_PWROK failure";
622 /* Also try logging to Critical log file, if available */
623 /* "PCH_PWROK failure,FRU:1" */
624 }
625 else
626 {
627 errLog = "Unknown";
628 }
629 }
630
logCatErr(uint8_t * data,std::string & errLog)631 static void logCatErr(uint8_t* data, std::string& errLog)
632 {
633 if (data[0] == 0x0)
634 {
635 errLog = "IERR/CATERR";
636 /* Also try logging to Critical log file, if available */
637 /* "IERR,FRU:1 */
638 }
639 else if (data[0] == 0xB)
640 {
641 errLog = "MCERR/CATERR";
642 /* Also try logging to Critical log file, if available */
643 /* "MCERR,FRU:1 */
644 }
645 else
646 {
647 errLog = "Unknown";
648 }
649 }
650
logDimmHot(uint8_t * data,std::string & errLog)651 static void logDimmHot(uint8_t* data, std::string& errLog)
652 {
653 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x01FFFF)
654 {
655 errLog = "SOC MEMHOT";
656 }
657 else
658 {
659 errLog = "Unknown";
660 /* Also try logging to Critical log file, if available */
661 /* ""CPU_DIMM_HOT %s,FRU:1" */
662 }
663 }
664
logSwNMI(uint8_t * data,std::string & errLog)665 static void logSwNMI(uint8_t* data, std::string& errLog)
666 {
667 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x03FFFF)
668 {
669 errLog = "Software NMI";
670 }
671 else
672 {
673 errLog = "Unknown SW NMI";
674 }
675 }
676
logCPUThermalSts(uint8_t * data,std::string & errLog)677 static void logCPUThermalSts(uint8_t* data, std::string& errLog)
678 {
679 switch (data[0])
680 {
681 case 0x0:
682 errLog = "CPU Critical Temperature";
683 break;
684 case 0x1:
685 errLog = "PROCHOT#";
686 break;
687 case 0x2:
688 errLog = "TCC Activation";
689 break;
690 default:
691 errLog = "Unknown";
692 }
693 }
694
logMEPwrState(uint8_t * data,std::string & errLog)695 static void logMEPwrState(uint8_t* data, std::string& errLog)
696 {
697 switch (data[0])
698 {
699 case 0:
700 errLog = "RUNNING";
701 break;
702 case 2:
703 errLog = "POWER_OFF";
704 break;
705 default:
706 errLog = "Unknown[" + std::to_string(data[0]) + "]";
707 break;
708 }
709 }
710
logSPSFwHealth(uint8_t * data,std::string & errLog)711 static void logSPSFwHealth(uint8_t* data, std::string& errLog)
712 {
713 if ((data[0] & 0x0F) == 0x00)
714 {
715 const std::vector<std::string> tmpStr = {
716 "Recovery GPIO forced",
717 "Image execution failed",
718 "Flash erase error",
719 "Flash state information",
720 "Internal error",
721 "BMC did not respond",
722 "Direct Flash update",
723 "Manufacturing error",
724 "Automatic Restore to Factory Presets",
725 "Firmware Exception",
726 "Flash Wear-Out Protection Warning",
727 "Unknown",
728 "Unknown",
729 "DMI interface error",
730 "MCTP interface error",
731 "Auto-configuration finished",
732 "Unsupported Segment Defined Feature",
733 "Unknown",
734 "CPU Debug Capability Disabled",
735 "UMA operation error"};
736
737 if (data[1] < 0x14)
738 {
739 errLog = tmpStr[data[1]];
740 }
741 else
742 {
743 errLog = "Unknown";
744 }
745 }
746 else if ((data[0] & 0x0F) == 0x01)
747 {
748 errLog = "SMBus link failure";
749 }
750 else
751 {
752 errLog = "Unknown";
753 }
754 }
755
logNmExcA(uint8_t * data,std::string & errLog)756 static void logNmExcA(uint8_t* data, std::string& errLog)
757 {
758 /*NM4.0 #550710, Revision 1.95, and turn to p.155*/
759 if (data[0] == 0xA8)
760 {
761 errLog = "Policy Correction Time Exceeded";
762 }
763 else
764 {
765 errLog = "Unknown";
766 }
767 }
768
logPCHThermal(uint8_t * data,std::string & errLog)769 static void logPCHThermal(uint8_t* data, std::string& errLog)
770 {
771 const std::vector<std::string> thresEvtName = {
772 "Lower Non-critical",
773 "Unknown",
774 "Lower Critical",
775 "Unknown",
776 "Lower Non-recoverable",
777 "Unknown",
778 "Unknown",
779 "Upper Non-critical",
780 "Unknown",
781 "Upper Critical",
782 "Unknown",
783 "Upper Non-recoverable"};
784
785 if ((data[0] & 0x0f) < 12)
786 {
787 errLog = thresEvtName[(data[0] & 0x0f)];
788 }
789 else
790 {
791 errLog = "Unknown";
792 }
793
794 errLog += ", curr_val: " + std::to_string(data[1]) +
795 " C, thresh_val: " + std::to_string(data[2]) + " C";
796 }
797
logNmHealth(uint8_t * data,std::string & errLog)798 static void logNmHealth(uint8_t* data, std::string& errLog)
799 {
800 std::vector<std::string> nmErrType = {
801 "Unknown",
802 "Unknown",
803 "Unknown",
804 "Unknown",
805 "Unknown",
806 "Unknown",
807 "Unknown",
808 "Extended Telemetry Device Reading Failure",
809 "Outlet Temperature Reading Failure",
810 "Volumetric Airflow Reading Failure",
811 "Policy Misconfiguration",
812 "Power Sensor Reading Failure",
813 "Inlet Temperature Reading Failure",
814 "Host Communication Error",
815 "Real-time Clock Synchronization Failure",
816 "Platform Shutdown Initiated by Intel NM Policy",
817 "Unknown"};
818 uint8_t nmTypeIdx = (data[0] & 0xf);
819 uint8_t domIdx = (data[1] & 0xf);
820 uint8_t errIdx = ((data[1] >> 4) & 0xf);
821
822 if (nmTypeIdx == 2)
823 {
824 errLog = "SensorIntelNM";
825 }
826 else
827 {
828 errLog = "Unknown";
829 }
830
831 errLog += ", Domain:" + nmDomName[domIdx] + ", ErrType:" +
832 nmErrType[errIdx] + ", Err:0x" + byteToStr(data[2]);
833 }
834
logNmCap(uint8_t * data,std::string & errLog)835 static void logNmCap(uint8_t* data, std::string& errLog)
836 {
837 const std::vector<std::string> nmCapStsStr = {"Not Available", "Available"};
838 if (data[0] & 0x7) // BIT1=policy, BIT2=monitoring, BIT3=pwr
839 // limit and the others are reserved
840 {
841 errLog = "PolicyInterface:" + nmCapStsStr[BIT(data[0], 0)] +
842 ",Monitoring:" + nmCapStsStr[BIT(data[0], 1)] +
843 ",PowerLimit:" + nmCapStsStr[BIT(data[0], 2)];
844 }
845 else
846 {
847 errLog = "Unknown";
848 }
849 }
850
logNmThreshold(uint8_t * data,std::string & errLog)851 static void logNmThreshold(uint8_t* data, std::string& errLog)
852 {
853 uint8_t thresNum = (data[0] & 0x3);
854 uint8_t domIdx = (data[1] & 0xf);
855 uint8_t polId = data[2];
856 uint8_t polEvtIdx = BIT(data[0], 3);
857 const std::vector<std::string> polEvtStr = {
858 "Threshold Exceeded", "Policy Correction Time Exceeded"};
859
860 errLog = "Threshold Number:" + std::to_string(thresNum) + "-" +
861 polEvtStr[polEvtIdx] + ", Domain:" + nmDomName[domIdx] +
862 ", PolicyID:0x" + byteToStr(polId);
863 }
864
logPwrThreshold(uint8_t * data,std::string & errLog)865 static void logPwrThreshold(uint8_t* data, std::string& errLog)
866 {
867 if (data[0] == 0x00)
868 {
869 errLog = "Limit Not Exceeded";
870 }
871 else if (data[0] == 0x01)
872 {
873 errLog = "Limit Exceeded";
874 }
875 else
876 {
877 errLog = "Unknown";
878 }
879 }
880
logMSMI(uint8_t * data,std::string & errLog)881 static void logMSMI(uint8_t* data, std::string& errLog)
882 {
883 if (data[0] == 0x0)
884 {
885 errLog = "IERR/MSMI";
886 }
887 else if (data[0] == 0x0B)
888 {
889 errLog = "MCERR/MSMI";
890 }
891 else
892 {
893 errLog = "Unknown";
894 }
895 }
896
logHprWarn(uint8_t * data,std::string & errLog)897 static void logHprWarn(uint8_t* data, std::string& errLog)
898 {
899 if (data[2] == 0x01)
900 {
901 if (data[1] == 0xFF)
902 {
903 errLog = "Infinite Time";
904 }
905 else
906 {
907 errLog = std::to_string(data[1]) + " minutes";
908 }
909 }
910 else
911 {
912 errLog = "Unknown";
913 }
914 }
915
916 static const boost::container::flat_map<
917 uint8_t,
918 std::pair<std::string, std::function<void(uint8_t*, std::string&)>>>
919 sensorNameTable = {
920 {0xE9, {"SYSTEM_EVENT", logSysEvent}},
921 {0x7D, {"THERM_THRESH_EVT", logThermalEvent}},
922 {0xAA, {"BUTTON", logDefault}},
923 {0xAB, {"POWER_STATE", logDefault}},
924 {0xEA, {"CRITICAL_IRQ", logCritIrq}},
925 {0x2B, {"POST_ERROR", logPostErr}},
926 {0x40, {"MACHINE_CHK_ERR", logMchChkErr}},
927 {0x41, {"PCIE_ERR", logPcieErr}},
928 {0x43, {"IIO_ERR", logIioErr}},
929 {0X63, {"MEMORY_ECC_ERR", logDefault}},
930 {0X87, {"MEMORY_ERR_LOG_DIS", logDefault}},
931 {0X51, {"PROCHOT_EXT", logDefault}},
932 {0X56, {"PWR_ERR", logPwrErr}},
933 {0xE6, {"CATERR_A", logCatErr}},
934 {0xEB, {"CATERR_B", logCatErr}},
935 {0xB3, {"CPU_DIMM_HOT", logDimmHot}},
936 {0x90, {"SOFTWARE_NMI", logSwNMI}},
937 {0x1C, {"CPU0_THERM_STATUS", logCPUThermalSts}},
938 {0x1D, {"CPU1_THERM_STATUS", logCPUThermalSts}},
939 {0x16, {"ME_POWER_STATE", logMEPwrState}},
940 {0x17, {"SPS_FW_HEALTH", logSPSFwHealth}},
941 {0x18, {"NM_EXCEPTION_A", logNmExcA}},
942 {0x08, {"PCH_THERM_THRESHOLD", logPCHThermal}},
943 {0x19, {"NM_HEALTH", logNmHealth}},
944 {0x1A, {"NM_CAPABILITIES", logNmCap}},
945 {0x1B, {"NM_THRESHOLD", logNmThreshold}},
946 {0x3B, {"PWR_THRESH_EVT", logPwrThreshold}},
947 {0xE7, {"MSMI", logMSMI}},
948 {0xC5, {"HPR_WARNING", logHprWarn}}};
949
parseSelHelper(StdSELEntry * data,std::string & errStr)950 static void parseSelHelper(StdSELEntry* data, std::string& errStr)
951 {
952 /* Check if sensor type is OS_BOOT (0x1f) */
953 if (data->sensorType == 0x1F)
954 {
955 /* OS_BOOT used by OS */
956 switch (data->eventData1 & 0xF)
957 {
958 case 0x07:
959 errStr = "Base OS/Hypervisor Installation started";
960 break;
961 case 0x08:
962 errStr = "Base OS/Hypervisor Installation completed";
963 break;
964 case 0x09:
965 errStr = "Base OS/Hypervisor Installation aborted";
966 break;
967 case 0x0A:
968 errStr = "Base OS/Hypervisor Installation failed";
969 break;
970 default:
971 errStr = "Unknown";
972 }
973 return;
974 }
975
976 auto findSensorName = sensorNameTable.find(data->sensorNum);
977 if (findSensorName == sensorNameTable.end())
978 {
979 errStr = "Unknown";
980 return;
981 }
982 else
983 {
984 switch (data->sensorNum)
985 {
986 /* logMemErr function needs data from sensor type */
987 case memoryEccError:
988 case memoryErrLogDIS:
989 findSensorName->second.second(&(data->sensorType), errStr);
990 break;
991 /* Other sensor function needs only event data for parsing */
992 default:
993 findSensorName->second.second(&(data->eventData1), errStr);
994 }
995 }
996
997 if (((data->eventData3 & 0x80) >> 7) == 0)
998 {
999 errStr += " Assertion";
1000 }
1001 else
1002 {
1003 errStr += " Deassertion";
1004 }
1005 }
1006
parseDimmPhyloc(StdSELEntry * data,std::string & errStr)1007 static void parseDimmPhyloc(StdSELEntry* data, std::string& errStr)
1008 {
1009 // Log when " All info available"
1010 uint8_t chNum = (data->eventData3 & 0x18) >> 3;
1011 uint8_t dimmNum = data->eventData3 & 0x7;
1012 uint8_t rankNum = data->eventData2 & 0x03;
1013 uint8_t nodeNum = (data->eventData3 & 0xE0) >> 5;
1014
1015 if (chNum == 3 && dimmNum == 0)
1016 {
1017 errStr += " Node: " + std::to_string(nodeNum) + "," +
1018 " Card: " + std::to_string(chNum) + "," +
1019 " Module: " + std::to_string(dimmNum) + "," +
1020 " Rank Number: " + std::to_string(rankNum) + "," +
1021 " Location: DIMM A0";
1022 }
1023 else if (chNum == 2 && dimmNum == 0)
1024 {
1025 errStr += " Node: " + std::to_string(nodeNum) + "," +
1026 " Card: " + std::to_string(chNum) + "," +
1027 " Module: " + std::to_string(dimmNum) + "," +
1028 " Rank Number: " + std::to_string(rankNum) + "," +
1029 " Location: DIMM B0";
1030 }
1031 else if (chNum == 4 && dimmNum == 0)
1032 {
1033 errStr += " Node: " + std::to_string(nodeNum) + "," +
1034 " Card: " + std::to_string(chNum) + "," +
1035 " Module: " + std::to_string(dimmNum) + "," +
1036 " Rank Number: " + std::to_string(rankNum) + "," +
1037 " Location: DIMM C0 ";
1038 }
1039 else if (chNum == 5 && dimmNum == 0)
1040 {
1041 errStr += " Node: " + std::to_string(nodeNum) + "," +
1042 " Card: " + std::to_string(chNum) + "," +
1043 " Module: " + std::to_string(dimmNum) + "," +
1044 " Rank Number: " + std::to_string(rankNum) + "," +
1045 " Location: DIMM D0";
1046 }
1047 else
1048 {
1049 errStr += " Node: " + std::to_string(nodeNum) + "," +
1050 " Card: " + std::to_string(chNum) + "," +
1051 " Module: " + std::to_string(dimmNum) + "," +
1052 " Rank Number: " + std::to_string(rankNum) + "," +
1053 " Location: DIMM Unknown";
1054 }
1055 }
1056
parseStdSel(StdSELEntry * data,std::string & errStr)1057 static void parseStdSel(StdSELEntry* data, std::string& errStr)
1058 {
1059 std::stringstream tmpStream;
1060 tmpStream << std::hex << std::uppercase;
1061
1062 /* TODO: add pal_add_cri_sel */
1063 switch (data->sensorNum)
1064 {
1065 case memoryEccError:
1066 switch (data->eventData1 & 0x0F)
1067 {
1068 case 0x00:
1069 errStr = "Correctable";
1070 tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1071 << data->eventData3 << " ECC err";
1072 parseDimmPhyloc(data, errStr);
1073 break;
1074 case 0x01:
1075 errStr = "Uncorrectable";
1076 tmpStream << "DIMM" << std::setw(2) << std::setfill('0')
1077 << data->eventData3 << " UECC err";
1078 parseDimmPhyloc(data, errStr);
1079 break;
1080 case 0x02:
1081 errStr = "Parity";
1082 break;
1083 case 0x05:
1084 errStr = "Correctable ECC error Logging Limit Reached";
1085 break;
1086 default:
1087 errStr = "Unknown";
1088 }
1089 break;
1090 case memoryErrLogDIS:
1091 if ((data->eventData1 & 0x0F) == 0)
1092 {
1093 errStr = "Correctable Memory Error Logging Disabled";
1094 }
1095 else
1096 {
1097 errStr = "Unknown";
1098 }
1099 break;
1100 default:
1101 parseSelHelper(data, errStr);
1102 return;
1103 }
1104
1105 errStr += " (DIMM " + std::to_string(data->eventData3) + ")";
1106 errStr += " Logical Rank " + std::to_string(data->eventData2 & 0x03);
1107
1108 switch ((data->eventData2 & 0x0C) >> 2)
1109 {
1110 case 0x00:
1111 // Ignore when " All info available"
1112 break;
1113 case 0x01:
1114 errStr += " DIMM info not valid";
1115 break;
1116 case 0x02:
1117 errStr += " CHN info not valid";
1118 break;
1119 case 0x03:
1120 errStr += " CPU info not valid";
1121 break;
1122 default:
1123 errStr += " Unknown";
1124 }
1125
1126 if (((data->eventType & 0x80) >> 7) == 0)
1127 {
1128 errStr += " Assertion";
1129 }
1130 else
1131 {
1132 errStr += " Deassertion";
1133 }
1134
1135 return;
1136 }
1137
parseOemSel(TsOemSELEntry * data,std::string & errStr)1138 static void parseOemSel(TsOemSELEntry* data, std::string& errStr)
1139 {
1140 std::stringstream tmpStream;
1141 tmpStream << std::hex << std::uppercase << std::setfill('0');
1142
1143 switch (data->recordType)
1144 {
1145 case 0xC0:
1146 tmpStream << "VID:0x" << std::setw(2) << (int)data->oemData[1]
1147 << std::setw(2) << (int)data->oemData[0] << " DID:0x"
1148 << std::setw(2) << (int)data->oemData[3] << std::setw(2)
1149 << (int)data->oemData[2] << " Slot:0x" << std::setw(2)
1150 << (int)data->oemData[4] << " Error ID:0x" << std::setw(2)
1151 << (int)data->oemData[5];
1152 break;
1153 case 0xC2:
1154 tmpStream << "Extra info:0x" << std::setw(2)
1155 << (int)data->oemData[1] << " MSCOD:0x" << std::setw(2)
1156 << (int)data->oemData[3] << std::setw(2)
1157 << (int)data->oemData[2] << " MCACOD:0x" << std::setw(2)
1158 << (int)data->oemData[5] << std::setw(2)
1159 << (int)data->oemData[4];
1160 break;
1161 case 0xC3:
1162 int bank = (data->oemData[1] & 0xf0) >> 4;
1163 int col = ((data->oemData[1] & 0x0f) << 8) | data->oemData[2];
1164
1165 tmpStream << "Fail Device:0x" << std::setw(2)
1166 << (int)data->oemData[0] << " Bank:0x" << std::setw(2)
1167 << bank << " Column:0x" << std::setw(2) << col
1168 << " Failed Row:0x" << std::setw(2)
1169 << (int)data->oemData[3] << std::setw(2)
1170 << (int)data->oemData[4] << std::setw(2)
1171 << (int)data->oemData[5];
1172 }
1173
1174 errStr = tmpStream.str();
1175
1176 return;
1177 }
1178
dimmLocationStr(uint8_t socket,uint8_t channel,uint8_t slot)1179 static std::string dimmLocationStr(uint8_t socket, uint8_t channel,
1180 uint8_t slot)
1181 {
1182 uint8_t sled = (socket >> 4) & 0x3;
1183
1184 socket &= 0xf;
1185 if (channel == 0xFF && slot == 0xFF)
1186 {
1187 return std::format(
1188 "DIMM Slot Location: Sled {:02}/Socket {:02}, Channel unknown"
1189 ", Slot unknown, DIMM unknown",
1190 sled, socket);
1191 }
1192 else
1193 {
1194 channel &= 0xf;
1195 slot &= 0xf;
1196 const char label[] = {'A', 'C', 'B', 'D'};
1197 uint8_t idx = socket * 2 + slot;
1198 return std::format("DIMM Slot Location: Sled {:02}/Socket {:02}"
1199 ", Channel {:02}, Slot {:02} DIMM {}",
1200 sled, socket, channel, slot,
1201 (idx < sizeof(label))
1202 ? label[idx] + std::to_string(channel)
1203 : "NA");
1204 }
1205 }
1206
parseOemUnifiedSel(NtsOemSELEntry * data,std::string & errStr)1207 static void parseOemUnifiedSel(NtsOemSELEntry* data, std::string& errStr)
1208 {
1209 uint8_t* ptr = data->oemData;
1210 uint8_t eventType = ptr[5] & 0xf;
1211 int genInfo = ptr[0];
1212 int errType = genInfo & 0x0f;
1213 std::vector<std::string> dimmErr = {
1214 "Memory training failure",
1215 "Memory correctable error",
1216 "Memory uncorrectable error",
1217 "Memory correctable error (Patrol scrub)",
1218 "Memory uncorrectable error (Patrol scrub)",
1219 "Memory Parity Error (PCC=0)",
1220 "Memory Parity Error (PCC=1)",
1221 "Memory PMIC Error",
1222 "CXL Memory training error",
1223 "Reserved"};
1224 std::vector<std::string> postEvent = {
1225 "System PXE boot fail",
1226 "CMOS/NVRAM configuration cleared",
1227 "TPM Self-Test Fail",
1228 "Boot Drive failure",
1229 "Data Drive failure",
1230 "Received invalid boot order request from BMC",
1231 "System HTTP boot fail",
1232 "BIOS fails to get the certificate from BMC",
1233 "Password cleared by jumper",
1234 "DXE FV check failure",
1235 "AMD ABL failure",
1236 "Reserved"};
1237 std::vector<std::string> certErr = {
1238 "No certificate at BMC", "IPMI transaction fail",
1239 "Certificate data corrupted", "Reserved"};
1240 std::vector<std::string> pcieEvent = {
1241 "PCIe DPC Event",
1242 "PCIe LER Event",
1243 "PCIe Link Retraining and Recovery",
1244 "PCIe Link CRC Error Check and Retry",
1245 "PCIe Corrupt Data Containment",
1246 "PCIe Express ECRC",
1247 "Reserved"};
1248 std::vector<std::string> memEvent = {
1249 "Memory PPR event",
1250 "Memory Correctable Error logging limit reached",
1251 "Memory disable/map-out for FRB",
1252 "Memory SDDC",
1253 "Memory Address range/Partial mirroring",
1254 "Memory ADDDC",
1255 "Memory SMBus hang recovery",
1256 "No DIMM in System",
1257 "Reserved"};
1258 std::vector<std::string> memPprTime = {"Boot time", "Autonomous",
1259 "Run time", "Reserved"};
1260 std::vector<std::string> memPpr = {"PPR success", "PPR fail", "PPR request",
1261 "Reserved"};
1262 std::vector<std::string> memAdddc = {
1263 "Bank VLS", "r-Bank VLS + re-buddy", "r-Bank VLS + Rank VLS",
1264 "r-Rank VLS + re-buddy", "Reserved"};
1265 std::vector<std::string> pprEvent = {"PPR disable", "Soft PPR", "Hard PPR",
1266 "Reserved"};
1267
1268 std::stringstream tmpStream;
1269
1270 switch (errType)
1271 {
1272 case unifiedPcieErr:
1273 tmpStream << std::format(
1274 "GeneralInfo: x86/PCIeErr(0x{:02X})"
1275 ", Bus {:02X}/Dev {:02X}/Fun {:02X}, TotalErrID1Cnt: 0x{:04X}"
1276 ", ErrID2: 0x{:02X}, ErrID1: 0x{:02X}",
1277 genInfo, ptr[8], ptr[7] >> 3, ptr[7] & 0x7,
1278 (ptr[10] << 8) | ptr[9], ptr[11], ptr[12]);
1279 break;
1280 case unifiedMemErr:
1281 eventType = ptr[9] & 0xf;
1282 tmpStream << std::format(
1283 "GeneralInfo: MemErr(0x{:02X}), {}, DIMM Failure Event: {}",
1284 genInfo, dimmLocationStr(ptr[5], ptr[6], ptr[7]),
1285 dimmErr[std::min(eventType,
1286 static_cast<uint8_t>(dimmErr.size() - 1))]);
1287
1288 if (static_cast<MemErrType>(eventType) == MemErrType::memTrainErr ||
1289 static_cast<MemErrType>(eventType) == MemErrType::memPmicErr)
1290 {
1291 bool amd = ptr[9] & 0x80;
1292 tmpStream << std::format(
1293 ", Major Code: 0x{:02X}, Minor Code: 0x{:0{}X}", ptr[10],
1294 amd ? (ptr[12] << 8 | ptr[11]) : ptr[11], amd ? 4 : 2);
1295 }
1296 break;
1297 case unifiedIioErr:
1298 tmpStream << std::format(
1299 "GeneralInfo: IIOErr(0x{:02X})"
1300 ", IIO Port Location: Sled {:02}/Socket {:02}, Stack 0x{:02X}"
1301 ", Error Type: 0x{:02X}, Error Severity: 0x{:02X}"
1302 ", Error ID: 0x{:02X}",
1303 genInfo, (ptr[5] >> 4) & 0x3, ptr[5] & 0xf, ptr[6], ptr[10],
1304 ptr[11] & 0xf, ptr[12]);
1305 break;
1306 case unifiedPostEvt:
1307 tmpStream << std::format(
1308 "GeneralInfo: POST(0x{:02X}), POST Failure Event: {}", genInfo,
1309 postEvent[std::min(
1310 eventType, static_cast<uint8_t>(postEvent.size() - 1))]);
1311
1312 switch (static_cast<PostEvtType>(eventType))
1313 {
1314 case PostEvtType::pxeBootFail:
1315 case PostEvtType::httpBootFail:
1316 {
1317 uint8_t failType = ptr[10] & 0xf;
1318 tmpStream
1319 << std::format(", Fail Type: {}, Error Code: 0x{:02X}",
1320 (failType == 4 || failType == 6)
1321 ? std::format("IPv{} fail", failType)
1322 : std::format("0x{:02X}", ptr[10]),
1323 ptr[11]);
1324 break;
1325 }
1326 case PostEvtType::getCertFail:
1327 tmpStream << std::format(
1328 ", Failure Detail: {}",
1329 certErr[std::min(
1330 ptr[9], static_cast<uint8_t>(certErr.size() - 1))]);
1331 break;
1332 case PostEvtType::amdAblFail:
1333 tmpStream << std::format(", ABL Error Code: 0x{:04X}",
1334 (ptr[12] << 8) | ptr[11]);
1335 break;
1336 }
1337 break;
1338 case unifiedPcieEvt:
1339 tmpStream << std::format(
1340 "GeneralInfo: PCIeEvent(0x{:02X}), PCIe Failure Event: {}",
1341 genInfo,
1342 pcieEvent[std::min(
1343 eventType, static_cast<uint8_t>(pcieEvent.size() - 1))]);
1344
1345 if (static_cast<PcieEvtType>(eventType) == PcieEvtType::dpc)
1346 {
1347 tmpStream << std::format(
1348 ", Status: 0x{:04X}, Source ID: 0x{:04X}",
1349 (ptr[8] << 8) | ptr[7], (ptr[10] << 8) | ptr[9]);
1350 }
1351 break;
1352 case unifiedMemEvt:
1353 eventType = ptr[9] & 0xf;
1354 tmpStream
1355 << std::format("GeneralInfo: MemEvent(0x{:02X})", genInfo)
1356 << (static_cast<MemEvtType>(eventType) != MemEvtType::noDimm
1357 ? std::format(", {}",
1358 dimmLocationStr(ptr[5], ptr[6], ptr[7]))
1359 : "")
1360 << ", DIMM Failure Event: ";
1361
1362 switch (static_cast<MemEvtType>(eventType))
1363 {
1364 case MemEvtType::ppr:
1365 tmpStream << std::format("{} {}",
1366 memPprTime[(ptr[10] >> 2) & 0x3],
1367 memPpr[ptr[10] & 0x3]);
1368 break;
1369 case MemEvtType::adddc:
1370 tmpStream << std::format(
1371 "{} {}",
1372 memEvent[std::min(eventType, static_cast<uint8_t>(
1373 memEvent.size() - 1))],
1374 memAdddc[std::min(
1375 static_cast<uint8_t>(ptr[11] & 0xf),
1376 static_cast<uint8_t>(memAdddc.size() - 1))]);
1377 break;
1378 default:
1379 tmpStream << std::format(
1380 "{}", memEvent[std::min(
1381 eventType,
1382 static_cast<uint8_t>(memEvent.size() - 1))]);
1383 break;
1384 }
1385 break;
1386 case unifiedBootGuard:
1387 tmpStream << std::format(
1388 "GeneralInfo: Boot Guard ACM Failure Events(0x{:02X})"
1389 ", Error Class: 0x{:02X}, Error Code: 0x{:02X}",
1390 genInfo, ptr[9], ptr[10]);
1391 break;
1392 case unifiedPprEvt:
1393 tmpStream << std::format(
1394 "GeneralInfo: PPREvent(0x{:02X}), {}"
1395 ", DIMM Info: {:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
1396 genInfo,
1397 pprEvent[std::min(eventType,
1398 static_cast<uint8_t>(pprEvent.size() - 1))],
1399 ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12]);
1400 break;
1401 default:
1402 std::vector<uint8_t> oemData(ptr, ptr + 13);
1403 std::string oemDataStr;
1404 toHexStr(oemData, oemDataStr);
1405 tmpStream << std::format("Undefined Error Type(0x{:02X}), Raw: {}",
1406 errType, oemDataStr);
1407 }
1408
1409 errStr = tmpStream.str();
1410
1411 return;
1412 }
1413
parseSelData(uint8_t fruId,std::vector<uint8_t> & reqData,std::string & msgLog)1414 static void parseSelData(uint8_t fruId, std::vector<uint8_t>& reqData,
1415 std::string& msgLog)
1416 {
1417 /* Get record type */
1418 int recType = reqData[2];
1419 std::string errType, errLog;
1420
1421 uint8_t* ptr = NULL;
1422
1423 std::stringstream recTypeStream;
1424 recTypeStream << std::hex << std::uppercase << std::setfill('0')
1425 << std::setw(2) << recType;
1426
1427 msgLog = "SEL Entry: FRU: " + std::to_string(fruId) + ", Record: ";
1428
1429 if (recType == stdErrType)
1430 {
1431 StdSELEntry* data = reinterpret_cast<StdSELEntry*>(&reqData[0]);
1432 std::string sensorName;
1433
1434 errType = stdErr;
1435 if (data->sensorType == 0x1F)
1436 {
1437 sensorName = "OS";
1438 }
1439 else
1440 {
1441 auto findSensorName = sensorNameTable.find(data->sensorNum);
1442 if (findSensorName == sensorNameTable.end())
1443 {
1444 sensorName = "Unknown";
1445 }
1446 else
1447 {
1448 sensorName = findSensorName->second.first;
1449 }
1450 }
1451
1452 parseStdSel(data, errLog);
1453 ptr = &(data->eventData1);
1454 std::vector<uint8_t> evtData(ptr, ptr + 3);
1455 std::string eventData;
1456 toHexStr(evtData, eventData);
1457
1458 std::stringstream senNumStream;
1459 senNumStream << std::hex << std::uppercase << std::setfill('0')
1460 << std::setw(2) << (int)(data->sensorNum);
1461
1462 msgLog += errType + " (0x" + recTypeStream.str() +
1463 "), Sensor: " + sensorName + " (0x" + senNumStream.str() +
1464 "), Event Data: (" + eventData + ") " + errLog;
1465 }
1466 else if ((recType >= oemTSErrTypeMin) && (recType <= oemTSErrTypeMax))
1467 {
1468 /* timestamped OEM SEL records */
1469 TsOemSELEntry* data = reinterpret_cast<TsOemSELEntry*>(&reqData[0]);
1470 ptr = data->mfrId;
1471 std::vector<uint8_t> mfrIdData(ptr, ptr + 3);
1472 std::string mfrIdStr;
1473 toHexStr(mfrIdData, mfrIdStr);
1474
1475 ptr = data->oemData;
1476 std::vector<uint8_t> oemData(ptr, ptr + 6);
1477 std::string oemDataStr;
1478 toHexStr(oemData, oemDataStr);
1479
1480 errType = oemTSErr;
1481 parseOemSel(data, errLog);
1482
1483 msgLog += errType + " (0x" + recTypeStream.str() + "), MFG ID: " +
1484 mfrIdStr + ", OEM Data: (" + oemDataStr + ") " + errLog;
1485 }
1486 else if (recType == fbUniErrType)
1487 {
1488 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1489 errType = fbUniSELErr;
1490 parseOemUnifiedSel(data, errLog);
1491 msgLog += errType + " (0x" + recTypeStream.str() + "), " + errLog;
1492 }
1493 else if ((recType >= oemNTSErrTypeMin) && (recType <= oemNTSErrTypeMax))
1494 {
1495 /* Non timestamped OEM SEL records */
1496 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]);
1497 errType = oemNTSErr;
1498
1499 ptr = data->oemData;
1500 std::vector<uint8_t> oemData(ptr, ptr + 13);
1501 std::string oemDataStr;
1502 toHexStr(oemData, oemDataStr);
1503
1504 parseOemSel((TsOemSELEntry*)data, errLog);
1505 msgLog += errType + " (0x" + recTypeStream.str() + "), OEM Data: (" +
1506 oemDataStr + ") " + errLog;
1507 }
1508 else
1509 {
1510 errType = unknownErr;
1511 toHexStr(reqData, errLog);
1512 msgLog += errType + " (0x" + recTypeStream.str() +
1513 ") RawData: " + errLog;
1514 }
1515 }
1516
1517 } // namespace fb_oem::ipmi::sel
1518
1519 namespace ipmi
1520 {
1521
1522 namespace storage
1523 {
1524
1525 static void registerSELFunctions() __attribute__((constructor));
1526 static fb_oem::ipmi::sel::SELData selObj __attribute__((init_priority(101)));
1527
1528 ipmi::RspType<uint8_t, // SEL version
1529 uint16_t, // SEL entry count
1530 uint16_t, // free space
1531 uint32_t, // last add timestamp
1532 uint32_t, // last erase timestamp
1533 uint8_t> // operation support
ipmiStorageGetSELInfo()1534 ipmiStorageGetSELInfo()
1535 {
1536 fb_oem::ipmi::sel::GetSELInfoData info;
1537
1538 selObj.getInfo(info);
1539 return ipmi::responseSuccess(info.selVersion, info.entries, info.freeSpace,
1540 info.addTimeStamp, info.eraseTimeStamp,
1541 info.operationSupport);
1542 }
1543
ipmiStorageGetSELEntry(std::vector<uint8_t> data)1544 ipmi::RspType<uint16_t, std::vector<uint8_t>> ipmiStorageGetSELEntry(
1545 std::vector<uint8_t> data)
1546 {
1547 if (data.size() != sizeof(fb_oem::ipmi::sel::GetSELEntryRequest))
1548 {
1549 return ipmi::responseReqDataLenInvalid();
1550 }
1551
1552 fb_oem::ipmi::sel::GetSELEntryRequest* reqData =
1553 reinterpret_cast<fb_oem::ipmi::sel::GetSELEntryRequest*>(&data[0]);
1554
1555 if (reqData->reservID != 0)
1556 {
1557 if (!checkSELReservation(reqData->reservID))
1558 {
1559 return ipmi::responseInvalidReservationId();
1560 }
1561 }
1562
1563 uint16_t selCnt = selObj.getCount();
1564 if (selCnt == 0)
1565 {
1566 return ipmi::responseSensorInvalid();
1567 }
1568
1569 /* If it is asked for first entry */
1570 if (reqData->recordID == fb_oem::ipmi::sel::firstEntry)
1571 {
1572 /* First Entry (0x0000) as per Spec */
1573 reqData->recordID = 1;
1574 }
1575 else if (reqData->recordID == fb_oem::ipmi::sel::lastEntry)
1576 {
1577 /* Last entry (0xFFFF) as per Spec */
1578 reqData->recordID = selCnt;
1579 }
1580
1581 std::string ipmiRaw;
1582
1583 if (selObj.getEntry(reqData->recordID, ipmiRaw) < 0)
1584 {
1585 return ipmi::responseSensorInvalid();
1586 }
1587
1588 std::vector<uint8_t> recDataBytes;
1589 if (fromHexStr(ipmiRaw, recDataBytes) < 0)
1590 {
1591 return ipmi::responseUnspecifiedError();
1592 }
1593
1594 /* Identify the next SEL record ID. If recordID is same as
1595 * total SeL count then next id should be last entry else
1596 * it should be incremented by 1 to current RecordID
1597 */
1598 uint16_t nextRecord;
1599 if (reqData->recordID == selCnt)
1600 {
1601 nextRecord = fb_oem::ipmi::sel::lastEntry;
1602 }
1603 else
1604 {
1605 nextRecord = reqData->recordID + 1;
1606 }
1607
1608 if (reqData->readLen == fb_oem::ipmi::sel::entireRecord)
1609 {
1610 return ipmi::responseSuccess(nextRecord, recDataBytes);
1611 }
1612 else
1613 {
1614 if (reqData->offset >= fb_oem::ipmi::sel::selRecordSize ||
1615 reqData->readLen > fb_oem::ipmi::sel::selRecordSize)
1616 {
1617 return ipmi::responseUnspecifiedError();
1618 }
1619 std::vector<uint8_t> recPartData;
1620
1621 auto diff = fb_oem::ipmi::sel::selRecordSize - reqData->offset;
1622 auto readLength = std::min(diff, static_cast<int>(reqData->readLen));
1623
1624 for (int i = 0; i < readLength; i++)
1625 {
1626 recPartData.push_back(recDataBytes[i + reqData->offset]);
1627 }
1628 return ipmi::responseSuccess(nextRecord, recPartData);
1629 }
1630 }
1631
1632 // Main function to add SEL entry
ipmiStorageAddSELEntry(ipmi::Context::ptr ctx,std::vector<uint8_t> data)1633 ipmi::RspType<uint16_t> ipmiStorageAddSELEntry(ipmi::Context::ptr ctx,
1634 std::vector<uint8_t> data)
1635 {
1636 /* Per the IPMI spec, need to cancel any reservation when a
1637 * SEL entry is added
1638 */
1639 cancelSELReservation();
1640
1641 if (data.size() != fb_oem::ipmi::sel::selRecordSize)
1642 {
1643 return ipmi::responseReqDataLenInvalid();
1644 }
1645
1646 std::string ipmiRaw, logErr;
1647 toHexStr(data, ipmiRaw);
1648
1649 /* Parse sel data and get an error log to be filed */
1650 fb_oem::ipmi::sel::parseSelData((ctx->hostIdx + 1), data, logErr);
1651
1652 std::string source = "/xyz/openbmc_project/state/host0";
1653 // Launch the logging thread
1654 std::thread([=]() {
1655 namespace Errors = sdbusplus::error::com::meta::ipmi::UnifiedSEL;
1656 lg2::commit(Errors::UnifiedSELEvent("SOURCE", source, "EVENT", logErr,
1657 "RAW_EVENT", ipmiRaw));
1658 }).detach();
1659
1660 int responseID = selObj.addEntry(ipmiRaw.c_str());
1661 if (responseID < 0)
1662 {
1663 return ipmi::responseUnspecifiedError();
1664 }
1665 return ipmi::responseSuccess(static_cast<uint16_t>(responseID));
1666 }
1667
ipmiStorageClearSEL(uint16_t reservationID,const std::array<uint8_t,3> & clr,uint8_t eraseOperation)1668 ipmi::RspType<uint8_t> ipmiStorageClearSEL(uint16_t reservationID,
1669 const std::array<uint8_t, 3>& clr,
1670 uint8_t eraseOperation)
1671 {
1672 if (!checkSELReservation(reservationID))
1673 {
1674 return ipmi::responseInvalidReservationId();
1675 }
1676
1677 static constexpr std::array<uint8_t, 3> clrExpected = {'C', 'L', 'R'};
1678 if (clr != clrExpected)
1679 {
1680 return ipmi::responseInvalidFieldRequest();
1681 }
1682
1683 /* If there is no sel then return erase complete */
1684 if (selObj.getCount() == 0)
1685 {
1686 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1687 }
1688
1689 /* Erasure status cannot be fetched, so always return erasure
1690 * status as `erase completed`.
1691 */
1692 if (eraseOperation == fb_oem::ipmi::sel::getEraseStatus)
1693 {
1694 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1695 }
1696
1697 /* Check that initiate erase is correct */
1698 if (eraseOperation != fb_oem::ipmi::sel::initiateErase)
1699 {
1700 return ipmi::responseInvalidFieldRequest();
1701 }
1702
1703 /* Per the IPMI spec, need to cancel any reservation when the
1704 * SEL is cleared
1705 */
1706 cancelSELReservation();
1707
1708 /* Clear the complete Sel Json object */
1709 if (selObj.clear() < 0)
1710 {
1711 return ipmi::responseUnspecifiedError();
1712 }
1713
1714 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete);
1715 }
1716
ipmiStorageGetSELTime()1717 ipmi::RspType<uint32_t> ipmiStorageGetSELTime()
1718 {
1719 struct timespec selTime = {};
1720
1721 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0)
1722 {
1723 return ipmi::responseUnspecifiedError();
1724 }
1725
1726 return ipmi::responseSuccess(selTime.tv_sec);
1727 }
1728
ipmiStorageSetSELTime(uint32_t)1729 ipmi::RspType<> ipmiStorageSetSELTime(uint32_t)
1730 {
1731 // Set SEL Time is not supported
1732 return ipmi::responseInvalidCommand();
1733 }
1734
ipmiStorageGetSELTimeUtcOffset()1735 ipmi::RspType<uint16_t> ipmiStorageGetSELTimeUtcOffset()
1736 {
1737 /* TODO: For now, the SEL time stamp is based on UTC time,
1738 * so return 0x0000 as offset. Might need to change once
1739 * supporting zones in SEL time stamps
1740 */
1741
1742 uint16_t utcOffset = 0x0000;
1743 return ipmi::responseSuccess(utcOffset);
1744 }
1745
registerSELFunctions()1746 void registerSELFunctions()
1747 {
1748 // <Get SEL Info>
1749 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1750 ipmi::storage::cmdGetSelInfo, ipmi::Privilege::User,
1751 ipmiStorageGetSELInfo);
1752
1753 // <Get SEL Entry>
1754 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1755 ipmi::storage::cmdGetSelEntry, ipmi::Privilege::User,
1756 ipmiStorageGetSELEntry);
1757
1758 // <Add SEL Entry>
1759 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1760 ipmi::storage::cmdAddSelEntry,
1761 ipmi::Privilege::Operator, ipmiStorageAddSELEntry);
1762
1763 // <Clear SEL>
1764 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1765 ipmi::storage::cmdClearSel, ipmi::Privilege::Operator,
1766 ipmiStorageClearSEL);
1767
1768 // <Get SEL Time>
1769 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1770 ipmi::storage::cmdGetSelTime, ipmi::Privilege::User,
1771 ipmiStorageGetSELTime);
1772
1773 // <Set SEL Time>
1774 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1775 ipmi::storage::cmdSetSelTime,
1776 ipmi::Privilege::Operator, ipmiStorageSetSELTime);
1777
1778 // <Get SEL Time UTC Offset>
1779 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage,
1780 ipmi::storage::cmdGetSelTimeUtcOffset,
1781 ipmi::Privilege::User,
1782 ipmiStorageGetSELTimeUtcOffset);
1783
1784 return;
1785 }
1786
1787 } // namespace storage
1788 } // namespace ipmi
1789