1 #include <fstream> 2 #include <future> 3 #include <iostream> 4 #include <chrono> 5 #include <cstdio> 6 #include <poll.h> 7 #include <set> 8 #include <string> 9 #include <vector> 10 #include <sdbusplus/vtable.hpp> 11 #include <sys/inotify.h> 12 #include <systemd/sd-bus.h> 13 #include <systemd/sd-journal.h> 14 #include <unistd.h> 15 #include "config.h" 16 #include "elog_entry.hpp" 17 #include <phosphor-logging/log.hpp> 18 #include "log_manager.hpp" 19 #include "elog_meta.hpp" 20 #include "elog_serialize.hpp" 21 22 using namespace phosphor::logging; 23 using namespace std::chrono; 24 extern const std::map<metadata::Metadata, 25 std::function<metadata::associations::Type>> meta; 26 27 namespace phosphor 28 { 29 namespace logging 30 { 31 namespace internal 32 { 33 inline auto getLevel(const std::string& errMsg) 34 { 35 auto reqLevel = Entry::Level::Error; // Default to Error 36 37 auto levelmap = g_errLevelMap.find(errMsg); 38 if (levelmap != g_errLevelMap.end()) 39 { 40 reqLevel = static_cast<Entry::Level>(levelmap->second); 41 } 42 43 return reqLevel; 44 } 45 46 void Manager::commit(uint64_t transactionId, std::string errMsg) 47 { 48 auto level = getLevel(errMsg); 49 _commit(transactionId, std::move(errMsg), level); 50 } 51 52 void Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 53 uint32_t errLvl) 54 { 55 _commit(transactionId, std::move(errMsg), 56 static_cast<Entry::Level>(errLvl)); 57 } 58 59 void Manager::_commit(uint64_t transactionId, std::string&& errMsg, 60 Entry::Level errLvl) 61 { 62 if (errLvl < Entry::sevLowerLimit) 63 { 64 if (realErrors.size() >= ERROR_CAP) 65 { 66 erase(realErrors.front()); 67 } 68 } 69 else 70 { 71 if (infoErrors.size() >= ERROR_INFO_CAP) 72 { 73 erase(infoErrors.front()); 74 } 75 } 76 constexpr const auto transactionIdVar = "TRANSACTION_ID"; 77 // Length of 'TRANSACTION_ID' string. 78 constexpr const auto transactionIdVarSize = strlen(transactionIdVar); 79 // Length of 'TRANSACTION_ID=' string. 80 constexpr const auto transactionIdVarOffset = transactionIdVarSize + 1; 81 82 // Flush all the pending log messages into the journal 83 journalSync(); 84 85 sd_journal *j = nullptr; 86 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 87 if (rc < 0) 88 { 89 logging::log<logging::level::ERR>("Failed to open journal", 90 logging::entry("DESCRIPTION=%s", strerror(-rc))); 91 return; 92 } 93 94 std::string transactionIdStr = std::to_string(transactionId); 95 std::set<std::string> metalist; 96 auto metamap = g_errMetaMap.find(errMsg); 97 if (metamap != g_errMetaMap.end()) 98 { 99 metalist.insert(metamap->second.begin(), metamap->second.end()); 100 } 101 102 //Add _PID field information in AdditionalData. 103 metalist.insert("_PID"); 104 105 std::vector<std::string> additionalData; 106 107 // Read the journal from the end to get the most recent entry first. 108 // The result from the sd_journal_get_data() is of the form VARIABLE=value. 109 SD_JOURNAL_FOREACH_BACKWARDS(j) 110 { 111 const char *data = nullptr; 112 size_t length = 0; 113 114 // Look for the transaction id metadata variable 115 rc = sd_journal_get_data(j, transactionIdVar, (const void **)&data, 116 &length); 117 if (rc < 0) 118 { 119 // This journal entry does not have the TRANSACTION_ID 120 // metadata variable. 121 continue; 122 } 123 124 // journald does not guarantee that sd_journal_get_data() returns NULL 125 // terminated strings, so need to specify the size to use to compare, 126 // use the returned length instead of anything that relies on NULL 127 // terminators like strlen(). 128 // The data variable is in the form of 'TRANSACTION_ID=1234'. Remove 129 // the TRANSACTION_ID characters plus the (=) sign to do the comparison. 130 // 'data + transactionIdVarOffset' will be in the form of '1234'. 131 // 'length - transactionIdVarOffset' will be the length of '1234'. 132 if ((length <= (transactionIdVarOffset)) || 133 (transactionIdStr.compare(0, 134 transactionIdStr.size(), 135 data + transactionIdVarOffset, 136 length - transactionIdVarOffset) != 0)) 137 { 138 // The value of the TRANSACTION_ID metadata is not the requested 139 // transaction id number. 140 continue; 141 } 142 143 // Search for all metadata variables in the current journal entry. 144 for (auto i = metalist.cbegin(); i != metalist.cend();) 145 { 146 rc = sd_journal_get_data(j, (*i).c_str(), 147 (const void **)&data, &length); 148 if (rc < 0) 149 { 150 // Metadata variable not found, check next metadata variable. 151 i++; 152 continue; 153 } 154 155 // Metadata variable found, save it and remove it from the set. 156 additionalData.emplace_back(data, length); 157 i = metalist.erase(i); 158 } 159 if (metalist.empty()) 160 { 161 // All metadata variables found, break out of journal loop. 162 break; 163 } 164 } 165 if (!metalist.empty()) 166 { 167 // Not all the metadata variables were found in the journal. 168 for (auto& metaVarStr : metalist) 169 { 170 logging::log<logging::level::INFO>("Failed to find metadata", 171 logging::entry("META_FIELD=%s", metaVarStr.c_str())); 172 } 173 } 174 175 sd_journal_close(j); 176 177 // Create error Entry dbus object 178 entryId++; 179 if (errLvl >= Entry::sevLowerLimit) 180 { 181 infoErrors.push_back(entryId); 182 } 183 else 184 { 185 realErrors.push_back(entryId); 186 } 187 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 188 std::chrono::system_clock::now().time_since_epoch()).count(); 189 auto objPath = std::string(OBJ_ENTRY) + '/' + 190 std::to_string(entryId); 191 192 AssociationList objects {}; 193 processMetadata(errMsg, additionalData, objects); 194 195 auto e = std::make_unique<Entry>( 196 busLog, 197 objPath, 198 entryId, 199 ms, // Milliseconds since 1970 200 errLvl, 201 std::move(errMsg), 202 std::move(additionalData), 203 std::move(objects), 204 fwVersion, 205 *this); 206 serialize(*e); 207 entries.insert(std::make_pair(entryId, std::move(e))); 208 } 209 210 void Manager::processMetadata(const std::string& errorName, 211 const std::vector<std::string>& additionalData, 212 AssociationList& objects) const 213 { 214 // additionalData is a list of "metadata=value" 215 constexpr auto separator = '='; 216 for(const auto& entry: additionalData) 217 { 218 auto found = entry.find(separator); 219 if(std::string::npos != found) 220 { 221 auto metadata = entry.substr(0, found); 222 auto iter = meta.find(metadata); 223 if(meta.end() != iter) 224 { 225 (iter->second)(metadata, additionalData, objects); 226 } 227 } 228 } 229 } 230 231 void Manager::erase(uint32_t entryId) 232 { 233 auto entry = entries.find(entryId); 234 if(entries.end() != entry) 235 { 236 // Delete the persistent representation of this error. 237 fs::path errorPath(ERRLOG_PERSIST_PATH); 238 errorPath /= std::to_string(entryId); 239 fs::remove(errorPath); 240 241 auto removeId = [](std::list<uint32_t>& ids , uint32_t id) 242 { 243 auto it = std::find(ids.begin(), ids.end(), id); 244 if (it != ids.end()) 245 { 246 ids.erase(it); 247 } 248 }; 249 if (entry->second->severity() >= Entry::sevLowerLimit) 250 { 251 removeId(infoErrors, entryId); 252 } 253 else 254 { 255 removeId(realErrors, entryId); 256 } 257 entries.erase(entry); 258 } 259 else 260 { 261 logging::log<level::ERR>("Invalid entry ID to delete", 262 logging::entry("ID=%d", entryId)); 263 } 264 } 265 266 void Manager::restore() 267 { 268 auto sanity = [](const auto& id, const auto& restoredId) 269 { 270 return id == restoredId; 271 }; 272 std::vector<uint32_t> errorIds; 273 274 fs::path dir(ERRLOG_PERSIST_PATH); 275 if (!fs::exists(dir) || fs::is_empty(dir)) 276 { 277 return; 278 } 279 280 for(auto& file: fs::directory_iterator(dir)) 281 { 282 auto id = file.path().filename().c_str(); 283 auto idNum = std::stol(id); 284 auto e = std::make_unique<Entry>( 285 busLog, 286 std::string(OBJ_ENTRY) + '/' + id, 287 idNum, 288 *this); 289 if (deserialize(file.path(), *e)) 290 { 291 //validate the restored error entry id 292 if (sanity(static_cast<uint32_t>(idNum), e->id())) 293 { 294 e->emit_object_added(); 295 if (e->severity() >= Entry::sevLowerLimit) 296 { 297 infoErrors.push_back(idNum); 298 } 299 else 300 { 301 realErrors.push_back(idNum); 302 } 303 304 entries.insert(std::make_pair(idNum, std::move(e))); 305 errorIds.push_back(idNum); 306 } 307 else 308 { 309 logging::log<logging::level::ERR>( 310 "Failed in sanity check while restoring error entry. " 311 "Ignoring error entry", 312 logging::entry("ID_NUM=%d", idNum), 313 logging::entry("ENTRY_ID=%d", e->id())); 314 } 315 } 316 } 317 318 if (!errorIds.empty()) 319 { 320 entryId = *(std::max_element(errorIds.begin(), errorIds.end())); 321 } 322 } 323 324 void Manager::journalSync() 325 { 326 bool syncRequested = false; 327 auto fd = -1; 328 auto rc = -1; 329 auto wd = -1; 330 auto bus = sdbusplus::bus::new_default(); 331 332 auto start = 333 duration_cast<microseconds>(steady_clock::now().time_since_epoch()) 334 .count(); 335 336 constexpr auto maxRetry = 2; 337 for (int i = 0; i < maxRetry; i++) 338 { 339 // Read timestamp from synced file 340 constexpr auto syncedPath = "/run/systemd/journal/synced"; 341 std::ifstream syncedFile(syncedPath); 342 if (syncedFile.fail()) 343 { 344 log<level::ERR>("Failed to open journal synced file", 345 entry("FILENAME=%s", syncedPath), 346 entry("ERRNO=%d", errno)); 347 return; 348 } 349 350 // See if a sync happened by now 351 std::string timestampStr; 352 std::getline(syncedFile, timestampStr); 353 auto timestamp = stoll(timestampStr); 354 if (timestamp >= start) 355 { 356 return; 357 } 358 359 // Let's ask for a sync, but only once 360 if (!syncRequested) 361 { 362 syncRequested = true; 363 364 constexpr auto SYSTEMD_BUSNAME = "org.freedesktop.systemd1"; 365 constexpr auto SYSTEMD_PATH = "/org/freedesktop/systemd1"; 366 constexpr auto SYSTEMD_INTERFACE = 367 "org.freedesktop.systemd1.Manager"; 368 constexpr auto JOURNAL_UNIT = "systemd-journald.service"; 369 auto signal = SIGRTMIN + 1; 370 371 auto method = bus.new_method_call(SYSTEMD_BUSNAME, SYSTEMD_PATH, 372 SYSTEMD_INTERFACE, "KillUnit"); 373 method.append(JOURNAL_UNIT, "main", signal); 374 bus.call(method); 375 if (method.is_method_error()) 376 { 377 log<level::ERR>("Failed to kill journal service"); 378 return; 379 } 380 continue; 381 } 382 383 // Let's install the inotify watch, if we didn't do that yet. This watch 384 // monitors the syncedFile for when journald updates it with a newer 385 // timestamp. This means the journal has been flushed. 386 if (fd < 0) 387 { 388 fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC); 389 if (fd < 0) 390 { 391 log<level::ERR>("Failed to create inotify watch", 392 entry("ERRNO=%d", errno)); 393 return; 394 } 395 396 constexpr auto JOURNAL_RUN_PATH = "/run/systemd/journal"; 397 wd = inotify_add_watch(fd, JOURNAL_RUN_PATH, 398 IN_MOVED_TO | IN_DONT_FOLLOW | IN_ONLYDIR); 399 if (wd < 0) 400 { 401 log<level::ERR>("Failed to watch journal directory", 402 entry("PATH=%s", JOURNAL_RUN_PATH), 403 entry("ERRNO=%d", errno)); 404 close(fd); 405 return; 406 } 407 continue; 408 } 409 410 // Let's wait until inotify reports an event 411 struct pollfd fds = { 412 .fd = fd, 413 .events = POLLIN, 414 }; 415 constexpr auto pollTimeout = 5; // 5 seconds 416 rc = poll(&fds, 1, pollTimeout * 1000); 417 if (rc < 0) 418 { 419 log<level::ERR>("Failed to add event", entry("ERRNO=%d", errno), 420 entry("ERR=%s", strerror(-rc))); 421 inotify_rm_watch(fd, wd); 422 close(fd); 423 return; 424 } 425 else if (rc == 0) 426 { 427 log<level::INFO>("Poll timeout, no new journal synced data", 428 entry("TIMEOUT=%d", pollTimeout)); 429 break; 430 } 431 432 // Read from the specified file descriptor until there is no new data, 433 // throwing away everything read since the timestamp will be read at the 434 // beginning of the loop. 435 constexpr auto maxBytes = 64; 436 uint8_t buffer[maxBytes]; 437 while (read(fd, buffer, maxBytes) > 0) 438 ; 439 } 440 441 if (fd != -1) 442 { 443 if (wd != -1) 444 { 445 inotify_rm_watch(fd, wd); 446 } 447 close(fd); 448 } 449 450 return; 451 } 452 453 std::string Manager::readFWVersion() 454 { 455 std::string version; 456 std::ifstream versionFile{BMC_VERSION_FILE}; 457 std::string line; 458 static constexpr auto VERSION_ID = "VERSION_ID="; 459 460 while (std::getline(versionFile, line)) 461 { 462 if (line.find(VERSION_ID) != std::string::npos) 463 { 464 auto pos = line.find_first_of('"') + 1; 465 version = line.substr(pos, line.find_last_of('"') - pos); 466 break; 467 } 468 } 469 470 if (version.empty()) 471 { 472 log<level::ERR>("Unable to read BMC firmware version"); 473 } 474 475 return version; 476 } 477 478 } // namespace internal 479 } // namespace logging 480 } // namepsace phosphor 481