1 /**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "config.h"
17
18 #include "shutdown_alarm_monitor.hpp"
19
20 #include <unistd.h>
21
22 #include <phosphor-logging/log.hpp>
23 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
24
25 #include <format>
26
27 namespace sensor::monitor
28 {
29 using namespace phosphor::logging;
30 using namespace phosphor::fan::util;
31 using namespace phosphor::fan;
32 namespace fs = std::filesystem;
33
34 const std::map<ShutdownType, std::string> shutdownInterfaces{
35 {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"},
36 {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}};
37
38 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{
39 {ShutdownType::hard,
40 {{AlarmType::low, "HardShutdownAlarmLow"},
41 {AlarmType::high, "HardShutdownAlarmHigh"}}},
42 {ShutdownType::soft,
43 {{AlarmType::low, "SoftShutdownAlarmLow"},
44 {AlarmType::high, "SoftShutdownAlarmHigh"}}}};
45
46 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{
47 {ShutdownType::hard,
48 std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}},
49 {ShutdownType::soft,
50 std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}};
51
52 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{
53 {ShutdownType::hard,
54 {{AlarmType::high,
55 "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"},
56 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
57 "HardShutdownAlarmLow"}}},
58 {ShutdownType::soft,
59 {{AlarmType::high,
60 "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"},
61 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
62 "SoftShutdownAlarmLow"}}}};
63
64 const std::map<ShutdownType, std::map<AlarmType, std::string>>
65 alarmClearEventLogs{
66 {ShutdownType::hard,
67 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error."
68 "HardShutdownAlarmHighClear"},
69 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
70 "HardShutdownAlarmLowClear"}}},
71 {ShutdownType::soft,
72 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error."
73 "SoftShutdownAlarmHighClear"},
74 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
75 "SoftShutdownAlarmLowClear"}}}};
76
77 constexpr auto systemdService = "org.freedesktop.systemd1";
78 constexpr auto systemdPath = "/org/freedesktop/systemd1";
79 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager";
80 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
81 constexpr auto valueProperty = "Value";
82 const auto loggingService = "xyz.openbmc_project.Logging";
83 const auto loggingPath = "/xyz/openbmc_project/logging";
84 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
85
86 using namespace sdbusplus::bus::match;
87
ShutdownAlarmMonitor(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)88 ShutdownAlarmMonitor::ShutdownAlarmMonitor(
89 sdbusplus::bus_t& bus, sdeventplus::Event& event,
90 std::shared_ptr<PowerState> powerState) :
91 bus(bus), event(event), _powerState(std::move(powerState)),
92 hardShutdownMatch(bus,
93 "type='signal',member='PropertiesChanged',"
94 "path_namespace='/xyz/openbmc_project/sensors',"
95 "arg0='" +
96 shutdownInterfaces.at(ShutdownType::hard) + "'",
97 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this,
98 std::placeholders::_1)),
99 softShutdownMatch(bus,
100 "type='signal',member='PropertiesChanged',"
101 "path_namespace='/xyz/openbmc_project/sensors',"
102 "arg0='" +
103 shutdownInterfaces.at(ShutdownType::soft) + "'",
104 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this,
105 std::placeholders::_1))
106 {
107 _powerState->addCallback("shutdownMon",
108 std::bind(&ShutdownAlarmMonitor::powerStateChanged,
109 this, std::placeholders::_1));
110 findAlarms();
111
112 if (_powerState->isPowerOn())
113 {
114 checkAlarms();
115
116 // Get rid of any previous saved timestamps that don't
117 // apply anymore.
118 timestamps.prune(alarms);
119 }
120 else
121 {
122 timestamps.clear();
123 }
124 }
125
findAlarms()126 void ShutdownAlarmMonitor::findAlarms()
127 {
128 // Find all shutdown threshold ifaces currently on D-Bus.
129 for (const auto& [shutdownType, interface] : shutdownInterfaces)
130 {
131 auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0);
132
133 auto shutdownType2 = shutdownType;
134
135 std::for_each(
136 paths.begin(), paths.end(),
137 [this, shutdownType2](const auto& path) {
138 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::high},
139 nullptr);
140 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::low},
141 nullptr);
142 });
143 }
144 }
145
checkAlarms()146 void ShutdownAlarmMonitor::checkAlarms()
147 {
148 for (auto& [alarmKey, timer] : alarms)
149 {
150 const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
151 const auto& interface = shutdownInterfaces.at(shutdownType);
152 auto propertyName = alarmProperties.at(shutdownType).at(alarmType);
153 bool value;
154
155 try
156 {
157 value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface,
158 propertyName);
159 }
160 catch (const DBusServiceError& e)
161 {
162 // The sensor isn't on D-Bus anymore
163 log<level::INFO>(std::format("No {} interface on {} anymore.",
164 interface, sensorPath)
165 .c_str());
166 continue;
167 }
168
169 checkAlarm(value, alarmKey);
170 }
171 }
172
propertiesChanged(sdbusplus::message_t & message)173 void ShutdownAlarmMonitor::propertiesChanged(sdbusplus::message_t& message)
174 {
175 std::map<std::string, std::variant<bool>> properties;
176 std::string interface;
177
178 if (!_powerState->isPowerOn())
179 {
180 return;
181 }
182
183 message.read(interface, properties);
184
185 auto type = getShutdownType(interface);
186 if (!type)
187 {
188 return;
189 }
190
191 std::string sensorPath = message.get_path();
192
193 const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low);
194 if (properties.count(lowAlarmName) > 0)
195 {
196 AlarmKey alarmKey{sensorPath, *type, AlarmType::low};
197 auto alarm = alarms.find(alarmKey);
198 if (alarm == alarms.end())
199 {
200 alarms.emplace(alarmKey, nullptr);
201 }
202 checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey);
203 }
204
205 const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high);
206 if (properties.count(highAlarmName) > 0)
207 {
208 AlarmKey alarmKey{sensorPath, *type, AlarmType::high};
209 auto alarm = alarms.find(alarmKey);
210 if (alarm == alarms.end())
211 {
212 alarms.emplace(alarmKey, nullptr);
213 }
214 checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey);
215 }
216 }
217
checkAlarm(bool value,const AlarmKey & alarmKey)218 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey)
219 {
220 auto alarm = alarms.find(alarmKey);
221 if (alarm == alarms.end())
222 {
223 return;
224 }
225
226 // Start or stop the timer if necessary.
227 auto& timer = alarm->second;
228 if (value)
229 {
230 if (!timer)
231 {
232 startTimer(alarmKey);
233 }
234 }
235 else
236 {
237 if (timer)
238 {
239 stopTimer(alarmKey);
240 }
241 }
242 }
243
startTimer(const AlarmKey & alarmKey)244 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey)
245 {
246 const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
247 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
248 std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)};
249 std::optional<double> value;
250
251 auto alarm = alarms.find(alarmKey);
252 if (alarm == alarms.end())
253 {
254 throw std::runtime_error("Couldn't find alarm inside startTimer");
255 }
256
257 try
258 {
259 value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
260 valueProperty);
261 }
262 catch (const DBusServiceError& e)
263 {
264 // If the sensor was just added, the Value interface for it may
265 // not be in the mapper yet. This could only happen if the sensor
266 // application was started with power up and the value exceeded the
267 // threshold immediately.
268 }
269
270 createEventLog(alarmKey, true, value);
271
272 uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>(
273 std::chrono::system_clock::now().time_since_epoch())
274 .count();
275
276 // If there is a saved timestamp for this timer, then we were restarted
277 // while the timer was running. Calculate the remaining time to use
278 // for the timer.
279 auto previousStartTime = timestamps.get().find(alarmKey);
280 if (previousStartTime != timestamps.get().end())
281 {
282 const uint64_t& original = previousStartTime->second;
283
284 log<level::INFO>(std::format("Found previously running {} timer "
285 "for {} with start time {}",
286 propertyName, sensorPath, original)
287 .c_str());
288
289 // Sanity check it isn't total garbage.
290 if (now > original)
291 {
292 uint64_t remainingTime = 0;
293 auto elapsedTime = now - original;
294
295 if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count()))
296 {
297 remainingTime = static_cast<uint64_t>(shutdownDelay.count()) -
298 elapsedTime;
299 }
300
301 shutdownDelay = std::chrono::milliseconds{remainingTime};
302 }
303 else
304 {
305 log<level::WARNING>(
306 std::format(
307 "Restarting {} shutdown timer for {} for full "
308 "time because saved time {} is after current time {}",
309 propertyName, sensorPath, original, now)
310 .c_str());
311 }
312 }
313
314 log<level::INFO>(
315 std::format("Starting {}ms {} shutdown timer due to sensor {} value {}",
316 shutdownDelay.count(), propertyName, sensorPath, *value)
317 .c_str());
318
319 auto& timer = alarm->second;
320
321 timer = std::make_unique<
322 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
323 event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey));
324
325 timer->restartOnce(shutdownDelay);
326
327 // Note that if this key is already in the timestamps map because
328 // the timer was already running the timestamp wil not be updated.
329 timestamps.add(alarmKey, now);
330 }
331
stopTimer(const AlarmKey & alarmKey)332 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey)
333 {
334 const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
335 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
336
337 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
338 valueProperty);
339
340 auto alarm = alarms.find(alarmKey);
341 if (alarm == alarms.end())
342 {
343 throw std::runtime_error("Couldn't find alarm inside stopTimer");
344 }
345
346 createEventLog(alarmKey, false, value);
347
348 log<level::INFO>(
349 std::format("Stopping {} shutdown timer due to sensor {} value {}",
350 propertyName, sensorPath, value)
351 .c_str());
352
353 auto& timer = alarm->second;
354 timer->setEnabled(false);
355 timer.reset();
356
357 timestamps.erase(alarmKey);
358 }
359
createBmcDump() const360 void ShutdownAlarmMonitor::createBmcDump() const
361 {
362 try
363 {
364 util::SDBusPlus::callMethod(
365 "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc",
366 "xyz.openbmc_project.Dump.Create", "CreateDump",
367 std::vector<
368 std::pair<std::string, std::variant<std::string, uint64_t>>>());
369 }
370 catch (const std::exception& e)
371 {
372 auto message = std::format(
373 "Caught exception while creating BMC dump: {}", e.what());
374
375 log<level::ERR>(message.c_str());
376 }
377 }
378
timerExpired(const AlarmKey & alarmKey)379 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey)
380 {
381 const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
382 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
383
384 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
385 valueProperty);
386
387 log<level::ERR>(
388 std::format(
389 "The {} shutdown timer expired for sensor {}, shutting down",
390 propertyName, sensorPath)
391 .c_str());
392
393 // Re-send the event log. If someone didn't want this it could be
394 // wrapped by a compile option.
395 createEventLog(alarmKey, true, value, true);
396
397 SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface,
398 "StartUnit", "obmc-chassis-hard-poweroff@0.target",
399 "replace");
400
401 timestamps.erase(alarmKey);
402 createBmcDump();
403 }
404
powerStateChanged(bool powerStateOn)405 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn)
406 {
407 if (powerStateOn)
408 {
409 checkAlarms();
410 }
411 else
412 {
413 timestamps.clear();
414
415 // Cancel and delete all timers
416 std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) {
417 auto& timer = alarm.second;
418 if (timer)
419 {
420 timer->setEnabled(false);
421 timer.reset();
422 }
423 });
424 }
425 }
426
createEventLog(const AlarmKey & alarmKey,bool alarmValue,const std::optional<double> & sensorValue,bool isPowerOffError)427 void ShutdownAlarmMonitor::createEventLog(
428 const AlarmKey& alarmKey, bool alarmValue,
429 const std::optional<double>& sensorValue, bool isPowerOffError)
430 {
431 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
432 const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
433 std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath},
434 {"_PID", std::to_string(getpid())}};
435
436 std::string errorName =
437 (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType)
438 : alarmClearEventLogs.at(shutdownType).at(alarmType);
439
440 // severity = Critical if a power off
441 // severity = Error if alarm was asserted
442 // severity = Informational if alarm was deasserted
443 Entry::Level severity = Entry::Level::Error;
444 if (isPowerOffError)
445 {
446 severity = Entry::Level::Critical;
447 }
448 else if (!alarmValue)
449 {
450 severity = Entry::Level::Informational;
451 }
452
453 if (sensorValue)
454 {
455 ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue));
456 }
457
458 // If this is a power off, specify that it's a power
459 // fault and a system termination. This is used by some
460 // implementations for service reasons.
461 if (isPowerOffError)
462 {
463 ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM");
464 }
465
466 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
467 "Create", errorName, convertForMessage(severity), ad);
468 }
469
470 std::optional<ShutdownType>
getShutdownType(const std::string & interface) const471 ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const
472 {
473 auto it = std::find_if(
474 shutdownInterfaces.begin(), shutdownInterfaces.end(),
475 [interface](const auto& a) { return a.second == interface; });
476
477 if (it == shutdownInterfaces.end())
478 {
479 return std::nullopt;
480 }
481
482 return it->first;
483 }
484
485 } // namespace sensor::monitor
486