xref: /openbmc/phosphor-fan-presence/sensor-monitor/shutdown_alarm_monitor.cpp (revision 61b7329603e737b76b04b98746d69c1f410761b8)
1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "shutdown_alarm_monitor.hpp"
19 
20 #include <fmt/format.h>
21 #include <unistd.h>
22 
23 #include <phosphor-logging/log.hpp>
24 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
25 
26 namespace sensor::monitor
27 {
28 using namespace phosphor::logging;
29 using namespace phosphor::fan::util;
30 using namespace phosphor::fan;
31 namespace fs = std::filesystem;
32 
33 const std::map<ShutdownType, std::string> shutdownInterfaces{
34     {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"},
35     {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}};
36 
37 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{
38     {ShutdownType::hard,
39      {{AlarmType::low, "HardShutdownAlarmLow"},
40       {AlarmType::high, "HardShutdownAlarmHigh"}}},
41     {ShutdownType::soft,
42      {{AlarmType::low, "SoftShutdownAlarmLow"},
43       {AlarmType::high, "SoftShutdownAlarmHigh"}}}};
44 
45 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{
46     {ShutdownType::hard,
47      std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}},
48     {ShutdownType::soft,
49      std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}};
50 
51 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{
52     {ShutdownType::hard,
53      {{AlarmType::high,
54        "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"},
55       {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
56                        "HardShutdownAlarmLow"}}},
57     {ShutdownType::soft,
58      {{AlarmType::high,
59        "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"},
60       {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
61                        "SoftShutdownAlarmLow"}}}};
62 
63 const std::map<ShutdownType, std::map<AlarmType, std::string>>
64     alarmClearEventLogs{
65         {ShutdownType::hard,
66          {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error."
67                             "HardShutdownAlarmHighClear"},
68           {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
69                            "HardShutdownAlarmLowClear"}}},
70         {ShutdownType::soft,
71          {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error."
72                             "SoftShutdownAlarmHighClear"},
73           {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
74                            "SoftShutdownAlarmLowClear"}}}};
75 
76 constexpr auto systemdService = "org.freedesktop.systemd1";
77 constexpr auto systemdPath = "/org/freedesktop/systemd1";
78 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager";
79 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
80 constexpr auto valueProperty = "Value";
81 const auto loggingService = "xyz.openbmc_project.Logging";
82 const auto loggingPath = "/xyz/openbmc_project/logging";
83 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
84 
85 using namespace sdbusplus::bus::match;
86 
87 ShutdownAlarmMonitor::ShutdownAlarmMonitor(
88     sdbusplus::bus_t& bus, sdeventplus::Event& event,
89     std::shared_ptr<PowerState> powerState) :
90     bus(bus),
91     event(event), _powerState(std::move(powerState)),
92     hardShutdownMatch(bus,
93                       "type='signal',member='PropertiesChanged',"
94                       "path_namespace='/xyz/openbmc_project/sensors',"
95                       "arg0='" +
96                           shutdownInterfaces.at(ShutdownType::hard) + "'",
97                       std::bind(&ShutdownAlarmMonitor::propertiesChanged, this,
98                                 std::placeholders::_1)),
99     softShutdownMatch(bus,
100                       "type='signal',member='PropertiesChanged',"
101                       "path_namespace='/xyz/openbmc_project/sensors',"
102                       "arg0='" +
103                           shutdownInterfaces.at(ShutdownType::soft) + "'",
104                       std::bind(&ShutdownAlarmMonitor::propertiesChanged, this,
105                                 std::placeholders::_1))
106 {
107     _powerState->addCallback("shutdownMon",
108                              std::bind(&ShutdownAlarmMonitor::powerStateChanged,
109                                        this, std::placeholders::_1));
110     findAlarms();
111 
112     if (_powerState->isPowerOn())
113     {
114         checkAlarms();
115 
116         // Get rid of any previous saved timestamps that don't
117         // apply anymore.
118         timestamps.prune(alarms);
119     }
120     else
121     {
122         timestamps.clear();
123     }
124 }
125 
126 void ShutdownAlarmMonitor::findAlarms()
127 {
128     // Find all shutdown threshold ifaces currently on D-Bus.
129     for (const auto& [shutdownType, interface] : shutdownInterfaces)
130     {
131         auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0);
132 
133         auto shutdownType2 = shutdownType;
134 
135         std::for_each(paths.begin(), paths.end(),
136                       [this, shutdownType2](const auto& path) {
137             alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::high},
138                            nullptr);
139             alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::low},
140                            nullptr);
141         });
142     }
143 }
144 
145 void ShutdownAlarmMonitor::checkAlarms()
146 {
147     for (auto& [alarmKey, timer] : alarms)
148     {
149         const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
150         const auto& interface = shutdownInterfaces.at(shutdownType);
151         auto propertyName = alarmProperties.at(shutdownType).at(alarmType);
152         bool value;
153 
154         try
155         {
156             value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface,
157                                                  propertyName);
158         }
159         catch (const DBusServiceError& e)
160         {
161             // The sensor isn't on D-Bus anymore
162             log<level::INFO>(fmt::format("No {} interface on {} anymore.",
163                                          interface, sensorPath)
164                                  .c_str());
165             continue;
166         }
167 
168         checkAlarm(value, alarmKey);
169     }
170 }
171 
172 void ShutdownAlarmMonitor::propertiesChanged(sdbusplus::message_t& message)
173 {
174     std::map<std::string, std::variant<bool>> properties;
175     std::string interface;
176 
177     if (!_powerState->isPowerOn())
178     {
179         return;
180     }
181 
182     message.read(interface, properties);
183 
184     auto type = getShutdownType(interface);
185     if (!type)
186     {
187         return;
188     }
189 
190     std::string sensorPath = message.get_path();
191 
192     const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low);
193     if (properties.count(lowAlarmName) > 0)
194     {
195         AlarmKey alarmKey{sensorPath, *type, AlarmType::low};
196         auto alarm = alarms.find(alarmKey);
197         if (alarm == alarms.end())
198         {
199             alarms.emplace(alarmKey, nullptr);
200         }
201         checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey);
202     }
203 
204     const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high);
205     if (properties.count(highAlarmName) > 0)
206     {
207         AlarmKey alarmKey{sensorPath, *type, AlarmType::high};
208         auto alarm = alarms.find(alarmKey);
209         if (alarm == alarms.end())
210         {
211             alarms.emplace(alarmKey, nullptr);
212         }
213         checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey);
214     }
215 }
216 
217 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey)
218 {
219     auto alarm = alarms.find(alarmKey);
220     if (alarm == alarms.end())
221     {
222         return;
223     }
224 
225     // Start or stop the timer if necessary.
226     auto& timer = alarm->second;
227     if (value)
228     {
229         if (!timer)
230         {
231             startTimer(alarmKey);
232         }
233     }
234     else
235     {
236         if (timer)
237         {
238             stopTimer(alarmKey);
239         }
240     }
241 }
242 
243 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey)
244 {
245     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
246     const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
247     std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)};
248     std::optional<double> value;
249 
250     auto alarm = alarms.find(alarmKey);
251     if (alarm == alarms.end())
252     {
253         throw std::runtime_error("Couldn't find alarm inside startTimer");
254     }
255 
256     try
257     {
258         value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
259                                                valueProperty);
260     }
261     catch (const DBusServiceError& e)
262     {
263         // If the sensor was just added, the Value interface for it may
264         // not be in the mapper yet.  This could only happen if the sensor
265         // application was started with power up and the value exceeded the
266         // threshold immediately.
267     }
268 
269     createEventLog(alarmKey, true, value);
270 
271     uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>(
272                        std::chrono::system_clock::now().time_since_epoch())
273                        .count();
274 
275     // If there is a saved timestamp for this timer, then we were restarted
276     // while the timer was running.  Calculate the remaining time to use
277     // for the timer.
278     auto previousStartTime = timestamps.get().find(alarmKey);
279     if (previousStartTime != timestamps.get().end())
280     {
281         const uint64_t& original = previousStartTime->second;
282 
283         log<level::INFO>(fmt::format("Found previously running {} timer "
284                                      "for {} with start time {}",
285                                      propertyName, sensorPath, original)
286                              .c_str());
287 
288         // Sanity check it isn't total garbage.
289         if (now > original)
290         {
291             uint64_t remainingTime = 0;
292             auto elapsedTime = now - original;
293 
294             if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count()))
295             {
296                 remainingTime = static_cast<uint64_t>(shutdownDelay.count()) -
297                                 elapsedTime;
298             }
299 
300             shutdownDelay = std::chrono::milliseconds{remainingTime};
301         }
302         else
303         {
304             log<level::WARNING>(
305                 fmt::format(
306                     "Restarting {} shutdown timer for {} for full "
307                     "time because saved time {} is after current time {}",
308                     propertyName, sensorPath, original, now)
309                     .c_str());
310         }
311     }
312 
313     log<level::INFO>(
314         fmt::format("Starting {}ms {} shutdown timer due to sensor {} value {}",
315                     shutdownDelay.count(), propertyName, sensorPath, *value)
316             .c_str());
317 
318     auto& timer = alarm->second;
319 
320     timer = std::make_unique<
321         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
322         event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey));
323 
324     timer->restartOnce(shutdownDelay);
325 
326     // Note that if this key is already in the timestamps map because
327     // the timer was already running the timestamp wil not be updated.
328     timestamps.add(alarmKey, now);
329 }
330 
331 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey)
332 {
333     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
334     const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
335 
336     auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
337                                                 valueProperty);
338 
339     auto alarm = alarms.find(alarmKey);
340     if (alarm == alarms.end())
341     {
342         throw std::runtime_error("Couldn't find alarm inside stopTimer");
343     }
344 
345     createEventLog(alarmKey, false, value);
346 
347     log<level::INFO>(
348         fmt::format("Stopping {} shutdown timer due to sensor {} value {}",
349                     propertyName, sensorPath, value)
350             .c_str());
351 
352     auto& timer = alarm->second;
353     timer->setEnabled(false);
354     timer.reset();
355 
356     timestamps.erase(alarmKey);
357 }
358 
359 void ShutdownAlarmMonitor::createBmcDump() const
360 {
361     try
362     {
363         util::SDBusPlus::callMethod(
364             "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc",
365             "xyz.openbmc_project.Dump.Create", "CreateDump",
366             std::vector<
367                 std::pair<std::string, std::variant<std::string, uint64_t>>>());
368     }
369     catch (const std::exception& e)
370     {
371         auto message = fmt::format(
372             "Caught exception while creating BMC dump: {}", e.what());
373 
374         log<level::ERR>(message.c_str());
375     }
376 }
377 
378 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey)
379 {
380     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
381     const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
382 
383     auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
384                                                 valueProperty);
385 
386     log<level::ERR>(
387         fmt::format(
388             "The {} shutdown timer expired for sensor {}, shutting down",
389             propertyName, sensorPath)
390             .c_str());
391 
392     // Re-send the event log.  If someone didn't want this it could be
393     // wrapped by a compile option.
394     createEventLog(alarmKey, true, value, true);
395 
396     SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface,
397                           "StartUnit", "obmc-chassis-hard-poweroff@0.target",
398                           "replace");
399 
400     timestamps.erase(alarmKey);
401     createBmcDump();
402 }
403 
404 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn)
405 {
406     if (powerStateOn)
407     {
408         checkAlarms();
409     }
410     else
411     {
412         timestamps.clear();
413 
414         // Cancel and delete all timers
415         std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) {
416             auto& timer = alarm.second;
417             if (timer)
418             {
419                 timer->setEnabled(false);
420                 timer.reset();
421             }
422         });
423     }
424 }
425 
426 void ShutdownAlarmMonitor::createEventLog(
427     const AlarmKey& alarmKey, bool alarmValue,
428     const std::optional<double>& sensorValue, bool isPowerOffError)
429 {
430     using namespace sdbusplus::xyz::openbmc_project::Logging::server;
431     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
432     std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath},
433                                           {"_PID", std::to_string(getpid())}};
434 
435     std::string errorName =
436         (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType)
437                      : alarmClearEventLogs.at(shutdownType).at(alarmType);
438 
439     // severity = Critical if a power off
440     // severity = Error if alarm was asserted
441     // severity = Informational if alarm was deasserted
442     Entry::Level severity = Entry::Level::Error;
443     if (isPowerOffError)
444     {
445         severity = Entry::Level::Critical;
446     }
447     else if (!alarmValue)
448     {
449         severity = Entry::Level::Informational;
450     }
451 
452     if (sensorValue)
453     {
454         ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue));
455     }
456 
457     // If this is a power off, specify that it's a power
458     // fault and a system termination.  This is used by some
459     // implementations for service reasons.
460     if (isPowerOffError)
461     {
462         ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM");
463     }
464 
465     SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
466                           "Create", errorName, convertForMessage(severity), ad);
467 }
468 
469 std::optional<ShutdownType>
470     ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const
471 {
472     auto it = std::find_if(shutdownInterfaces.begin(), shutdownInterfaces.end(),
473                            [interface](const auto& a) {
474         return a.second == interface;
475     });
476 
477     if (it == shutdownInterfaces.end())
478     {
479         return std::nullopt;
480     }
481 
482     return it->first;
483 }
484 
485 } // namespace sensor::monitor
486