xref: /openbmc/phosphor-fan-presence/sensor-monitor/shutdown_alarm_monitor.cpp (revision 3420426c93fe195e148b2ae9aefe2eed5afa2f47)
1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "shutdown_alarm_monitor.hpp"
19 
20 #include <unistd.h>
21 
22 #include <phosphor-logging/log.hpp>
23 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
24 
25 #include <format>
26 
27 namespace sensor::monitor
28 {
29 using namespace phosphor::logging;
30 using namespace phosphor::fan::util;
31 using namespace phosphor::fan;
32 namespace fs = std::filesystem;
33 
34 const std::map<ShutdownType, std::string> shutdownInterfaces{
35     {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"},
36     {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}};
37 
38 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{
39     {ShutdownType::hard,
40      {{AlarmType::low, "HardShutdownAlarmLow"},
41       {AlarmType::high, "HardShutdownAlarmHigh"}}},
42     {ShutdownType::soft,
43      {{AlarmType::low, "SoftShutdownAlarmLow"},
44       {AlarmType::high, "SoftShutdownAlarmHigh"}}}};
45 
46 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{
47     {ShutdownType::hard,
48      std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}},
49     {ShutdownType::soft,
50      std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}};
51 
52 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{
53     {ShutdownType::hard,
54      {{AlarmType::high,
55        "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"},
56       {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
57                        "HardShutdownAlarmLow"}}},
58     {ShutdownType::soft,
59      {{AlarmType::high,
60        "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"},
61       {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
62                        "SoftShutdownAlarmLow"}}}};
63 
64 const std::map<ShutdownType, std::map<AlarmType, std::string>>
65     alarmClearEventLogs{
66         {ShutdownType::hard,
67          {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error."
68                             "HardShutdownAlarmHighClear"},
69           {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
70                            "HardShutdownAlarmLowClear"}}},
71         {ShutdownType::soft,
72          {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error."
73                             "SoftShutdownAlarmHighClear"},
74           {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error."
75                            "SoftShutdownAlarmLowClear"}}}};
76 
77 constexpr auto systemdService = "org.freedesktop.systemd1";
78 constexpr auto systemdPath = "/org/freedesktop/systemd1";
79 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager";
80 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
81 constexpr auto valueProperty = "Value";
82 const auto loggingService = "xyz.openbmc_project.Logging";
83 const auto loggingPath = "/xyz/openbmc_project/logging";
84 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
85 
86 using namespace sdbusplus::bus::match;
87 
88 ShutdownAlarmMonitor::ShutdownAlarmMonitor(
89     sdbusplus::bus_t& bus, sdeventplus::Event& event,
90     std::shared_ptr<PowerState> powerState) :
91     bus(bus),
92     event(event), _powerState(std::move(powerState)),
93     hardShutdownMatch(bus,
94                       "type='signal',member='PropertiesChanged',"
95                       "path_namespace='/xyz/openbmc_project/sensors',"
96                       "arg0='" +
97                           shutdownInterfaces.at(ShutdownType::hard) + "'",
98                       std::bind(&ShutdownAlarmMonitor::propertiesChanged, this,
99                                 std::placeholders::_1)),
100     softShutdownMatch(bus,
101                       "type='signal',member='PropertiesChanged',"
102                       "path_namespace='/xyz/openbmc_project/sensors',"
103                       "arg0='" +
104                           shutdownInterfaces.at(ShutdownType::soft) + "'",
105                       std::bind(&ShutdownAlarmMonitor::propertiesChanged, this,
106                                 std::placeholders::_1))
107 {
108     _powerState->addCallback("shutdownMon",
109                              std::bind(&ShutdownAlarmMonitor::powerStateChanged,
110                                        this, std::placeholders::_1));
111     findAlarms();
112 
113     if (_powerState->isPowerOn())
114     {
115         checkAlarms();
116 
117         // Get rid of any previous saved timestamps that don't
118         // apply anymore.
119         timestamps.prune(alarms);
120     }
121     else
122     {
123         timestamps.clear();
124     }
125 }
126 
127 void ShutdownAlarmMonitor::findAlarms()
128 {
129     // Find all shutdown threshold ifaces currently on D-Bus.
130     for (const auto& [shutdownType, interface] : shutdownInterfaces)
131     {
132         auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0);
133 
134         auto shutdownType2 = shutdownType;
135 
136         std::for_each(paths.begin(), paths.end(),
137                       [this, shutdownType2](const auto& path) {
138             alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::high},
139                            nullptr);
140             alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::low},
141                            nullptr);
142         });
143     }
144 }
145 
146 void ShutdownAlarmMonitor::checkAlarms()
147 {
148     for (auto& [alarmKey, timer] : alarms)
149     {
150         const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
151         const auto& interface = shutdownInterfaces.at(shutdownType);
152         auto propertyName = alarmProperties.at(shutdownType).at(alarmType);
153         bool value;
154 
155         try
156         {
157             value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface,
158                                                  propertyName);
159         }
160         catch (const DBusServiceError& e)
161         {
162             // The sensor isn't on D-Bus anymore
163             log<level::INFO>(std::format("No {} interface on {} anymore.",
164                                          interface, sensorPath)
165                                  .c_str());
166             continue;
167         }
168 
169         checkAlarm(value, alarmKey);
170     }
171 }
172 
173 void ShutdownAlarmMonitor::propertiesChanged(sdbusplus::message_t& message)
174 {
175     std::map<std::string, std::variant<bool>> properties;
176     std::string interface;
177 
178     if (!_powerState->isPowerOn())
179     {
180         return;
181     }
182 
183     message.read(interface, properties);
184 
185     auto type = getShutdownType(interface);
186     if (!type)
187     {
188         return;
189     }
190 
191     std::string sensorPath = message.get_path();
192 
193     const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low);
194     if (properties.count(lowAlarmName) > 0)
195     {
196         AlarmKey alarmKey{sensorPath, *type, AlarmType::low};
197         auto alarm = alarms.find(alarmKey);
198         if (alarm == alarms.end())
199         {
200             alarms.emplace(alarmKey, nullptr);
201         }
202         checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey);
203     }
204 
205     const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high);
206     if (properties.count(highAlarmName) > 0)
207     {
208         AlarmKey alarmKey{sensorPath, *type, AlarmType::high};
209         auto alarm = alarms.find(alarmKey);
210         if (alarm == alarms.end())
211         {
212             alarms.emplace(alarmKey, nullptr);
213         }
214         checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey);
215     }
216 }
217 
218 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey)
219 {
220     auto alarm = alarms.find(alarmKey);
221     if (alarm == alarms.end())
222     {
223         return;
224     }
225 
226     // Start or stop the timer if necessary.
227     auto& timer = alarm->second;
228     if (value)
229     {
230         if (!timer)
231         {
232             startTimer(alarmKey);
233         }
234     }
235     else
236     {
237         if (timer)
238         {
239             stopTimer(alarmKey);
240         }
241     }
242 }
243 
244 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey)
245 {
246     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
247     const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
248     std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)};
249     std::optional<double> value;
250 
251     auto alarm = alarms.find(alarmKey);
252     if (alarm == alarms.end())
253     {
254         throw std::runtime_error("Couldn't find alarm inside startTimer");
255     }
256 
257     try
258     {
259         value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
260                                                valueProperty);
261     }
262     catch (const DBusServiceError& e)
263     {
264         // If the sensor was just added, the Value interface for it may
265         // not be in the mapper yet.  This could only happen if the sensor
266         // application was started with power up and the value exceeded the
267         // threshold immediately.
268     }
269 
270     createEventLog(alarmKey, true, value);
271 
272     uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>(
273                        std::chrono::system_clock::now().time_since_epoch())
274                        .count();
275 
276     // If there is a saved timestamp for this timer, then we were restarted
277     // while the timer was running.  Calculate the remaining time to use
278     // for the timer.
279     auto previousStartTime = timestamps.get().find(alarmKey);
280     if (previousStartTime != timestamps.get().end())
281     {
282         const uint64_t& original = previousStartTime->second;
283 
284         log<level::INFO>(std::format("Found previously running {} timer "
285                                      "for {} with start time {}",
286                                      propertyName, sensorPath, original)
287                              .c_str());
288 
289         // Sanity check it isn't total garbage.
290         if (now > original)
291         {
292             uint64_t remainingTime = 0;
293             auto elapsedTime = now - original;
294 
295             if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count()))
296             {
297                 remainingTime = static_cast<uint64_t>(shutdownDelay.count()) -
298                                 elapsedTime;
299             }
300 
301             shutdownDelay = std::chrono::milliseconds{remainingTime};
302         }
303         else
304         {
305             log<level::WARNING>(
306                 std::format(
307                     "Restarting {} shutdown timer for {} for full "
308                     "time because saved time {} is after current time {}",
309                     propertyName, sensorPath, original, now)
310                     .c_str());
311         }
312     }
313 
314     log<level::INFO>(
315         std::format("Starting {}ms {} shutdown timer due to sensor {} value {}",
316                     shutdownDelay.count(), propertyName, sensorPath, *value)
317             .c_str());
318 
319     auto& timer = alarm->second;
320 
321     timer = std::make_unique<
322         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
323         event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey));
324 
325     timer->restartOnce(shutdownDelay);
326 
327     // Note that if this key is already in the timestamps map because
328     // the timer was already running the timestamp wil not be updated.
329     timestamps.add(alarmKey, now);
330 }
331 
332 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey)
333 {
334     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
335     const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
336 
337     auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
338                                                 valueProperty);
339 
340     auto alarm = alarms.find(alarmKey);
341     if (alarm == alarms.end())
342     {
343         throw std::runtime_error("Couldn't find alarm inside stopTimer");
344     }
345 
346     createEventLog(alarmKey, false, value);
347 
348     log<level::INFO>(
349         std::format("Stopping {} shutdown timer due to sensor {} value {}",
350                     propertyName, sensorPath, value)
351             .c_str());
352 
353     auto& timer = alarm->second;
354     timer->setEnabled(false);
355     timer.reset();
356 
357     timestamps.erase(alarmKey);
358 }
359 
360 void ShutdownAlarmMonitor::createBmcDump() const
361 {
362     try
363     {
364         util::SDBusPlus::callMethod(
365             "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc",
366             "xyz.openbmc_project.Dump.Create", "CreateDump",
367             std::vector<
368                 std::pair<std::string, std::variant<std::string, uint64_t>>>());
369     }
370     catch (const std::exception& e)
371     {
372         auto message = std::format(
373             "Caught exception while creating BMC dump: {}", e.what());
374 
375         log<level::ERR>(message.c_str());
376     }
377 }
378 
379 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey)
380 {
381     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
382     const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType);
383 
384     auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface,
385                                                 valueProperty);
386 
387     log<level::ERR>(
388         std::format(
389             "The {} shutdown timer expired for sensor {}, shutting down",
390             propertyName, sensorPath)
391             .c_str());
392 
393     // Re-send the event log.  If someone didn't want this it could be
394     // wrapped by a compile option.
395     createEventLog(alarmKey, true, value, true);
396 
397     SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface,
398                           "StartUnit", "obmc-chassis-hard-poweroff@0.target",
399                           "replace");
400 
401     timestamps.erase(alarmKey);
402     createBmcDump();
403 }
404 
405 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn)
406 {
407     if (powerStateOn)
408     {
409         checkAlarms();
410     }
411     else
412     {
413         timestamps.clear();
414 
415         // Cancel and delete all timers
416         std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) {
417             auto& timer = alarm.second;
418             if (timer)
419             {
420                 timer->setEnabled(false);
421                 timer.reset();
422             }
423         });
424     }
425 }
426 
427 void ShutdownAlarmMonitor::createEventLog(
428     const AlarmKey& alarmKey, bool alarmValue,
429     const std::optional<double>& sensorValue, bool isPowerOffError)
430 {
431     using namespace sdbusplus::xyz::openbmc_project::Logging::server;
432     const auto& [sensorPath, shutdownType, alarmType] = alarmKey;
433     std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath},
434                                           {"_PID", std::to_string(getpid())}};
435 
436     std::string errorName =
437         (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType)
438                      : alarmClearEventLogs.at(shutdownType).at(alarmType);
439 
440     // severity = Critical if a power off
441     // severity = Error if alarm was asserted
442     // severity = Informational if alarm was deasserted
443     Entry::Level severity = Entry::Level::Error;
444     if (isPowerOffError)
445     {
446         severity = Entry::Level::Critical;
447     }
448     else if (!alarmValue)
449     {
450         severity = Entry::Level::Informational;
451     }
452 
453     if (sensorValue)
454     {
455         ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue));
456     }
457 
458     // If this is a power off, specify that it's a power
459     // fault and a system termination.  This is used by some
460     // implementations for service reasons.
461     if (isPowerOffError)
462     {
463         ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM");
464     }
465 
466     SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
467                           "Create", errorName, convertForMessage(severity), ad);
468 }
469 
470 std::optional<ShutdownType>
471     ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const
472 {
473     auto it = std::find_if(
474         shutdownInterfaces.begin(), shutdownInterfaces.end(),
475         [interface](const auto& a) { return a.second == interface; });
476 
477     if (it == shutdownInterfaces.end())
478     {
479         return std::nullopt;
480     }
481 
482     return it->first;
483 }
484 
485 } // namespace sensor::monitor
486