xref: /openbmc/openpower-occ-control/occ_errors.cpp (revision f6992a4d60297e991d413f8f5f537dd1054030dd)
1 #include "occ_errors.hpp"
2 
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <sys/ioctl.h>
6 #include <unistd.h>
7 
8 #include <org/open_power/OCC/Device/error.hpp>
9 #include <phosphor-logging/elog-errors.hpp>
10 #include <phosphor-logging/elog.hpp>
11 #include <phosphor-logging/log.hpp>
12 #include <xyz/openbmc_project/Common/error.hpp>
13 
14 #include <format>
15 namespace open_power
16 {
17 namespace occ
18 {
19 
20 using namespace phosphor::logging;
21 using namespace sdbusplus::org::open_power::OCC::Device::Error;
22 using InternalFailure =
23     sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
24 
25 // Populate the file descriptor on the error file
26 void Error::openFile()
27 {
28     using namespace phosphor::logging;
29 
30     fd = open(file.c_str(), O_RDONLY | O_NONBLOCK);
31     const int open_errno = errno;
32     if (fd < 0)
33     {
34         log<level::ERR>(
35             std::format("Error::openFile: open failed (errno={})", open_errno)
36                 .c_str());
37         elog<OpenFailure>(phosphor::logging::org::open_power::OCC::Device::
38                               OpenFailure::CALLOUT_ERRNO(open_errno),
39                           phosphor::logging::org::open_power::OCC::Device::
40                               OpenFailure::CALLOUT_DEVICE_PATH(file.c_str()));
41     }
42 }
43 
44 // Attaches the FD to event loop and registers the callback handler
45 void Error::registerCallBack()
46 {
47     decltype(eventSource.get()) sourcePtr = nullptr;
48     auto r = sd_event_add_io(event.get(), &sourcePtr, fd, EPOLLPRI | EPOLLERR,
49                              processEvents, this);
50     eventSource.reset(sourcePtr);
51 
52     if (r < 0)
53     {
54         log<level::ERR>("Failed to register callback handler",
55                         entry("ERROR=%s", strerror(-r)));
56         elog<InternalFailure>();
57     }
58 }
59 
60 // Starts to watch for errors
61 void Error::addWatch(bool poll)
62 {
63     if (!watching)
64     {
65         // Open the file
66         openFile();
67 
68         if (poll)
69         {
70             // register the callback handler
71             registerCallBack();
72         }
73 
74         // Set we are watching the error
75         watching = true;
76     }
77 }
78 
79 // Stops watching for errors
80 void Error::removeWatch()
81 {
82     if (watching)
83     {
84         // Close the file
85         if (fd >= 0)
86         {
87             close(fd);
88         }
89 
90         // Reduce the reference count. Since there is only one instances
91         // of add_io, this will result empty loop
92         eventSource.reset();
93 
94         // We are no more watching the error
95         watching = false;
96     }
97 }
98 
99 // Callback handler when there is an activity on the FD
100 int Error::processEvents(sd_event_source* /*es*/, int /*fd*/,
101                          uint32_t /*revents*/, void* userData)
102 {
103     auto error = static_cast<Error*>(userData);
104 
105     error->analyzeEvent();
106     return 0;
107 }
108 
109 // Reads the error file and analyzes the data
110 void Error::analyzeEvent()
111 {
112     // Get the number of bytes to read
113     int err = 0;
114     int len = -1;
115     auto r = ioctl(fd, FIONREAD, &len);
116     if (r < 0)
117     {
118         elog<ConfigFailure>(
119             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
120                 CALLOUT_ERRNO(errno),
121             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
122                 CALLOUT_DEVICE_PATH(file.c_str()));
123     }
124 
125     // A non-zero data indicates an error condition
126     // Let the caller take appropriate action on this
127     auto data = readFile(len);
128     if (!data.empty())
129         err = std::stoi(data, nullptr, 0);
130     if (callBack)
131     {
132         callBack(err);
133     }
134     return;
135 }
136 
137 // Reads so many bytes as passed in
138 std::string Error::readFile(int len) const
139 {
140     auto data = std::make_unique<char[]>(len + 1);
141     auto retries = 3;
142     auto delay = std::chrono::milliseconds{100};
143 
144     // OCC / FSI have intermittent issues so retry all reads
145     while (true)
146     {
147         // This file get created soon after binding. A value of 0 is
148         // deemed success and anything else is a Failure
149         // Since all the sysfs files would have size of 4096, if we read 0
150         // bytes -or- value '0', then it just means we are fine
151         auto r = read(fd, data.get(), len);
152         if (r < 0)
153         {
154             retries--;
155             if (retries == 0)
156             {
157                 elog<ReadFailure>(
158                     phosphor::logging::org::open_power::OCC::Device::
159                         ReadFailure::CALLOUT_ERRNO(errno),
160                     phosphor::logging::org::open_power::OCC::Device::
161                         ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
162                 break;
163             }
164             std::this_thread::sleep_for(delay);
165             continue;
166         }
167         break;
168     }
169     // Need to seek to START, else the poll returns immediately telling
170     // there is data to be read
171     auto r = lseek(fd, 0, SEEK_SET);
172     if (r < 0)
173     {
174         log<level::ERR>("Failure seeking error file to START");
175         elog<ConfigFailure>(
176             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
177                 CALLOUT_ERRNO(errno),
178             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
179                 CALLOUT_DEVICE_PATH(file.c_str()));
180     }
181     return std::string(data.get());
182 }
183 
184 } // namespace occ
185 } // namespace open_power
186