xref: /openbmc/openpower-occ-control/occ_errors.cpp (revision 9ed399d987706c5816f0f2ef850af347eac988d9)
1 #include "occ_errors.hpp"
2 
3 #include "elog-errors.hpp"
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <fmt/core.h>
8 #include <sys/ioctl.h>
9 #include <unistd.h>
10 
11 #include <org/open_power/OCC/Device/error.hpp>
12 #include <phosphor-logging/elog.hpp>
13 #include <phosphor-logging/log.hpp>
14 #include <xyz/openbmc_project/Common/error.hpp>
15 namespace open_power
16 {
17 namespace occ
18 {
19 
20 // Value in error file indicating success
21 constexpr auto NO_ERROR = '0';
22 
23 using namespace phosphor::logging;
24 using namespace sdbusplus::org::open_power::OCC::Device::Error;
25 using InternalFailure =
26     sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
27 
28 // Populate the file descriptor on the error file
29 void Error::openFile()
30 {
31     using namespace phosphor::logging;
32 
33     fd = open(file.c_str(), O_RDONLY | O_NONBLOCK);
34     const int open_errno = errno;
35     if (fd < 0)
36     {
37         log<level::ERR>(
38             fmt::format("Error::openFile: open failed (errno={})", open_errno)
39                 .c_str());
40         elog<OpenFailure>(phosphor::logging::org::open_power::OCC::Device::
41                               OpenFailure::CALLOUT_ERRNO(open_errno),
42                           phosphor::logging::org::open_power::OCC::Device::
43                               OpenFailure::CALLOUT_DEVICE_PATH(file.c_str()));
44     }
45 }
46 
47 // Attaches the FD to event loop and registers the callback handler
48 void Error::registerCallBack()
49 {
50     decltype(eventSource.get()) sourcePtr = nullptr;
51     auto r = sd_event_add_io(event.get(), &sourcePtr, fd, EPOLLPRI | EPOLLERR,
52                              processEvents, this);
53     eventSource.reset(sourcePtr);
54 
55     if (r < 0)
56     {
57         log<level::ERR>("Failed to register callback handler",
58                         entry("ERROR=%s", strerror(-r)));
59         elog<InternalFailure>();
60     }
61 }
62 
63 // Starts to watch for errors
64 void Error::addWatch(bool poll)
65 {
66     if (!watching)
67     {
68         // Open the file
69         openFile();
70 
71         if (poll)
72         {
73             // register the callback handler
74             registerCallBack();
75         }
76 
77         // Set we are watching the error
78         watching = true;
79     }
80 }
81 
82 // Stops watching for errors
83 void Error::removeWatch()
84 {
85     if (watching)
86     {
87         // Close the file
88         if (fd >= 0)
89         {
90             close(fd);
91         }
92 
93         // Reduce the reference count. Since there is only one instances
94         // of add_io, this will result empty loop
95         eventSource.reset();
96 
97         // We are no more watching the error
98         watching = false;
99     }
100 }
101 
102 // Callback handler when there is an activity on the FD
103 int Error::processEvents(sd_event_source* /*es*/, int /*fd*/,
104                          uint32_t /*revents*/, void* userData)
105 {
106     auto error = static_cast<Error*>(userData);
107 
108     error->analyzeEvent();
109     return 0;
110 }
111 
112 // Reads the error file and analyzes the data
113 void Error::analyzeEvent()
114 {
115     // Get the number of bytes to read
116     int len = -1;
117     auto r = ioctl(fd, FIONREAD, &len);
118     if (r < 0)
119     {
120         elog<ConfigFailure>(
121             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
122                 CALLOUT_ERRNO(errno),
123             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
124                 CALLOUT_DEVICE_PATH(file.c_str()));
125     }
126 
127     // A non-zero data indicates an error condition
128     // Let the caller take appropriate action on this
129     auto data = readFile(len);
130     bool error = !(data.empty() || data.front() == NO_ERROR);
131     if (callBack)
132     {
133         callBack(error);
134     }
135     return;
136 }
137 
138 // Reads so many bytes as passed in
139 std::string Error::readFile(int len) const
140 {
141     auto data = std::make_unique<char[]>(len + 1);
142     auto retries = 3;
143     auto delay = std::chrono::milliseconds{100};
144 
145     // OCC / FSI have intermittent issues so retry all reads
146     while (true)
147     {
148         // This file get created soon after binding. A value of 0 is
149         // deemed success and anything else is a Failure
150         // Since all the sysfs files would have size of 4096, if we read 0
151         // bytes -or- value '0', then it just means we are fine
152         auto r = read(fd, data.get(), len);
153         if (r < 0)
154         {
155             retries--;
156             if (retries == 0)
157             {
158                 elog<ReadFailure>(
159                     phosphor::logging::org::open_power::OCC::Device::
160                         ReadFailure::CALLOUT_ERRNO(errno),
161                     phosphor::logging::org::open_power::OCC::Device::
162                         ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
163                 break;
164             }
165             std::this_thread::sleep_for(delay);
166             continue;
167         }
168         break;
169     }
170     // Need to seek to START, else the poll returns immediately telling
171     // there is data to be read
172     auto r = lseek(fd, 0, SEEK_SET);
173     if (r < 0)
174     {
175         log<level::ERR>("Failure seeking error file to START");
176         elog<ConfigFailure>(
177             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
178                 CALLOUT_ERRNO(errno),
179             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
180                 CALLOUT_DEVICE_PATH(file.c_str()));
181     }
182     return std::string(data.get());
183 }
184 
185 } // namespace occ
186 } // namespace open_power
187