xref: /openbmc/openpower-occ-control/occ_errors.cpp (revision fa10621c961794ef39938d8812d205ef532e6c6a)
1 #include "occ_errors.hpp"
2 
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <fmt/core.h>
6 #include <sys/ioctl.h>
7 #include <unistd.h>
8 
9 #include <org/open_power/OCC/Device/error.hpp>
10 #include <phosphor-logging/elog-errors.hpp>
11 #include <phosphor-logging/elog.hpp>
12 #include <phosphor-logging/log.hpp>
13 #include <xyz/openbmc_project/Common/error.hpp>
14 namespace open_power
15 {
16 namespace occ
17 {
18 
19 using namespace phosphor::logging;
20 using namespace sdbusplus::org::open_power::OCC::Device::Error;
21 using InternalFailure =
22     sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
23 
24 // Populate the file descriptor on the error file
25 void Error::openFile()
26 {
27     using namespace phosphor::logging;
28 
29     fd = open(file.c_str(), O_RDONLY | O_NONBLOCK);
30     const int open_errno = errno;
31     if (fd < 0)
32     {
33         log<level::ERR>(
34             fmt::format("Error::openFile: open failed (errno={})", open_errno)
35                 .c_str());
36         elog<OpenFailure>(phosphor::logging::org::open_power::OCC::Device::
37                               OpenFailure::CALLOUT_ERRNO(open_errno),
38                           phosphor::logging::org::open_power::OCC::Device::
39                               OpenFailure::CALLOUT_DEVICE_PATH(file.c_str()));
40     }
41 }
42 
43 // Attaches the FD to event loop and registers the callback handler
44 void Error::registerCallBack()
45 {
46     decltype(eventSource.get()) sourcePtr = nullptr;
47     auto r = sd_event_add_io(event.get(), &sourcePtr, fd, EPOLLPRI | EPOLLERR,
48                              processEvents, this);
49     eventSource.reset(sourcePtr);
50 
51     if (r < 0)
52     {
53         log<level::ERR>("Failed to register callback handler",
54                         entry("ERROR=%s", strerror(-r)));
55         elog<InternalFailure>();
56     }
57 }
58 
59 // Starts to watch for errors
60 void Error::addWatch(bool poll)
61 {
62     if (!watching)
63     {
64         // Open the file
65         openFile();
66 
67         if (poll)
68         {
69             // register the callback handler
70             registerCallBack();
71         }
72 
73         // Set we are watching the error
74         watching = true;
75     }
76 }
77 
78 // Stops watching for errors
79 void Error::removeWatch()
80 {
81     if (watching)
82     {
83         // Close the file
84         if (fd >= 0)
85         {
86             close(fd);
87         }
88 
89         // Reduce the reference count. Since there is only one instances
90         // of add_io, this will result empty loop
91         eventSource.reset();
92 
93         // We are no more watching the error
94         watching = false;
95     }
96 }
97 
98 // Callback handler when there is an activity on the FD
99 int Error::processEvents(sd_event_source* /*es*/, int /*fd*/,
100                          uint32_t /*revents*/, void* userData)
101 {
102     auto error = static_cast<Error*>(userData);
103 
104     error->analyzeEvent();
105     return 0;
106 }
107 
108 // Reads the error file and analyzes the data
109 void Error::analyzeEvent()
110 {
111     // Get the number of bytes to read
112     int err = 0;
113     int len = -1;
114     auto r = ioctl(fd, FIONREAD, &len);
115     if (r < 0)
116     {
117         elog<ConfigFailure>(
118             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
119                 CALLOUT_ERRNO(errno),
120             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
121                 CALLOUT_DEVICE_PATH(file.c_str()));
122     }
123 
124     // A non-zero data indicates an error condition
125     // Let the caller take appropriate action on this
126     auto data = readFile(len);
127     if (!data.empty())
128         err = std::stoi(data, nullptr, 0);
129     if (callBack)
130     {
131         callBack(err);
132     }
133     return;
134 }
135 
136 // Reads so many bytes as passed in
137 std::string Error::readFile(int len) const
138 {
139     auto data = std::make_unique<char[]>(len + 1);
140     auto retries = 3;
141     auto delay = std::chrono::milliseconds{100};
142 
143     // OCC / FSI have intermittent issues so retry all reads
144     while (true)
145     {
146         // This file get created soon after binding. A value of 0 is
147         // deemed success and anything else is a Failure
148         // Since all the sysfs files would have size of 4096, if we read 0
149         // bytes -or- value '0', then it just means we are fine
150         auto r = read(fd, data.get(), len);
151         if (r < 0)
152         {
153             retries--;
154             if (retries == 0)
155             {
156                 elog<ReadFailure>(
157                     phosphor::logging::org::open_power::OCC::Device::
158                         ReadFailure::CALLOUT_ERRNO(errno),
159                     phosphor::logging::org::open_power::OCC::Device::
160                         ReadFailure::CALLOUT_DEVICE_PATH(file.c_str()));
161                 break;
162             }
163             std::this_thread::sleep_for(delay);
164             continue;
165         }
166         break;
167     }
168     // Need to seek to START, else the poll returns immediately telling
169     // there is data to be read
170     auto r = lseek(fd, 0, SEEK_SET);
171     if (r < 0)
172     {
173         log<level::ERR>("Failure seeking error file to START");
174         elog<ConfigFailure>(
175             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
176                 CALLOUT_ERRNO(errno),
177             phosphor::logging::org::open_power::OCC::Device::ConfigFailure::
178                 CALLOUT_DEVICE_PATH(file.c_str()));
179     }
180     return std::string(data.get());
181 }
182 
183 } // namespace occ
184 } // namespace open_power
185