1 #include "NVMeBasicContext.hpp"
2
3 #include "NVMeContext.hpp"
4 #include "NVMeSensor.hpp"
5
6 #include <endian.h>
7 #include <sys/ioctl.h>
8 #include <unistd.h>
9
10 #include <FileHandle.hpp>
11 #include <boost/asio/buffer.hpp>
12 #include <boost/asio/error.hpp>
13 #include <boost/asio/io_context.hpp>
14 #include <boost/asio/read.hpp>
15 #include <boost/asio/streambuf.hpp>
16 #include <boost/asio/write.hpp>
17 #include <phosphor-logging/lg2.hpp>
18 #include <phosphor-logging/lg2/flags.hpp>
19
20 #include <array>
21 #include <cerrno>
22 #include <chrono>
23 #include <cmath>
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstring>
27 #include <filesystem>
28 #include <iostream>
29 #include <iterator>
30 #include <limits>
31 #include <memory>
32 #include <stdexcept>
33 #include <string>
34 #include <system_error>
35 #include <thread>
36 #include <utility>
37 #include <vector>
38
39 extern "C"
40 {
41 #include <i2c/smbus.h>
42 #include <linux/i2c-dev.h>
43 }
44
45 /*
46 * NVMe-MI Basic Management Command
47 *
48 * https://nvmexpress.org/wp-content/uploads/NVMe_Management_-_Technical_Note_on_Basic_Management_Command.pdf
49 */
50
encodeBasicQuery(int bus,uint8_t device,uint8_t offset)51 static std::shared_ptr<std::array<uint8_t, 6>> encodeBasicQuery(
52 int bus, uint8_t device, uint8_t offset)
53 {
54 if (bus < 0)
55 {
56 throw std::domain_error("Invalid bus argument");
57 }
58
59 /* bus + address + command */
60 uint32_t busle = htole32(static_cast<uint32_t>(bus));
61 auto command =
62 std::make_shared<std::array<uint8_t, sizeof(busle) + 1 + 1>>();
63 memcpy(command->data(), &busle, sizeof(busle));
64 (*command)[sizeof(busle) + 0] = device;
65 (*command)[sizeof(busle) + 1] = offset;
66
67 return command;
68 }
69
decodeBasicQuery(const std::array<uint8_t,6> & req,int & bus,uint8_t & device,uint8_t & offset)70 static void decodeBasicQuery(const std::array<uint8_t, 6>& req, int& bus,
71 uint8_t& device, uint8_t& offset)
72 {
73 uint32_t busle = 0;
74
75 memcpy(&busle, req.data(), sizeof(busle));
76 bus = le32toh(busle);
77 device = req[sizeof(busle) + 0];
78 offset = req[sizeof(busle) + 1];
79 }
80
execBasicQuery(int bus,uint8_t addr,uint8_t cmd,std::vector<uint8_t> & resp)81 static void execBasicQuery(int bus, uint8_t addr, uint8_t cmd,
82 std::vector<uint8_t>& resp)
83 {
84 int32_t size = 0;
85 std::filesystem::path devpath = "/dev/i2c-" + std::to_string(bus);
86
87 try
88 {
89 FileHandle fileHandle(devpath);
90
91 /* Select the target device */
92 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
93 if (::ioctl(fileHandle.handle(), I2C_SLAVE, addr) == -1)
94 {
95 lg2::error(
96 "Failed to configure device address '{ADDR}' for bus '{BUS}': ERRNO",
97 "ADDR", lg2::hex, addr, "BUS", bus, "ERRNO", lg2::hex, errno);
98 resp.resize(0);
99 return;
100 }
101
102 resp.resize(UINT8_MAX + 1);
103
104 /* Issue the NVMe MI basic command */
105 size = i2c_smbus_read_block_data(fileHandle.handle(), cmd, resp.data());
106 if (size < 0)
107 {
108 lg2::error(
109 "Failed to read block data from device '{ADDR}' on bus '{BUS}': ERRNO",
110 "ADDR", lg2::hex, addr, "BUS", bus, "ERRNO", lg2::hex, errno);
111 resp.resize(0);
112 }
113 else if (size > UINT8_MAX + 1)
114 {
115 lg2::error(
116 "Unexpected message length from device '{ADDR}' on bus '{BUS}': '{SIZE}' ({MAX})",
117 "ADDR", lg2::hex, addr, "BUS", bus, "SIZE", size, "MAX",
118 UINT8_MAX);
119 resp.resize(0);
120 }
121 else
122 {
123 resp.resize(size);
124 }
125 }
126 catch (const std::out_of_range& e)
127 {
128 lg2::error("Failed to create file handle for bus '{BUS}': '{ERR}'",
129 "BUS", bus, "ERR", e);
130 resp.resize(0);
131 }
132 }
133
processBasicQueryStream(FileHandle & in,FileHandle & out)134 static ssize_t processBasicQueryStream(FileHandle& in, FileHandle& out)
135 {
136 std::vector<uint8_t> resp{};
137 ssize_t rc = 0;
138
139 while (true)
140 {
141 uint8_t device = 0;
142 uint8_t offset = 0;
143 uint8_t len = 0;
144 int bus = 0;
145
146 /* bus + address + command */
147 std::array<uint8_t, sizeof(uint32_t) + 1 + 1> req{};
148
149 /* Read the command parameters */
150 ssize_t rc = ::read(in.handle(), req.data(), req.size());
151 if (rc != static_cast<ssize_t>(req.size()))
152 {
153 lg2::error(
154 "Failed to read request from in descriptor '{ERROR_MESSAGE}'",
155 "ERROR_MESSAGE", strerror(errno));
156 if (rc != 0)
157 {
158 return -errno;
159 }
160 return -EIO;
161 }
162
163 decodeBasicQuery(req, bus, device, offset);
164
165 /* Execute the query */
166 execBasicQuery(bus, device, offset, resp);
167
168 /* Write out the response length */
169 len = resp.size();
170 rc = ::write(out.handle(), &len, sizeof(len));
171 if (rc != sizeof(len))
172 {
173 lg2::error(
174 "Failed to write block ({LEN}) length to out descriptor: '{ERRNO}'",
175 "LEN", len, "ERRNO", strerror(static_cast<int>(-rc)));
176 if (rc != 0)
177 {
178 return -errno;
179 }
180 return -EIO;
181 }
182
183 /* Write out the response data */
184 std::vector<uint8_t>::iterator cursor = resp.begin();
185 while (cursor != resp.end())
186 {
187 size_t lenRemaining = std::distance(cursor, resp.end());
188 ssize_t egress = ::write(out.handle(), &(*cursor), lenRemaining);
189 if (egress == -1)
190 {
191 lg2::error(
192 "Failed to write block data of length '{LEN}' to out pipe: '{ERROR_MESSAGE}'",
193 "LEN", lenRemaining, "ERROR_MESSAGE", strerror(errno));
194 if (rc != 0)
195 {
196 return -errno;
197 }
198 return -EIO;
199 }
200
201 cursor += egress;
202 }
203 }
204
205 return rc;
206 }
207
208 /* Throws std::error_code on failure */
209 /* FIXME: Probably shouldn't do fallible stuff in a constructor */
NVMeBasicContext(boost::asio::io_context & io,int rootBus)210 NVMeBasicContext::NVMeBasicContext(boost::asio::io_context& io, int rootBus) :
211 NVMeContext::NVMeContext(io, rootBus), io(io), reqStream(io), respStream(io)
212 {
213 std::array<int, 2> responsePipe{};
214 std::array<int, 2> requestPipe{};
215
216 /* Set up inter-thread communication */
217 if (::pipe(requestPipe.data()) == -1)
218 {
219 lg2::error("Failed to create request pipe: '{ERROR}'", "ERROR",
220 strerror(errno));
221 throw std::error_code(errno, std::system_category());
222 }
223
224 if (::pipe(responsePipe.data()) == -1)
225 {
226 lg2::error("Failed to create response pipe: '{ERROR}'", "ERROR",
227 strerror(errno));
228
229 if (::close(requestPipe[0]) == -1)
230 {
231 lg2::error("Failed to close write fd of request pipe '{ERROR}'",
232 "ERROR", strerror(errno));
233 }
234
235 if (::close(requestPipe[1]) == -1)
236 {
237 lg2::error("Failed to close read fd of request pipe '{ERROR}'",
238 "ERROR", strerror(errno));
239 }
240
241 throw std::error_code(errno, std::system_category());
242 }
243
244 reqStream.assign(requestPipe[1]);
245 FileHandle streamIn(requestPipe[0]);
246 FileHandle streamOut(responsePipe[1]);
247 respStream.assign(responsePipe[0]);
248
249 thread = std::jthread([streamIn{std::move(streamIn)},
250 streamOut{std::move(streamOut)}]() mutable {
251 ssize_t rc = processBasicQueryStream(streamIn, streamOut);
252
253 if (rc < 0)
254 {
255 lg2::error("Failure while processing query stream: '{ERROR}'",
256 "ERROR", strerror(static_cast<int>(-rc)));
257 }
258
259 lg2::error("Terminating basic query thread");
260 });
261 }
262
readAndProcessNVMeSensor()263 void NVMeBasicContext::readAndProcessNVMeSensor()
264 {
265 if (pollCursor == sensors.end())
266 {
267 this->pollNVMeDevices();
268 return;
269 }
270
271 std::shared_ptr<NVMeSensor> sensor = *pollCursor++;
272
273 if (!sensor->readingStateGood())
274 {
275 sensor->markAvailable(false);
276 sensor->updateValue(std::numeric_limits<double>::quiet_NaN());
277 readAndProcessNVMeSensor();
278 return;
279 }
280
281 /* Potentially defer sampling the sensor sensor if it is in error */
282 if (!sensor->sample())
283 {
284 readAndProcessNVMeSensor();
285 return;
286 }
287
288 auto command = encodeBasicQuery(sensor->bus, sensor->address, 0x00);
289
290 /* Issue the request */
291 boost::asio::async_write(
292 reqStream, boost::asio::buffer(command->data(), command->size()),
293 [command](boost::system::error_code ec, std::size_t) {
294 if (ec)
295 {
296 lg2::error("Got error writing basic query: '{ERROR_MESSAGE}'",
297 "ERROR_MESSAGE", ec.message());
298 }
299 });
300
301 auto response = std::make_shared<boost::asio::streambuf>();
302 response->prepare(1);
303
304 /* Gather the response and dispatch for parsing */
305 boost::asio::async_read(
306 respStream, *response,
307 [response](const boost::system::error_code& ec, std::size_t n) {
308 if (ec)
309 {
310 lg2::error(
311 "Got error completing basic query: '{ERROR_MESSAGE}'",
312 "ERROR_MESSAGE", ec.message());
313 return static_cast<std::size_t>(0);
314 }
315
316 if (n == 0)
317 {
318 return static_cast<std::size_t>(1);
319 }
320
321 std::istream is(response.get());
322 size_t len = static_cast<std::size_t>(is.peek());
323
324 if (n > len + 1)
325 {
326 lg2::error(
327 "Query stream has become unsynchronised: n: {N}, len: {LEN}",
328 "N", n, "LEN", len);
329 return static_cast<std::size_t>(0);
330 }
331
332 if (n == len + 1)
333 {
334 return static_cast<std::size_t>(0);
335 }
336
337 if (n > 1)
338 {
339 return len + 1 - n;
340 }
341
342 response->prepare(len);
343 return len;
344 },
345 [weakSelf{weak_from_this()}, sensor, response](
346 const boost::system::error_code& ec, std::size_t length) mutable {
347 if (ec)
348 {
349 lg2::error("Got error reading basic query: '{ERROR_MESSAGE}'",
350 "ERROR_MESSAGE", ec.message());
351 return;
352 }
353
354 if (length == 0)
355 {
356 lg2::error("Invalid message length: '{LEN}'", "LEN", length);
357 return;
358 }
359
360 if (auto self = weakSelf.lock())
361 {
362 /* Deserialise the response */
363 response->consume(1); /* Drop the length byte */
364 std::istream is(response.get());
365 std::vector<char> data(response->size());
366 is.read(data.data(), response->size());
367
368 /* Update the sensor */
369 self->processResponse(sensor, data.data(), data.size());
370
371 /* Enqueue processing of the next sensor */
372 self->readAndProcessNVMeSensor();
373 }
374 });
375 }
376
pollNVMeDevices()377 void NVMeBasicContext::pollNVMeDevices()
378 {
379 pollCursor = sensors.begin();
380
381 scanTimer.expires_after(std::chrono::seconds(1));
382 scanTimer.async_wait([weakSelf{weak_from_this()}](
383 const boost::system::error_code errorCode) {
384 if (errorCode == boost::asio::error::operation_aborted)
385 {
386 return;
387 }
388
389 if (errorCode)
390 {
391 lg2::error("error code: '{ERROR_MESSAGE}'", "ERROR_MESSAGE",
392 errorCode.message());
393 return;
394 }
395
396 if (auto self = weakSelf.lock())
397 {
398 self->readAndProcessNVMeSensor();
399 }
400 });
401 }
402
getTemperatureReading(int8_t reading)403 static double getTemperatureReading(int8_t reading)
404 {
405 if (reading == static_cast<int8_t>(0x80) ||
406 reading == static_cast<int8_t>(0x81))
407 {
408 // 0x80 = No temperature data or temperature data is more the 5 s
409 // old 0x81 = Temperature sensor failure
410 return std::numeric_limits<double>::quiet_NaN();
411 }
412
413 return reading;
414 }
415
processResponse(std::shared_ptr<NVMeSensor> & sensor,void * msg,size_t len)416 void NVMeBasicContext::processResponse(std::shared_ptr<NVMeSensor>& sensor,
417 void* msg, size_t len)
418 {
419 if (msg == nullptr || len < 6)
420 {
421 sensor->incrementError();
422 return;
423 }
424
425 uint8_t* messageData = static_cast<uint8_t*>(msg);
426
427 uint8_t status = messageData[0];
428 if (((status & NVME_MI_BASIC_SFLGS_DRIVE_NOT_READY) != 0) ||
429 ((status & NVME_MI_BASIC_SFLGS_DRIVE_FUNCTIONAL) == 0))
430 {
431 sensor->markFunctional(false);
432 return;
433 }
434
435 double value = getTemperatureReading(messageData[2]);
436 if (!std::isfinite(value))
437 {
438 sensor->incrementError();
439 return;
440 }
441
442 sensor->updateValue(value);
443 }
444