1 #include "NVMeBasicContext.hpp" 2 3 #include "NVMeContext.hpp" 4 #include "NVMeSensor.hpp" 5 6 #include <endian.h> 7 #include <sys/ioctl.h> 8 #include <unistd.h> 9 10 #include <FileHandle.hpp> 11 #include <boost/asio/buffer.hpp> 12 #include <boost/asio/error.hpp> 13 #include <boost/asio/io_context.hpp> 14 #include <boost/asio/read.hpp> 15 #include <boost/asio/streambuf.hpp> 16 #include <boost/asio/write.hpp> 17 18 #include <array> 19 #include <cerrno> 20 #include <chrono> 21 #include <cmath> 22 #include <cstdint> 23 #include <cstdio> 24 #include <cstring> 25 #include <filesystem> 26 #include <ios> 27 #include <iostream> 28 #include <iterator> 29 #include <limits> 30 #include <memory> 31 #include <stdexcept> 32 #include <string> 33 #include <system_error> 34 #include <thread> 35 #include <utility> 36 #include <vector> 37 38 extern "C" 39 { 40 #include <i2c/smbus.h> 41 #include <linux/i2c-dev.h> 42 } 43 44 /* 45 * NVMe-MI Basic Management Command 46 * 47 * https://nvmexpress.org/wp-content/uploads/NVMe_Management_-_Technical_Note_on_Basic_Management_Command.pdf 48 */ 49 50 static std::shared_ptr<std::array<uint8_t, 6>> 51 encodeBasicQuery(int bus, uint8_t device, uint8_t offset) 52 { 53 if (bus < 0) 54 { 55 throw std::domain_error("Invalid bus argument"); 56 } 57 58 /* bus + address + command */ 59 uint32_t busle = htole32(static_cast<uint32_t>(bus)); 60 auto command = 61 std::make_shared<std::array<uint8_t, sizeof(busle) + 1 + 1>>(); 62 memcpy(command->data(), &busle, sizeof(busle)); 63 (*command)[sizeof(busle) + 0] = device; 64 (*command)[sizeof(busle) + 1] = offset; 65 66 return command; 67 } 68 69 static void decodeBasicQuery(const std::array<uint8_t, 6>& req, int& bus, 70 uint8_t& device, uint8_t& offset) 71 { 72 uint32_t busle = 0; 73 74 memcpy(&busle, req.data(), sizeof(busle)); 75 bus = le32toh(busle); 76 device = req[sizeof(busle) + 0]; 77 offset = req[sizeof(busle) + 1]; 78 } 79 80 static void execBasicQuery(int bus, uint8_t addr, uint8_t cmd, 81 std::vector<uint8_t>& resp) 82 { 83 int32_t size = 0; 84 std::filesystem::path devpath = "/dev/i2c-" + std::to_string(bus); 85 86 try 87 { 88 FileHandle fileHandle(devpath); 89 90 /* Select the target device */ 91 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) 92 if (::ioctl(fileHandle.handle(), I2C_SLAVE, addr) == -1) 93 { 94 std::cerr << "Failed to configure device address 0x" << std::hex 95 << (int)addr << " for bus " << std::dec << bus << ": " 96 << strerror(errno) << "\n"; 97 resp.resize(0); 98 return; 99 } 100 101 resp.resize(UINT8_MAX + 1); 102 103 /* Issue the NVMe MI basic command */ 104 size = i2c_smbus_read_block_data(fileHandle.handle(), cmd, resp.data()); 105 if (size < 0) 106 { 107 std::cerr << "Failed to read block data from device 0x" << std::hex 108 << (int)addr << " on bus " << std::dec << bus << ": " 109 << strerror(errno) << "\n"; 110 resp.resize(0); 111 } 112 else if (size > UINT8_MAX + 1) 113 { 114 std::cerr << "Unexpected message length from device 0x" << std::hex 115 << (int)addr << " on bus " << std::dec << bus << ": " 116 << size << " (" << UINT8_MAX << ")\n"; 117 resp.resize(0); 118 } 119 else 120 { 121 resp.resize(size); 122 } 123 } 124 catch (const std::out_of_range& e) 125 { 126 std::cerr << "Failed to create file handle for bus " << std::dec << bus 127 << ": " << e.what() << "\n"; 128 resp.resize(0); 129 } 130 } 131 132 static ssize_t processBasicQueryStream(FileHandle& in, FileHandle& out) 133 { 134 std::vector<uint8_t> resp{}; 135 ssize_t rc = 0; 136 137 while (true) 138 { 139 uint8_t device = 0; 140 uint8_t offset = 0; 141 uint8_t len = 0; 142 int bus = 0; 143 144 /* bus + address + command */ 145 std::array<uint8_t, sizeof(uint32_t) + 1 + 1> req{}; 146 147 /* Read the command parameters */ 148 ssize_t rc = ::read(in.handle(), req.data(), req.size()); 149 if (rc != static_cast<ssize_t>(req.size())) 150 { 151 std::cerr << "Failed to read request from in descriptor " 152 << strerror(errno) << "\n"; 153 if (rc != 0) 154 { 155 return -errno; 156 } 157 return -EIO; 158 } 159 160 decodeBasicQuery(req, bus, device, offset); 161 162 /* Execute the query */ 163 execBasicQuery(bus, device, offset, resp); 164 165 /* Write out the response length */ 166 len = resp.size(); 167 rc = ::write(out.handle(), &len, sizeof(len)); 168 if (rc != sizeof(len)) 169 { 170 std::cerr << "Failed to write block (" << std::dec << len 171 << ") length to out descriptor: " 172 << strerror(static_cast<int>(-rc)) << "\n"; 173 if (rc != 0) 174 { 175 return -errno; 176 } 177 return -EIO; 178 } 179 180 /* Write out the response data */ 181 std::vector<uint8_t>::iterator cursor = resp.begin(); 182 while (cursor != resp.end()) 183 { 184 size_t lenRemaining = std::distance(cursor, resp.end()); 185 ssize_t egress = ::write(out.handle(), &(*cursor), lenRemaining); 186 if (egress == -1) 187 { 188 std::cerr << "Failed to write block data of length " << std::dec 189 << lenRemaining << " to out pipe: " << strerror(errno) 190 << "\n"; 191 if (rc != 0) 192 { 193 return -errno; 194 } 195 return -EIO; 196 } 197 198 cursor += egress; 199 } 200 } 201 202 return rc; 203 } 204 205 /* Throws std::error_code on failure */ 206 /* FIXME: Probably shouldn't do fallible stuff in a constructor */ 207 NVMeBasicContext::NVMeBasicContext(boost::asio::io_context& io, int rootBus) : 208 NVMeContext::NVMeContext(io, rootBus), io(io), reqStream(io), respStream(io) 209 { 210 std::array<int, 2> responsePipe{}; 211 std::array<int, 2> requestPipe{}; 212 213 /* Set up inter-thread communication */ 214 if (::pipe(requestPipe.data()) == -1) 215 { 216 std::cerr << "Failed to create request pipe: " << strerror(errno) 217 << "\n"; 218 throw std::error_code(errno, std::system_category()); 219 } 220 221 if (::pipe(responsePipe.data()) == -1) 222 { 223 std::cerr << "Failed to create response pipe: " << strerror(errno) 224 << "\n"; 225 226 if (::close(requestPipe[0]) == -1) 227 { 228 std::cerr << "Failed to close write fd of request pipe: " 229 << strerror(errno) << "\n"; 230 } 231 232 if (::close(requestPipe[1]) == -1) 233 { 234 std::cerr << "Failed to close read fd of request pipe: " 235 << strerror(errno) << "\n"; 236 } 237 238 throw std::error_code(errno, std::system_category()); 239 } 240 241 reqStream.assign(requestPipe[1]); 242 FileHandle streamIn(requestPipe[0]); 243 FileHandle streamOut(responsePipe[1]); 244 respStream.assign(responsePipe[0]); 245 246 thread = std::jthread([streamIn{std::move(streamIn)}, 247 streamOut{std::move(streamOut)}]() mutable { 248 ssize_t rc = processBasicQueryStream(streamIn, streamOut); 249 250 if (rc < 0) 251 { 252 std::cerr << "Failure while processing query stream: " 253 << strerror(static_cast<int>(-rc)) << "\n"; 254 } 255 256 std::cerr << "Terminating basic query thread\n"; 257 }); 258 } 259 260 void NVMeBasicContext::readAndProcessNVMeSensor() 261 { 262 if (pollCursor == sensors.end()) 263 { 264 this->pollNVMeDevices(); 265 return; 266 } 267 268 std::shared_ptr<NVMeSensor> sensor = *pollCursor++; 269 270 if (!sensor->readingStateGood()) 271 { 272 sensor->markAvailable(false); 273 sensor->updateValue(std::numeric_limits<double>::quiet_NaN()); 274 readAndProcessNVMeSensor(); 275 return; 276 } 277 278 /* Potentially defer sampling the sensor sensor if it is in error */ 279 if (!sensor->sample()) 280 { 281 readAndProcessNVMeSensor(); 282 return; 283 } 284 285 auto command = encodeBasicQuery(sensor->bus, sensor->address, 0x00); 286 287 /* Issue the request */ 288 boost::asio::async_write( 289 reqStream, boost::asio::buffer(command->data(), command->size()), 290 [command](boost::system::error_code ec, std::size_t) { 291 if (ec) 292 { 293 std::cerr << "Got error writing basic query: " << ec << "\n"; 294 } 295 }); 296 297 auto response = std::make_shared<boost::asio::streambuf>(); 298 response->prepare(1); 299 300 /* Gather the response and dispatch for parsing */ 301 boost::asio::async_read( 302 respStream, *response, 303 [response](const boost::system::error_code& ec, std::size_t n) { 304 if (ec) 305 { 306 std::cerr << "Got error completing basic query: " << ec << "\n"; 307 return static_cast<std::size_t>(0); 308 } 309 310 if (n == 0) 311 { 312 return static_cast<std::size_t>(1); 313 } 314 315 std::istream is(response.get()); 316 size_t len = static_cast<std::size_t>(is.peek()); 317 318 if (n > len + 1) 319 { 320 std::cerr << "Query stream has become unsynchronised: " 321 << "n: " << n << ", " 322 << "len: " << len << "\n"; 323 return static_cast<std::size_t>(0); 324 } 325 326 if (n == len + 1) 327 { 328 return static_cast<std::size_t>(0); 329 } 330 331 if (n > 1) 332 { 333 return len + 1 - n; 334 } 335 336 response->prepare(len); 337 return len; 338 }, 339 [weakSelf{weak_from_this()}, sensor, response]( 340 const boost::system::error_code& ec, std::size_t length) mutable { 341 if (ec) 342 { 343 std::cerr << "Got error reading basic query: " << ec << "\n"; 344 return; 345 } 346 347 if (length == 0) 348 { 349 std::cerr << "Invalid message length: " << length << "\n"; 350 return; 351 } 352 353 if (auto self = weakSelf.lock()) 354 { 355 /* Deserialise the response */ 356 response->consume(1); /* Drop the length byte */ 357 std::istream is(response.get()); 358 std::vector<char> data(response->size()); 359 is.read(data.data(), response->size()); 360 361 /* Update the sensor */ 362 self->processResponse(sensor, data.data(), data.size()); 363 364 /* Enqueue processing of the next sensor */ 365 self->readAndProcessNVMeSensor(); 366 } 367 }); 368 } 369 370 void NVMeBasicContext::pollNVMeDevices() 371 { 372 pollCursor = sensors.begin(); 373 374 scanTimer.expires_after(std::chrono::seconds(1)); 375 scanTimer.async_wait([weakSelf{weak_from_this()}]( 376 const boost::system::error_code errorCode) { 377 if (errorCode == boost::asio::error::operation_aborted) 378 { 379 return; 380 } 381 382 if (errorCode) 383 { 384 std::cerr << errorCode.message() << "\n"; 385 return; 386 } 387 388 if (auto self = weakSelf.lock()) 389 { 390 self->readAndProcessNVMeSensor(); 391 } 392 }); 393 } 394 395 static double getTemperatureReading(int8_t reading) 396 { 397 if (reading == static_cast<int8_t>(0x80) || 398 reading == static_cast<int8_t>(0x81)) 399 { 400 // 0x80 = No temperature data or temperature data is more the 5 s 401 // old 0x81 = Temperature sensor failure 402 return std::numeric_limits<double>::quiet_NaN(); 403 } 404 405 return reading; 406 } 407 408 void NVMeBasicContext::processResponse(std::shared_ptr<NVMeSensor>& sensor, 409 void* msg, size_t len) 410 { 411 if (msg == nullptr || len < 6) 412 { 413 sensor->incrementError(); 414 return; 415 } 416 417 uint8_t* messageData = static_cast<uint8_t*>(msg); 418 419 uint8_t status = messageData[0]; 420 if (((status & NVME_MI_BASIC_SFLGS_DRIVE_NOT_READY) != 0) || 421 ((status & NVME_MI_BASIC_SFLGS_DRIVE_FUNCTIONAL) == 0)) 422 { 423 sensor->markFunctional(false); 424 return; 425 } 426 427 double value = getTemperatureReading(messageData[2]); 428 if (!std::isfinite(value)) 429 { 430 sensor->incrementError(); 431 return; 432 } 433 434 sensor->updateValue(value); 435 } 436