1 /* 2 * NVMe admin command implementation. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/rculist.h> 17 18 #include <generated/utsrelease.h> 19 #include <asm/unaligned.h> 20 #include "nvmet.h" 21 22 u32 nvmet_get_log_page_len(struct nvme_command *cmd) 23 { 24 u32 len = le16_to_cpu(cmd->get_log_page.numdu); 25 26 len <<= 16; 27 len += le16_to_cpu(cmd->get_log_page.numdl); 28 /* NUMD is a 0's based value */ 29 len += 1; 30 len *= sizeof(u32); 31 32 return len; 33 } 34 35 static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, 36 struct nvme_smart_log *slog) 37 { 38 struct nvmet_ns *ns; 39 u64 host_reads, host_writes, data_units_read, data_units_written; 40 41 ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); 42 if (!ns) { 43 pr_err("nvmet : Could not find namespace id : %d\n", 44 le32_to_cpu(req->cmd->get_log_page.nsid)); 45 return NVME_SC_INVALID_NS; 46 } 47 48 host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); 49 data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); 50 host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); 51 data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]); 52 53 put_unaligned_le64(host_reads, &slog->host_reads[0]); 54 put_unaligned_le64(data_units_read, &slog->data_units_read[0]); 55 put_unaligned_le64(host_writes, &slog->host_writes[0]); 56 put_unaligned_le64(data_units_written, &slog->data_units_written[0]); 57 nvmet_put_namespace(ns); 58 59 return NVME_SC_SUCCESS; 60 } 61 62 static u16 nvmet_get_smart_log_all(struct nvmet_req *req, 63 struct nvme_smart_log *slog) 64 { 65 u64 host_reads = 0, host_writes = 0; 66 u64 data_units_read = 0, data_units_written = 0; 67 struct nvmet_ns *ns; 68 struct nvmet_ctrl *ctrl; 69 70 ctrl = req->sq->ctrl; 71 72 rcu_read_lock(); 73 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 74 host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); 75 data_units_read += 76 part_stat_read(ns->bdev->bd_part, sectors[READ]); 77 host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); 78 data_units_written += 79 part_stat_read(ns->bdev->bd_part, sectors[WRITE]); 80 81 } 82 rcu_read_unlock(); 83 84 put_unaligned_le64(host_reads, &slog->host_reads[0]); 85 put_unaligned_le64(data_units_read, &slog->data_units_read[0]); 86 put_unaligned_le64(host_writes, &slog->host_writes[0]); 87 put_unaligned_le64(data_units_written, &slog->data_units_written[0]); 88 89 return NVME_SC_SUCCESS; 90 } 91 92 static u16 nvmet_get_smart_log(struct nvmet_req *req, 93 struct nvme_smart_log *slog) 94 { 95 u16 status; 96 97 WARN_ON(req == NULL || slog == NULL); 98 if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL)) 99 status = nvmet_get_smart_log_all(req, slog); 100 else 101 status = nvmet_get_smart_log_nsid(req, slog); 102 return status; 103 } 104 105 static void nvmet_execute_get_log_page(struct nvmet_req *req) 106 { 107 struct nvme_smart_log *smart_log; 108 size_t data_len = nvmet_get_log_page_len(req->cmd); 109 void *buf; 110 u16 status = 0; 111 112 buf = kzalloc(data_len, GFP_KERNEL); 113 if (!buf) { 114 status = NVME_SC_INTERNAL; 115 goto out; 116 } 117 118 switch (req->cmd->get_log_page.lid) { 119 case NVME_LOG_ERROR: 120 /* 121 * We currently never set the More bit in the status field, 122 * so all error log entries are invalid and can be zeroed out. 123 * This is called a minum viable implementation (TM) of this 124 * mandatory log page. 125 */ 126 break; 127 case NVME_LOG_SMART: 128 /* 129 * XXX: fill out actual smart log 130 * 131 * We might have a hard time coming up with useful values for 132 * many of the fields, and even when we have useful data 133 * available (e.g. units or commands read/written) those aren't 134 * persistent over power loss. 135 */ 136 if (data_len != sizeof(*smart_log)) { 137 status = NVME_SC_INTERNAL; 138 goto err; 139 } 140 smart_log = buf; 141 status = nvmet_get_smart_log(req, smart_log); 142 if (status) 143 goto err; 144 break; 145 case NVME_LOG_FW_SLOT: 146 /* 147 * We only support a single firmware slot which always is 148 * active, so we can zero out the whole firmware slot log and 149 * still claim to fully implement this mandatory log page. 150 */ 151 break; 152 default: 153 BUG(); 154 } 155 156 status = nvmet_copy_to_sgl(req, 0, buf, data_len); 157 158 err: 159 kfree(buf); 160 out: 161 nvmet_req_complete(req, status); 162 } 163 164 static void nvmet_execute_identify_ctrl(struct nvmet_req *req) 165 { 166 struct nvmet_ctrl *ctrl = req->sq->ctrl; 167 struct nvme_id_ctrl *id; 168 u16 status = 0; 169 const char model[] = "Linux"; 170 171 id = kzalloc(sizeof(*id), GFP_KERNEL); 172 if (!id) { 173 status = NVME_SC_INTERNAL; 174 goto out; 175 } 176 177 /* XXX: figure out how to assign real vendors IDs. */ 178 id->vid = 0; 179 id->ssvid = 0; 180 181 bin2hex(id->sn, &ctrl->subsys->serial, 182 min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); 183 memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); 184 memcpy_and_pad(id->fr, sizeof(id->fr), 185 UTS_RELEASE, strlen(UTS_RELEASE), ' '); 186 187 id->rab = 6; 188 189 /* 190 * XXX: figure out how we can assign a IEEE OUI, but until then 191 * the safest is to leave it as zeroes. 192 */ 193 194 /* we support multiple ports and multiples hosts: */ 195 id->cmic = (1 << 0) | (1 << 1); 196 197 /* no limit on data transfer sizes for now */ 198 id->mdts = 0; 199 id->cntlid = cpu_to_le16(ctrl->cntlid); 200 id->ver = cpu_to_le32(ctrl->subsys->ver); 201 202 /* XXX: figure out what to do about RTD3R/RTD3 */ 203 id->oaes = cpu_to_le32(1 << 8); 204 id->ctratt = cpu_to_le32(1 << 0); 205 206 id->oacs = 0; 207 208 /* 209 * We don't really have a practical limit on the number of abort 210 * comands. But we don't do anything useful for abort either, so 211 * no point in allowing more abort commands than the spec requires. 212 */ 213 id->acl = 3; 214 215 id->aerl = NVMET_ASYNC_EVENTS - 1; 216 217 /* first slot is read-only, only one slot supported */ 218 id->frmw = (1 << 0) | (1 << 1); 219 id->lpa = (1 << 0) | (1 << 2); 220 id->elpe = NVMET_ERROR_LOG_SLOTS - 1; 221 id->npss = 0; 222 223 /* We support keep-alive timeout in granularity of seconds */ 224 id->kas = cpu_to_le16(NVMET_KAS); 225 226 id->sqes = (0x6 << 4) | 0x6; 227 id->cqes = (0x4 << 4) | 0x4; 228 229 /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ 230 id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); 231 232 id->nn = cpu_to_le32(ctrl->subsys->max_nsid); 233 id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | 234 NVME_CTRL_ONCS_WRITE_ZEROES); 235 236 /* XXX: don't report vwc if the underlying device is write through */ 237 id->vwc = NVME_CTRL_VWC_PRESENT; 238 239 /* 240 * We can't support atomic writes bigger than a LBA without support 241 * from the backend device. 242 */ 243 id->awun = 0; 244 id->awupf = 0; 245 246 id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ 247 if (ctrl->ops->has_keyed_sgls) 248 id->sgls |= cpu_to_le32(1 << 2); 249 if (ctrl->ops->sqe_inline_size) 250 id->sgls |= cpu_to_le32(1 << 20); 251 252 strcpy(id->subnqn, ctrl->subsys->subsysnqn); 253 254 /* Max command capsule size is sqe + single page of in-capsule data */ 255 id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + 256 ctrl->ops->sqe_inline_size) / 16); 257 /* Max response capsule size is cqe */ 258 id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); 259 260 id->msdbd = ctrl->ops->msdbd; 261 262 /* 263 * Meh, we don't really support any power state. Fake up the same 264 * values that qemu does. 265 */ 266 id->psd[0].max_power = cpu_to_le16(0x9c4); 267 id->psd[0].entry_lat = cpu_to_le32(0x10); 268 id->psd[0].exit_lat = cpu_to_le32(0x4); 269 270 status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 271 272 kfree(id); 273 out: 274 nvmet_req_complete(req, status); 275 } 276 277 static void nvmet_execute_identify_ns(struct nvmet_req *req) 278 { 279 struct nvmet_ns *ns; 280 struct nvme_id_ns *id; 281 u16 status = 0; 282 283 ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); 284 if (!ns) { 285 status = NVME_SC_INVALID_NS | NVME_SC_DNR; 286 goto out; 287 } 288 289 id = kzalloc(sizeof(*id), GFP_KERNEL); 290 if (!id) { 291 status = NVME_SC_INTERNAL; 292 goto out_put_ns; 293 } 294 295 /* 296 * nuse = ncap = nsze isn't always true, but we have no way to find 297 * that out from the underlying device. 298 */ 299 id->ncap = id->nuse = id->nsze = 300 cpu_to_le64(ns->size >> ns->blksize_shift); 301 302 /* 303 * We just provide a single LBA format that matches what the 304 * underlying device reports. 305 */ 306 id->nlbaf = 0; 307 id->flbas = 0; 308 309 /* 310 * Our namespace might always be shared. Not just with other 311 * controllers, but also with any other user of the block device. 312 */ 313 id->nmic = (1 << 0); 314 315 memcpy(&id->nguid, &ns->nguid, sizeof(uuid_le)); 316 317 id->lbaf[0].ds = ns->blksize_shift; 318 319 status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 320 321 kfree(id); 322 out_put_ns: 323 nvmet_put_namespace(ns); 324 out: 325 nvmet_req_complete(req, status); 326 } 327 328 static void nvmet_execute_identify_nslist(struct nvmet_req *req) 329 { 330 static const int buf_size = NVME_IDENTIFY_DATA_SIZE; 331 struct nvmet_ctrl *ctrl = req->sq->ctrl; 332 struct nvmet_ns *ns; 333 u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid); 334 __le32 *list; 335 u16 status = 0; 336 int i = 0; 337 338 list = kzalloc(buf_size, GFP_KERNEL); 339 if (!list) { 340 status = NVME_SC_INTERNAL; 341 goto out; 342 } 343 344 rcu_read_lock(); 345 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 346 if (ns->nsid <= min_nsid) 347 continue; 348 list[i++] = cpu_to_le32(ns->nsid); 349 if (i == buf_size / sizeof(__le32)) 350 break; 351 } 352 rcu_read_unlock(); 353 354 status = nvmet_copy_to_sgl(req, 0, list, buf_size); 355 356 kfree(list); 357 out: 358 nvmet_req_complete(req, status); 359 } 360 361 static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len, 362 void *id, off_t *off) 363 { 364 struct nvme_ns_id_desc desc = { 365 .nidt = type, 366 .nidl = len, 367 }; 368 u16 status; 369 370 status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc)); 371 if (status) 372 return status; 373 *off += sizeof(desc); 374 375 status = nvmet_copy_to_sgl(req, *off, id, len); 376 if (status) 377 return status; 378 *off += len; 379 380 return 0; 381 } 382 383 static void nvmet_execute_identify_desclist(struct nvmet_req *req) 384 { 385 struct nvmet_ns *ns; 386 u16 status = 0; 387 off_t off = 0; 388 389 ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); 390 if (!ns) { 391 status = NVME_SC_INVALID_NS | NVME_SC_DNR; 392 goto out; 393 } 394 395 if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) { 396 status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID, 397 NVME_NIDT_UUID_LEN, 398 &ns->uuid, &off); 399 if (status) 400 goto out_put_ns; 401 } 402 if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) { 403 status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID, 404 NVME_NIDT_NGUID_LEN, 405 &ns->nguid, &off); 406 if (status) 407 goto out_put_ns; 408 } 409 410 if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off, 411 off) != NVME_IDENTIFY_DATA_SIZE - off) 412 status = NVME_SC_INTERNAL | NVME_SC_DNR; 413 out_put_ns: 414 nvmet_put_namespace(ns); 415 out: 416 nvmet_req_complete(req, status); 417 } 418 419 /* 420 * A "minimum viable" abort implementation: the command is mandatory in the 421 * spec, but we are not required to do any useful work. We couldn't really 422 * do a useful abort, so don't bother even with waiting for the command 423 * to be exectuted and return immediately telling the command to abort 424 * wasn't found. 425 */ 426 static void nvmet_execute_abort(struct nvmet_req *req) 427 { 428 nvmet_set_result(req, 1); 429 nvmet_req_complete(req, 0); 430 } 431 432 static void nvmet_execute_set_features(struct nvmet_req *req) 433 { 434 struct nvmet_subsys *subsys = req->sq->ctrl->subsys; 435 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); 436 u32 val32; 437 u16 status = 0; 438 439 switch (cdw10 & 0xff) { 440 case NVME_FEAT_NUM_QUEUES: 441 nvmet_set_result(req, 442 (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); 443 break; 444 case NVME_FEAT_KATO: 445 val32 = le32_to_cpu(req->cmd->common.cdw10[1]); 446 req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); 447 nvmet_set_result(req, req->sq->ctrl->kato); 448 break; 449 case NVME_FEAT_HOST_ID: 450 status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 451 break; 452 default: 453 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 454 break; 455 } 456 457 nvmet_req_complete(req, status); 458 } 459 460 static void nvmet_execute_get_features(struct nvmet_req *req) 461 { 462 struct nvmet_subsys *subsys = req->sq->ctrl->subsys; 463 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); 464 u16 status = 0; 465 466 switch (cdw10 & 0xff) { 467 /* 468 * These features are mandatory in the spec, but we don't 469 * have a useful way to implement them. We'll eventually 470 * need to come up with some fake values for these. 471 */ 472 #if 0 473 case NVME_FEAT_ARBITRATION: 474 break; 475 case NVME_FEAT_POWER_MGMT: 476 break; 477 case NVME_FEAT_TEMP_THRESH: 478 break; 479 case NVME_FEAT_ERR_RECOVERY: 480 break; 481 case NVME_FEAT_IRQ_COALESCE: 482 break; 483 case NVME_FEAT_IRQ_CONFIG: 484 break; 485 case NVME_FEAT_WRITE_ATOMIC: 486 break; 487 case NVME_FEAT_ASYNC_EVENT: 488 break; 489 #endif 490 case NVME_FEAT_VOLATILE_WC: 491 nvmet_set_result(req, 1); 492 break; 493 case NVME_FEAT_NUM_QUEUES: 494 nvmet_set_result(req, 495 (subsys->max_qid-1) | ((subsys->max_qid-1) << 16)); 496 break; 497 case NVME_FEAT_KATO: 498 nvmet_set_result(req, req->sq->ctrl->kato * 1000); 499 break; 500 case NVME_FEAT_HOST_ID: 501 /* need 128-bit host identifier flag */ 502 if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) { 503 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 504 break; 505 } 506 507 status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid, 508 sizeof(req->sq->ctrl->hostid)); 509 break; 510 default: 511 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 512 break; 513 } 514 515 nvmet_req_complete(req, status); 516 } 517 518 static void nvmet_execute_async_event(struct nvmet_req *req) 519 { 520 struct nvmet_ctrl *ctrl = req->sq->ctrl; 521 522 mutex_lock(&ctrl->lock); 523 if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) { 524 mutex_unlock(&ctrl->lock); 525 nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_SC_DNR); 526 return; 527 } 528 ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; 529 mutex_unlock(&ctrl->lock); 530 531 schedule_work(&ctrl->async_event_work); 532 } 533 534 static void nvmet_execute_keep_alive(struct nvmet_req *req) 535 { 536 struct nvmet_ctrl *ctrl = req->sq->ctrl; 537 538 pr_debug("ctrl %d update keep-alive timer for %d secs\n", 539 ctrl->cntlid, ctrl->kato); 540 541 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 542 nvmet_req_complete(req, 0); 543 } 544 545 u16 nvmet_parse_admin_cmd(struct nvmet_req *req) 546 { 547 struct nvme_command *cmd = req->cmd; 548 u16 ret; 549 550 req->ns = NULL; 551 552 ret = nvmet_check_ctrl_status(req, cmd); 553 if (unlikely(ret)) 554 return ret; 555 556 switch (cmd->common.opcode) { 557 case nvme_admin_get_log_page: 558 req->data_len = nvmet_get_log_page_len(cmd); 559 560 switch (cmd->get_log_page.lid) { 561 case NVME_LOG_ERROR: 562 case NVME_LOG_SMART: 563 case NVME_LOG_FW_SLOT: 564 req->execute = nvmet_execute_get_log_page; 565 return 0; 566 } 567 break; 568 case nvme_admin_identify: 569 req->data_len = NVME_IDENTIFY_DATA_SIZE; 570 switch (cmd->identify.cns) { 571 case NVME_ID_CNS_NS: 572 req->execute = nvmet_execute_identify_ns; 573 return 0; 574 case NVME_ID_CNS_CTRL: 575 req->execute = nvmet_execute_identify_ctrl; 576 return 0; 577 case NVME_ID_CNS_NS_ACTIVE_LIST: 578 req->execute = nvmet_execute_identify_nslist; 579 return 0; 580 case NVME_ID_CNS_NS_DESC_LIST: 581 req->execute = nvmet_execute_identify_desclist; 582 return 0; 583 } 584 break; 585 case nvme_admin_abort_cmd: 586 req->execute = nvmet_execute_abort; 587 req->data_len = 0; 588 return 0; 589 case nvme_admin_set_features: 590 req->execute = nvmet_execute_set_features; 591 req->data_len = 0; 592 return 0; 593 case nvme_admin_get_features: 594 req->execute = nvmet_execute_get_features; 595 req->data_len = 0; 596 return 0; 597 case nvme_admin_async_event: 598 req->execute = nvmet_execute_async_event; 599 req->data_len = 0; 600 return 0; 601 case nvme_admin_keep_alive: 602 req->execute = nvmet_execute_keep_alive; 603 req->data_len = 0; 604 return 0; 605 } 606 607 pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, 608 req->sq->qid); 609 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 610 } 611