1 /* 2 * Common code for the NVMe target. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/random.h> 17 #include <linux/rculist.h> 18 19 #include "nvmet.h" 20 21 struct workqueue_struct *buffered_io_wq; 22 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 23 static DEFINE_IDA(cntlid_ida); 24 25 /* 26 * This read/write semaphore is used to synchronize access to configuration 27 * information on a target system that will result in discovery log page 28 * information change for at least one host. 29 * The full list of resources to protected by this semaphore is: 30 * 31 * - subsystems list 32 * - per-subsystem allowed hosts list 33 * - allow_any_host subsystem attribute 34 * - nvmet_genctr 35 * - the nvmet_transports array 36 * 37 * When updating any of those lists/structures write lock should be obtained, 38 * while when reading (popolating discovery log page or checking host-subsystem 39 * link) read lock is obtained to allow concurrent reads. 40 */ 41 DECLARE_RWSEM(nvmet_config_sem); 42 43 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 44 u64 nvmet_ana_chgcnt; 45 DECLARE_RWSEM(nvmet_ana_sem); 46 47 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 48 const char *subsysnqn); 49 50 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 51 size_t len) 52 { 53 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 54 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 55 return 0; 56 } 57 58 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 59 { 60 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 61 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 62 return 0; 63 } 64 65 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 66 { 67 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) 68 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 69 return 0; 70 } 71 72 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 73 { 74 struct nvmet_ns *ns; 75 76 if (list_empty(&subsys->namespaces)) 77 return 0; 78 79 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 80 return ns->nsid; 81 } 82 83 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 84 { 85 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 86 } 87 88 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 89 { 90 struct nvmet_req *req; 91 92 while (1) { 93 mutex_lock(&ctrl->lock); 94 if (!ctrl->nr_async_event_cmds) { 95 mutex_unlock(&ctrl->lock); 96 return; 97 } 98 99 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 100 mutex_unlock(&ctrl->lock); 101 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 102 } 103 } 104 105 static void nvmet_async_event_work(struct work_struct *work) 106 { 107 struct nvmet_ctrl *ctrl = 108 container_of(work, struct nvmet_ctrl, async_event_work); 109 struct nvmet_async_event *aen; 110 struct nvmet_req *req; 111 112 while (1) { 113 mutex_lock(&ctrl->lock); 114 aen = list_first_entry_or_null(&ctrl->async_events, 115 struct nvmet_async_event, entry); 116 if (!aen || !ctrl->nr_async_event_cmds) { 117 mutex_unlock(&ctrl->lock); 118 return; 119 } 120 121 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 122 nvmet_set_result(req, nvmet_async_event_result(aen)); 123 124 list_del(&aen->entry); 125 kfree(aen); 126 127 mutex_unlock(&ctrl->lock); 128 nvmet_req_complete(req, 0); 129 } 130 } 131 132 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 133 u8 event_info, u8 log_page) 134 { 135 struct nvmet_async_event *aen; 136 137 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 138 if (!aen) 139 return; 140 141 aen->event_type = event_type; 142 aen->event_info = event_info; 143 aen->log_page = log_page; 144 145 mutex_lock(&ctrl->lock); 146 list_add_tail(&aen->entry, &ctrl->async_events); 147 mutex_unlock(&ctrl->lock); 148 149 schedule_work(&ctrl->async_event_work); 150 } 151 152 static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) 153 { 154 if (!(READ_ONCE(ctrl->aen_enabled) & aen)) 155 return true; 156 return test_and_set_bit(aen, &ctrl->aen_masked); 157 } 158 159 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 160 { 161 u32 i; 162 163 mutex_lock(&ctrl->lock); 164 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 165 goto out_unlock; 166 167 for (i = 0; i < ctrl->nr_changed_ns; i++) { 168 if (ctrl->changed_ns_list[i] == nsid) 169 goto out_unlock; 170 } 171 172 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 173 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 174 ctrl->nr_changed_ns = U32_MAX; 175 goto out_unlock; 176 } 177 178 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 179 out_unlock: 180 mutex_unlock(&ctrl->lock); 181 } 182 183 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 184 { 185 struct nvmet_ctrl *ctrl; 186 187 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 188 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 189 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) 190 continue; 191 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 192 NVME_AER_NOTICE_NS_CHANGED, 193 NVME_LOG_CHANGED_NS); 194 } 195 } 196 197 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 198 struct nvmet_port *port) 199 { 200 struct nvmet_ctrl *ctrl; 201 202 mutex_lock(&subsys->lock); 203 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 204 if (port && ctrl->port != port) 205 continue; 206 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE)) 207 continue; 208 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 209 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 210 } 211 mutex_unlock(&subsys->lock); 212 } 213 214 void nvmet_port_send_ana_event(struct nvmet_port *port) 215 { 216 struct nvmet_subsys_link *p; 217 218 down_read(&nvmet_config_sem); 219 list_for_each_entry(p, &port->subsystems, entry) 220 nvmet_send_ana_event(p->subsys, port); 221 up_read(&nvmet_config_sem); 222 } 223 224 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 225 { 226 int ret = 0; 227 228 down_write(&nvmet_config_sem); 229 if (nvmet_transports[ops->type]) 230 ret = -EINVAL; 231 else 232 nvmet_transports[ops->type] = ops; 233 up_write(&nvmet_config_sem); 234 235 return ret; 236 } 237 EXPORT_SYMBOL_GPL(nvmet_register_transport); 238 239 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 240 { 241 down_write(&nvmet_config_sem); 242 nvmet_transports[ops->type] = NULL; 243 up_write(&nvmet_config_sem); 244 } 245 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 246 247 int nvmet_enable_port(struct nvmet_port *port) 248 { 249 const struct nvmet_fabrics_ops *ops; 250 int ret; 251 252 lockdep_assert_held(&nvmet_config_sem); 253 254 ops = nvmet_transports[port->disc_addr.trtype]; 255 if (!ops) { 256 up_write(&nvmet_config_sem); 257 request_module("nvmet-transport-%d", port->disc_addr.trtype); 258 down_write(&nvmet_config_sem); 259 ops = nvmet_transports[port->disc_addr.trtype]; 260 if (!ops) { 261 pr_err("transport type %d not supported\n", 262 port->disc_addr.trtype); 263 return -EINVAL; 264 } 265 } 266 267 if (!try_module_get(ops->owner)) 268 return -EINVAL; 269 270 ret = ops->add_port(port); 271 if (ret) { 272 module_put(ops->owner); 273 return ret; 274 } 275 276 /* If the transport didn't set inline_data_size, then disable it. */ 277 if (port->inline_data_size < 0) 278 port->inline_data_size = 0; 279 280 port->enabled = true; 281 return 0; 282 } 283 284 void nvmet_disable_port(struct nvmet_port *port) 285 { 286 const struct nvmet_fabrics_ops *ops; 287 288 lockdep_assert_held(&nvmet_config_sem); 289 290 port->enabled = false; 291 292 ops = nvmet_transports[port->disc_addr.trtype]; 293 ops->remove_port(port); 294 module_put(ops->owner); 295 } 296 297 static void nvmet_keep_alive_timer(struct work_struct *work) 298 { 299 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 300 struct nvmet_ctrl, ka_work); 301 302 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 303 ctrl->cntlid, ctrl->kato); 304 305 nvmet_ctrl_fatal_error(ctrl); 306 } 307 308 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 309 { 310 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 311 ctrl->cntlid, ctrl->kato); 312 313 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 314 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 315 } 316 317 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 318 { 319 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 320 321 cancel_delayed_work_sync(&ctrl->ka_work); 322 } 323 324 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 325 __le32 nsid) 326 { 327 struct nvmet_ns *ns; 328 329 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 330 if (ns->nsid == le32_to_cpu(nsid)) 331 return ns; 332 } 333 334 return NULL; 335 } 336 337 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 338 { 339 struct nvmet_ns *ns; 340 341 rcu_read_lock(); 342 ns = __nvmet_find_namespace(ctrl, nsid); 343 if (ns) 344 percpu_ref_get(&ns->ref); 345 rcu_read_unlock(); 346 347 return ns; 348 } 349 350 static void nvmet_destroy_namespace(struct percpu_ref *ref) 351 { 352 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 353 354 complete(&ns->disable_done); 355 } 356 357 void nvmet_put_namespace(struct nvmet_ns *ns) 358 { 359 percpu_ref_put(&ns->ref); 360 } 361 362 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 363 { 364 nvmet_bdev_ns_disable(ns); 365 nvmet_file_ns_disable(ns); 366 } 367 368 int nvmet_ns_enable(struct nvmet_ns *ns) 369 { 370 struct nvmet_subsys *subsys = ns->subsys; 371 int ret; 372 373 mutex_lock(&subsys->lock); 374 ret = -EMFILE; 375 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 376 goto out_unlock; 377 ret = 0; 378 if (ns->enabled) 379 goto out_unlock; 380 381 ret = nvmet_bdev_ns_enable(ns); 382 if (ret == -ENOTBLK) 383 ret = nvmet_file_ns_enable(ns); 384 if (ret) 385 goto out_unlock; 386 387 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 388 0, GFP_KERNEL); 389 if (ret) 390 goto out_dev_put; 391 392 if (ns->nsid > subsys->max_nsid) 393 subsys->max_nsid = ns->nsid; 394 395 /* 396 * The namespaces list needs to be sorted to simplify the implementation 397 * of the Identify Namepace List subcommand. 398 */ 399 if (list_empty(&subsys->namespaces)) { 400 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 401 } else { 402 struct nvmet_ns *old; 403 404 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 405 BUG_ON(ns->nsid == old->nsid); 406 if (ns->nsid < old->nsid) 407 break; 408 } 409 410 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 411 } 412 subsys->nr_namespaces++; 413 414 nvmet_ns_changed(subsys, ns->nsid); 415 ns->enabled = true; 416 ret = 0; 417 out_unlock: 418 mutex_unlock(&subsys->lock); 419 return ret; 420 out_dev_put: 421 nvmet_ns_dev_disable(ns); 422 goto out_unlock; 423 } 424 425 void nvmet_ns_disable(struct nvmet_ns *ns) 426 { 427 struct nvmet_subsys *subsys = ns->subsys; 428 429 mutex_lock(&subsys->lock); 430 if (!ns->enabled) 431 goto out_unlock; 432 433 ns->enabled = false; 434 list_del_rcu(&ns->dev_link); 435 if (ns->nsid == subsys->max_nsid) 436 subsys->max_nsid = nvmet_max_nsid(subsys); 437 mutex_unlock(&subsys->lock); 438 439 /* 440 * Now that we removed the namespaces from the lookup list, we 441 * can kill the per_cpu ref and wait for any remaining references 442 * to be dropped, as well as a RCU grace period for anyone only 443 * using the namepace under rcu_read_lock(). Note that we can't 444 * use call_rcu here as we need to ensure the namespaces have 445 * been fully destroyed before unloading the module. 446 */ 447 percpu_ref_kill(&ns->ref); 448 synchronize_rcu(); 449 wait_for_completion(&ns->disable_done); 450 percpu_ref_exit(&ns->ref); 451 452 mutex_lock(&subsys->lock); 453 subsys->nr_namespaces--; 454 nvmet_ns_changed(subsys, ns->nsid); 455 nvmet_ns_dev_disable(ns); 456 out_unlock: 457 mutex_unlock(&subsys->lock); 458 } 459 460 void nvmet_ns_free(struct nvmet_ns *ns) 461 { 462 nvmet_ns_disable(ns); 463 464 down_write(&nvmet_ana_sem); 465 nvmet_ana_group_enabled[ns->anagrpid]--; 466 up_write(&nvmet_ana_sem); 467 468 kfree(ns->device_path); 469 kfree(ns); 470 } 471 472 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 473 { 474 struct nvmet_ns *ns; 475 476 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 477 if (!ns) 478 return NULL; 479 480 INIT_LIST_HEAD(&ns->dev_link); 481 init_completion(&ns->disable_done); 482 483 ns->nsid = nsid; 484 ns->subsys = subsys; 485 486 down_write(&nvmet_ana_sem); 487 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 488 nvmet_ana_group_enabled[ns->anagrpid]++; 489 up_write(&nvmet_ana_sem); 490 491 uuid_gen(&ns->uuid); 492 ns->buffered_io = false; 493 494 return ns; 495 } 496 497 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 498 { 499 u32 old_sqhd, new_sqhd; 500 u16 sqhd; 501 502 if (status) 503 nvmet_set_status(req, status); 504 505 if (req->sq->size) { 506 do { 507 old_sqhd = req->sq->sqhd; 508 new_sqhd = (old_sqhd + 1) % req->sq->size; 509 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 510 old_sqhd); 511 } 512 sqhd = req->sq->sqhd & 0x0000FFFF; 513 req->rsp->sq_head = cpu_to_le16(sqhd); 514 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 515 req->rsp->command_id = req->cmd->common.command_id; 516 517 if (req->ns) 518 nvmet_put_namespace(req->ns); 519 req->ops->queue_response(req); 520 } 521 522 void nvmet_req_complete(struct nvmet_req *req, u16 status) 523 { 524 __nvmet_req_complete(req, status); 525 percpu_ref_put(&req->sq->ref); 526 } 527 EXPORT_SYMBOL_GPL(nvmet_req_complete); 528 529 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 530 u16 qid, u16 size) 531 { 532 cq->qid = qid; 533 cq->size = size; 534 535 ctrl->cqs[qid] = cq; 536 } 537 538 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 539 u16 qid, u16 size) 540 { 541 sq->sqhd = 0; 542 sq->qid = qid; 543 sq->size = size; 544 545 ctrl->sqs[qid] = sq; 546 } 547 548 static void nvmet_confirm_sq(struct percpu_ref *ref) 549 { 550 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 551 552 complete(&sq->confirm_done); 553 } 554 555 void nvmet_sq_destroy(struct nvmet_sq *sq) 556 { 557 /* 558 * If this is the admin queue, complete all AERs so that our 559 * queue doesn't have outstanding requests on it. 560 */ 561 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 562 nvmet_async_events_free(sq->ctrl); 563 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 564 wait_for_completion(&sq->confirm_done); 565 wait_for_completion(&sq->free_done); 566 percpu_ref_exit(&sq->ref); 567 568 if (sq->ctrl) { 569 nvmet_ctrl_put(sq->ctrl); 570 sq->ctrl = NULL; /* allows reusing the queue later */ 571 } 572 } 573 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 574 575 static void nvmet_sq_free(struct percpu_ref *ref) 576 { 577 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 578 579 complete(&sq->free_done); 580 } 581 582 int nvmet_sq_init(struct nvmet_sq *sq) 583 { 584 int ret; 585 586 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 587 if (ret) { 588 pr_err("percpu_ref init failed!\n"); 589 return ret; 590 } 591 init_completion(&sq->free_done); 592 init_completion(&sq->confirm_done); 593 594 return 0; 595 } 596 EXPORT_SYMBOL_GPL(nvmet_sq_init); 597 598 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 599 struct nvmet_ns *ns) 600 { 601 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 602 603 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 604 return NVME_SC_ANA_INACCESSIBLE; 605 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 606 return NVME_SC_ANA_PERSISTENT_LOSS; 607 if (unlikely(state == NVME_ANA_CHANGE)) 608 return NVME_SC_ANA_TRANSITION; 609 return 0; 610 } 611 612 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 613 { 614 if (unlikely(req->ns->readonly)) { 615 switch (req->cmd->common.opcode) { 616 case nvme_cmd_read: 617 case nvme_cmd_flush: 618 break; 619 default: 620 return NVME_SC_NS_WRITE_PROTECTED; 621 } 622 } 623 624 return 0; 625 } 626 627 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 628 { 629 struct nvme_command *cmd = req->cmd; 630 u16 ret; 631 632 ret = nvmet_check_ctrl_status(req, cmd); 633 if (unlikely(ret)) 634 return ret; 635 636 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 637 if (unlikely(!req->ns)) 638 return NVME_SC_INVALID_NS | NVME_SC_DNR; 639 ret = nvmet_check_ana_state(req->port, req->ns); 640 if (unlikely(ret)) 641 return ret; 642 ret = nvmet_io_cmd_check_access(req); 643 if (unlikely(ret)) 644 return ret; 645 646 if (req->ns->file) 647 return nvmet_file_parse_io_cmd(req); 648 else 649 return nvmet_bdev_parse_io_cmd(req); 650 } 651 652 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 653 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 654 { 655 u8 flags = req->cmd->common.flags; 656 u16 status; 657 658 req->cq = cq; 659 req->sq = sq; 660 req->ops = ops; 661 req->sg = NULL; 662 req->sg_cnt = 0; 663 req->transfer_len = 0; 664 req->rsp->status = 0; 665 req->ns = NULL; 666 667 /* no support for fused commands yet */ 668 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 669 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 670 goto fail; 671 } 672 673 /* 674 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 675 * contains an address of a single contiguous physical buffer that is 676 * byte aligned. 677 */ 678 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 679 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 680 goto fail; 681 } 682 683 if (unlikely(!req->sq->ctrl)) 684 /* will return an error for any Non-connect command: */ 685 status = nvmet_parse_connect_cmd(req); 686 else if (likely(req->sq->qid != 0)) 687 status = nvmet_parse_io_cmd(req); 688 else if (req->cmd->common.opcode == nvme_fabrics_command) 689 status = nvmet_parse_fabrics_cmd(req); 690 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 691 status = nvmet_parse_discovery_cmd(req); 692 else 693 status = nvmet_parse_admin_cmd(req); 694 695 if (status) 696 goto fail; 697 698 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 699 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 700 goto fail; 701 } 702 703 return true; 704 705 fail: 706 __nvmet_req_complete(req, status); 707 return false; 708 } 709 EXPORT_SYMBOL_GPL(nvmet_req_init); 710 711 void nvmet_req_uninit(struct nvmet_req *req) 712 { 713 percpu_ref_put(&req->sq->ref); 714 if (req->ns) 715 nvmet_put_namespace(req->ns); 716 } 717 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 718 719 void nvmet_req_execute(struct nvmet_req *req) 720 { 721 if (unlikely(req->data_len != req->transfer_len)) 722 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 723 else 724 req->execute(req); 725 } 726 EXPORT_SYMBOL_GPL(nvmet_req_execute); 727 728 static inline bool nvmet_cc_en(u32 cc) 729 { 730 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 731 } 732 733 static inline u8 nvmet_cc_css(u32 cc) 734 { 735 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 736 } 737 738 static inline u8 nvmet_cc_mps(u32 cc) 739 { 740 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 741 } 742 743 static inline u8 nvmet_cc_ams(u32 cc) 744 { 745 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 746 } 747 748 static inline u8 nvmet_cc_shn(u32 cc) 749 { 750 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 751 } 752 753 static inline u8 nvmet_cc_iosqes(u32 cc) 754 { 755 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 756 } 757 758 static inline u8 nvmet_cc_iocqes(u32 cc) 759 { 760 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 761 } 762 763 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 764 { 765 lockdep_assert_held(&ctrl->lock); 766 767 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 768 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 769 nvmet_cc_mps(ctrl->cc) != 0 || 770 nvmet_cc_ams(ctrl->cc) != 0 || 771 nvmet_cc_css(ctrl->cc) != 0) { 772 ctrl->csts = NVME_CSTS_CFS; 773 return; 774 } 775 776 ctrl->csts = NVME_CSTS_RDY; 777 778 /* 779 * Controllers that are not yet enabled should not really enforce the 780 * keep alive timeout, but we still want to track a timeout and cleanup 781 * in case a host died before it enabled the controller. Hence, simply 782 * reset the keep alive timer when the controller is enabled. 783 */ 784 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 785 } 786 787 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 788 { 789 lockdep_assert_held(&ctrl->lock); 790 791 /* XXX: tear down queues? */ 792 ctrl->csts &= ~NVME_CSTS_RDY; 793 ctrl->cc = 0; 794 } 795 796 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 797 { 798 u32 old; 799 800 mutex_lock(&ctrl->lock); 801 old = ctrl->cc; 802 ctrl->cc = new; 803 804 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 805 nvmet_start_ctrl(ctrl); 806 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 807 nvmet_clear_ctrl(ctrl); 808 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 809 nvmet_clear_ctrl(ctrl); 810 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 811 } 812 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 813 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 814 mutex_unlock(&ctrl->lock); 815 } 816 817 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 818 { 819 /* command sets supported: NVMe command set: */ 820 ctrl->cap = (1ULL << 37); 821 /* CC.EN timeout in 500msec units: */ 822 ctrl->cap |= (15ULL << 24); 823 /* maximum queue entries supported: */ 824 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 825 } 826 827 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 828 struct nvmet_req *req, struct nvmet_ctrl **ret) 829 { 830 struct nvmet_subsys *subsys; 831 struct nvmet_ctrl *ctrl; 832 u16 status = 0; 833 834 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 835 if (!subsys) { 836 pr_warn("connect request for invalid subsystem %s!\n", 837 subsysnqn); 838 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 839 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 840 } 841 842 mutex_lock(&subsys->lock); 843 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 844 if (ctrl->cntlid == cntlid) { 845 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 846 pr_warn("hostnqn mismatch.\n"); 847 continue; 848 } 849 if (!kref_get_unless_zero(&ctrl->ref)) 850 continue; 851 852 *ret = ctrl; 853 goto out; 854 } 855 } 856 857 pr_warn("could not find controller %d for subsys %s / host %s\n", 858 cntlid, subsysnqn, hostnqn); 859 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 860 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 861 862 out: 863 mutex_unlock(&subsys->lock); 864 nvmet_subsys_put(subsys); 865 return status; 866 } 867 868 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 869 { 870 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 871 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 872 cmd->common.opcode, req->sq->qid); 873 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 874 } 875 876 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 877 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 878 cmd->common.opcode, req->sq->qid); 879 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 880 } 881 return 0; 882 } 883 884 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, 885 const char *hostnqn) 886 { 887 struct nvmet_host_link *p; 888 889 if (subsys->allow_any_host) 890 return true; 891 892 list_for_each_entry(p, &subsys->hosts, entry) { 893 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 894 return true; 895 } 896 897 return false; 898 } 899 900 static bool nvmet_host_discovery_allowed(struct nvmet_req *req, 901 const char *hostnqn) 902 { 903 struct nvmet_subsys_link *s; 904 905 list_for_each_entry(s, &req->port->subsystems, entry) { 906 if (__nvmet_host_allowed(s->subsys, hostnqn)) 907 return true; 908 } 909 910 return false; 911 } 912 913 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 914 const char *hostnqn) 915 { 916 lockdep_assert_held(&nvmet_config_sem); 917 918 if (subsys->type == NVME_NQN_DISC) 919 return nvmet_host_discovery_allowed(req, hostnqn); 920 else 921 return __nvmet_host_allowed(subsys, hostnqn); 922 } 923 924 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 925 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 926 { 927 struct nvmet_subsys *subsys; 928 struct nvmet_ctrl *ctrl; 929 int ret; 930 u16 status; 931 932 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 933 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 934 if (!subsys) { 935 pr_warn("connect request for invalid subsystem %s!\n", 936 subsysnqn); 937 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 938 goto out; 939 } 940 941 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 942 down_read(&nvmet_config_sem); 943 if (!nvmet_host_allowed(req, subsys, hostnqn)) { 944 pr_info("connect by host %s for subsystem %s not allowed\n", 945 hostnqn, subsysnqn); 946 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 947 up_read(&nvmet_config_sem); 948 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 949 goto out_put_subsystem; 950 } 951 up_read(&nvmet_config_sem); 952 953 status = NVME_SC_INTERNAL; 954 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 955 if (!ctrl) 956 goto out_put_subsystem; 957 mutex_init(&ctrl->lock); 958 959 nvmet_init_cap(ctrl); 960 961 ctrl->port = req->port; 962 963 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 964 INIT_LIST_HEAD(&ctrl->async_events); 965 966 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 967 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 968 969 kref_init(&ctrl->ref); 970 ctrl->subsys = subsys; 971 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 972 973 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 974 sizeof(__le32), GFP_KERNEL); 975 if (!ctrl->changed_ns_list) 976 goto out_free_ctrl; 977 978 ctrl->cqs = kcalloc(subsys->max_qid + 1, 979 sizeof(struct nvmet_cq *), 980 GFP_KERNEL); 981 if (!ctrl->cqs) 982 goto out_free_changed_ns_list; 983 984 ctrl->sqs = kcalloc(subsys->max_qid + 1, 985 sizeof(struct nvmet_sq *), 986 GFP_KERNEL); 987 if (!ctrl->sqs) 988 goto out_free_cqs; 989 990 ret = ida_simple_get(&cntlid_ida, 991 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 992 GFP_KERNEL); 993 if (ret < 0) { 994 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 995 goto out_free_sqs; 996 } 997 ctrl->cntlid = ret; 998 999 ctrl->ops = req->ops; 1000 if (ctrl->subsys->type == NVME_NQN_DISC) { 1001 /* Don't accept keep-alive timeout for discovery controllers */ 1002 if (kato) { 1003 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 1004 goto out_remove_ida; 1005 } 1006 1007 /* 1008 * Discovery controllers use some arbitrary high value in order 1009 * to cleanup stale discovery sessions 1010 * 1011 * From the latest base diff RC: 1012 * "The Keep Alive command is not supported by 1013 * Discovery controllers. A transport may specify a 1014 * fixed Discovery controller activity timeout value 1015 * (e.g., 2 minutes). If no commands are received 1016 * by a Discovery controller within that time 1017 * period, the controller may perform the 1018 * actions for Keep Alive Timer expiration". 1019 */ 1020 ctrl->kato = NVMET_DISC_KATO; 1021 } else { 1022 /* keep-alive timeout in seconds */ 1023 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1024 } 1025 nvmet_start_keep_alive_timer(ctrl); 1026 1027 mutex_lock(&subsys->lock); 1028 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1029 mutex_unlock(&subsys->lock); 1030 1031 *ctrlp = ctrl; 1032 return 0; 1033 1034 out_remove_ida: 1035 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 1036 out_free_sqs: 1037 kfree(ctrl->sqs); 1038 out_free_cqs: 1039 kfree(ctrl->cqs); 1040 out_free_changed_ns_list: 1041 kfree(ctrl->changed_ns_list); 1042 out_free_ctrl: 1043 kfree(ctrl); 1044 out_put_subsystem: 1045 nvmet_subsys_put(subsys); 1046 out: 1047 return status; 1048 } 1049 1050 static void nvmet_ctrl_free(struct kref *ref) 1051 { 1052 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1053 struct nvmet_subsys *subsys = ctrl->subsys; 1054 1055 mutex_lock(&subsys->lock); 1056 list_del(&ctrl->subsys_entry); 1057 mutex_unlock(&subsys->lock); 1058 1059 nvmet_stop_keep_alive_timer(ctrl); 1060 1061 flush_work(&ctrl->async_event_work); 1062 cancel_work_sync(&ctrl->fatal_err_work); 1063 1064 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 1065 1066 kfree(ctrl->sqs); 1067 kfree(ctrl->cqs); 1068 kfree(ctrl->changed_ns_list); 1069 kfree(ctrl); 1070 1071 nvmet_subsys_put(subsys); 1072 } 1073 1074 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1075 { 1076 kref_put(&ctrl->ref, nvmet_ctrl_free); 1077 } 1078 1079 static void nvmet_fatal_error_handler(struct work_struct *work) 1080 { 1081 struct nvmet_ctrl *ctrl = 1082 container_of(work, struct nvmet_ctrl, fatal_err_work); 1083 1084 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1085 ctrl->ops->delete_ctrl(ctrl); 1086 } 1087 1088 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1089 { 1090 mutex_lock(&ctrl->lock); 1091 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1092 ctrl->csts |= NVME_CSTS_CFS; 1093 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1094 schedule_work(&ctrl->fatal_err_work); 1095 } 1096 mutex_unlock(&ctrl->lock); 1097 } 1098 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1099 1100 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1101 const char *subsysnqn) 1102 { 1103 struct nvmet_subsys_link *p; 1104 1105 if (!port) 1106 return NULL; 1107 1108 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, 1109 NVMF_NQN_SIZE)) { 1110 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1111 return NULL; 1112 return nvmet_disc_subsys; 1113 } 1114 1115 down_read(&nvmet_config_sem); 1116 list_for_each_entry(p, &port->subsystems, entry) { 1117 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1118 NVMF_NQN_SIZE)) { 1119 if (!kref_get_unless_zero(&p->subsys->ref)) 1120 break; 1121 up_read(&nvmet_config_sem); 1122 return p->subsys; 1123 } 1124 } 1125 up_read(&nvmet_config_sem); 1126 return NULL; 1127 } 1128 1129 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1130 enum nvme_subsys_type type) 1131 { 1132 struct nvmet_subsys *subsys; 1133 1134 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1135 if (!subsys) 1136 return NULL; 1137 1138 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 1139 /* generate a random serial number as our controllers are ephemeral: */ 1140 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1141 1142 switch (type) { 1143 case NVME_NQN_NVME: 1144 subsys->max_qid = NVMET_NR_QUEUES; 1145 break; 1146 case NVME_NQN_DISC: 1147 subsys->max_qid = 0; 1148 break; 1149 default: 1150 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1151 kfree(subsys); 1152 return NULL; 1153 } 1154 subsys->type = type; 1155 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1156 GFP_KERNEL); 1157 if (!subsys->subsysnqn) { 1158 kfree(subsys); 1159 return NULL; 1160 } 1161 1162 kref_init(&subsys->ref); 1163 1164 mutex_init(&subsys->lock); 1165 INIT_LIST_HEAD(&subsys->namespaces); 1166 INIT_LIST_HEAD(&subsys->ctrls); 1167 INIT_LIST_HEAD(&subsys->hosts); 1168 1169 return subsys; 1170 } 1171 1172 static void nvmet_subsys_free(struct kref *ref) 1173 { 1174 struct nvmet_subsys *subsys = 1175 container_of(ref, struct nvmet_subsys, ref); 1176 1177 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1178 1179 kfree(subsys->subsysnqn); 1180 kfree(subsys); 1181 } 1182 1183 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1184 { 1185 struct nvmet_ctrl *ctrl; 1186 1187 mutex_lock(&subsys->lock); 1188 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1189 ctrl->ops->delete_ctrl(ctrl); 1190 mutex_unlock(&subsys->lock); 1191 } 1192 1193 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1194 { 1195 kref_put(&subsys->ref, nvmet_subsys_free); 1196 } 1197 1198 static int __init nvmet_init(void) 1199 { 1200 int error; 1201 1202 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1203 1204 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1205 WQ_MEM_RECLAIM, 0); 1206 if (!buffered_io_wq) { 1207 error = -ENOMEM; 1208 goto out; 1209 } 1210 1211 error = nvmet_init_discovery(); 1212 if (error) 1213 goto out_free_work_queue; 1214 1215 error = nvmet_init_configfs(); 1216 if (error) 1217 goto out_exit_discovery; 1218 return 0; 1219 1220 out_exit_discovery: 1221 nvmet_exit_discovery(); 1222 out_free_work_queue: 1223 destroy_workqueue(buffered_io_wq); 1224 out: 1225 return error; 1226 } 1227 1228 static void __exit nvmet_exit(void) 1229 { 1230 nvmet_exit_configfs(); 1231 nvmet_exit_discovery(); 1232 ida_destroy(&cntlid_ida); 1233 destroy_workqueue(buffered_io_wq); 1234 1235 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1236 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1237 } 1238 1239 module_init(nvmet_init); 1240 module_exit(nvmet_exit); 1241 1242 MODULE_LICENSE("GPL v2"); 1243