1 /* 2 * Common code for the NVMe target. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/random.h> 17 #include <linux/rculist.h> 18 19 #include "nvmet.h" 20 21 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 22 static DEFINE_IDA(cntlid_ida); 23 24 /* 25 * This read/write semaphore is used to synchronize access to configuration 26 * information on a target system that will result in discovery log page 27 * information change for at least one host. 28 * The full list of resources to protected by this semaphore is: 29 * 30 * - subsystems list 31 * - per-subsystem allowed hosts list 32 * - allow_any_host subsystem attribute 33 * - nvmet_genctr 34 * - the nvmet_transports array 35 * 36 * When updating any of those lists/structures write lock should be obtained, 37 * while when reading (popolating discovery log page or checking host-subsystem 38 * link) read lock is obtained to allow concurrent reads. 39 */ 40 DECLARE_RWSEM(nvmet_config_sem); 41 42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 43 const char *subsysnqn); 44 45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 46 size_t len) 47 { 48 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 49 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 50 return 0; 51 } 52 53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 54 { 55 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 56 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 57 return 0; 58 } 59 60 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 61 { 62 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) 63 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 64 return 0; 65 } 66 67 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 68 { 69 struct nvmet_ns *ns; 70 71 if (list_empty(&subsys->namespaces)) 72 return 0; 73 74 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 75 return ns->nsid; 76 } 77 78 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 79 { 80 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 81 } 82 83 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 84 { 85 struct nvmet_req *req; 86 87 while (1) { 88 mutex_lock(&ctrl->lock); 89 if (!ctrl->nr_async_event_cmds) { 90 mutex_unlock(&ctrl->lock); 91 return; 92 } 93 94 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 95 mutex_unlock(&ctrl->lock); 96 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 97 } 98 } 99 100 static void nvmet_async_event_work(struct work_struct *work) 101 { 102 struct nvmet_ctrl *ctrl = 103 container_of(work, struct nvmet_ctrl, async_event_work); 104 struct nvmet_async_event *aen; 105 struct nvmet_req *req; 106 107 while (1) { 108 mutex_lock(&ctrl->lock); 109 aen = list_first_entry_or_null(&ctrl->async_events, 110 struct nvmet_async_event, entry); 111 if (!aen || !ctrl->nr_async_event_cmds) { 112 mutex_unlock(&ctrl->lock); 113 return; 114 } 115 116 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 117 nvmet_set_result(req, nvmet_async_event_result(aen)); 118 119 list_del(&aen->entry); 120 kfree(aen); 121 122 mutex_unlock(&ctrl->lock); 123 nvmet_req_complete(req, 0); 124 } 125 } 126 127 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 128 u8 event_info, u8 log_page) 129 { 130 struct nvmet_async_event *aen; 131 132 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 133 if (!aen) 134 return; 135 136 aen->event_type = event_type; 137 aen->event_info = event_info; 138 aen->log_page = log_page; 139 140 mutex_lock(&ctrl->lock); 141 list_add_tail(&aen->entry, &ctrl->async_events); 142 mutex_unlock(&ctrl->lock); 143 144 schedule_work(&ctrl->async_event_work); 145 } 146 147 static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) 148 { 149 if (!(READ_ONCE(ctrl->aen_enabled) & aen)) 150 return true; 151 return test_and_set_bit(aen, &ctrl->aen_masked); 152 } 153 154 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 155 { 156 u32 i; 157 158 mutex_lock(&ctrl->lock); 159 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 160 goto out_unlock; 161 162 for (i = 0; i < ctrl->nr_changed_ns; i++) { 163 if (ctrl->changed_ns_list[i] == nsid) 164 goto out_unlock; 165 } 166 167 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 168 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 169 ctrl->nr_changed_ns = U32_MAX; 170 goto out_unlock; 171 } 172 173 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 174 out_unlock: 175 mutex_unlock(&ctrl->lock); 176 } 177 178 static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 179 { 180 struct nvmet_ctrl *ctrl; 181 182 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 183 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 184 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) 185 continue; 186 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 187 NVME_AER_NOTICE_NS_CHANGED, 188 NVME_LOG_CHANGED_NS); 189 } 190 } 191 192 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 193 { 194 int ret = 0; 195 196 down_write(&nvmet_config_sem); 197 if (nvmet_transports[ops->type]) 198 ret = -EINVAL; 199 else 200 nvmet_transports[ops->type] = ops; 201 up_write(&nvmet_config_sem); 202 203 return ret; 204 } 205 EXPORT_SYMBOL_GPL(nvmet_register_transport); 206 207 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 208 { 209 down_write(&nvmet_config_sem); 210 nvmet_transports[ops->type] = NULL; 211 up_write(&nvmet_config_sem); 212 } 213 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 214 215 int nvmet_enable_port(struct nvmet_port *port) 216 { 217 const struct nvmet_fabrics_ops *ops; 218 int ret; 219 220 lockdep_assert_held(&nvmet_config_sem); 221 222 ops = nvmet_transports[port->disc_addr.trtype]; 223 if (!ops) { 224 up_write(&nvmet_config_sem); 225 request_module("nvmet-transport-%d", port->disc_addr.trtype); 226 down_write(&nvmet_config_sem); 227 ops = nvmet_transports[port->disc_addr.trtype]; 228 if (!ops) { 229 pr_err("transport type %d not supported\n", 230 port->disc_addr.trtype); 231 return -EINVAL; 232 } 233 } 234 235 if (!try_module_get(ops->owner)) 236 return -EINVAL; 237 238 ret = ops->add_port(port); 239 if (ret) { 240 module_put(ops->owner); 241 return ret; 242 } 243 244 port->enabled = true; 245 return 0; 246 } 247 248 void nvmet_disable_port(struct nvmet_port *port) 249 { 250 const struct nvmet_fabrics_ops *ops; 251 252 lockdep_assert_held(&nvmet_config_sem); 253 254 port->enabled = false; 255 256 ops = nvmet_transports[port->disc_addr.trtype]; 257 ops->remove_port(port); 258 module_put(ops->owner); 259 } 260 261 static void nvmet_keep_alive_timer(struct work_struct *work) 262 { 263 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 264 struct nvmet_ctrl, ka_work); 265 266 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 267 ctrl->cntlid, ctrl->kato); 268 269 nvmet_ctrl_fatal_error(ctrl); 270 } 271 272 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 273 { 274 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 275 ctrl->cntlid, ctrl->kato); 276 277 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 278 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 279 } 280 281 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 282 { 283 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 284 285 cancel_delayed_work_sync(&ctrl->ka_work); 286 } 287 288 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 289 __le32 nsid) 290 { 291 struct nvmet_ns *ns; 292 293 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 294 if (ns->nsid == le32_to_cpu(nsid)) 295 return ns; 296 } 297 298 return NULL; 299 } 300 301 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 302 { 303 struct nvmet_ns *ns; 304 305 rcu_read_lock(); 306 ns = __nvmet_find_namespace(ctrl, nsid); 307 if (ns) 308 percpu_ref_get(&ns->ref); 309 rcu_read_unlock(); 310 311 return ns; 312 } 313 314 static void nvmet_destroy_namespace(struct percpu_ref *ref) 315 { 316 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 317 318 complete(&ns->disable_done); 319 } 320 321 void nvmet_put_namespace(struct nvmet_ns *ns) 322 { 323 percpu_ref_put(&ns->ref); 324 } 325 326 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 327 { 328 nvmet_bdev_ns_disable(ns); 329 nvmet_file_ns_disable(ns); 330 } 331 332 int nvmet_ns_enable(struct nvmet_ns *ns) 333 { 334 struct nvmet_subsys *subsys = ns->subsys; 335 int ret = 0; 336 337 mutex_lock(&subsys->lock); 338 if (ns->enabled) 339 goto out_unlock; 340 341 ret = nvmet_bdev_ns_enable(ns); 342 if (ret) 343 ret = nvmet_file_ns_enable(ns); 344 if (ret) 345 goto out_unlock; 346 347 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 348 0, GFP_KERNEL); 349 if (ret) 350 goto out_dev_put; 351 352 if (ns->nsid > subsys->max_nsid) 353 subsys->max_nsid = ns->nsid; 354 355 /* 356 * The namespaces list needs to be sorted to simplify the implementation 357 * of the Identify Namepace List subcommand. 358 */ 359 if (list_empty(&subsys->namespaces)) { 360 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 361 } else { 362 struct nvmet_ns *old; 363 364 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 365 BUG_ON(ns->nsid == old->nsid); 366 if (ns->nsid < old->nsid) 367 break; 368 } 369 370 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 371 } 372 373 nvmet_ns_changed(subsys, ns->nsid); 374 ns->enabled = true; 375 ret = 0; 376 out_unlock: 377 mutex_unlock(&subsys->lock); 378 return ret; 379 out_dev_put: 380 nvmet_ns_dev_disable(ns); 381 goto out_unlock; 382 } 383 384 void nvmet_ns_disable(struct nvmet_ns *ns) 385 { 386 struct nvmet_subsys *subsys = ns->subsys; 387 388 mutex_lock(&subsys->lock); 389 if (!ns->enabled) 390 goto out_unlock; 391 392 ns->enabled = false; 393 list_del_rcu(&ns->dev_link); 394 if (ns->nsid == subsys->max_nsid) 395 subsys->max_nsid = nvmet_max_nsid(subsys); 396 mutex_unlock(&subsys->lock); 397 398 /* 399 * Now that we removed the namespaces from the lookup list, we 400 * can kill the per_cpu ref and wait for any remaining references 401 * to be dropped, as well as a RCU grace period for anyone only 402 * using the namepace under rcu_read_lock(). Note that we can't 403 * use call_rcu here as we need to ensure the namespaces have 404 * been fully destroyed before unloading the module. 405 */ 406 percpu_ref_kill(&ns->ref); 407 synchronize_rcu(); 408 wait_for_completion(&ns->disable_done); 409 percpu_ref_exit(&ns->ref); 410 411 mutex_lock(&subsys->lock); 412 nvmet_ns_changed(subsys, ns->nsid); 413 nvmet_ns_dev_disable(ns); 414 out_unlock: 415 mutex_unlock(&subsys->lock); 416 } 417 418 void nvmet_ns_free(struct nvmet_ns *ns) 419 { 420 nvmet_ns_disable(ns); 421 422 kfree(ns->device_path); 423 kfree(ns); 424 } 425 426 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 427 { 428 struct nvmet_ns *ns; 429 430 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 431 if (!ns) 432 return NULL; 433 434 INIT_LIST_HEAD(&ns->dev_link); 435 init_completion(&ns->disable_done); 436 437 ns->nsid = nsid; 438 ns->subsys = subsys; 439 uuid_gen(&ns->uuid); 440 441 return ns; 442 } 443 444 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 445 { 446 u32 old_sqhd, new_sqhd; 447 u16 sqhd; 448 449 if (status) 450 nvmet_set_status(req, status); 451 452 if (req->sq->size) { 453 do { 454 old_sqhd = req->sq->sqhd; 455 new_sqhd = (old_sqhd + 1) % req->sq->size; 456 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 457 old_sqhd); 458 } 459 sqhd = req->sq->sqhd & 0x0000FFFF; 460 req->rsp->sq_head = cpu_to_le16(sqhd); 461 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 462 req->rsp->command_id = req->cmd->common.command_id; 463 464 if (req->ns) 465 nvmet_put_namespace(req->ns); 466 req->ops->queue_response(req); 467 } 468 469 void nvmet_req_complete(struct nvmet_req *req, u16 status) 470 { 471 __nvmet_req_complete(req, status); 472 percpu_ref_put(&req->sq->ref); 473 } 474 EXPORT_SYMBOL_GPL(nvmet_req_complete); 475 476 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 477 u16 qid, u16 size) 478 { 479 cq->qid = qid; 480 cq->size = size; 481 482 ctrl->cqs[qid] = cq; 483 } 484 485 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 486 u16 qid, u16 size) 487 { 488 sq->sqhd = 0; 489 sq->qid = qid; 490 sq->size = size; 491 492 ctrl->sqs[qid] = sq; 493 } 494 495 static void nvmet_confirm_sq(struct percpu_ref *ref) 496 { 497 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 498 499 complete(&sq->confirm_done); 500 } 501 502 void nvmet_sq_destroy(struct nvmet_sq *sq) 503 { 504 /* 505 * If this is the admin queue, complete all AERs so that our 506 * queue doesn't have outstanding requests on it. 507 */ 508 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 509 nvmet_async_events_free(sq->ctrl); 510 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 511 wait_for_completion(&sq->confirm_done); 512 wait_for_completion(&sq->free_done); 513 percpu_ref_exit(&sq->ref); 514 515 if (sq->ctrl) { 516 nvmet_ctrl_put(sq->ctrl); 517 sq->ctrl = NULL; /* allows reusing the queue later */ 518 } 519 } 520 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 521 522 static void nvmet_sq_free(struct percpu_ref *ref) 523 { 524 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 525 526 complete(&sq->free_done); 527 } 528 529 int nvmet_sq_init(struct nvmet_sq *sq) 530 { 531 int ret; 532 533 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 534 if (ret) { 535 pr_err("percpu_ref init failed!\n"); 536 return ret; 537 } 538 init_completion(&sq->free_done); 539 init_completion(&sq->confirm_done); 540 541 return 0; 542 } 543 EXPORT_SYMBOL_GPL(nvmet_sq_init); 544 545 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 546 { 547 struct nvme_command *cmd = req->cmd; 548 u16 ret; 549 550 ret = nvmet_check_ctrl_status(req, cmd); 551 if (unlikely(ret)) 552 return ret; 553 554 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 555 if (unlikely(!req->ns)) 556 return NVME_SC_INVALID_NS | NVME_SC_DNR; 557 558 if (req->ns->file) 559 return nvmet_file_parse_io_cmd(req); 560 else 561 return nvmet_bdev_parse_io_cmd(req); 562 } 563 564 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 565 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 566 { 567 u8 flags = req->cmd->common.flags; 568 u16 status; 569 570 req->cq = cq; 571 req->sq = sq; 572 req->ops = ops; 573 req->sg = NULL; 574 req->sg_cnt = 0; 575 req->transfer_len = 0; 576 req->rsp->status = 0; 577 req->ns = NULL; 578 579 /* no support for fused commands yet */ 580 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 581 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 582 goto fail; 583 } 584 585 /* 586 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 587 * contains an address of a single contiguous physical buffer that is 588 * byte aligned. 589 */ 590 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 591 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 592 goto fail; 593 } 594 595 if (unlikely(!req->sq->ctrl)) 596 /* will return an error for any Non-connect command: */ 597 status = nvmet_parse_connect_cmd(req); 598 else if (likely(req->sq->qid != 0)) 599 status = nvmet_parse_io_cmd(req); 600 else if (req->cmd->common.opcode == nvme_fabrics_command) 601 status = nvmet_parse_fabrics_cmd(req); 602 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 603 status = nvmet_parse_discovery_cmd(req); 604 else 605 status = nvmet_parse_admin_cmd(req); 606 607 if (status) 608 goto fail; 609 610 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 611 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 612 goto fail; 613 } 614 615 return true; 616 617 fail: 618 __nvmet_req_complete(req, status); 619 return false; 620 } 621 EXPORT_SYMBOL_GPL(nvmet_req_init); 622 623 void nvmet_req_uninit(struct nvmet_req *req) 624 { 625 percpu_ref_put(&req->sq->ref); 626 if (req->ns) 627 nvmet_put_namespace(req->ns); 628 } 629 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 630 631 void nvmet_req_execute(struct nvmet_req *req) 632 { 633 if (unlikely(req->data_len != req->transfer_len)) 634 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 635 else 636 req->execute(req); 637 } 638 EXPORT_SYMBOL_GPL(nvmet_req_execute); 639 640 static inline bool nvmet_cc_en(u32 cc) 641 { 642 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 643 } 644 645 static inline u8 nvmet_cc_css(u32 cc) 646 { 647 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 648 } 649 650 static inline u8 nvmet_cc_mps(u32 cc) 651 { 652 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 653 } 654 655 static inline u8 nvmet_cc_ams(u32 cc) 656 { 657 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 658 } 659 660 static inline u8 nvmet_cc_shn(u32 cc) 661 { 662 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 663 } 664 665 static inline u8 nvmet_cc_iosqes(u32 cc) 666 { 667 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 668 } 669 670 static inline u8 nvmet_cc_iocqes(u32 cc) 671 { 672 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 673 } 674 675 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 676 { 677 lockdep_assert_held(&ctrl->lock); 678 679 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 680 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 681 nvmet_cc_mps(ctrl->cc) != 0 || 682 nvmet_cc_ams(ctrl->cc) != 0 || 683 nvmet_cc_css(ctrl->cc) != 0) { 684 ctrl->csts = NVME_CSTS_CFS; 685 return; 686 } 687 688 ctrl->csts = NVME_CSTS_RDY; 689 690 /* 691 * Controllers that are not yet enabled should not really enforce the 692 * keep alive timeout, but we still want to track a timeout and cleanup 693 * in case a host died before it enabled the controller. Hence, simply 694 * reset the keep alive timer when the controller is enabled. 695 */ 696 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 697 } 698 699 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 700 { 701 lockdep_assert_held(&ctrl->lock); 702 703 /* XXX: tear down queues? */ 704 ctrl->csts &= ~NVME_CSTS_RDY; 705 ctrl->cc = 0; 706 } 707 708 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 709 { 710 u32 old; 711 712 mutex_lock(&ctrl->lock); 713 old = ctrl->cc; 714 ctrl->cc = new; 715 716 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 717 nvmet_start_ctrl(ctrl); 718 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 719 nvmet_clear_ctrl(ctrl); 720 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 721 nvmet_clear_ctrl(ctrl); 722 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 723 } 724 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 725 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 726 mutex_unlock(&ctrl->lock); 727 } 728 729 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 730 { 731 /* command sets supported: NVMe command set: */ 732 ctrl->cap = (1ULL << 37); 733 /* CC.EN timeout in 500msec units: */ 734 ctrl->cap |= (15ULL << 24); 735 /* maximum queue entries supported: */ 736 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 737 } 738 739 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 740 struct nvmet_req *req, struct nvmet_ctrl **ret) 741 { 742 struct nvmet_subsys *subsys; 743 struct nvmet_ctrl *ctrl; 744 u16 status = 0; 745 746 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 747 if (!subsys) { 748 pr_warn("connect request for invalid subsystem %s!\n", 749 subsysnqn); 750 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 751 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 752 } 753 754 mutex_lock(&subsys->lock); 755 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 756 if (ctrl->cntlid == cntlid) { 757 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 758 pr_warn("hostnqn mismatch.\n"); 759 continue; 760 } 761 if (!kref_get_unless_zero(&ctrl->ref)) 762 continue; 763 764 *ret = ctrl; 765 goto out; 766 } 767 } 768 769 pr_warn("could not find controller %d for subsys %s / host %s\n", 770 cntlid, subsysnqn, hostnqn); 771 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 772 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 773 774 out: 775 mutex_unlock(&subsys->lock); 776 nvmet_subsys_put(subsys); 777 return status; 778 } 779 780 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 781 { 782 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 783 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 784 cmd->common.opcode, req->sq->qid); 785 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 786 } 787 788 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 789 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 790 cmd->common.opcode, req->sq->qid); 791 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 792 } 793 return 0; 794 } 795 796 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, 797 const char *hostnqn) 798 { 799 struct nvmet_host_link *p; 800 801 if (subsys->allow_any_host) 802 return true; 803 804 list_for_each_entry(p, &subsys->hosts, entry) { 805 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 806 return true; 807 } 808 809 return false; 810 } 811 812 static bool nvmet_host_discovery_allowed(struct nvmet_req *req, 813 const char *hostnqn) 814 { 815 struct nvmet_subsys_link *s; 816 817 list_for_each_entry(s, &req->port->subsystems, entry) { 818 if (__nvmet_host_allowed(s->subsys, hostnqn)) 819 return true; 820 } 821 822 return false; 823 } 824 825 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 826 const char *hostnqn) 827 { 828 lockdep_assert_held(&nvmet_config_sem); 829 830 if (subsys->type == NVME_NQN_DISC) 831 return nvmet_host_discovery_allowed(req, hostnqn); 832 else 833 return __nvmet_host_allowed(subsys, hostnqn); 834 } 835 836 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 837 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 838 { 839 struct nvmet_subsys *subsys; 840 struct nvmet_ctrl *ctrl; 841 int ret; 842 u16 status; 843 844 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 845 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 846 if (!subsys) { 847 pr_warn("connect request for invalid subsystem %s!\n", 848 subsysnqn); 849 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 850 goto out; 851 } 852 853 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 854 down_read(&nvmet_config_sem); 855 if (!nvmet_host_allowed(req, subsys, hostnqn)) { 856 pr_info("connect by host %s for subsystem %s not allowed\n", 857 hostnqn, subsysnqn); 858 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 859 up_read(&nvmet_config_sem); 860 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 861 goto out_put_subsystem; 862 } 863 up_read(&nvmet_config_sem); 864 865 status = NVME_SC_INTERNAL; 866 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 867 if (!ctrl) 868 goto out_put_subsystem; 869 mutex_init(&ctrl->lock); 870 871 nvmet_init_cap(ctrl); 872 873 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 874 INIT_LIST_HEAD(&ctrl->async_events); 875 876 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 877 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 878 879 kref_init(&ctrl->ref); 880 ctrl->subsys = subsys; 881 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 882 883 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 884 sizeof(__le32), GFP_KERNEL); 885 if (!ctrl->changed_ns_list) 886 goto out_free_ctrl; 887 888 ctrl->cqs = kcalloc(subsys->max_qid + 1, 889 sizeof(struct nvmet_cq *), 890 GFP_KERNEL); 891 if (!ctrl->cqs) 892 goto out_free_changed_ns_list; 893 894 ctrl->sqs = kcalloc(subsys->max_qid + 1, 895 sizeof(struct nvmet_sq *), 896 GFP_KERNEL); 897 if (!ctrl->sqs) 898 goto out_free_cqs; 899 900 ret = ida_simple_get(&cntlid_ida, 901 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 902 GFP_KERNEL); 903 if (ret < 0) { 904 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 905 goto out_free_sqs; 906 } 907 ctrl->cntlid = ret; 908 909 ctrl->ops = req->ops; 910 if (ctrl->subsys->type == NVME_NQN_DISC) { 911 /* Don't accept keep-alive timeout for discovery controllers */ 912 if (kato) { 913 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 914 goto out_remove_ida; 915 } 916 917 /* 918 * Discovery controllers use some arbitrary high value in order 919 * to cleanup stale discovery sessions 920 * 921 * From the latest base diff RC: 922 * "The Keep Alive command is not supported by 923 * Discovery controllers. A transport may specify a 924 * fixed Discovery controller activity timeout value 925 * (e.g., 2 minutes). If no commands are received 926 * by a Discovery controller within that time 927 * period, the controller may perform the 928 * actions for Keep Alive Timer expiration". 929 */ 930 ctrl->kato = NVMET_DISC_KATO; 931 } else { 932 /* keep-alive timeout in seconds */ 933 ctrl->kato = DIV_ROUND_UP(kato, 1000); 934 } 935 nvmet_start_keep_alive_timer(ctrl); 936 937 mutex_lock(&subsys->lock); 938 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 939 mutex_unlock(&subsys->lock); 940 941 *ctrlp = ctrl; 942 return 0; 943 944 out_remove_ida: 945 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 946 out_free_sqs: 947 kfree(ctrl->sqs); 948 out_free_cqs: 949 kfree(ctrl->cqs); 950 out_free_changed_ns_list: 951 kfree(ctrl->changed_ns_list); 952 out_free_ctrl: 953 kfree(ctrl); 954 out_put_subsystem: 955 nvmet_subsys_put(subsys); 956 out: 957 return status; 958 } 959 960 static void nvmet_ctrl_free(struct kref *ref) 961 { 962 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 963 struct nvmet_subsys *subsys = ctrl->subsys; 964 965 mutex_lock(&subsys->lock); 966 list_del(&ctrl->subsys_entry); 967 mutex_unlock(&subsys->lock); 968 969 nvmet_stop_keep_alive_timer(ctrl); 970 971 flush_work(&ctrl->async_event_work); 972 cancel_work_sync(&ctrl->fatal_err_work); 973 974 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 975 976 kfree(ctrl->sqs); 977 kfree(ctrl->cqs); 978 kfree(ctrl->changed_ns_list); 979 kfree(ctrl); 980 981 nvmet_subsys_put(subsys); 982 } 983 984 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 985 { 986 kref_put(&ctrl->ref, nvmet_ctrl_free); 987 } 988 989 static void nvmet_fatal_error_handler(struct work_struct *work) 990 { 991 struct nvmet_ctrl *ctrl = 992 container_of(work, struct nvmet_ctrl, fatal_err_work); 993 994 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 995 ctrl->ops->delete_ctrl(ctrl); 996 } 997 998 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 999 { 1000 mutex_lock(&ctrl->lock); 1001 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1002 ctrl->csts |= NVME_CSTS_CFS; 1003 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1004 schedule_work(&ctrl->fatal_err_work); 1005 } 1006 mutex_unlock(&ctrl->lock); 1007 } 1008 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1009 1010 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1011 const char *subsysnqn) 1012 { 1013 struct nvmet_subsys_link *p; 1014 1015 if (!port) 1016 return NULL; 1017 1018 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, 1019 NVMF_NQN_SIZE)) { 1020 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1021 return NULL; 1022 return nvmet_disc_subsys; 1023 } 1024 1025 down_read(&nvmet_config_sem); 1026 list_for_each_entry(p, &port->subsystems, entry) { 1027 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1028 NVMF_NQN_SIZE)) { 1029 if (!kref_get_unless_zero(&p->subsys->ref)) 1030 break; 1031 up_read(&nvmet_config_sem); 1032 return p->subsys; 1033 } 1034 } 1035 up_read(&nvmet_config_sem); 1036 return NULL; 1037 } 1038 1039 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1040 enum nvme_subsys_type type) 1041 { 1042 struct nvmet_subsys *subsys; 1043 1044 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1045 if (!subsys) 1046 return NULL; 1047 1048 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 1049 /* generate a random serial number as our controllers are ephemeral: */ 1050 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1051 1052 switch (type) { 1053 case NVME_NQN_NVME: 1054 subsys->max_qid = NVMET_NR_QUEUES; 1055 break; 1056 case NVME_NQN_DISC: 1057 subsys->max_qid = 0; 1058 break; 1059 default: 1060 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1061 kfree(subsys); 1062 return NULL; 1063 } 1064 subsys->type = type; 1065 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1066 GFP_KERNEL); 1067 if (!subsys->subsysnqn) { 1068 kfree(subsys); 1069 return NULL; 1070 } 1071 1072 kref_init(&subsys->ref); 1073 1074 mutex_init(&subsys->lock); 1075 INIT_LIST_HEAD(&subsys->namespaces); 1076 INIT_LIST_HEAD(&subsys->ctrls); 1077 INIT_LIST_HEAD(&subsys->hosts); 1078 1079 return subsys; 1080 } 1081 1082 static void nvmet_subsys_free(struct kref *ref) 1083 { 1084 struct nvmet_subsys *subsys = 1085 container_of(ref, struct nvmet_subsys, ref); 1086 1087 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1088 1089 kfree(subsys->subsysnqn); 1090 kfree(subsys); 1091 } 1092 1093 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1094 { 1095 struct nvmet_ctrl *ctrl; 1096 1097 mutex_lock(&subsys->lock); 1098 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1099 ctrl->ops->delete_ctrl(ctrl); 1100 mutex_unlock(&subsys->lock); 1101 } 1102 1103 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1104 { 1105 kref_put(&subsys->ref, nvmet_subsys_free); 1106 } 1107 1108 static int __init nvmet_init(void) 1109 { 1110 int error; 1111 1112 error = nvmet_init_discovery(); 1113 if (error) 1114 goto out; 1115 1116 error = nvmet_init_configfs(); 1117 if (error) 1118 goto out_exit_discovery; 1119 return 0; 1120 1121 out_exit_discovery: 1122 nvmet_exit_discovery(); 1123 out: 1124 return error; 1125 } 1126 1127 static void __exit nvmet_exit(void) 1128 { 1129 nvmet_exit_configfs(); 1130 nvmet_exit_discovery(); 1131 ida_destroy(&cntlid_ida); 1132 1133 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1134 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1135 } 1136 1137 module_init(nvmet_init); 1138 module_exit(nvmet_exit); 1139 1140 MODULE_LICENSE("GPL v2"); 1141