1 /* 2 * Common code for the NVMe target. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/random.h> 17 #include <linux/rculist.h> 18 19 #include "nvmet.h" 20 21 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 22 static DEFINE_IDA(cntlid_ida); 23 24 /* 25 * This read/write semaphore is used to synchronize access to configuration 26 * information on a target system that will result in discovery log page 27 * information change for at least one host. 28 * The full list of resources to protected by this semaphore is: 29 * 30 * - subsystems list 31 * - per-subsystem allowed hosts list 32 * - allow_any_host subsystem attribute 33 * - nvmet_genctr 34 * - the nvmet_transports array 35 * 36 * When updating any of those lists/structures write lock should be obtained, 37 * while when reading (popolating discovery log page or checking host-subsystem 38 * link) read lock is obtained to allow concurrent reads. 39 */ 40 DECLARE_RWSEM(nvmet_config_sem); 41 42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 43 const char *subsysnqn); 44 45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 46 size_t len) 47 { 48 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 49 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 50 return 0; 51 } 52 53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 54 { 55 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 56 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 57 return 0; 58 } 59 60 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 61 { 62 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) 63 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 64 return 0; 65 } 66 67 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 68 { 69 struct nvmet_ns *ns; 70 71 if (list_empty(&subsys->namespaces)) 72 return 0; 73 74 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 75 return ns->nsid; 76 } 77 78 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 79 { 80 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 81 } 82 83 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 84 { 85 struct nvmet_req *req; 86 87 while (1) { 88 mutex_lock(&ctrl->lock); 89 if (!ctrl->nr_async_event_cmds) { 90 mutex_unlock(&ctrl->lock); 91 return; 92 } 93 94 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 95 mutex_unlock(&ctrl->lock); 96 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 97 } 98 } 99 100 static void nvmet_async_event_work(struct work_struct *work) 101 { 102 struct nvmet_ctrl *ctrl = 103 container_of(work, struct nvmet_ctrl, async_event_work); 104 struct nvmet_async_event *aen; 105 struct nvmet_req *req; 106 107 while (1) { 108 mutex_lock(&ctrl->lock); 109 aen = list_first_entry_or_null(&ctrl->async_events, 110 struct nvmet_async_event, entry); 111 if (!aen || !ctrl->nr_async_event_cmds) { 112 mutex_unlock(&ctrl->lock); 113 return; 114 } 115 116 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 117 nvmet_set_result(req, nvmet_async_event_result(aen)); 118 119 list_del(&aen->entry); 120 kfree(aen); 121 122 mutex_unlock(&ctrl->lock); 123 nvmet_req_complete(req, 0); 124 } 125 } 126 127 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 128 u8 event_info, u8 log_page) 129 { 130 struct nvmet_async_event *aen; 131 132 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 133 if (!aen) 134 return; 135 136 aen->event_type = event_type; 137 aen->event_info = event_info; 138 aen->log_page = log_page; 139 140 mutex_lock(&ctrl->lock); 141 list_add_tail(&aen->entry, &ctrl->async_events); 142 mutex_unlock(&ctrl->lock); 143 144 schedule_work(&ctrl->async_event_work); 145 } 146 147 static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) 148 { 149 if (!(READ_ONCE(ctrl->aen_enabled) & aen)) 150 return true; 151 return test_and_set_bit(aen, &ctrl->aen_masked); 152 } 153 154 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 155 { 156 u32 i; 157 158 mutex_lock(&ctrl->lock); 159 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 160 goto out_unlock; 161 162 for (i = 0; i < ctrl->nr_changed_ns; i++) { 163 if (ctrl->changed_ns_list[i] == nsid) 164 goto out_unlock; 165 } 166 167 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 168 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 169 ctrl->nr_changed_ns = U32_MAX; 170 goto out_unlock; 171 } 172 173 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 174 out_unlock: 175 mutex_unlock(&ctrl->lock); 176 } 177 178 static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 179 { 180 struct nvmet_ctrl *ctrl; 181 182 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 183 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 184 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) 185 continue; 186 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 187 NVME_AER_NOTICE_NS_CHANGED, 188 NVME_LOG_CHANGED_NS); 189 } 190 } 191 192 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 193 { 194 int ret = 0; 195 196 down_write(&nvmet_config_sem); 197 if (nvmet_transports[ops->type]) 198 ret = -EINVAL; 199 else 200 nvmet_transports[ops->type] = ops; 201 up_write(&nvmet_config_sem); 202 203 return ret; 204 } 205 EXPORT_SYMBOL_GPL(nvmet_register_transport); 206 207 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 208 { 209 down_write(&nvmet_config_sem); 210 nvmet_transports[ops->type] = NULL; 211 up_write(&nvmet_config_sem); 212 } 213 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 214 215 int nvmet_enable_port(struct nvmet_port *port) 216 { 217 const struct nvmet_fabrics_ops *ops; 218 int ret; 219 220 lockdep_assert_held(&nvmet_config_sem); 221 222 ops = nvmet_transports[port->disc_addr.trtype]; 223 if (!ops) { 224 up_write(&nvmet_config_sem); 225 request_module("nvmet-transport-%d", port->disc_addr.trtype); 226 down_write(&nvmet_config_sem); 227 ops = nvmet_transports[port->disc_addr.trtype]; 228 if (!ops) { 229 pr_err("transport type %d not supported\n", 230 port->disc_addr.trtype); 231 return -EINVAL; 232 } 233 } 234 235 if (!try_module_get(ops->owner)) 236 return -EINVAL; 237 238 ret = ops->add_port(port); 239 if (ret) { 240 module_put(ops->owner); 241 return ret; 242 } 243 244 port->enabled = true; 245 return 0; 246 } 247 248 void nvmet_disable_port(struct nvmet_port *port) 249 { 250 const struct nvmet_fabrics_ops *ops; 251 252 lockdep_assert_held(&nvmet_config_sem); 253 254 port->enabled = false; 255 256 ops = nvmet_transports[port->disc_addr.trtype]; 257 ops->remove_port(port); 258 module_put(ops->owner); 259 } 260 261 static void nvmet_keep_alive_timer(struct work_struct *work) 262 { 263 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 264 struct nvmet_ctrl, ka_work); 265 266 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 267 ctrl->cntlid, ctrl->kato); 268 269 nvmet_ctrl_fatal_error(ctrl); 270 } 271 272 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 273 { 274 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 275 ctrl->cntlid, ctrl->kato); 276 277 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 278 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 279 } 280 281 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 282 { 283 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 284 285 cancel_delayed_work_sync(&ctrl->ka_work); 286 } 287 288 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 289 __le32 nsid) 290 { 291 struct nvmet_ns *ns; 292 293 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 294 if (ns->nsid == le32_to_cpu(nsid)) 295 return ns; 296 } 297 298 return NULL; 299 } 300 301 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 302 { 303 struct nvmet_ns *ns; 304 305 rcu_read_lock(); 306 ns = __nvmet_find_namespace(ctrl, nsid); 307 if (ns) 308 percpu_ref_get(&ns->ref); 309 rcu_read_unlock(); 310 311 return ns; 312 } 313 314 static void nvmet_destroy_namespace(struct percpu_ref *ref) 315 { 316 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 317 318 complete(&ns->disable_done); 319 } 320 321 void nvmet_put_namespace(struct nvmet_ns *ns) 322 { 323 percpu_ref_put(&ns->ref); 324 } 325 326 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 327 { 328 nvmet_bdev_ns_disable(ns); 329 nvmet_file_ns_disable(ns); 330 } 331 332 int nvmet_ns_enable(struct nvmet_ns *ns) 333 { 334 struct nvmet_subsys *subsys = ns->subsys; 335 int ret = 0; 336 337 mutex_lock(&subsys->lock); 338 if (ns->enabled) 339 goto out_unlock; 340 341 ret = nvmet_bdev_ns_enable(ns); 342 if (ret) 343 ret = nvmet_file_ns_enable(ns); 344 if (ret) 345 goto out_unlock; 346 347 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 348 0, GFP_KERNEL); 349 if (ret) 350 goto out_dev_put; 351 352 if (ns->nsid > subsys->max_nsid) 353 subsys->max_nsid = ns->nsid; 354 355 /* 356 * The namespaces list needs to be sorted to simplify the implementation 357 * of the Identify Namepace List subcommand. 358 */ 359 if (list_empty(&subsys->namespaces)) { 360 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 361 } else { 362 struct nvmet_ns *old; 363 364 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 365 BUG_ON(ns->nsid == old->nsid); 366 if (ns->nsid < old->nsid) 367 break; 368 } 369 370 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 371 } 372 373 nvmet_ns_changed(subsys, ns->nsid); 374 ns->enabled = true; 375 ret = 0; 376 out_unlock: 377 mutex_unlock(&subsys->lock); 378 return ret; 379 out_dev_put: 380 nvmet_ns_dev_disable(ns); 381 goto out_unlock; 382 } 383 384 void nvmet_ns_disable(struct nvmet_ns *ns) 385 { 386 struct nvmet_subsys *subsys = ns->subsys; 387 388 mutex_lock(&subsys->lock); 389 if (!ns->enabled) 390 goto out_unlock; 391 392 ns->enabled = false; 393 list_del_rcu(&ns->dev_link); 394 if (ns->nsid == subsys->max_nsid) 395 subsys->max_nsid = nvmet_max_nsid(subsys); 396 mutex_unlock(&subsys->lock); 397 398 /* 399 * Now that we removed the namespaces from the lookup list, we 400 * can kill the per_cpu ref and wait for any remaining references 401 * to be dropped, as well as a RCU grace period for anyone only 402 * using the namepace under rcu_read_lock(). Note that we can't 403 * use call_rcu here as we need to ensure the namespaces have 404 * been fully destroyed before unloading the module. 405 */ 406 percpu_ref_kill(&ns->ref); 407 synchronize_rcu(); 408 wait_for_completion(&ns->disable_done); 409 percpu_ref_exit(&ns->ref); 410 411 mutex_lock(&subsys->lock); 412 nvmet_ns_changed(subsys, ns->nsid); 413 nvmet_ns_dev_disable(ns); 414 out_unlock: 415 mutex_unlock(&subsys->lock); 416 } 417 418 void nvmet_ns_free(struct nvmet_ns *ns) 419 { 420 nvmet_ns_disable(ns); 421 422 kfree(ns->device_path); 423 kfree(ns); 424 } 425 426 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 427 { 428 struct nvmet_ns *ns; 429 430 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 431 if (!ns) 432 return NULL; 433 434 INIT_LIST_HEAD(&ns->dev_link); 435 init_completion(&ns->disable_done); 436 437 ns->nsid = nsid; 438 ns->subsys = subsys; 439 uuid_gen(&ns->uuid); 440 441 return ns; 442 } 443 444 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 445 { 446 u32 old_sqhd, new_sqhd; 447 u16 sqhd; 448 449 if (status) 450 nvmet_set_status(req, status); 451 452 if (req->sq->size) { 453 do { 454 old_sqhd = req->sq->sqhd; 455 new_sqhd = (old_sqhd + 1) % req->sq->size; 456 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 457 old_sqhd); 458 } 459 sqhd = req->sq->sqhd & 0x0000FFFF; 460 req->rsp->sq_head = cpu_to_le16(sqhd); 461 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 462 req->rsp->command_id = req->cmd->common.command_id; 463 464 if (req->ns) 465 nvmet_put_namespace(req->ns); 466 req->ops->queue_response(req); 467 } 468 469 void nvmet_req_complete(struct nvmet_req *req, u16 status) 470 { 471 __nvmet_req_complete(req, status); 472 percpu_ref_put(&req->sq->ref); 473 } 474 EXPORT_SYMBOL_GPL(nvmet_req_complete); 475 476 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 477 u16 qid, u16 size) 478 { 479 cq->qid = qid; 480 cq->size = size; 481 482 ctrl->cqs[qid] = cq; 483 } 484 485 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 486 u16 qid, u16 size) 487 { 488 sq->sqhd = 0; 489 sq->qid = qid; 490 sq->size = size; 491 492 ctrl->sqs[qid] = sq; 493 } 494 495 static void nvmet_confirm_sq(struct percpu_ref *ref) 496 { 497 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 498 499 complete(&sq->confirm_done); 500 } 501 502 void nvmet_sq_destroy(struct nvmet_sq *sq) 503 { 504 /* 505 * If this is the admin queue, complete all AERs so that our 506 * queue doesn't have outstanding requests on it. 507 */ 508 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 509 nvmet_async_events_free(sq->ctrl); 510 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 511 wait_for_completion(&sq->confirm_done); 512 wait_for_completion(&sq->free_done); 513 percpu_ref_exit(&sq->ref); 514 515 if (sq->ctrl) { 516 nvmet_ctrl_put(sq->ctrl); 517 sq->ctrl = NULL; /* allows reusing the queue later */ 518 } 519 } 520 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 521 522 static void nvmet_sq_free(struct percpu_ref *ref) 523 { 524 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 525 526 complete(&sq->free_done); 527 } 528 529 int nvmet_sq_init(struct nvmet_sq *sq) 530 { 531 int ret; 532 533 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 534 if (ret) { 535 pr_err("percpu_ref init failed!\n"); 536 return ret; 537 } 538 init_completion(&sq->free_done); 539 init_completion(&sq->confirm_done); 540 541 return 0; 542 } 543 EXPORT_SYMBOL_GPL(nvmet_sq_init); 544 545 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 546 { 547 struct nvme_command *cmd = req->cmd; 548 u16 ret; 549 550 ret = nvmet_check_ctrl_status(req, cmd); 551 if (unlikely(ret)) 552 return ret; 553 554 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 555 if (unlikely(!req->ns)) 556 return NVME_SC_INVALID_NS | NVME_SC_DNR; 557 558 if (req->ns->file) 559 return nvmet_file_parse_io_cmd(req); 560 else 561 return nvmet_bdev_parse_io_cmd(req); 562 } 563 564 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 565 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 566 { 567 u8 flags = req->cmd->common.flags; 568 u16 status; 569 570 req->cq = cq; 571 req->sq = sq; 572 req->ops = ops; 573 req->sg = NULL; 574 req->sg_cnt = 0; 575 req->transfer_len = 0; 576 req->rsp->status = 0; 577 req->ns = NULL; 578 579 /* no support for fused commands yet */ 580 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 581 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 582 goto fail; 583 } 584 585 /* 586 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 587 * contains an address of a single contiguous physical buffer that is 588 * byte aligned. 589 */ 590 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 591 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 592 goto fail; 593 } 594 595 if (unlikely(!req->sq->ctrl)) 596 /* will return an error for any Non-connect command: */ 597 status = nvmet_parse_connect_cmd(req); 598 else if (likely(req->sq->qid != 0)) 599 status = nvmet_parse_io_cmd(req); 600 else if (req->cmd->common.opcode == nvme_fabrics_command) 601 status = nvmet_parse_fabrics_cmd(req); 602 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 603 status = nvmet_parse_discovery_cmd(req); 604 else 605 status = nvmet_parse_admin_cmd(req); 606 607 if (status) 608 goto fail; 609 610 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 611 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 612 goto fail; 613 } 614 615 return true; 616 617 fail: 618 __nvmet_req_complete(req, status); 619 return false; 620 } 621 EXPORT_SYMBOL_GPL(nvmet_req_init); 622 623 void nvmet_req_uninit(struct nvmet_req *req) 624 { 625 percpu_ref_put(&req->sq->ref); 626 if (req->ns) 627 nvmet_put_namespace(req->ns); 628 } 629 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 630 631 void nvmet_req_execute(struct nvmet_req *req) 632 { 633 if (unlikely(req->data_len != req->transfer_len)) 634 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 635 else 636 req->execute(req); 637 } 638 EXPORT_SYMBOL_GPL(nvmet_req_execute); 639 640 static inline bool nvmet_cc_en(u32 cc) 641 { 642 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 643 } 644 645 static inline u8 nvmet_cc_css(u32 cc) 646 { 647 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 648 } 649 650 static inline u8 nvmet_cc_mps(u32 cc) 651 { 652 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 653 } 654 655 static inline u8 nvmet_cc_ams(u32 cc) 656 { 657 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 658 } 659 660 static inline u8 nvmet_cc_shn(u32 cc) 661 { 662 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 663 } 664 665 static inline u8 nvmet_cc_iosqes(u32 cc) 666 { 667 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 668 } 669 670 static inline u8 nvmet_cc_iocqes(u32 cc) 671 { 672 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 673 } 674 675 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 676 { 677 lockdep_assert_held(&ctrl->lock); 678 679 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 680 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 681 nvmet_cc_mps(ctrl->cc) != 0 || 682 nvmet_cc_ams(ctrl->cc) != 0 || 683 nvmet_cc_css(ctrl->cc) != 0) { 684 ctrl->csts = NVME_CSTS_CFS; 685 return; 686 } 687 688 ctrl->csts = NVME_CSTS_RDY; 689 } 690 691 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 692 { 693 lockdep_assert_held(&ctrl->lock); 694 695 /* XXX: tear down queues? */ 696 ctrl->csts &= ~NVME_CSTS_RDY; 697 ctrl->cc = 0; 698 } 699 700 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 701 { 702 u32 old; 703 704 mutex_lock(&ctrl->lock); 705 old = ctrl->cc; 706 ctrl->cc = new; 707 708 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 709 nvmet_start_ctrl(ctrl); 710 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 711 nvmet_clear_ctrl(ctrl); 712 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 713 nvmet_clear_ctrl(ctrl); 714 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 715 } 716 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 717 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 718 mutex_unlock(&ctrl->lock); 719 } 720 721 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 722 { 723 /* command sets supported: NVMe command set: */ 724 ctrl->cap = (1ULL << 37); 725 /* CC.EN timeout in 500msec units: */ 726 ctrl->cap |= (15ULL << 24); 727 /* maximum queue entries supported: */ 728 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 729 } 730 731 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 732 struct nvmet_req *req, struct nvmet_ctrl **ret) 733 { 734 struct nvmet_subsys *subsys; 735 struct nvmet_ctrl *ctrl; 736 u16 status = 0; 737 738 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 739 if (!subsys) { 740 pr_warn("connect request for invalid subsystem %s!\n", 741 subsysnqn); 742 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 743 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 744 } 745 746 mutex_lock(&subsys->lock); 747 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 748 if (ctrl->cntlid == cntlid) { 749 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 750 pr_warn("hostnqn mismatch.\n"); 751 continue; 752 } 753 if (!kref_get_unless_zero(&ctrl->ref)) 754 continue; 755 756 *ret = ctrl; 757 goto out; 758 } 759 } 760 761 pr_warn("could not find controller %d for subsys %s / host %s\n", 762 cntlid, subsysnqn, hostnqn); 763 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 764 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 765 766 out: 767 mutex_unlock(&subsys->lock); 768 nvmet_subsys_put(subsys); 769 return status; 770 } 771 772 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 773 { 774 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 775 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 776 cmd->common.opcode, req->sq->qid); 777 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 778 } 779 780 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 781 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 782 cmd->common.opcode, req->sq->qid); 783 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 784 } 785 return 0; 786 } 787 788 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, 789 const char *hostnqn) 790 { 791 struct nvmet_host_link *p; 792 793 if (subsys->allow_any_host) 794 return true; 795 796 list_for_each_entry(p, &subsys->hosts, entry) { 797 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 798 return true; 799 } 800 801 return false; 802 } 803 804 static bool nvmet_host_discovery_allowed(struct nvmet_req *req, 805 const char *hostnqn) 806 { 807 struct nvmet_subsys_link *s; 808 809 list_for_each_entry(s, &req->port->subsystems, entry) { 810 if (__nvmet_host_allowed(s->subsys, hostnqn)) 811 return true; 812 } 813 814 return false; 815 } 816 817 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 818 const char *hostnqn) 819 { 820 lockdep_assert_held(&nvmet_config_sem); 821 822 if (subsys->type == NVME_NQN_DISC) 823 return nvmet_host_discovery_allowed(req, hostnqn); 824 else 825 return __nvmet_host_allowed(subsys, hostnqn); 826 } 827 828 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 829 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 830 { 831 struct nvmet_subsys *subsys; 832 struct nvmet_ctrl *ctrl; 833 int ret; 834 u16 status; 835 836 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 837 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 838 if (!subsys) { 839 pr_warn("connect request for invalid subsystem %s!\n", 840 subsysnqn); 841 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 842 goto out; 843 } 844 845 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 846 down_read(&nvmet_config_sem); 847 if (!nvmet_host_allowed(req, subsys, hostnqn)) { 848 pr_info("connect by host %s for subsystem %s not allowed\n", 849 hostnqn, subsysnqn); 850 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 851 up_read(&nvmet_config_sem); 852 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 853 goto out_put_subsystem; 854 } 855 up_read(&nvmet_config_sem); 856 857 status = NVME_SC_INTERNAL; 858 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 859 if (!ctrl) 860 goto out_put_subsystem; 861 mutex_init(&ctrl->lock); 862 863 nvmet_init_cap(ctrl); 864 865 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 866 INIT_LIST_HEAD(&ctrl->async_events); 867 868 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 869 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 870 871 kref_init(&ctrl->ref); 872 ctrl->subsys = subsys; 873 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 874 875 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 876 sizeof(__le32), GFP_KERNEL); 877 if (!ctrl->changed_ns_list) 878 goto out_free_ctrl; 879 880 ctrl->cqs = kcalloc(subsys->max_qid + 1, 881 sizeof(struct nvmet_cq *), 882 GFP_KERNEL); 883 if (!ctrl->cqs) 884 goto out_free_changed_ns_list; 885 886 ctrl->sqs = kcalloc(subsys->max_qid + 1, 887 sizeof(struct nvmet_sq *), 888 GFP_KERNEL); 889 if (!ctrl->sqs) 890 goto out_free_cqs; 891 892 ret = ida_simple_get(&cntlid_ida, 893 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 894 GFP_KERNEL); 895 if (ret < 0) { 896 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 897 goto out_free_sqs; 898 } 899 ctrl->cntlid = ret; 900 901 ctrl->ops = req->ops; 902 if (ctrl->subsys->type == NVME_NQN_DISC) { 903 /* Don't accept keep-alive timeout for discovery controllers */ 904 if (kato) { 905 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 906 goto out_remove_ida; 907 } 908 909 /* 910 * Discovery controllers use some arbitrary high value in order 911 * to cleanup stale discovery sessions 912 * 913 * From the latest base diff RC: 914 * "The Keep Alive command is not supported by 915 * Discovery controllers. A transport may specify a 916 * fixed Discovery controller activity timeout value 917 * (e.g., 2 minutes). If no commands are received 918 * by a Discovery controller within that time 919 * period, the controller may perform the 920 * actions for Keep Alive Timer expiration". 921 */ 922 ctrl->kato = NVMET_DISC_KATO; 923 } else { 924 /* keep-alive timeout in seconds */ 925 ctrl->kato = DIV_ROUND_UP(kato, 1000); 926 } 927 nvmet_start_keep_alive_timer(ctrl); 928 929 mutex_lock(&subsys->lock); 930 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 931 mutex_unlock(&subsys->lock); 932 933 *ctrlp = ctrl; 934 return 0; 935 936 out_remove_ida: 937 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 938 out_free_sqs: 939 kfree(ctrl->sqs); 940 out_free_cqs: 941 kfree(ctrl->cqs); 942 out_free_changed_ns_list: 943 kfree(ctrl->changed_ns_list); 944 out_free_ctrl: 945 kfree(ctrl); 946 out_put_subsystem: 947 nvmet_subsys_put(subsys); 948 out: 949 return status; 950 } 951 952 static void nvmet_ctrl_free(struct kref *ref) 953 { 954 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 955 struct nvmet_subsys *subsys = ctrl->subsys; 956 957 mutex_lock(&subsys->lock); 958 list_del(&ctrl->subsys_entry); 959 mutex_unlock(&subsys->lock); 960 961 nvmet_stop_keep_alive_timer(ctrl); 962 963 flush_work(&ctrl->async_event_work); 964 cancel_work_sync(&ctrl->fatal_err_work); 965 966 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 967 968 kfree(ctrl->sqs); 969 kfree(ctrl->cqs); 970 kfree(ctrl->changed_ns_list); 971 kfree(ctrl); 972 973 nvmet_subsys_put(subsys); 974 } 975 976 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 977 { 978 kref_put(&ctrl->ref, nvmet_ctrl_free); 979 } 980 981 static void nvmet_fatal_error_handler(struct work_struct *work) 982 { 983 struct nvmet_ctrl *ctrl = 984 container_of(work, struct nvmet_ctrl, fatal_err_work); 985 986 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 987 ctrl->ops->delete_ctrl(ctrl); 988 } 989 990 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 991 { 992 mutex_lock(&ctrl->lock); 993 if (!(ctrl->csts & NVME_CSTS_CFS)) { 994 ctrl->csts |= NVME_CSTS_CFS; 995 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 996 schedule_work(&ctrl->fatal_err_work); 997 } 998 mutex_unlock(&ctrl->lock); 999 } 1000 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1001 1002 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1003 const char *subsysnqn) 1004 { 1005 struct nvmet_subsys_link *p; 1006 1007 if (!port) 1008 return NULL; 1009 1010 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, 1011 NVMF_NQN_SIZE)) { 1012 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1013 return NULL; 1014 return nvmet_disc_subsys; 1015 } 1016 1017 down_read(&nvmet_config_sem); 1018 list_for_each_entry(p, &port->subsystems, entry) { 1019 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1020 NVMF_NQN_SIZE)) { 1021 if (!kref_get_unless_zero(&p->subsys->ref)) 1022 break; 1023 up_read(&nvmet_config_sem); 1024 return p->subsys; 1025 } 1026 } 1027 up_read(&nvmet_config_sem); 1028 return NULL; 1029 } 1030 1031 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1032 enum nvme_subsys_type type) 1033 { 1034 struct nvmet_subsys *subsys; 1035 1036 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1037 if (!subsys) 1038 return NULL; 1039 1040 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 1041 /* generate a random serial number as our controllers are ephemeral: */ 1042 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1043 1044 switch (type) { 1045 case NVME_NQN_NVME: 1046 subsys->max_qid = NVMET_NR_QUEUES; 1047 break; 1048 case NVME_NQN_DISC: 1049 subsys->max_qid = 0; 1050 break; 1051 default: 1052 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1053 kfree(subsys); 1054 return NULL; 1055 } 1056 subsys->type = type; 1057 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1058 GFP_KERNEL); 1059 if (!subsys->subsysnqn) { 1060 kfree(subsys); 1061 return NULL; 1062 } 1063 1064 kref_init(&subsys->ref); 1065 1066 mutex_init(&subsys->lock); 1067 INIT_LIST_HEAD(&subsys->namespaces); 1068 INIT_LIST_HEAD(&subsys->ctrls); 1069 INIT_LIST_HEAD(&subsys->hosts); 1070 1071 return subsys; 1072 } 1073 1074 static void nvmet_subsys_free(struct kref *ref) 1075 { 1076 struct nvmet_subsys *subsys = 1077 container_of(ref, struct nvmet_subsys, ref); 1078 1079 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1080 1081 kfree(subsys->subsysnqn); 1082 kfree(subsys); 1083 } 1084 1085 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1086 { 1087 struct nvmet_ctrl *ctrl; 1088 1089 mutex_lock(&subsys->lock); 1090 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1091 ctrl->ops->delete_ctrl(ctrl); 1092 mutex_unlock(&subsys->lock); 1093 } 1094 1095 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1096 { 1097 kref_put(&subsys->ref, nvmet_subsys_free); 1098 } 1099 1100 static int __init nvmet_init(void) 1101 { 1102 int error; 1103 1104 error = nvmet_init_discovery(); 1105 if (error) 1106 goto out; 1107 1108 error = nvmet_init_configfs(); 1109 if (error) 1110 goto out_exit_discovery; 1111 return 0; 1112 1113 out_exit_discovery: 1114 nvmet_exit_discovery(); 1115 out: 1116 return error; 1117 } 1118 1119 static void __exit nvmet_exit(void) 1120 { 1121 nvmet_exit_configfs(); 1122 nvmet_exit_discovery(); 1123 ida_destroy(&cntlid_ida); 1124 1125 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1126 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1127 } 1128 1129 module_init(nvmet_init); 1130 module_exit(nvmet_exit); 1131 1132 MODULE_LICENSE("GPL v2"); 1133