1 /* 2 * Common code for the NVMe target. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/random.h> 17 #include <linux/rculist.h> 18 19 #include "nvmet.h" 20 21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 22 static DEFINE_IDA(cntlid_ida); 23 24 /* 25 * This read/write semaphore is used to synchronize access to configuration 26 * information on a target system that will result in discovery log page 27 * information change for at least one host. 28 * The full list of resources to protected by this semaphore is: 29 * 30 * - subsystems list 31 * - per-subsystem allowed hosts list 32 * - allow_any_host subsystem attribute 33 * - nvmet_genctr 34 * - the nvmet_transports array 35 * 36 * When updating any of those lists/structures write lock should be obtained, 37 * while when reading (popolating discovery log page or checking host-subsystem 38 * link) read lock is obtained to allow concurrent reads. 39 */ 40 DECLARE_RWSEM(nvmet_config_sem); 41 42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 43 const char *subsysnqn); 44 45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 46 size_t len) 47 { 48 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 49 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 50 return 0; 51 } 52 53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 54 { 55 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 56 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 57 return 0; 58 } 59 60 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 61 { 62 struct nvmet_ns *ns; 63 64 if (list_empty(&subsys->namespaces)) 65 return 0; 66 67 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 68 return ns->nsid; 69 } 70 71 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 72 { 73 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 74 } 75 76 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 77 { 78 struct nvmet_req *req; 79 80 while (1) { 81 mutex_lock(&ctrl->lock); 82 if (!ctrl->nr_async_event_cmds) { 83 mutex_unlock(&ctrl->lock); 84 return; 85 } 86 87 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 88 mutex_unlock(&ctrl->lock); 89 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 90 } 91 } 92 93 static void nvmet_async_event_work(struct work_struct *work) 94 { 95 struct nvmet_ctrl *ctrl = 96 container_of(work, struct nvmet_ctrl, async_event_work); 97 struct nvmet_async_event *aen; 98 struct nvmet_req *req; 99 100 while (1) { 101 mutex_lock(&ctrl->lock); 102 aen = list_first_entry_or_null(&ctrl->async_events, 103 struct nvmet_async_event, entry); 104 if (!aen || !ctrl->nr_async_event_cmds) { 105 mutex_unlock(&ctrl->lock); 106 return; 107 } 108 109 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 110 nvmet_set_result(req, nvmet_async_event_result(aen)); 111 112 list_del(&aen->entry); 113 kfree(aen); 114 115 mutex_unlock(&ctrl->lock); 116 nvmet_req_complete(req, 0); 117 } 118 } 119 120 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 121 u8 event_info, u8 log_page) 122 { 123 struct nvmet_async_event *aen; 124 125 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 126 if (!aen) 127 return; 128 129 aen->event_type = event_type; 130 aen->event_info = event_info; 131 aen->log_page = log_page; 132 133 mutex_lock(&ctrl->lock); 134 list_add_tail(&aen->entry, &ctrl->async_events); 135 mutex_unlock(&ctrl->lock); 136 137 schedule_work(&ctrl->async_event_work); 138 } 139 140 int nvmet_register_transport(struct nvmet_fabrics_ops *ops) 141 { 142 int ret = 0; 143 144 down_write(&nvmet_config_sem); 145 if (nvmet_transports[ops->type]) 146 ret = -EINVAL; 147 else 148 nvmet_transports[ops->type] = ops; 149 up_write(&nvmet_config_sem); 150 151 return ret; 152 } 153 EXPORT_SYMBOL_GPL(nvmet_register_transport); 154 155 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops) 156 { 157 down_write(&nvmet_config_sem); 158 nvmet_transports[ops->type] = NULL; 159 up_write(&nvmet_config_sem); 160 } 161 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 162 163 int nvmet_enable_port(struct nvmet_port *port) 164 { 165 struct nvmet_fabrics_ops *ops; 166 int ret; 167 168 lockdep_assert_held(&nvmet_config_sem); 169 170 ops = nvmet_transports[port->disc_addr.trtype]; 171 if (!ops) { 172 up_write(&nvmet_config_sem); 173 request_module("nvmet-transport-%d", port->disc_addr.trtype); 174 down_write(&nvmet_config_sem); 175 ops = nvmet_transports[port->disc_addr.trtype]; 176 if (!ops) { 177 pr_err("transport type %d not supported\n", 178 port->disc_addr.trtype); 179 return -EINVAL; 180 } 181 } 182 183 if (!try_module_get(ops->owner)) 184 return -EINVAL; 185 186 ret = ops->add_port(port); 187 if (ret) { 188 module_put(ops->owner); 189 return ret; 190 } 191 192 port->enabled = true; 193 return 0; 194 } 195 196 void nvmet_disable_port(struct nvmet_port *port) 197 { 198 struct nvmet_fabrics_ops *ops; 199 200 lockdep_assert_held(&nvmet_config_sem); 201 202 port->enabled = false; 203 204 ops = nvmet_transports[port->disc_addr.trtype]; 205 ops->remove_port(port); 206 module_put(ops->owner); 207 } 208 209 static void nvmet_keep_alive_timer(struct work_struct *work) 210 { 211 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 212 struct nvmet_ctrl, ka_work); 213 214 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 215 ctrl->cntlid, ctrl->kato); 216 217 nvmet_ctrl_fatal_error(ctrl); 218 } 219 220 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 221 { 222 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 223 ctrl->cntlid, ctrl->kato); 224 225 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 226 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 227 } 228 229 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 230 { 231 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 232 233 cancel_delayed_work_sync(&ctrl->ka_work); 234 } 235 236 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 237 __le32 nsid) 238 { 239 struct nvmet_ns *ns; 240 241 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 242 if (ns->nsid == le32_to_cpu(nsid)) 243 return ns; 244 } 245 246 return NULL; 247 } 248 249 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 250 { 251 struct nvmet_ns *ns; 252 253 rcu_read_lock(); 254 ns = __nvmet_find_namespace(ctrl, nsid); 255 if (ns) 256 percpu_ref_get(&ns->ref); 257 rcu_read_unlock(); 258 259 return ns; 260 } 261 262 static void nvmet_destroy_namespace(struct percpu_ref *ref) 263 { 264 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 265 266 complete(&ns->disable_done); 267 } 268 269 void nvmet_put_namespace(struct nvmet_ns *ns) 270 { 271 percpu_ref_put(&ns->ref); 272 } 273 274 int nvmet_ns_enable(struct nvmet_ns *ns) 275 { 276 struct nvmet_subsys *subsys = ns->subsys; 277 struct nvmet_ctrl *ctrl; 278 int ret = 0; 279 280 mutex_lock(&subsys->lock); 281 if (ns->enabled) 282 goto out_unlock; 283 284 ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, 285 NULL); 286 if (IS_ERR(ns->bdev)) { 287 pr_err("failed to open block device %s: (%ld)\n", 288 ns->device_path, PTR_ERR(ns->bdev)); 289 ret = PTR_ERR(ns->bdev); 290 ns->bdev = NULL; 291 goto out_unlock; 292 } 293 294 ns->size = i_size_read(ns->bdev->bd_inode); 295 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 296 297 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 298 0, GFP_KERNEL); 299 if (ret) 300 goto out_blkdev_put; 301 302 if (ns->nsid > subsys->max_nsid) 303 subsys->max_nsid = ns->nsid; 304 305 /* 306 * The namespaces list needs to be sorted to simplify the implementation 307 * of the Identify Namepace List subcommand. 308 */ 309 if (list_empty(&subsys->namespaces)) { 310 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 311 } else { 312 struct nvmet_ns *old; 313 314 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 315 BUG_ON(ns->nsid == old->nsid); 316 if (ns->nsid < old->nsid) 317 break; 318 } 319 320 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 321 } 322 323 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 324 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); 325 326 ns->enabled = true; 327 ret = 0; 328 out_unlock: 329 mutex_unlock(&subsys->lock); 330 return ret; 331 out_blkdev_put: 332 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); 333 ns->bdev = NULL; 334 goto out_unlock; 335 } 336 337 void nvmet_ns_disable(struct nvmet_ns *ns) 338 { 339 struct nvmet_subsys *subsys = ns->subsys; 340 struct nvmet_ctrl *ctrl; 341 342 mutex_lock(&subsys->lock); 343 if (!ns->enabled) 344 goto out_unlock; 345 346 ns->enabled = false; 347 list_del_rcu(&ns->dev_link); 348 if (ns->nsid == subsys->max_nsid) 349 subsys->max_nsid = nvmet_max_nsid(subsys); 350 mutex_unlock(&subsys->lock); 351 352 /* 353 * Now that we removed the namespaces from the lookup list, we 354 * can kill the per_cpu ref and wait for any remaining references 355 * to be dropped, as well as a RCU grace period for anyone only 356 * using the namepace under rcu_read_lock(). Note that we can't 357 * use call_rcu here as we need to ensure the namespaces have 358 * been fully destroyed before unloading the module. 359 */ 360 percpu_ref_kill(&ns->ref); 361 synchronize_rcu(); 362 wait_for_completion(&ns->disable_done); 363 percpu_ref_exit(&ns->ref); 364 365 mutex_lock(&subsys->lock); 366 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 367 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); 368 369 if (ns->bdev) 370 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); 371 out_unlock: 372 mutex_unlock(&subsys->lock); 373 } 374 375 void nvmet_ns_free(struct nvmet_ns *ns) 376 { 377 nvmet_ns_disable(ns); 378 379 kfree(ns->device_path); 380 kfree(ns); 381 } 382 383 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 384 { 385 struct nvmet_ns *ns; 386 387 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 388 if (!ns) 389 return NULL; 390 391 INIT_LIST_HEAD(&ns->dev_link); 392 init_completion(&ns->disable_done); 393 394 ns->nsid = nsid; 395 ns->subsys = subsys; 396 uuid_gen(&ns->uuid); 397 398 return ns; 399 } 400 401 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 402 { 403 u32 old_sqhd, new_sqhd; 404 u16 sqhd; 405 406 if (status) 407 nvmet_set_status(req, status); 408 409 if (req->sq->size) { 410 do { 411 old_sqhd = req->sq->sqhd; 412 new_sqhd = (old_sqhd + 1) % req->sq->size; 413 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 414 old_sqhd); 415 } 416 sqhd = req->sq->sqhd & 0x0000FFFF; 417 req->rsp->sq_head = cpu_to_le16(sqhd); 418 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 419 req->rsp->command_id = req->cmd->common.command_id; 420 421 if (req->ns) 422 nvmet_put_namespace(req->ns); 423 req->ops->queue_response(req); 424 } 425 426 void nvmet_req_complete(struct nvmet_req *req, u16 status) 427 { 428 __nvmet_req_complete(req, status); 429 percpu_ref_put(&req->sq->ref); 430 } 431 EXPORT_SYMBOL_GPL(nvmet_req_complete); 432 433 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 434 u16 qid, u16 size) 435 { 436 cq->qid = qid; 437 cq->size = size; 438 439 ctrl->cqs[qid] = cq; 440 } 441 442 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 443 u16 qid, u16 size) 444 { 445 sq->sqhd = 0; 446 sq->qid = qid; 447 sq->size = size; 448 449 ctrl->sqs[qid] = sq; 450 } 451 452 static void nvmet_confirm_sq(struct percpu_ref *ref) 453 { 454 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 455 456 complete(&sq->confirm_done); 457 } 458 459 void nvmet_sq_destroy(struct nvmet_sq *sq) 460 { 461 /* 462 * If this is the admin queue, complete all AERs so that our 463 * queue doesn't have outstanding requests on it. 464 */ 465 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 466 nvmet_async_events_free(sq->ctrl); 467 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 468 wait_for_completion(&sq->confirm_done); 469 wait_for_completion(&sq->free_done); 470 percpu_ref_exit(&sq->ref); 471 472 if (sq->ctrl) { 473 nvmet_ctrl_put(sq->ctrl); 474 sq->ctrl = NULL; /* allows reusing the queue later */ 475 } 476 } 477 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 478 479 static void nvmet_sq_free(struct percpu_ref *ref) 480 { 481 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 482 483 complete(&sq->free_done); 484 } 485 486 int nvmet_sq_init(struct nvmet_sq *sq) 487 { 488 int ret; 489 490 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 491 if (ret) { 492 pr_err("percpu_ref init failed!\n"); 493 return ret; 494 } 495 init_completion(&sq->free_done); 496 init_completion(&sq->confirm_done); 497 498 return 0; 499 } 500 EXPORT_SYMBOL_GPL(nvmet_sq_init); 501 502 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 503 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops) 504 { 505 u8 flags = req->cmd->common.flags; 506 u16 status; 507 508 req->cq = cq; 509 req->sq = sq; 510 req->ops = ops; 511 req->sg = NULL; 512 req->sg_cnt = 0; 513 req->transfer_len = 0; 514 req->rsp->status = 0; 515 req->ns = NULL; 516 517 /* no support for fused commands yet */ 518 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 519 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 520 goto fail; 521 } 522 523 /* either variant of SGLs is fine, as we don't support metadata */ 524 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF && 525 (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) { 526 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 527 goto fail; 528 } 529 530 if (unlikely(!req->sq->ctrl)) 531 /* will return an error for any Non-connect command: */ 532 status = nvmet_parse_connect_cmd(req); 533 else if (likely(req->sq->qid != 0)) 534 status = nvmet_parse_io_cmd(req); 535 else if (req->cmd->common.opcode == nvme_fabrics_command) 536 status = nvmet_parse_fabrics_cmd(req); 537 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 538 status = nvmet_parse_discovery_cmd(req); 539 else 540 status = nvmet_parse_admin_cmd(req); 541 542 if (status) 543 goto fail; 544 545 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 546 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 547 goto fail; 548 } 549 550 return true; 551 552 fail: 553 __nvmet_req_complete(req, status); 554 return false; 555 } 556 EXPORT_SYMBOL_GPL(nvmet_req_init); 557 558 void nvmet_req_uninit(struct nvmet_req *req) 559 { 560 percpu_ref_put(&req->sq->ref); 561 if (req->ns) 562 nvmet_put_namespace(req->ns); 563 } 564 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 565 566 void nvmet_req_execute(struct nvmet_req *req) 567 { 568 if (unlikely(req->data_len != req->transfer_len)) 569 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 570 else 571 req->execute(req); 572 } 573 EXPORT_SYMBOL_GPL(nvmet_req_execute); 574 575 static inline bool nvmet_cc_en(u32 cc) 576 { 577 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 578 } 579 580 static inline u8 nvmet_cc_css(u32 cc) 581 { 582 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 583 } 584 585 static inline u8 nvmet_cc_mps(u32 cc) 586 { 587 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 588 } 589 590 static inline u8 nvmet_cc_ams(u32 cc) 591 { 592 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 593 } 594 595 static inline u8 nvmet_cc_shn(u32 cc) 596 { 597 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 598 } 599 600 static inline u8 nvmet_cc_iosqes(u32 cc) 601 { 602 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 603 } 604 605 static inline u8 nvmet_cc_iocqes(u32 cc) 606 { 607 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 608 } 609 610 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 611 { 612 lockdep_assert_held(&ctrl->lock); 613 614 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 615 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 616 nvmet_cc_mps(ctrl->cc) != 0 || 617 nvmet_cc_ams(ctrl->cc) != 0 || 618 nvmet_cc_css(ctrl->cc) != 0) { 619 ctrl->csts = NVME_CSTS_CFS; 620 return; 621 } 622 623 ctrl->csts = NVME_CSTS_RDY; 624 } 625 626 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 627 { 628 lockdep_assert_held(&ctrl->lock); 629 630 /* XXX: tear down queues? */ 631 ctrl->csts &= ~NVME_CSTS_RDY; 632 ctrl->cc = 0; 633 } 634 635 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 636 { 637 u32 old; 638 639 mutex_lock(&ctrl->lock); 640 old = ctrl->cc; 641 ctrl->cc = new; 642 643 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 644 nvmet_start_ctrl(ctrl); 645 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 646 nvmet_clear_ctrl(ctrl); 647 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 648 nvmet_clear_ctrl(ctrl); 649 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 650 } 651 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 652 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 653 mutex_unlock(&ctrl->lock); 654 } 655 656 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 657 { 658 /* command sets supported: NVMe command set: */ 659 ctrl->cap = (1ULL << 37); 660 /* CC.EN timeout in 500msec units: */ 661 ctrl->cap |= (15ULL << 24); 662 /* maximum queue entries supported: */ 663 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 664 } 665 666 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 667 struct nvmet_req *req, struct nvmet_ctrl **ret) 668 { 669 struct nvmet_subsys *subsys; 670 struct nvmet_ctrl *ctrl; 671 u16 status = 0; 672 673 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 674 if (!subsys) { 675 pr_warn("connect request for invalid subsystem %s!\n", 676 subsysnqn); 677 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 678 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 679 } 680 681 mutex_lock(&subsys->lock); 682 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 683 if (ctrl->cntlid == cntlid) { 684 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 685 pr_warn("hostnqn mismatch.\n"); 686 continue; 687 } 688 if (!kref_get_unless_zero(&ctrl->ref)) 689 continue; 690 691 *ret = ctrl; 692 goto out; 693 } 694 } 695 696 pr_warn("could not find controller %d for subsys %s / host %s\n", 697 cntlid, subsysnqn, hostnqn); 698 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 699 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 700 701 out: 702 mutex_unlock(&subsys->lock); 703 nvmet_subsys_put(subsys); 704 return status; 705 } 706 707 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 708 { 709 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 710 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", 711 cmd->common.opcode, req->sq->qid); 712 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 713 } 714 715 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 716 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", 717 cmd->common.opcode, req->sq->qid); 718 req->ns = NULL; 719 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 720 } 721 return 0; 722 } 723 724 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, 725 const char *hostnqn) 726 { 727 struct nvmet_host_link *p; 728 729 if (subsys->allow_any_host) 730 return true; 731 732 list_for_each_entry(p, &subsys->hosts, entry) { 733 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 734 return true; 735 } 736 737 return false; 738 } 739 740 static bool nvmet_host_discovery_allowed(struct nvmet_req *req, 741 const char *hostnqn) 742 { 743 struct nvmet_subsys_link *s; 744 745 list_for_each_entry(s, &req->port->subsystems, entry) { 746 if (__nvmet_host_allowed(s->subsys, hostnqn)) 747 return true; 748 } 749 750 return false; 751 } 752 753 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 754 const char *hostnqn) 755 { 756 lockdep_assert_held(&nvmet_config_sem); 757 758 if (subsys->type == NVME_NQN_DISC) 759 return nvmet_host_discovery_allowed(req, hostnqn); 760 else 761 return __nvmet_host_allowed(subsys, hostnqn); 762 } 763 764 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 765 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 766 { 767 struct nvmet_subsys *subsys; 768 struct nvmet_ctrl *ctrl; 769 int ret; 770 u16 status; 771 772 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 773 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 774 if (!subsys) { 775 pr_warn("connect request for invalid subsystem %s!\n", 776 subsysnqn); 777 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 778 goto out; 779 } 780 781 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 782 down_read(&nvmet_config_sem); 783 if (!nvmet_host_allowed(req, subsys, hostnqn)) { 784 pr_info("connect by host %s for subsystem %s not allowed\n", 785 hostnqn, subsysnqn); 786 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 787 up_read(&nvmet_config_sem); 788 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 789 goto out_put_subsystem; 790 } 791 up_read(&nvmet_config_sem); 792 793 status = NVME_SC_INTERNAL; 794 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 795 if (!ctrl) 796 goto out_put_subsystem; 797 mutex_init(&ctrl->lock); 798 799 nvmet_init_cap(ctrl); 800 801 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 802 INIT_LIST_HEAD(&ctrl->async_events); 803 804 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 805 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 806 807 kref_init(&ctrl->ref); 808 ctrl->subsys = subsys; 809 810 ctrl->cqs = kcalloc(subsys->max_qid + 1, 811 sizeof(struct nvmet_cq *), 812 GFP_KERNEL); 813 if (!ctrl->cqs) 814 goto out_free_ctrl; 815 816 ctrl->sqs = kcalloc(subsys->max_qid + 1, 817 sizeof(struct nvmet_sq *), 818 GFP_KERNEL); 819 if (!ctrl->sqs) 820 goto out_free_cqs; 821 822 ret = ida_simple_get(&cntlid_ida, 823 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 824 GFP_KERNEL); 825 if (ret < 0) { 826 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 827 goto out_free_sqs; 828 } 829 ctrl->cntlid = ret; 830 831 ctrl->ops = req->ops; 832 if (ctrl->subsys->type == NVME_NQN_DISC) { 833 /* Don't accept keep-alive timeout for discovery controllers */ 834 if (kato) { 835 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 836 goto out_remove_ida; 837 } 838 839 /* 840 * Discovery controllers use some arbitrary high value in order 841 * to cleanup stale discovery sessions 842 * 843 * From the latest base diff RC: 844 * "The Keep Alive command is not supported by 845 * Discovery controllers. A transport may specify a 846 * fixed Discovery controller activity timeout value 847 * (e.g., 2 minutes). If no commands are received 848 * by a Discovery controller within that time 849 * period, the controller may perform the 850 * actions for Keep Alive Timer expiration". 851 */ 852 ctrl->kato = NVMET_DISC_KATO; 853 } else { 854 /* keep-alive timeout in seconds */ 855 ctrl->kato = DIV_ROUND_UP(kato, 1000); 856 } 857 nvmet_start_keep_alive_timer(ctrl); 858 859 mutex_lock(&subsys->lock); 860 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 861 mutex_unlock(&subsys->lock); 862 863 *ctrlp = ctrl; 864 return 0; 865 866 out_remove_ida: 867 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 868 out_free_sqs: 869 kfree(ctrl->sqs); 870 out_free_cqs: 871 kfree(ctrl->cqs); 872 out_free_ctrl: 873 kfree(ctrl); 874 out_put_subsystem: 875 nvmet_subsys_put(subsys); 876 out: 877 return status; 878 } 879 880 static void nvmet_ctrl_free(struct kref *ref) 881 { 882 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 883 struct nvmet_subsys *subsys = ctrl->subsys; 884 885 mutex_lock(&subsys->lock); 886 list_del(&ctrl->subsys_entry); 887 mutex_unlock(&subsys->lock); 888 889 nvmet_stop_keep_alive_timer(ctrl); 890 891 flush_work(&ctrl->async_event_work); 892 cancel_work_sync(&ctrl->fatal_err_work); 893 894 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 895 896 kfree(ctrl->sqs); 897 kfree(ctrl->cqs); 898 kfree(ctrl); 899 900 nvmet_subsys_put(subsys); 901 } 902 903 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 904 { 905 kref_put(&ctrl->ref, nvmet_ctrl_free); 906 } 907 908 static void nvmet_fatal_error_handler(struct work_struct *work) 909 { 910 struct nvmet_ctrl *ctrl = 911 container_of(work, struct nvmet_ctrl, fatal_err_work); 912 913 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 914 ctrl->ops->delete_ctrl(ctrl); 915 } 916 917 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 918 { 919 mutex_lock(&ctrl->lock); 920 if (!(ctrl->csts & NVME_CSTS_CFS)) { 921 ctrl->csts |= NVME_CSTS_CFS; 922 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 923 schedule_work(&ctrl->fatal_err_work); 924 } 925 mutex_unlock(&ctrl->lock); 926 } 927 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 928 929 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 930 const char *subsysnqn) 931 { 932 struct nvmet_subsys_link *p; 933 934 if (!port) 935 return NULL; 936 937 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, 938 NVMF_NQN_SIZE)) { 939 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 940 return NULL; 941 return nvmet_disc_subsys; 942 } 943 944 down_read(&nvmet_config_sem); 945 list_for_each_entry(p, &port->subsystems, entry) { 946 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 947 NVMF_NQN_SIZE)) { 948 if (!kref_get_unless_zero(&p->subsys->ref)) 949 break; 950 up_read(&nvmet_config_sem); 951 return p->subsys; 952 } 953 } 954 up_read(&nvmet_config_sem); 955 return NULL; 956 } 957 958 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 959 enum nvme_subsys_type type) 960 { 961 struct nvmet_subsys *subsys; 962 963 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 964 if (!subsys) 965 return NULL; 966 967 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 968 /* generate a random serial number as our controllers are ephemeral: */ 969 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 970 971 switch (type) { 972 case NVME_NQN_NVME: 973 subsys->max_qid = NVMET_NR_QUEUES; 974 break; 975 case NVME_NQN_DISC: 976 subsys->max_qid = 0; 977 break; 978 default: 979 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 980 kfree(subsys); 981 return NULL; 982 } 983 subsys->type = type; 984 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 985 GFP_KERNEL); 986 if (!subsys->subsysnqn) { 987 kfree(subsys); 988 return NULL; 989 } 990 991 kref_init(&subsys->ref); 992 993 mutex_init(&subsys->lock); 994 INIT_LIST_HEAD(&subsys->namespaces); 995 INIT_LIST_HEAD(&subsys->ctrls); 996 INIT_LIST_HEAD(&subsys->hosts); 997 998 return subsys; 999 } 1000 1001 static void nvmet_subsys_free(struct kref *ref) 1002 { 1003 struct nvmet_subsys *subsys = 1004 container_of(ref, struct nvmet_subsys, ref); 1005 1006 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1007 1008 kfree(subsys->subsysnqn); 1009 kfree(subsys); 1010 } 1011 1012 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1013 { 1014 struct nvmet_ctrl *ctrl; 1015 1016 mutex_lock(&subsys->lock); 1017 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1018 ctrl->ops->delete_ctrl(ctrl); 1019 mutex_unlock(&subsys->lock); 1020 } 1021 1022 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1023 { 1024 kref_put(&subsys->ref, nvmet_subsys_free); 1025 } 1026 1027 static int __init nvmet_init(void) 1028 { 1029 int error; 1030 1031 error = nvmet_init_discovery(); 1032 if (error) 1033 goto out; 1034 1035 error = nvmet_init_configfs(); 1036 if (error) 1037 goto out_exit_discovery; 1038 return 0; 1039 1040 out_exit_discovery: 1041 nvmet_exit_discovery(); 1042 out: 1043 return error; 1044 } 1045 1046 static void __exit nvmet_exit(void) 1047 { 1048 nvmet_exit_configfs(); 1049 nvmet_exit_discovery(); 1050 ida_destroy(&cntlid_ida); 1051 1052 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1053 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1054 } 1055 1056 module_init(nvmet_init); 1057 module_exit(nvmet_exit); 1058 1059 MODULE_LICENSE("GPL v2"); 1060