1 /* 2 * Common code for the NVMe target. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/random.h> 17 #include <linux/rculist.h> 18 19 #include "nvmet.h" 20 21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 22 static DEFINE_IDA(cntlid_ida); 23 24 /* 25 * This read/write semaphore is used to synchronize access to configuration 26 * information on a target system that will result in discovery log page 27 * information change for at least one host. 28 * The full list of resources to protected by this semaphore is: 29 * 30 * - subsystems list 31 * - per-subsystem allowed hosts list 32 * - allow_any_host subsystem attribute 33 * - nvmet_genctr 34 * - the nvmet_transports array 35 * 36 * When updating any of those lists/structures write lock should be obtained, 37 * while when reading (popolating discovery log page or checking host-subsystem 38 * link) read lock is obtained to allow concurrent reads. 39 */ 40 DECLARE_RWSEM(nvmet_config_sem); 41 42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 43 const char *subsysnqn); 44 45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 46 size_t len) 47 { 48 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 49 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 50 return 0; 51 } 52 53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 54 { 55 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 56 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 57 return 0; 58 } 59 60 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 61 { 62 struct nvmet_ns *ns; 63 64 if (list_empty(&subsys->namespaces)) 65 return 0; 66 67 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 68 return ns->nsid; 69 } 70 71 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 72 { 73 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 74 } 75 76 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 77 { 78 struct nvmet_req *req; 79 80 while (1) { 81 mutex_lock(&ctrl->lock); 82 if (!ctrl->nr_async_event_cmds) { 83 mutex_unlock(&ctrl->lock); 84 return; 85 } 86 87 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 88 mutex_unlock(&ctrl->lock); 89 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 90 } 91 } 92 93 static void nvmet_async_event_work(struct work_struct *work) 94 { 95 struct nvmet_ctrl *ctrl = 96 container_of(work, struct nvmet_ctrl, async_event_work); 97 struct nvmet_async_event *aen; 98 struct nvmet_req *req; 99 100 while (1) { 101 mutex_lock(&ctrl->lock); 102 aen = list_first_entry_or_null(&ctrl->async_events, 103 struct nvmet_async_event, entry); 104 if (!aen || !ctrl->nr_async_event_cmds) { 105 mutex_unlock(&ctrl->lock); 106 return; 107 } 108 109 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 110 nvmet_set_result(req, nvmet_async_event_result(aen)); 111 112 list_del(&aen->entry); 113 kfree(aen); 114 115 mutex_unlock(&ctrl->lock); 116 nvmet_req_complete(req, 0); 117 } 118 } 119 120 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 121 u8 event_info, u8 log_page) 122 { 123 struct nvmet_async_event *aen; 124 125 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 126 if (!aen) 127 return; 128 129 aen->event_type = event_type; 130 aen->event_info = event_info; 131 aen->log_page = log_page; 132 133 mutex_lock(&ctrl->lock); 134 list_add_tail(&aen->entry, &ctrl->async_events); 135 mutex_unlock(&ctrl->lock); 136 137 schedule_work(&ctrl->async_event_work); 138 } 139 140 int nvmet_register_transport(struct nvmet_fabrics_ops *ops) 141 { 142 int ret = 0; 143 144 down_write(&nvmet_config_sem); 145 if (nvmet_transports[ops->type]) 146 ret = -EINVAL; 147 else 148 nvmet_transports[ops->type] = ops; 149 up_write(&nvmet_config_sem); 150 151 return ret; 152 } 153 EXPORT_SYMBOL_GPL(nvmet_register_transport); 154 155 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops) 156 { 157 down_write(&nvmet_config_sem); 158 nvmet_transports[ops->type] = NULL; 159 up_write(&nvmet_config_sem); 160 } 161 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 162 163 int nvmet_enable_port(struct nvmet_port *port) 164 { 165 struct nvmet_fabrics_ops *ops; 166 int ret; 167 168 lockdep_assert_held(&nvmet_config_sem); 169 170 ops = nvmet_transports[port->disc_addr.trtype]; 171 if (!ops) { 172 up_write(&nvmet_config_sem); 173 request_module("nvmet-transport-%d", port->disc_addr.trtype); 174 down_write(&nvmet_config_sem); 175 ops = nvmet_transports[port->disc_addr.trtype]; 176 if (!ops) { 177 pr_err("transport type %d not supported\n", 178 port->disc_addr.trtype); 179 return -EINVAL; 180 } 181 } 182 183 if (!try_module_get(ops->owner)) 184 return -EINVAL; 185 186 ret = ops->add_port(port); 187 if (ret) { 188 module_put(ops->owner); 189 return ret; 190 } 191 192 port->enabled = true; 193 return 0; 194 } 195 196 void nvmet_disable_port(struct nvmet_port *port) 197 { 198 struct nvmet_fabrics_ops *ops; 199 200 lockdep_assert_held(&nvmet_config_sem); 201 202 port->enabled = false; 203 204 ops = nvmet_transports[port->disc_addr.trtype]; 205 ops->remove_port(port); 206 module_put(ops->owner); 207 } 208 209 static void nvmet_keep_alive_timer(struct work_struct *work) 210 { 211 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 212 struct nvmet_ctrl, ka_work); 213 214 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 215 ctrl->cntlid, ctrl->kato); 216 217 nvmet_ctrl_fatal_error(ctrl); 218 } 219 220 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 221 { 222 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 223 ctrl->cntlid, ctrl->kato); 224 225 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 226 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 227 } 228 229 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 230 { 231 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 232 233 cancel_delayed_work_sync(&ctrl->ka_work); 234 } 235 236 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 237 __le32 nsid) 238 { 239 struct nvmet_ns *ns; 240 241 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 242 if (ns->nsid == le32_to_cpu(nsid)) 243 return ns; 244 } 245 246 return NULL; 247 } 248 249 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 250 { 251 struct nvmet_ns *ns; 252 253 rcu_read_lock(); 254 ns = __nvmet_find_namespace(ctrl, nsid); 255 if (ns) 256 percpu_ref_get(&ns->ref); 257 rcu_read_unlock(); 258 259 return ns; 260 } 261 262 static void nvmet_destroy_namespace(struct percpu_ref *ref) 263 { 264 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 265 266 complete(&ns->disable_done); 267 } 268 269 void nvmet_put_namespace(struct nvmet_ns *ns) 270 { 271 percpu_ref_put(&ns->ref); 272 } 273 274 int nvmet_ns_enable(struct nvmet_ns *ns) 275 { 276 struct nvmet_subsys *subsys = ns->subsys; 277 struct nvmet_ctrl *ctrl; 278 int ret = 0; 279 280 mutex_lock(&subsys->lock); 281 if (ns->enabled) 282 goto out_unlock; 283 284 ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, 285 NULL); 286 if (IS_ERR(ns->bdev)) { 287 pr_err("failed to open block device %s: (%ld)\n", 288 ns->device_path, PTR_ERR(ns->bdev)); 289 ret = PTR_ERR(ns->bdev); 290 ns->bdev = NULL; 291 goto out_unlock; 292 } 293 294 ns->size = i_size_read(ns->bdev->bd_inode); 295 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 296 297 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 298 0, GFP_KERNEL); 299 if (ret) 300 goto out_blkdev_put; 301 302 if (ns->nsid > subsys->max_nsid) 303 subsys->max_nsid = ns->nsid; 304 305 /* 306 * The namespaces list needs to be sorted to simplify the implementation 307 * of the Identify Namepace List subcommand. 308 */ 309 if (list_empty(&subsys->namespaces)) { 310 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 311 } else { 312 struct nvmet_ns *old; 313 314 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 315 BUG_ON(ns->nsid == old->nsid); 316 if (ns->nsid < old->nsid) 317 break; 318 } 319 320 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 321 } 322 323 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 324 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); 325 326 ns->enabled = true; 327 ret = 0; 328 out_unlock: 329 mutex_unlock(&subsys->lock); 330 return ret; 331 out_blkdev_put: 332 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); 333 ns->bdev = NULL; 334 goto out_unlock; 335 } 336 337 void nvmet_ns_disable(struct nvmet_ns *ns) 338 { 339 struct nvmet_subsys *subsys = ns->subsys; 340 struct nvmet_ctrl *ctrl; 341 342 mutex_lock(&subsys->lock); 343 if (!ns->enabled) 344 goto out_unlock; 345 346 ns->enabled = false; 347 list_del_rcu(&ns->dev_link); 348 if (ns->nsid == subsys->max_nsid) 349 subsys->max_nsid = nvmet_max_nsid(subsys); 350 mutex_unlock(&subsys->lock); 351 352 /* 353 * Now that we removed the namespaces from the lookup list, we 354 * can kill the per_cpu ref and wait for any remaining references 355 * to be dropped, as well as a RCU grace period for anyone only 356 * using the namepace under rcu_read_lock(). Note that we can't 357 * use call_rcu here as we need to ensure the namespaces have 358 * been fully destroyed before unloading the module. 359 */ 360 percpu_ref_kill(&ns->ref); 361 synchronize_rcu(); 362 wait_for_completion(&ns->disable_done); 363 percpu_ref_exit(&ns->ref); 364 365 mutex_lock(&subsys->lock); 366 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 367 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); 368 369 if (ns->bdev) 370 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); 371 out_unlock: 372 mutex_unlock(&subsys->lock); 373 } 374 375 void nvmet_ns_free(struct nvmet_ns *ns) 376 { 377 nvmet_ns_disable(ns); 378 379 kfree(ns->device_path); 380 kfree(ns); 381 } 382 383 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 384 { 385 struct nvmet_ns *ns; 386 387 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 388 if (!ns) 389 return NULL; 390 391 INIT_LIST_HEAD(&ns->dev_link); 392 init_completion(&ns->disable_done); 393 394 ns->nsid = nsid; 395 ns->subsys = subsys; 396 uuid_gen(&ns->uuid); 397 398 return ns; 399 } 400 401 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 402 { 403 u32 old_sqhd, new_sqhd; 404 u16 sqhd; 405 406 if (status) 407 nvmet_set_status(req, status); 408 409 if (req->sq->size) { 410 do { 411 old_sqhd = req->sq->sqhd; 412 new_sqhd = (old_sqhd + 1) % req->sq->size; 413 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 414 old_sqhd); 415 } 416 sqhd = req->sq->sqhd & 0x0000FFFF; 417 req->rsp->sq_head = cpu_to_le16(sqhd); 418 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 419 req->rsp->command_id = req->cmd->common.command_id; 420 421 if (req->ns) 422 nvmet_put_namespace(req->ns); 423 req->ops->queue_response(req); 424 } 425 426 void nvmet_req_complete(struct nvmet_req *req, u16 status) 427 { 428 __nvmet_req_complete(req, status); 429 percpu_ref_put(&req->sq->ref); 430 } 431 EXPORT_SYMBOL_GPL(nvmet_req_complete); 432 433 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 434 u16 qid, u16 size) 435 { 436 cq->qid = qid; 437 cq->size = size; 438 439 ctrl->cqs[qid] = cq; 440 } 441 442 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 443 u16 qid, u16 size) 444 { 445 sq->sqhd = 0; 446 sq->qid = qid; 447 sq->size = size; 448 449 ctrl->sqs[qid] = sq; 450 } 451 452 static void nvmet_confirm_sq(struct percpu_ref *ref) 453 { 454 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 455 456 complete(&sq->confirm_done); 457 } 458 459 void nvmet_sq_destroy(struct nvmet_sq *sq) 460 { 461 /* 462 * If this is the admin queue, complete all AERs so that our 463 * queue doesn't have outstanding requests on it. 464 */ 465 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 466 nvmet_async_events_free(sq->ctrl); 467 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 468 wait_for_completion(&sq->confirm_done); 469 wait_for_completion(&sq->free_done); 470 percpu_ref_exit(&sq->ref); 471 472 if (sq->ctrl) { 473 nvmet_ctrl_put(sq->ctrl); 474 sq->ctrl = NULL; /* allows reusing the queue later */ 475 } 476 } 477 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 478 479 static void nvmet_sq_free(struct percpu_ref *ref) 480 { 481 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 482 483 complete(&sq->free_done); 484 } 485 486 int nvmet_sq_init(struct nvmet_sq *sq) 487 { 488 int ret; 489 490 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 491 if (ret) { 492 pr_err("percpu_ref init failed!\n"); 493 return ret; 494 } 495 init_completion(&sq->free_done); 496 init_completion(&sq->confirm_done); 497 498 return 0; 499 } 500 EXPORT_SYMBOL_GPL(nvmet_sq_init); 501 502 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 503 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops) 504 { 505 u8 flags = req->cmd->common.flags; 506 u16 status; 507 508 req->cq = cq; 509 req->sq = sq; 510 req->ops = ops; 511 req->sg = NULL; 512 req->sg_cnt = 0; 513 req->transfer_len = 0; 514 req->rsp->status = 0; 515 516 /* no support for fused commands yet */ 517 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 518 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 519 goto fail; 520 } 521 522 /* either variant of SGLs is fine, as we don't support metadata */ 523 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF && 524 (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) { 525 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 526 goto fail; 527 } 528 529 if (unlikely(!req->sq->ctrl)) 530 /* will return an error for any Non-connect command: */ 531 status = nvmet_parse_connect_cmd(req); 532 else if (likely(req->sq->qid != 0)) 533 status = nvmet_parse_io_cmd(req); 534 else if (req->cmd->common.opcode == nvme_fabrics_command) 535 status = nvmet_parse_fabrics_cmd(req); 536 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 537 status = nvmet_parse_discovery_cmd(req); 538 else 539 status = nvmet_parse_admin_cmd(req); 540 541 if (status) 542 goto fail; 543 544 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 545 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 546 goto fail; 547 } 548 549 return true; 550 551 fail: 552 __nvmet_req_complete(req, status); 553 return false; 554 } 555 EXPORT_SYMBOL_GPL(nvmet_req_init); 556 557 void nvmet_req_uninit(struct nvmet_req *req) 558 { 559 percpu_ref_put(&req->sq->ref); 560 } 561 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 562 563 void nvmet_req_execute(struct nvmet_req *req) 564 { 565 if (unlikely(req->data_len != req->transfer_len)) 566 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 567 else 568 req->execute(req); 569 } 570 EXPORT_SYMBOL_GPL(nvmet_req_execute); 571 572 static inline bool nvmet_cc_en(u32 cc) 573 { 574 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 575 } 576 577 static inline u8 nvmet_cc_css(u32 cc) 578 { 579 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 580 } 581 582 static inline u8 nvmet_cc_mps(u32 cc) 583 { 584 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 585 } 586 587 static inline u8 nvmet_cc_ams(u32 cc) 588 { 589 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 590 } 591 592 static inline u8 nvmet_cc_shn(u32 cc) 593 { 594 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 595 } 596 597 static inline u8 nvmet_cc_iosqes(u32 cc) 598 { 599 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 600 } 601 602 static inline u8 nvmet_cc_iocqes(u32 cc) 603 { 604 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 605 } 606 607 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 608 { 609 lockdep_assert_held(&ctrl->lock); 610 611 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 612 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 613 nvmet_cc_mps(ctrl->cc) != 0 || 614 nvmet_cc_ams(ctrl->cc) != 0 || 615 nvmet_cc_css(ctrl->cc) != 0) { 616 ctrl->csts = NVME_CSTS_CFS; 617 return; 618 } 619 620 ctrl->csts = NVME_CSTS_RDY; 621 } 622 623 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 624 { 625 lockdep_assert_held(&ctrl->lock); 626 627 /* XXX: tear down queues? */ 628 ctrl->csts &= ~NVME_CSTS_RDY; 629 ctrl->cc = 0; 630 } 631 632 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 633 { 634 u32 old; 635 636 mutex_lock(&ctrl->lock); 637 old = ctrl->cc; 638 ctrl->cc = new; 639 640 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 641 nvmet_start_ctrl(ctrl); 642 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 643 nvmet_clear_ctrl(ctrl); 644 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 645 nvmet_clear_ctrl(ctrl); 646 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 647 } 648 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 649 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 650 mutex_unlock(&ctrl->lock); 651 } 652 653 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 654 { 655 /* command sets supported: NVMe command set: */ 656 ctrl->cap = (1ULL << 37); 657 /* CC.EN timeout in 500msec units: */ 658 ctrl->cap |= (15ULL << 24); 659 /* maximum queue entries supported: */ 660 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 661 } 662 663 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 664 struct nvmet_req *req, struct nvmet_ctrl **ret) 665 { 666 struct nvmet_subsys *subsys; 667 struct nvmet_ctrl *ctrl; 668 u16 status = 0; 669 670 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 671 if (!subsys) { 672 pr_warn("connect request for invalid subsystem %s!\n", 673 subsysnqn); 674 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 675 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 676 } 677 678 mutex_lock(&subsys->lock); 679 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 680 if (ctrl->cntlid == cntlid) { 681 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 682 pr_warn("hostnqn mismatch.\n"); 683 continue; 684 } 685 if (!kref_get_unless_zero(&ctrl->ref)) 686 continue; 687 688 *ret = ctrl; 689 goto out; 690 } 691 } 692 693 pr_warn("could not find controller %d for subsys %s / host %s\n", 694 cntlid, subsysnqn, hostnqn); 695 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 696 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 697 698 out: 699 mutex_unlock(&subsys->lock); 700 nvmet_subsys_put(subsys); 701 return status; 702 } 703 704 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 705 { 706 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 707 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", 708 cmd->common.opcode, req->sq->qid); 709 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 710 } 711 712 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 713 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", 714 cmd->common.opcode, req->sq->qid); 715 req->ns = NULL; 716 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 717 } 718 return 0; 719 } 720 721 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, 722 const char *hostnqn) 723 { 724 struct nvmet_host_link *p; 725 726 if (subsys->allow_any_host) 727 return true; 728 729 list_for_each_entry(p, &subsys->hosts, entry) { 730 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 731 return true; 732 } 733 734 return false; 735 } 736 737 static bool nvmet_host_discovery_allowed(struct nvmet_req *req, 738 const char *hostnqn) 739 { 740 struct nvmet_subsys_link *s; 741 742 list_for_each_entry(s, &req->port->subsystems, entry) { 743 if (__nvmet_host_allowed(s->subsys, hostnqn)) 744 return true; 745 } 746 747 return false; 748 } 749 750 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 751 const char *hostnqn) 752 { 753 lockdep_assert_held(&nvmet_config_sem); 754 755 if (subsys->type == NVME_NQN_DISC) 756 return nvmet_host_discovery_allowed(req, hostnqn); 757 else 758 return __nvmet_host_allowed(subsys, hostnqn); 759 } 760 761 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 762 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 763 { 764 struct nvmet_subsys *subsys; 765 struct nvmet_ctrl *ctrl; 766 int ret; 767 u16 status; 768 769 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 770 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 771 if (!subsys) { 772 pr_warn("connect request for invalid subsystem %s!\n", 773 subsysnqn); 774 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 775 goto out; 776 } 777 778 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 779 down_read(&nvmet_config_sem); 780 if (!nvmet_host_allowed(req, subsys, hostnqn)) { 781 pr_info("connect by host %s for subsystem %s not allowed\n", 782 hostnqn, subsysnqn); 783 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 784 up_read(&nvmet_config_sem); 785 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 786 goto out_put_subsystem; 787 } 788 up_read(&nvmet_config_sem); 789 790 status = NVME_SC_INTERNAL; 791 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 792 if (!ctrl) 793 goto out_put_subsystem; 794 mutex_init(&ctrl->lock); 795 796 nvmet_init_cap(ctrl); 797 798 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 799 INIT_LIST_HEAD(&ctrl->async_events); 800 801 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 802 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 803 804 kref_init(&ctrl->ref); 805 ctrl->subsys = subsys; 806 807 ctrl->cqs = kcalloc(subsys->max_qid + 1, 808 sizeof(struct nvmet_cq *), 809 GFP_KERNEL); 810 if (!ctrl->cqs) 811 goto out_free_ctrl; 812 813 ctrl->sqs = kcalloc(subsys->max_qid + 1, 814 sizeof(struct nvmet_sq *), 815 GFP_KERNEL); 816 if (!ctrl->sqs) 817 goto out_free_cqs; 818 819 ret = ida_simple_get(&cntlid_ida, 820 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 821 GFP_KERNEL); 822 if (ret < 0) { 823 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 824 goto out_free_sqs; 825 } 826 ctrl->cntlid = ret; 827 828 ctrl->ops = req->ops; 829 if (ctrl->subsys->type == NVME_NQN_DISC) { 830 /* Don't accept keep-alive timeout for discovery controllers */ 831 if (kato) { 832 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 833 goto out_free_sqs; 834 } 835 836 /* 837 * Discovery controllers use some arbitrary high value in order 838 * to cleanup stale discovery sessions 839 * 840 * From the latest base diff RC: 841 * "The Keep Alive command is not supported by 842 * Discovery controllers. A transport may specify a 843 * fixed Discovery controller activity timeout value 844 * (e.g., 2 minutes). If no commands are received 845 * by a Discovery controller within that time 846 * period, the controller may perform the 847 * actions for Keep Alive Timer expiration". 848 */ 849 ctrl->kato = NVMET_DISC_KATO; 850 } else { 851 /* keep-alive timeout in seconds */ 852 ctrl->kato = DIV_ROUND_UP(kato, 1000); 853 } 854 nvmet_start_keep_alive_timer(ctrl); 855 856 mutex_lock(&subsys->lock); 857 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 858 mutex_unlock(&subsys->lock); 859 860 *ctrlp = ctrl; 861 return 0; 862 863 out_free_sqs: 864 kfree(ctrl->sqs); 865 out_free_cqs: 866 kfree(ctrl->cqs); 867 out_free_ctrl: 868 kfree(ctrl); 869 out_put_subsystem: 870 nvmet_subsys_put(subsys); 871 out: 872 return status; 873 } 874 875 static void nvmet_ctrl_free(struct kref *ref) 876 { 877 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 878 struct nvmet_subsys *subsys = ctrl->subsys; 879 880 nvmet_stop_keep_alive_timer(ctrl); 881 882 mutex_lock(&subsys->lock); 883 list_del(&ctrl->subsys_entry); 884 mutex_unlock(&subsys->lock); 885 886 flush_work(&ctrl->async_event_work); 887 cancel_work_sync(&ctrl->fatal_err_work); 888 889 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 890 nvmet_subsys_put(subsys); 891 892 kfree(ctrl->sqs); 893 kfree(ctrl->cqs); 894 kfree(ctrl); 895 } 896 897 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 898 { 899 kref_put(&ctrl->ref, nvmet_ctrl_free); 900 } 901 902 static void nvmet_fatal_error_handler(struct work_struct *work) 903 { 904 struct nvmet_ctrl *ctrl = 905 container_of(work, struct nvmet_ctrl, fatal_err_work); 906 907 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 908 ctrl->ops->delete_ctrl(ctrl); 909 } 910 911 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 912 { 913 mutex_lock(&ctrl->lock); 914 if (!(ctrl->csts & NVME_CSTS_CFS)) { 915 ctrl->csts |= NVME_CSTS_CFS; 916 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 917 schedule_work(&ctrl->fatal_err_work); 918 } 919 mutex_unlock(&ctrl->lock); 920 } 921 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 922 923 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 924 const char *subsysnqn) 925 { 926 struct nvmet_subsys_link *p; 927 928 if (!port) 929 return NULL; 930 931 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, 932 NVMF_NQN_SIZE)) { 933 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 934 return NULL; 935 return nvmet_disc_subsys; 936 } 937 938 down_read(&nvmet_config_sem); 939 list_for_each_entry(p, &port->subsystems, entry) { 940 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 941 NVMF_NQN_SIZE)) { 942 if (!kref_get_unless_zero(&p->subsys->ref)) 943 break; 944 up_read(&nvmet_config_sem); 945 return p->subsys; 946 } 947 } 948 up_read(&nvmet_config_sem); 949 return NULL; 950 } 951 952 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 953 enum nvme_subsys_type type) 954 { 955 struct nvmet_subsys *subsys; 956 957 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 958 if (!subsys) 959 return NULL; 960 961 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 962 /* generate a random serial number as our controllers are ephemeral: */ 963 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 964 965 switch (type) { 966 case NVME_NQN_NVME: 967 subsys->max_qid = NVMET_NR_QUEUES; 968 break; 969 case NVME_NQN_DISC: 970 subsys->max_qid = 0; 971 break; 972 default: 973 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 974 kfree(subsys); 975 return NULL; 976 } 977 subsys->type = type; 978 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 979 GFP_KERNEL); 980 if (!subsys->subsysnqn) { 981 kfree(subsys); 982 return NULL; 983 } 984 985 kref_init(&subsys->ref); 986 987 mutex_init(&subsys->lock); 988 INIT_LIST_HEAD(&subsys->namespaces); 989 INIT_LIST_HEAD(&subsys->ctrls); 990 INIT_LIST_HEAD(&subsys->hosts); 991 992 return subsys; 993 } 994 995 static void nvmet_subsys_free(struct kref *ref) 996 { 997 struct nvmet_subsys *subsys = 998 container_of(ref, struct nvmet_subsys, ref); 999 1000 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1001 1002 kfree(subsys->subsysnqn); 1003 kfree(subsys); 1004 } 1005 1006 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1007 { 1008 struct nvmet_ctrl *ctrl; 1009 1010 mutex_lock(&subsys->lock); 1011 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1012 ctrl->ops->delete_ctrl(ctrl); 1013 mutex_unlock(&subsys->lock); 1014 } 1015 1016 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1017 { 1018 kref_put(&subsys->ref, nvmet_subsys_free); 1019 } 1020 1021 static int __init nvmet_init(void) 1022 { 1023 int error; 1024 1025 error = nvmet_init_discovery(); 1026 if (error) 1027 goto out; 1028 1029 error = nvmet_init_configfs(); 1030 if (error) 1031 goto out_exit_discovery; 1032 return 0; 1033 1034 out_exit_discovery: 1035 nvmet_exit_discovery(); 1036 out: 1037 return error; 1038 } 1039 1040 static void __exit nvmet_exit(void) 1041 { 1042 nvmet_exit_configfs(); 1043 nvmet_exit_discovery(); 1044 ida_destroy(&cntlid_ida); 1045 1046 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1047 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1048 } 1049 1050 module_init(nvmet_init); 1051 module_exit(nvmet_exit); 1052 1053 MODULE_LICENSE("GPL v2"); 1054