1 /* 2 * Common code for the NVMe target. 3 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #include <linux/module.h> 16 #include <linux/random.h> 17 #include <linux/rculist.h> 18 19 #include "nvmet.h" 20 21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 22 static DEFINE_IDA(cntlid_ida); 23 24 /* 25 * This read/write semaphore is used to synchronize access to configuration 26 * information on a target system that will result in discovery log page 27 * information change for at least one host. 28 * The full list of resources to protected by this semaphore is: 29 * 30 * - subsystems list 31 * - per-subsystem allowed hosts list 32 * - allow_any_host subsystem attribute 33 * - nvmet_genctr 34 * - the nvmet_transports array 35 * 36 * When updating any of those lists/structures write lock should be obtained, 37 * while when reading (popolating discovery log page or checking host-subsystem 38 * link) read lock is obtained to allow concurrent reads. 39 */ 40 DECLARE_RWSEM(nvmet_config_sem); 41 42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 43 const char *subsysnqn); 44 45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 46 size_t len) 47 { 48 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 49 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 50 return 0; 51 } 52 53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 54 { 55 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) 56 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 57 return 0; 58 } 59 60 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 61 { 62 struct nvmet_ns *ns; 63 64 if (list_empty(&subsys->namespaces)) 65 return 0; 66 67 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 68 return ns->nsid; 69 } 70 71 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 72 { 73 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 74 } 75 76 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 77 { 78 struct nvmet_req *req; 79 80 while (1) { 81 mutex_lock(&ctrl->lock); 82 if (!ctrl->nr_async_event_cmds) { 83 mutex_unlock(&ctrl->lock); 84 return; 85 } 86 87 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 88 mutex_unlock(&ctrl->lock); 89 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 90 } 91 } 92 93 static void nvmet_async_event_work(struct work_struct *work) 94 { 95 struct nvmet_ctrl *ctrl = 96 container_of(work, struct nvmet_ctrl, async_event_work); 97 struct nvmet_async_event *aen; 98 struct nvmet_req *req; 99 100 while (1) { 101 mutex_lock(&ctrl->lock); 102 aen = list_first_entry_or_null(&ctrl->async_events, 103 struct nvmet_async_event, entry); 104 if (!aen || !ctrl->nr_async_event_cmds) { 105 mutex_unlock(&ctrl->lock); 106 return; 107 } 108 109 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 110 nvmet_set_result(req, nvmet_async_event_result(aen)); 111 112 list_del(&aen->entry); 113 kfree(aen); 114 115 mutex_unlock(&ctrl->lock); 116 nvmet_req_complete(req, 0); 117 } 118 } 119 120 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 121 u8 event_info, u8 log_page) 122 { 123 struct nvmet_async_event *aen; 124 125 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 126 if (!aen) 127 return; 128 129 aen->event_type = event_type; 130 aen->event_info = event_info; 131 aen->log_page = log_page; 132 133 mutex_lock(&ctrl->lock); 134 list_add_tail(&aen->entry, &ctrl->async_events); 135 mutex_unlock(&ctrl->lock); 136 137 schedule_work(&ctrl->async_event_work); 138 } 139 140 int nvmet_register_transport(struct nvmet_fabrics_ops *ops) 141 { 142 int ret = 0; 143 144 down_write(&nvmet_config_sem); 145 if (nvmet_transports[ops->type]) 146 ret = -EINVAL; 147 else 148 nvmet_transports[ops->type] = ops; 149 up_write(&nvmet_config_sem); 150 151 return ret; 152 } 153 EXPORT_SYMBOL_GPL(nvmet_register_transport); 154 155 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops) 156 { 157 down_write(&nvmet_config_sem); 158 nvmet_transports[ops->type] = NULL; 159 up_write(&nvmet_config_sem); 160 } 161 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 162 163 int nvmet_enable_port(struct nvmet_port *port) 164 { 165 struct nvmet_fabrics_ops *ops; 166 int ret; 167 168 lockdep_assert_held(&nvmet_config_sem); 169 170 ops = nvmet_transports[port->disc_addr.trtype]; 171 if (!ops) { 172 up_write(&nvmet_config_sem); 173 request_module("nvmet-transport-%d", port->disc_addr.trtype); 174 down_write(&nvmet_config_sem); 175 ops = nvmet_transports[port->disc_addr.trtype]; 176 if (!ops) { 177 pr_err("transport type %d not supported\n", 178 port->disc_addr.trtype); 179 return -EINVAL; 180 } 181 } 182 183 if (!try_module_get(ops->owner)) 184 return -EINVAL; 185 186 ret = ops->add_port(port); 187 if (ret) { 188 module_put(ops->owner); 189 return ret; 190 } 191 192 port->enabled = true; 193 return 0; 194 } 195 196 void nvmet_disable_port(struct nvmet_port *port) 197 { 198 struct nvmet_fabrics_ops *ops; 199 200 lockdep_assert_held(&nvmet_config_sem); 201 202 port->enabled = false; 203 204 ops = nvmet_transports[port->disc_addr.trtype]; 205 ops->remove_port(port); 206 module_put(ops->owner); 207 } 208 209 static void nvmet_keep_alive_timer(struct work_struct *work) 210 { 211 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 212 struct nvmet_ctrl, ka_work); 213 214 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 215 ctrl->cntlid, ctrl->kato); 216 217 nvmet_ctrl_fatal_error(ctrl); 218 } 219 220 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 221 { 222 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 223 ctrl->cntlid, ctrl->kato); 224 225 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 226 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 227 } 228 229 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 230 { 231 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 232 233 cancel_delayed_work_sync(&ctrl->ka_work); 234 } 235 236 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 237 __le32 nsid) 238 { 239 struct nvmet_ns *ns; 240 241 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 242 if (ns->nsid == le32_to_cpu(nsid)) 243 return ns; 244 } 245 246 return NULL; 247 } 248 249 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 250 { 251 struct nvmet_ns *ns; 252 253 rcu_read_lock(); 254 ns = __nvmet_find_namespace(ctrl, nsid); 255 if (ns) 256 percpu_ref_get(&ns->ref); 257 rcu_read_unlock(); 258 259 return ns; 260 } 261 262 static void nvmet_destroy_namespace(struct percpu_ref *ref) 263 { 264 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 265 266 complete(&ns->disable_done); 267 } 268 269 void nvmet_put_namespace(struct nvmet_ns *ns) 270 { 271 percpu_ref_put(&ns->ref); 272 } 273 274 int nvmet_ns_enable(struct nvmet_ns *ns) 275 { 276 struct nvmet_subsys *subsys = ns->subsys; 277 struct nvmet_ctrl *ctrl; 278 int ret = 0; 279 280 mutex_lock(&subsys->lock); 281 if (ns->enabled) 282 goto out_unlock; 283 284 ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, 285 NULL); 286 if (IS_ERR(ns->bdev)) { 287 pr_err("failed to open block device %s: (%ld)\n", 288 ns->device_path, PTR_ERR(ns->bdev)); 289 ret = PTR_ERR(ns->bdev); 290 ns->bdev = NULL; 291 goto out_unlock; 292 } 293 294 ns->size = i_size_read(ns->bdev->bd_inode); 295 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 296 297 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 298 0, GFP_KERNEL); 299 if (ret) 300 goto out_blkdev_put; 301 302 if (ns->nsid > subsys->max_nsid) 303 subsys->max_nsid = ns->nsid; 304 305 /* 306 * The namespaces list needs to be sorted to simplify the implementation 307 * of the Identify Namepace List subcommand. 308 */ 309 if (list_empty(&subsys->namespaces)) { 310 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 311 } else { 312 struct nvmet_ns *old; 313 314 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 315 BUG_ON(ns->nsid == old->nsid); 316 if (ns->nsid < old->nsid) 317 break; 318 } 319 320 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 321 } 322 323 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 324 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); 325 326 ns->enabled = true; 327 ret = 0; 328 out_unlock: 329 mutex_unlock(&subsys->lock); 330 return ret; 331 out_blkdev_put: 332 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); 333 ns->bdev = NULL; 334 goto out_unlock; 335 } 336 337 void nvmet_ns_disable(struct nvmet_ns *ns) 338 { 339 struct nvmet_subsys *subsys = ns->subsys; 340 struct nvmet_ctrl *ctrl; 341 342 mutex_lock(&subsys->lock); 343 if (!ns->enabled) 344 goto out_unlock; 345 346 ns->enabled = false; 347 list_del_rcu(&ns->dev_link); 348 if (ns->nsid == subsys->max_nsid) 349 subsys->max_nsid = nvmet_max_nsid(subsys); 350 mutex_unlock(&subsys->lock); 351 352 /* 353 * Now that we removed the namespaces from the lookup list, we 354 * can kill the per_cpu ref and wait for any remaining references 355 * to be dropped, as well as a RCU grace period for anyone only 356 * using the namepace under rcu_read_lock(). Note that we can't 357 * use call_rcu here as we need to ensure the namespaces have 358 * been fully destroyed before unloading the module. 359 */ 360 percpu_ref_kill(&ns->ref); 361 synchronize_rcu(); 362 wait_for_completion(&ns->disable_done); 363 percpu_ref_exit(&ns->ref); 364 365 mutex_lock(&subsys->lock); 366 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 367 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); 368 369 if (ns->bdev) 370 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); 371 out_unlock: 372 mutex_unlock(&subsys->lock); 373 } 374 375 void nvmet_ns_free(struct nvmet_ns *ns) 376 { 377 nvmet_ns_disable(ns); 378 379 kfree(ns->device_path); 380 kfree(ns); 381 } 382 383 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 384 { 385 struct nvmet_ns *ns; 386 387 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 388 if (!ns) 389 return NULL; 390 391 INIT_LIST_HEAD(&ns->dev_link); 392 init_completion(&ns->disable_done); 393 394 ns->nsid = nsid; 395 ns->subsys = subsys; 396 uuid_gen(&ns->uuid); 397 398 return ns; 399 } 400 401 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 402 { 403 u32 old_sqhd, new_sqhd; 404 u16 sqhd; 405 406 if (status) 407 nvmet_set_status(req, status); 408 409 if (req->sq->size) { 410 do { 411 old_sqhd = req->sq->sqhd; 412 new_sqhd = (old_sqhd + 1) % req->sq->size; 413 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 414 old_sqhd); 415 } 416 sqhd = req->sq->sqhd & 0x0000FFFF; 417 req->rsp->sq_head = cpu_to_le16(sqhd); 418 req->rsp->sq_id = cpu_to_le16(req->sq->qid); 419 req->rsp->command_id = req->cmd->common.command_id; 420 421 if (req->ns) 422 nvmet_put_namespace(req->ns); 423 req->ops->queue_response(req); 424 } 425 426 void nvmet_req_complete(struct nvmet_req *req, u16 status) 427 { 428 __nvmet_req_complete(req, status); 429 percpu_ref_put(&req->sq->ref); 430 } 431 EXPORT_SYMBOL_GPL(nvmet_req_complete); 432 433 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 434 u16 qid, u16 size) 435 { 436 cq->qid = qid; 437 cq->size = size; 438 439 ctrl->cqs[qid] = cq; 440 } 441 442 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 443 u16 qid, u16 size) 444 { 445 sq->sqhd = 0; 446 sq->qid = qid; 447 sq->size = size; 448 449 ctrl->sqs[qid] = sq; 450 } 451 452 static void nvmet_confirm_sq(struct percpu_ref *ref) 453 { 454 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 455 456 complete(&sq->confirm_done); 457 } 458 459 void nvmet_sq_destroy(struct nvmet_sq *sq) 460 { 461 /* 462 * If this is the admin queue, complete all AERs so that our 463 * queue doesn't have outstanding requests on it. 464 */ 465 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 466 nvmet_async_events_free(sq->ctrl); 467 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 468 wait_for_completion(&sq->confirm_done); 469 wait_for_completion(&sq->free_done); 470 percpu_ref_exit(&sq->ref); 471 472 if (sq->ctrl) { 473 nvmet_ctrl_put(sq->ctrl); 474 sq->ctrl = NULL; /* allows reusing the queue later */ 475 } 476 } 477 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 478 479 static void nvmet_sq_free(struct percpu_ref *ref) 480 { 481 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 482 483 complete(&sq->free_done); 484 } 485 486 int nvmet_sq_init(struct nvmet_sq *sq) 487 { 488 int ret; 489 490 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 491 if (ret) { 492 pr_err("percpu_ref init failed!\n"); 493 return ret; 494 } 495 init_completion(&sq->free_done); 496 init_completion(&sq->confirm_done); 497 498 return 0; 499 } 500 EXPORT_SYMBOL_GPL(nvmet_sq_init); 501 502 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 503 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops) 504 { 505 u8 flags = req->cmd->common.flags; 506 u16 status; 507 508 req->cq = cq; 509 req->sq = sq; 510 req->ops = ops; 511 req->sg = NULL; 512 req->sg_cnt = 0; 513 req->transfer_len = 0; 514 req->rsp->status = 0; 515 req->ns = NULL; 516 517 /* no support for fused commands yet */ 518 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 519 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 520 goto fail; 521 } 522 523 /* 524 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 525 * contains an address of a single contiguous physical buffer that is 526 * byte aligned. 527 */ 528 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 529 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 530 goto fail; 531 } 532 533 if (unlikely(!req->sq->ctrl)) 534 /* will return an error for any Non-connect command: */ 535 status = nvmet_parse_connect_cmd(req); 536 else if (likely(req->sq->qid != 0)) 537 status = nvmet_parse_io_cmd(req); 538 else if (req->cmd->common.opcode == nvme_fabrics_command) 539 status = nvmet_parse_fabrics_cmd(req); 540 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 541 status = nvmet_parse_discovery_cmd(req); 542 else 543 status = nvmet_parse_admin_cmd(req); 544 545 if (status) 546 goto fail; 547 548 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 549 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 550 goto fail; 551 } 552 553 return true; 554 555 fail: 556 __nvmet_req_complete(req, status); 557 return false; 558 } 559 EXPORT_SYMBOL_GPL(nvmet_req_init); 560 561 void nvmet_req_uninit(struct nvmet_req *req) 562 { 563 percpu_ref_put(&req->sq->ref); 564 if (req->ns) 565 nvmet_put_namespace(req->ns); 566 } 567 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 568 569 void nvmet_req_execute(struct nvmet_req *req) 570 { 571 if (unlikely(req->data_len != req->transfer_len)) 572 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 573 else 574 req->execute(req); 575 } 576 EXPORT_SYMBOL_GPL(nvmet_req_execute); 577 578 static inline bool nvmet_cc_en(u32 cc) 579 { 580 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 581 } 582 583 static inline u8 nvmet_cc_css(u32 cc) 584 { 585 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 586 } 587 588 static inline u8 nvmet_cc_mps(u32 cc) 589 { 590 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 591 } 592 593 static inline u8 nvmet_cc_ams(u32 cc) 594 { 595 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 596 } 597 598 static inline u8 nvmet_cc_shn(u32 cc) 599 { 600 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 601 } 602 603 static inline u8 nvmet_cc_iosqes(u32 cc) 604 { 605 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 606 } 607 608 static inline u8 nvmet_cc_iocqes(u32 cc) 609 { 610 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 611 } 612 613 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 614 { 615 lockdep_assert_held(&ctrl->lock); 616 617 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 618 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 619 nvmet_cc_mps(ctrl->cc) != 0 || 620 nvmet_cc_ams(ctrl->cc) != 0 || 621 nvmet_cc_css(ctrl->cc) != 0) { 622 ctrl->csts = NVME_CSTS_CFS; 623 return; 624 } 625 626 ctrl->csts = NVME_CSTS_RDY; 627 } 628 629 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 630 { 631 lockdep_assert_held(&ctrl->lock); 632 633 /* XXX: tear down queues? */ 634 ctrl->csts &= ~NVME_CSTS_RDY; 635 ctrl->cc = 0; 636 } 637 638 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 639 { 640 u32 old; 641 642 mutex_lock(&ctrl->lock); 643 old = ctrl->cc; 644 ctrl->cc = new; 645 646 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 647 nvmet_start_ctrl(ctrl); 648 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 649 nvmet_clear_ctrl(ctrl); 650 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 651 nvmet_clear_ctrl(ctrl); 652 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 653 } 654 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 655 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 656 mutex_unlock(&ctrl->lock); 657 } 658 659 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 660 { 661 /* command sets supported: NVMe command set: */ 662 ctrl->cap = (1ULL << 37); 663 /* CC.EN timeout in 500msec units: */ 664 ctrl->cap |= (15ULL << 24); 665 /* maximum queue entries supported: */ 666 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 667 } 668 669 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 670 struct nvmet_req *req, struct nvmet_ctrl **ret) 671 { 672 struct nvmet_subsys *subsys; 673 struct nvmet_ctrl *ctrl; 674 u16 status = 0; 675 676 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 677 if (!subsys) { 678 pr_warn("connect request for invalid subsystem %s!\n", 679 subsysnqn); 680 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 681 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 682 } 683 684 mutex_lock(&subsys->lock); 685 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 686 if (ctrl->cntlid == cntlid) { 687 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 688 pr_warn("hostnqn mismatch.\n"); 689 continue; 690 } 691 if (!kref_get_unless_zero(&ctrl->ref)) 692 continue; 693 694 *ret = ctrl; 695 goto out; 696 } 697 } 698 699 pr_warn("could not find controller %d for subsys %s / host %s\n", 700 cntlid, subsysnqn, hostnqn); 701 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 702 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 703 704 out: 705 mutex_unlock(&subsys->lock); 706 nvmet_subsys_put(subsys); 707 return status; 708 } 709 710 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 711 { 712 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 713 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", 714 cmd->common.opcode, req->sq->qid); 715 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 716 } 717 718 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 719 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", 720 cmd->common.opcode, req->sq->qid); 721 req->ns = NULL; 722 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 723 } 724 return 0; 725 } 726 727 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, 728 const char *hostnqn) 729 { 730 struct nvmet_host_link *p; 731 732 if (subsys->allow_any_host) 733 return true; 734 735 list_for_each_entry(p, &subsys->hosts, entry) { 736 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 737 return true; 738 } 739 740 return false; 741 } 742 743 static bool nvmet_host_discovery_allowed(struct nvmet_req *req, 744 const char *hostnqn) 745 { 746 struct nvmet_subsys_link *s; 747 748 list_for_each_entry(s, &req->port->subsystems, entry) { 749 if (__nvmet_host_allowed(s->subsys, hostnqn)) 750 return true; 751 } 752 753 return false; 754 } 755 756 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 757 const char *hostnqn) 758 { 759 lockdep_assert_held(&nvmet_config_sem); 760 761 if (subsys->type == NVME_NQN_DISC) 762 return nvmet_host_discovery_allowed(req, hostnqn); 763 else 764 return __nvmet_host_allowed(subsys, hostnqn); 765 } 766 767 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 768 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 769 { 770 struct nvmet_subsys *subsys; 771 struct nvmet_ctrl *ctrl; 772 int ret; 773 u16 status; 774 775 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 776 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 777 if (!subsys) { 778 pr_warn("connect request for invalid subsystem %s!\n", 779 subsysnqn); 780 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 781 goto out; 782 } 783 784 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 785 down_read(&nvmet_config_sem); 786 if (!nvmet_host_allowed(req, subsys, hostnqn)) { 787 pr_info("connect by host %s for subsystem %s not allowed\n", 788 hostnqn, subsysnqn); 789 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 790 up_read(&nvmet_config_sem); 791 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 792 goto out_put_subsystem; 793 } 794 up_read(&nvmet_config_sem); 795 796 status = NVME_SC_INTERNAL; 797 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 798 if (!ctrl) 799 goto out_put_subsystem; 800 mutex_init(&ctrl->lock); 801 802 nvmet_init_cap(ctrl); 803 804 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 805 INIT_LIST_HEAD(&ctrl->async_events); 806 807 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 808 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 809 810 kref_init(&ctrl->ref); 811 ctrl->subsys = subsys; 812 813 ctrl->cqs = kcalloc(subsys->max_qid + 1, 814 sizeof(struct nvmet_cq *), 815 GFP_KERNEL); 816 if (!ctrl->cqs) 817 goto out_free_ctrl; 818 819 ctrl->sqs = kcalloc(subsys->max_qid + 1, 820 sizeof(struct nvmet_sq *), 821 GFP_KERNEL); 822 if (!ctrl->sqs) 823 goto out_free_cqs; 824 825 ret = ida_simple_get(&cntlid_ida, 826 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 827 GFP_KERNEL); 828 if (ret < 0) { 829 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 830 goto out_free_sqs; 831 } 832 ctrl->cntlid = ret; 833 834 ctrl->ops = req->ops; 835 if (ctrl->subsys->type == NVME_NQN_DISC) { 836 /* Don't accept keep-alive timeout for discovery controllers */ 837 if (kato) { 838 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 839 goto out_remove_ida; 840 } 841 842 /* 843 * Discovery controllers use some arbitrary high value in order 844 * to cleanup stale discovery sessions 845 * 846 * From the latest base diff RC: 847 * "The Keep Alive command is not supported by 848 * Discovery controllers. A transport may specify a 849 * fixed Discovery controller activity timeout value 850 * (e.g., 2 minutes). If no commands are received 851 * by a Discovery controller within that time 852 * period, the controller may perform the 853 * actions for Keep Alive Timer expiration". 854 */ 855 ctrl->kato = NVMET_DISC_KATO; 856 } else { 857 /* keep-alive timeout in seconds */ 858 ctrl->kato = DIV_ROUND_UP(kato, 1000); 859 } 860 nvmet_start_keep_alive_timer(ctrl); 861 862 mutex_lock(&subsys->lock); 863 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 864 mutex_unlock(&subsys->lock); 865 866 *ctrlp = ctrl; 867 return 0; 868 869 out_remove_ida: 870 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 871 out_free_sqs: 872 kfree(ctrl->sqs); 873 out_free_cqs: 874 kfree(ctrl->cqs); 875 out_free_ctrl: 876 kfree(ctrl); 877 out_put_subsystem: 878 nvmet_subsys_put(subsys); 879 out: 880 return status; 881 } 882 883 static void nvmet_ctrl_free(struct kref *ref) 884 { 885 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 886 struct nvmet_subsys *subsys = ctrl->subsys; 887 888 mutex_lock(&subsys->lock); 889 list_del(&ctrl->subsys_entry); 890 mutex_unlock(&subsys->lock); 891 892 nvmet_stop_keep_alive_timer(ctrl); 893 894 flush_work(&ctrl->async_event_work); 895 cancel_work_sync(&ctrl->fatal_err_work); 896 897 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 898 899 kfree(ctrl->sqs); 900 kfree(ctrl->cqs); 901 kfree(ctrl); 902 903 nvmet_subsys_put(subsys); 904 } 905 906 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 907 { 908 kref_put(&ctrl->ref, nvmet_ctrl_free); 909 } 910 911 static void nvmet_fatal_error_handler(struct work_struct *work) 912 { 913 struct nvmet_ctrl *ctrl = 914 container_of(work, struct nvmet_ctrl, fatal_err_work); 915 916 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 917 ctrl->ops->delete_ctrl(ctrl); 918 } 919 920 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 921 { 922 mutex_lock(&ctrl->lock); 923 if (!(ctrl->csts & NVME_CSTS_CFS)) { 924 ctrl->csts |= NVME_CSTS_CFS; 925 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 926 schedule_work(&ctrl->fatal_err_work); 927 } 928 mutex_unlock(&ctrl->lock); 929 } 930 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 931 932 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 933 const char *subsysnqn) 934 { 935 struct nvmet_subsys_link *p; 936 937 if (!port) 938 return NULL; 939 940 if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, 941 NVMF_NQN_SIZE)) { 942 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 943 return NULL; 944 return nvmet_disc_subsys; 945 } 946 947 down_read(&nvmet_config_sem); 948 list_for_each_entry(p, &port->subsystems, entry) { 949 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 950 NVMF_NQN_SIZE)) { 951 if (!kref_get_unless_zero(&p->subsys->ref)) 952 break; 953 up_read(&nvmet_config_sem); 954 return p->subsys; 955 } 956 } 957 up_read(&nvmet_config_sem); 958 return NULL; 959 } 960 961 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 962 enum nvme_subsys_type type) 963 { 964 struct nvmet_subsys *subsys; 965 966 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 967 if (!subsys) 968 return NULL; 969 970 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 971 /* generate a random serial number as our controllers are ephemeral: */ 972 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 973 974 switch (type) { 975 case NVME_NQN_NVME: 976 subsys->max_qid = NVMET_NR_QUEUES; 977 break; 978 case NVME_NQN_DISC: 979 subsys->max_qid = 0; 980 break; 981 default: 982 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 983 kfree(subsys); 984 return NULL; 985 } 986 subsys->type = type; 987 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 988 GFP_KERNEL); 989 if (!subsys->subsysnqn) { 990 kfree(subsys); 991 return NULL; 992 } 993 994 kref_init(&subsys->ref); 995 996 mutex_init(&subsys->lock); 997 INIT_LIST_HEAD(&subsys->namespaces); 998 INIT_LIST_HEAD(&subsys->ctrls); 999 INIT_LIST_HEAD(&subsys->hosts); 1000 1001 return subsys; 1002 } 1003 1004 static void nvmet_subsys_free(struct kref *ref) 1005 { 1006 struct nvmet_subsys *subsys = 1007 container_of(ref, struct nvmet_subsys, ref); 1008 1009 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1010 1011 kfree(subsys->subsysnqn); 1012 kfree(subsys); 1013 } 1014 1015 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1016 { 1017 struct nvmet_ctrl *ctrl; 1018 1019 mutex_lock(&subsys->lock); 1020 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1021 ctrl->ops->delete_ctrl(ctrl); 1022 mutex_unlock(&subsys->lock); 1023 } 1024 1025 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1026 { 1027 kref_put(&subsys->ref, nvmet_subsys_free); 1028 } 1029 1030 static int __init nvmet_init(void) 1031 { 1032 int error; 1033 1034 error = nvmet_init_discovery(); 1035 if (error) 1036 goto out; 1037 1038 error = nvmet_init_configfs(); 1039 if (error) 1040 goto out_exit_discovery; 1041 return 0; 1042 1043 out_exit_discovery: 1044 nvmet_exit_discovery(); 1045 out: 1046 return error; 1047 } 1048 1049 static void __exit nvmet_exit(void) 1050 { 1051 nvmet_exit_configfs(); 1052 nvmet_exit_discovery(); 1053 ida_destroy(&cntlid_ida); 1054 1055 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1056 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1057 } 1058 1059 module_init(nvmet_init); 1060 module_exit(nvmet_exit); 1061 1062 MODULE_LICENSE("GPL v2"); 1063