1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/module.h> 8 #include <linux/random.h> 9 #include <linux/rculist.h> 10 #include <linux/pci-p2pdma.h> 11 #include <linux/scatterlist.h> 12 13 #define CREATE_TRACE_POINTS 14 #include "trace.h" 15 16 #include "nvmet.h" 17 18 struct workqueue_struct *buffered_io_wq; 19 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 20 static DEFINE_IDA(cntlid_ida); 21 22 /* 23 * This read/write semaphore is used to synchronize access to configuration 24 * information on a target system that will result in discovery log page 25 * information change for at least one host. 26 * The full list of resources to protected by this semaphore is: 27 * 28 * - subsystems list 29 * - per-subsystem allowed hosts list 30 * - allow_any_host subsystem attribute 31 * - nvmet_genctr 32 * - the nvmet_transports array 33 * 34 * When updating any of those lists/structures write lock should be obtained, 35 * while when reading (popolating discovery log page or checking host-subsystem 36 * link) read lock is obtained to allow concurrent reads. 37 */ 38 DECLARE_RWSEM(nvmet_config_sem); 39 40 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 41 u64 nvmet_ana_chgcnt; 42 DECLARE_RWSEM(nvmet_ana_sem); 43 44 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 45 { 46 u16 status; 47 48 switch (errno) { 49 case 0: 50 status = NVME_SC_SUCCESS; 51 break; 52 case -ENOSPC: 53 req->error_loc = offsetof(struct nvme_rw_command, length); 54 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 55 break; 56 case -EREMOTEIO: 57 req->error_loc = offsetof(struct nvme_rw_command, slba); 58 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 59 break; 60 case -EOPNOTSUPP: 61 req->error_loc = offsetof(struct nvme_common_command, opcode); 62 switch (req->cmd->common.opcode) { 63 case nvme_cmd_dsm: 64 case nvme_cmd_write_zeroes: 65 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 66 break; 67 default: 68 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 69 } 70 break; 71 case -ENODATA: 72 req->error_loc = offsetof(struct nvme_rw_command, nsid); 73 status = NVME_SC_ACCESS_DENIED; 74 break; 75 case -EIO: 76 /* FALLTHRU */ 77 default: 78 req->error_loc = offsetof(struct nvme_common_command, opcode); 79 status = NVME_SC_INTERNAL | NVME_SC_DNR; 80 } 81 82 return status; 83 } 84 85 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 86 const char *subsysnqn); 87 88 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 89 size_t len) 90 { 91 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 92 req->error_loc = offsetof(struct nvme_common_command, dptr); 93 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 94 } 95 return 0; 96 } 97 98 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 99 { 100 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 101 req->error_loc = offsetof(struct nvme_common_command, dptr); 102 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 103 } 104 return 0; 105 } 106 107 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 108 { 109 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 110 req->error_loc = offsetof(struct nvme_common_command, dptr); 111 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 112 } 113 return 0; 114 } 115 116 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 117 { 118 struct nvmet_ns *ns; 119 120 if (list_empty(&subsys->namespaces)) 121 return 0; 122 123 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 124 return ns->nsid; 125 } 126 127 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 128 { 129 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 130 } 131 132 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 133 { 134 struct nvmet_req *req; 135 136 while (1) { 137 mutex_lock(&ctrl->lock); 138 if (!ctrl->nr_async_event_cmds) { 139 mutex_unlock(&ctrl->lock); 140 return; 141 } 142 143 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 144 mutex_unlock(&ctrl->lock); 145 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 146 } 147 } 148 149 static void nvmet_async_event_work(struct work_struct *work) 150 { 151 struct nvmet_ctrl *ctrl = 152 container_of(work, struct nvmet_ctrl, async_event_work); 153 struct nvmet_async_event *aen; 154 struct nvmet_req *req; 155 156 while (1) { 157 mutex_lock(&ctrl->lock); 158 aen = list_first_entry_or_null(&ctrl->async_events, 159 struct nvmet_async_event, entry); 160 if (!aen || !ctrl->nr_async_event_cmds) { 161 mutex_unlock(&ctrl->lock); 162 return; 163 } 164 165 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 166 nvmet_set_result(req, nvmet_async_event_result(aen)); 167 168 list_del(&aen->entry); 169 kfree(aen); 170 171 mutex_unlock(&ctrl->lock); 172 nvmet_req_complete(req, 0); 173 } 174 } 175 176 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 177 u8 event_info, u8 log_page) 178 { 179 struct nvmet_async_event *aen; 180 181 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 182 if (!aen) 183 return; 184 185 aen->event_type = event_type; 186 aen->event_info = event_info; 187 aen->log_page = log_page; 188 189 mutex_lock(&ctrl->lock); 190 list_add_tail(&aen->entry, &ctrl->async_events); 191 mutex_unlock(&ctrl->lock); 192 193 schedule_work(&ctrl->async_event_work); 194 } 195 196 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 197 { 198 u32 i; 199 200 mutex_lock(&ctrl->lock); 201 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 202 goto out_unlock; 203 204 for (i = 0; i < ctrl->nr_changed_ns; i++) { 205 if (ctrl->changed_ns_list[i] == nsid) 206 goto out_unlock; 207 } 208 209 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 210 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 211 ctrl->nr_changed_ns = U32_MAX; 212 goto out_unlock; 213 } 214 215 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 216 out_unlock: 217 mutex_unlock(&ctrl->lock); 218 } 219 220 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 221 { 222 struct nvmet_ctrl *ctrl; 223 224 lockdep_assert_held(&subsys->lock); 225 226 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 227 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 228 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 229 continue; 230 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 231 NVME_AER_NOTICE_NS_CHANGED, 232 NVME_LOG_CHANGED_NS); 233 } 234 } 235 236 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 237 struct nvmet_port *port) 238 { 239 struct nvmet_ctrl *ctrl; 240 241 mutex_lock(&subsys->lock); 242 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 243 if (port && ctrl->port != port) 244 continue; 245 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 246 continue; 247 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 248 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 249 } 250 mutex_unlock(&subsys->lock); 251 } 252 253 void nvmet_port_send_ana_event(struct nvmet_port *port) 254 { 255 struct nvmet_subsys_link *p; 256 257 down_read(&nvmet_config_sem); 258 list_for_each_entry(p, &port->subsystems, entry) 259 nvmet_send_ana_event(p->subsys, port); 260 up_read(&nvmet_config_sem); 261 } 262 263 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 264 { 265 int ret = 0; 266 267 down_write(&nvmet_config_sem); 268 if (nvmet_transports[ops->type]) 269 ret = -EINVAL; 270 else 271 nvmet_transports[ops->type] = ops; 272 up_write(&nvmet_config_sem); 273 274 return ret; 275 } 276 EXPORT_SYMBOL_GPL(nvmet_register_transport); 277 278 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 279 { 280 down_write(&nvmet_config_sem); 281 nvmet_transports[ops->type] = NULL; 282 up_write(&nvmet_config_sem); 283 } 284 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 285 286 void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) 287 { 288 struct nvmet_ctrl *ctrl; 289 290 mutex_lock(&subsys->lock); 291 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 292 if (ctrl->port == port) 293 ctrl->ops->delete_ctrl(ctrl); 294 } 295 mutex_unlock(&subsys->lock); 296 } 297 298 int nvmet_enable_port(struct nvmet_port *port) 299 { 300 const struct nvmet_fabrics_ops *ops; 301 int ret; 302 303 lockdep_assert_held(&nvmet_config_sem); 304 305 ops = nvmet_transports[port->disc_addr.trtype]; 306 if (!ops) { 307 up_write(&nvmet_config_sem); 308 request_module("nvmet-transport-%d", port->disc_addr.trtype); 309 down_write(&nvmet_config_sem); 310 ops = nvmet_transports[port->disc_addr.trtype]; 311 if (!ops) { 312 pr_err("transport type %d not supported\n", 313 port->disc_addr.trtype); 314 return -EINVAL; 315 } 316 } 317 318 if (!try_module_get(ops->owner)) 319 return -EINVAL; 320 321 ret = ops->add_port(port); 322 if (ret) { 323 module_put(ops->owner); 324 return ret; 325 } 326 327 /* If the transport didn't set inline_data_size, then disable it. */ 328 if (port->inline_data_size < 0) 329 port->inline_data_size = 0; 330 331 port->enabled = true; 332 port->tr_ops = ops; 333 return 0; 334 } 335 336 void nvmet_disable_port(struct nvmet_port *port) 337 { 338 const struct nvmet_fabrics_ops *ops; 339 340 lockdep_assert_held(&nvmet_config_sem); 341 342 port->enabled = false; 343 port->tr_ops = NULL; 344 345 ops = nvmet_transports[port->disc_addr.trtype]; 346 ops->remove_port(port); 347 module_put(ops->owner); 348 } 349 350 static void nvmet_keep_alive_timer(struct work_struct *work) 351 { 352 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 353 struct nvmet_ctrl, ka_work); 354 bool cmd_seen = ctrl->cmd_seen; 355 356 ctrl->cmd_seen = false; 357 if (cmd_seen) { 358 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 359 ctrl->cntlid); 360 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 361 return; 362 } 363 364 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 365 ctrl->cntlid, ctrl->kato); 366 367 nvmet_ctrl_fatal_error(ctrl); 368 } 369 370 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 371 { 372 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 373 ctrl->cntlid, ctrl->kato); 374 375 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 376 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 377 } 378 379 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 380 { 381 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 382 383 cancel_delayed_work_sync(&ctrl->ka_work); 384 } 385 386 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 387 __le32 nsid) 388 { 389 struct nvmet_ns *ns; 390 391 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 392 if (ns->nsid == le32_to_cpu(nsid)) 393 return ns; 394 } 395 396 return NULL; 397 } 398 399 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 400 { 401 struct nvmet_ns *ns; 402 403 rcu_read_lock(); 404 ns = __nvmet_find_namespace(ctrl, nsid); 405 if (ns) 406 percpu_ref_get(&ns->ref); 407 rcu_read_unlock(); 408 409 return ns; 410 } 411 412 static void nvmet_destroy_namespace(struct percpu_ref *ref) 413 { 414 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 415 416 complete(&ns->disable_done); 417 } 418 419 void nvmet_put_namespace(struct nvmet_ns *ns) 420 { 421 percpu_ref_put(&ns->ref); 422 } 423 424 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 425 { 426 nvmet_bdev_ns_disable(ns); 427 nvmet_file_ns_disable(ns); 428 } 429 430 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 431 { 432 int ret; 433 struct pci_dev *p2p_dev; 434 435 if (!ns->use_p2pmem) 436 return 0; 437 438 if (!ns->bdev) { 439 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 440 return -EINVAL; 441 } 442 443 if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) { 444 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 445 ns->device_path); 446 return -EINVAL; 447 } 448 449 if (ns->p2p_dev) { 450 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 451 if (ret < 0) 452 return -EINVAL; 453 } else { 454 /* 455 * Right now we just check that there is p2pmem available so 456 * we can report an error to the user right away if there 457 * is not. We'll find the actual device to use once we 458 * setup the controller when the port's device is available. 459 */ 460 461 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 462 if (!p2p_dev) { 463 pr_err("no peer-to-peer memory is available for %s\n", 464 ns->device_path); 465 return -EINVAL; 466 } 467 468 pci_dev_put(p2p_dev); 469 } 470 471 return 0; 472 } 473 474 /* 475 * Note: ctrl->subsys->lock should be held when calling this function 476 */ 477 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 478 struct nvmet_ns *ns) 479 { 480 struct device *clients[2]; 481 struct pci_dev *p2p_dev; 482 int ret; 483 484 if (!ctrl->p2p_client || !ns->use_p2pmem) 485 return; 486 487 if (ns->p2p_dev) { 488 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 489 if (ret < 0) 490 return; 491 492 p2p_dev = pci_dev_get(ns->p2p_dev); 493 } else { 494 clients[0] = ctrl->p2p_client; 495 clients[1] = nvmet_ns_dev(ns); 496 497 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 498 if (!p2p_dev) { 499 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 500 dev_name(ctrl->p2p_client), ns->device_path); 501 return; 502 } 503 } 504 505 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 506 if (ret < 0) 507 pci_dev_put(p2p_dev); 508 509 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 510 ns->nsid); 511 } 512 513 int nvmet_ns_enable(struct nvmet_ns *ns) 514 { 515 struct nvmet_subsys *subsys = ns->subsys; 516 struct nvmet_ctrl *ctrl; 517 int ret; 518 519 mutex_lock(&subsys->lock); 520 ret = 0; 521 if (ns->enabled) 522 goto out_unlock; 523 524 ret = -EMFILE; 525 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 526 goto out_unlock; 527 528 ret = nvmet_bdev_ns_enable(ns); 529 if (ret == -ENOTBLK) 530 ret = nvmet_file_ns_enable(ns); 531 if (ret) 532 goto out_unlock; 533 534 ret = nvmet_p2pmem_ns_enable(ns); 535 if (ret) 536 goto out_dev_disable; 537 538 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 539 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 540 541 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 542 0, GFP_KERNEL); 543 if (ret) 544 goto out_dev_put; 545 546 if (ns->nsid > subsys->max_nsid) 547 subsys->max_nsid = ns->nsid; 548 549 /* 550 * The namespaces list needs to be sorted to simplify the implementation 551 * of the Identify Namepace List subcommand. 552 */ 553 if (list_empty(&subsys->namespaces)) { 554 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 555 } else { 556 struct nvmet_ns *old; 557 558 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 559 BUG_ON(ns->nsid == old->nsid); 560 if (ns->nsid < old->nsid) 561 break; 562 } 563 564 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 565 } 566 subsys->nr_namespaces++; 567 568 nvmet_ns_changed(subsys, ns->nsid); 569 ns->enabled = true; 570 ret = 0; 571 out_unlock: 572 mutex_unlock(&subsys->lock); 573 return ret; 574 out_dev_put: 575 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 576 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 577 out_dev_disable: 578 nvmet_ns_dev_disable(ns); 579 goto out_unlock; 580 } 581 582 void nvmet_ns_disable(struct nvmet_ns *ns) 583 { 584 struct nvmet_subsys *subsys = ns->subsys; 585 struct nvmet_ctrl *ctrl; 586 587 mutex_lock(&subsys->lock); 588 if (!ns->enabled) 589 goto out_unlock; 590 591 ns->enabled = false; 592 list_del_rcu(&ns->dev_link); 593 if (ns->nsid == subsys->max_nsid) 594 subsys->max_nsid = nvmet_max_nsid(subsys); 595 596 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 597 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 598 599 mutex_unlock(&subsys->lock); 600 601 /* 602 * Now that we removed the namespaces from the lookup list, we 603 * can kill the per_cpu ref and wait for any remaining references 604 * to be dropped, as well as a RCU grace period for anyone only 605 * using the namepace under rcu_read_lock(). Note that we can't 606 * use call_rcu here as we need to ensure the namespaces have 607 * been fully destroyed before unloading the module. 608 */ 609 percpu_ref_kill(&ns->ref); 610 synchronize_rcu(); 611 wait_for_completion(&ns->disable_done); 612 percpu_ref_exit(&ns->ref); 613 614 mutex_lock(&subsys->lock); 615 616 subsys->nr_namespaces--; 617 nvmet_ns_changed(subsys, ns->nsid); 618 nvmet_ns_dev_disable(ns); 619 out_unlock: 620 mutex_unlock(&subsys->lock); 621 } 622 623 void nvmet_ns_free(struct nvmet_ns *ns) 624 { 625 nvmet_ns_disable(ns); 626 627 down_write(&nvmet_ana_sem); 628 nvmet_ana_group_enabled[ns->anagrpid]--; 629 up_write(&nvmet_ana_sem); 630 631 kfree(ns->device_path); 632 kfree(ns); 633 } 634 635 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 636 { 637 struct nvmet_ns *ns; 638 639 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 640 if (!ns) 641 return NULL; 642 643 INIT_LIST_HEAD(&ns->dev_link); 644 init_completion(&ns->disable_done); 645 646 ns->nsid = nsid; 647 ns->subsys = subsys; 648 649 down_write(&nvmet_ana_sem); 650 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 651 nvmet_ana_group_enabled[ns->anagrpid]++; 652 up_write(&nvmet_ana_sem); 653 654 uuid_gen(&ns->uuid); 655 ns->buffered_io = false; 656 657 return ns; 658 } 659 660 static void nvmet_update_sq_head(struct nvmet_req *req) 661 { 662 if (req->sq->size) { 663 u32 old_sqhd, new_sqhd; 664 665 do { 666 old_sqhd = req->sq->sqhd; 667 new_sqhd = (old_sqhd + 1) % req->sq->size; 668 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 669 old_sqhd); 670 } 671 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 672 } 673 674 static void nvmet_set_error(struct nvmet_req *req, u16 status) 675 { 676 struct nvmet_ctrl *ctrl = req->sq->ctrl; 677 struct nvme_error_slot *new_error_slot; 678 unsigned long flags; 679 680 req->cqe->status = cpu_to_le16(status << 1); 681 682 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 683 return; 684 685 spin_lock_irqsave(&ctrl->error_lock, flags); 686 ctrl->err_counter++; 687 new_error_slot = 688 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 689 690 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 691 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 692 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 693 new_error_slot->status_field = cpu_to_le16(status << 1); 694 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 695 new_error_slot->lba = cpu_to_le64(req->error_slba); 696 new_error_slot->nsid = req->cmd->common.nsid; 697 spin_unlock_irqrestore(&ctrl->error_lock, flags); 698 699 /* set the more bit for this request */ 700 req->cqe->status |= cpu_to_le16(1 << 14); 701 } 702 703 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 704 { 705 if (!req->sq->sqhd_disabled) 706 nvmet_update_sq_head(req); 707 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 708 req->cqe->command_id = req->cmd->common.command_id; 709 710 if (unlikely(status)) 711 nvmet_set_error(req, status); 712 713 trace_nvmet_req_complete(req); 714 715 if (req->ns) 716 nvmet_put_namespace(req->ns); 717 req->ops->queue_response(req); 718 } 719 720 void nvmet_req_complete(struct nvmet_req *req, u16 status) 721 { 722 __nvmet_req_complete(req, status); 723 percpu_ref_put(&req->sq->ref); 724 } 725 EXPORT_SYMBOL_GPL(nvmet_req_complete); 726 727 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 728 u16 qid, u16 size) 729 { 730 cq->qid = qid; 731 cq->size = size; 732 733 ctrl->cqs[qid] = cq; 734 } 735 736 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 737 u16 qid, u16 size) 738 { 739 sq->sqhd = 0; 740 sq->qid = qid; 741 sq->size = size; 742 743 ctrl->sqs[qid] = sq; 744 } 745 746 static void nvmet_confirm_sq(struct percpu_ref *ref) 747 { 748 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 749 750 complete(&sq->confirm_done); 751 } 752 753 void nvmet_sq_destroy(struct nvmet_sq *sq) 754 { 755 /* 756 * If this is the admin queue, complete all AERs so that our 757 * queue doesn't have outstanding requests on it. 758 */ 759 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 760 nvmet_async_events_free(sq->ctrl); 761 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 762 wait_for_completion(&sq->confirm_done); 763 wait_for_completion(&sq->free_done); 764 percpu_ref_exit(&sq->ref); 765 766 if (sq->ctrl) { 767 nvmet_ctrl_put(sq->ctrl); 768 sq->ctrl = NULL; /* allows reusing the queue later */ 769 } 770 } 771 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 772 773 static void nvmet_sq_free(struct percpu_ref *ref) 774 { 775 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 776 777 complete(&sq->free_done); 778 } 779 780 int nvmet_sq_init(struct nvmet_sq *sq) 781 { 782 int ret; 783 784 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 785 if (ret) { 786 pr_err("percpu_ref init failed!\n"); 787 return ret; 788 } 789 init_completion(&sq->free_done); 790 init_completion(&sq->confirm_done); 791 792 return 0; 793 } 794 EXPORT_SYMBOL_GPL(nvmet_sq_init); 795 796 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 797 struct nvmet_ns *ns) 798 { 799 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 800 801 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 802 return NVME_SC_ANA_INACCESSIBLE; 803 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 804 return NVME_SC_ANA_PERSISTENT_LOSS; 805 if (unlikely(state == NVME_ANA_CHANGE)) 806 return NVME_SC_ANA_TRANSITION; 807 return 0; 808 } 809 810 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 811 { 812 if (unlikely(req->ns->readonly)) { 813 switch (req->cmd->common.opcode) { 814 case nvme_cmd_read: 815 case nvme_cmd_flush: 816 break; 817 default: 818 return NVME_SC_NS_WRITE_PROTECTED; 819 } 820 } 821 822 return 0; 823 } 824 825 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 826 { 827 struct nvme_command *cmd = req->cmd; 828 u16 ret; 829 830 ret = nvmet_check_ctrl_status(req, cmd); 831 if (unlikely(ret)) 832 return ret; 833 834 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 835 if (unlikely(!req->ns)) { 836 req->error_loc = offsetof(struct nvme_common_command, nsid); 837 return NVME_SC_INVALID_NS | NVME_SC_DNR; 838 } 839 ret = nvmet_check_ana_state(req->port, req->ns); 840 if (unlikely(ret)) { 841 req->error_loc = offsetof(struct nvme_common_command, nsid); 842 return ret; 843 } 844 ret = nvmet_io_cmd_check_access(req); 845 if (unlikely(ret)) { 846 req->error_loc = offsetof(struct nvme_common_command, nsid); 847 return ret; 848 } 849 850 if (req->ns->file) 851 return nvmet_file_parse_io_cmd(req); 852 else 853 return nvmet_bdev_parse_io_cmd(req); 854 } 855 856 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 857 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 858 { 859 u8 flags = req->cmd->common.flags; 860 u16 status; 861 862 req->cq = cq; 863 req->sq = sq; 864 req->ops = ops; 865 req->sg = NULL; 866 req->sg_cnt = 0; 867 req->transfer_len = 0; 868 req->cqe->status = 0; 869 req->cqe->sq_head = 0; 870 req->ns = NULL; 871 req->error_loc = NVMET_NO_ERROR_LOC; 872 req->error_slba = 0; 873 874 trace_nvmet_req_init(req, req->cmd); 875 876 /* no support for fused commands yet */ 877 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 878 req->error_loc = offsetof(struct nvme_common_command, flags); 879 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 880 goto fail; 881 } 882 883 /* 884 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 885 * contains an address of a single contiguous physical buffer that is 886 * byte aligned. 887 */ 888 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 889 req->error_loc = offsetof(struct nvme_common_command, flags); 890 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 891 goto fail; 892 } 893 894 if (unlikely(!req->sq->ctrl)) 895 /* will return an error for any non-connect command: */ 896 status = nvmet_parse_connect_cmd(req); 897 else if (likely(req->sq->qid != 0)) 898 status = nvmet_parse_io_cmd(req); 899 else 900 status = nvmet_parse_admin_cmd(req); 901 902 if (status) 903 goto fail; 904 905 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 906 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 907 goto fail; 908 } 909 910 if (sq->ctrl) 911 sq->ctrl->cmd_seen = true; 912 913 return true; 914 915 fail: 916 __nvmet_req_complete(req, status); 917 return false; 918 } 919 EXPORT_SYMBOL_GPL(nvmet_req_init); 920 921 void nvmet_req_uninit(struct nvmet_req *req) 922 { 923 percpu_ref_put(&req->sq->ref); 924 if (req->ns) 925 nvmet_put_namespace(req->ns); 926 } 927 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 928 929 bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) 930 { 931 if (unlikely(data_len != req->transfer_len)) { 932 req->error_loc = offsetof(struct nvme_common_command, dptr); 933 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 934 return false; 935 } 936 937 return true; 938 } 939 EXPORT_SYMBOL_GPL(nvmet_check_data_len); 940 941 int nvmet_req_alloc_sgl(struct nvmet_req *req) 942 { 943 struct pci_dev *p2p_dev = NULL; 944 945 if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { 946 if (req->sq->ctrl && req->ns) 947 p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, 948 req->ns->nsid); 949 950 req->p2p_dev = NULL; 951 if (req->sq->qid && p2p_dev) { 952 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 953 req->transfer_len); 954 if (req->sg) { 955 req->p2p_dev = p2p_dev; 956 return 0; 957 } 958 } 959 960 /* 961 * If no P2P memory was available we fallback to using 962 * regular memory 963 */ 964 } 965 966 req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); 967 if (unlikely(!req->sg)) 968 return -ENOMEM; 969 970 return 0; 971 } 972 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); 973 974 void nvmet_req_free_sgl(struct nvmet_req *req) 975 { 976 if (req->p2p_dev) 977 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 978 else 979 sgl_free(req->sg); 980 981 req->sg = NULL; 982 req->sg_cnt = 0; 983 } 984 EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); 985 986 static inline bool nvmet_cc_en(u32 cc) 987 { 988 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 989 } 990 991 static inline u8 nvmet_cc_css(u32 cc) 992 { 993 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 994 } 995 996 static inline u8 nvmet_cc_mps(u32 cc) 997 { 998 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 999 } 1000 1001 static inline u8 nvmet_cc_ams(u32 cc) 1002 { 1003 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 1004 } 1005 1006 static inline u8 nvmet_cc_shn(u32 cc) 1007 { 1008 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 1009 } 1010 1011 static inline u8 nvmet_cc_iosqes(u32 cc) 1012 { 1013 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 1014 } 1015 1016 static inline u8 nvmet_cc_iocqes(u32 cc) 1017 { 1018 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 1019 } 1020 1021 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 1022 { 1023 lockdep_assert_held(&ctrl->lock); 1024 1025 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1026 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 1027 nvmet_cc_mps(ctrl->cc) != 0 || 1028 nvmet_cc_ams(ctrl->cc) != 0 || 1029 nvmet_cc_css(ctrl->cc) != 0) { 1030 ctrl->csts = NVME_CSTS_CFS; 1031 return; 1032 } 1033 1034 ctrl->csts = NVME_CSTS_RDY; 1035 1036 /* 1037 * Controllers that are not yet enabled should not really enforce the 1038 * keep alive timeout, but we still want to track a timeout and cleanup 1039 * in case a host died before it enabled the controller. Hence, simply 1040 * reset the keep alive timer when the controller is enabled. 1041 */ 1042 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 1043 } 1044 1045 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1046 { 1047 lockdep_assert_held(&ctrl->lock); 1048 1049 /* XXX: tear down queues? */ 1050 ctrl->csts &= ~NVME_CSTS_RDY; 1051 ctrl->cc = 0; 1052 } 1053 1054 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1055 { 1056 u32 old; 1057 1058 mutex_lock(&ctrl->lock); 1059 old = ctrl->cc; 1060 ctrl->cc = new; 1061 1062 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1063 nvmet_start_ctrl(ctrl); 1064 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1065 nvmet_clear_ctrl(ctrl); 1066 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1067 nvmet_clear_ctrl(ctrl); 1068 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1069 } 1070 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1071 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1072 mutex_unlock(&ctrl->lock); 1073 } 1074 1075 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1076 { 1077 /* command sets supported: NVMe command set: */ 1078 ctrl->cap = (1ULL << 37); 1079 /* CC.EN timeout in 500msec units: */ 1080 ctrl->cap |= (15ULL << 24); 1081 /* maximum queue entries supported: */ 1082 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 1083 } 1084 1085 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 1086 struct nvmet_req *req, struct nvmet_ctrl **ret) 1087 { 1088 struct nvmet_subsys *subsys; 1089 struct nvmet_ctrl *ctrl; 1090 u16 status = 0; 1091 1092 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1093 if (!subsys) { 1094 pr_warn("connect request for invalid subsystem %s!\n", 1095 subsysnqn); 1096 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1097 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1098 } 1099 1100 mutex_lock(&subsys->lock); 1101 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1102 if (ctrl->cntlid == cntlid) { 1103 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1104 pr_warn("hostnqn mismatch.\n"); 1105 continue; 1106 } 1107 if (!kref_get_unless_zero(&ctrl->ref)) 1108 continue; 1109 1110 *ret = ctrl; 1111 goto out; 1112 } 1113 } 1114 1115 pr_warn("could not find controller %d for subsys %s / host %s\n", 1116 cntlid, subsysnqn, hostnqn); 1117 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1118 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1119 1120 out: 1121 mutex_unlock(&subsys->lock); 1122 nvmet_subsys_put(subsys); 1123 return status; 1124 } 1125 1126 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 1127 { 1128 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1129 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1130 cmd->common.opcode, req->sq->qid); 1131 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 1132 } 1133 1134 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1135 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1136 cmd->common.opcode, req->sq->qid); 1137 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 1138 } 1139 return 0; 1140 } 1141 1142 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1143 { 1144 struct nvmet_host_link *p; 1145 1146 lockdep_assert_held(&nvmet_config_sem); 1147 1148 if (subsys->allow_any_host) 1149 return true; 1150 1151 if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */ 1152 return true; 1153 1154 list_for_each_entry(p, &subsys->hosts, entry) { 1155 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1156 return true; 1157 } 1158 1159 return false; 1160 } 1161 1162 /* 1163 * Note: ctrl->subsys->lock should be held when calling this function 1164 */ 1165 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1166 struct nvmet_req *req) 1167 { 1168 struct nvmet_ns *ns; 1169 1170 if (!req->p2p_client) 1171 return; 1172 1173 ctrl->p2p_client = get_device(req->p2p_client); 1174 1175 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) 1176 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1177 } 1178 1179 /* 1180 * Note: ctrl->subsys->lock should be held when calling this function 1181 */ 1182 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1183 { 1184 struct radix_tree_iter iter; 1185 void __rcu **slot; 1186 1187 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1188 pci_dev_put(radix_tree_deref_slot(slot)); 1189 1190 put_device(ctrl->p2p_client); 1191 } 1192 1193 static void nvmet_fatal_error_handler(struct work_struct *work) 1194 { 1195 struct nvmet_ctrl *ctrl = 1196 container_of(work, struct nvmet_ctrl, fatal_err_work); 1197 1198 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1199 ctrl->ops->delete_ctrl(ctrl); 1200 } 1201 1202 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 1203 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 1204 { 1205 struct nvmet_subsys *subsys; 1206 struct nvmet_ctrl *ctrl; 1207 int ret; 1208 u16 status; 1209 1210 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1211 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1212 if (!subsys) { 1213 pr_warn("connect request for invalid subsystem %s!\n", 1214 subsysnqn); 1215 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1216 goto out; 1217 } 1218 1219 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1220 down_read(&nvmet_config_sem); 1221 if (!nvmet_host_allowed(subsys, hostnqn)) { 1222 pr_info("connect by host %s for subsystem %s not allowed\n", 1223 hostnqn, subsysnqn); 1224 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 1225 up_read(&nvmet_config_sem); 1226 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 1227 goto out_put_subsystem; 1228 } 1229 up_read(&nvmet_config_sem); 1230 1231 status = NVME_SC_INTERNAL; 1232 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1233 if (!ctrl) 1234 goto out_put_subsystem; 1235 mutex_init(&ctrl->lock); 1236 1237 nvmet_init_cap(ctrl); 1238 1239 ctrl->port = req->port; 1240 1241 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1242 INIT_LIST_HEAD(&ctrl->async_events); 1243 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1244 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1245 1246 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 1247 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 1248 1249 kref_init(&ctrl->ref); 1250 ctrl->subsys = subsys; 1251 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1252 1253 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1254 sizeof(__le32), GFP_KERNEL); 1255 if (!ctrl->changed_ns_list) 1256 goto out_free_ctrl; 1257 1258 ctrl->cqs = kcalloc(subsys->max_qid + 1, 1259 sizeof(struct nvmet_cq *), 1260 GFP_KERNEL); 1261 if (!ctrl->cqs) 1262 goto out_free_changed_ns_list; 1263 1264 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1265 sizeof(struct nvmet_sq *), 1266 GFP_KERNEL); 1267 if (!ctrl->sqs) 1268 goto out_free_cqs; 1269 1270 ret = ida_simple_get(&cntlid_ida, 1271 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 1272 GFP_KERNEL); 1273 if (ret < 0) { 1274 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 1275 goto out_free_sqs; 1276 } 1277 ctrl->cntlid = ret; 1278 1279 ctrl->ops = req->ops; 1280 1281 /* 1282 * Discovery controllers may use some arbitrary high value 1283 * in order to cleanup stale discovery sessions 1284 */ 1285 if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato) 1286 kato = NVMET_DISC_KATO_MS; 1287 1288 /* keep-alive timeout in seconds */ 1289 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1290 1291 ctrl->err_counter = 0; 1292 spin_lock_init(&ctrl->error_lock); 1293 1294 nvmet_start_keep_alive_timer(ctrl); 1295 1296 mutex_lock(&subsys->lock); 1297 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1298 nvmet_setup_p2p_ns_map(ctrl, req); 1299 mutex_unlock(&subsys->lock); 1300 1301 *ctrlp = ctrl; 1302 return 0; 1303 1304 out_free_sqs: 1305 kfree(ctrl->sqs); 1306 out_free_cqs: 1307 kfree(ctrl->cqs); 1308 out_free_changed_ns_list: 1309 kfree(ctrl->changed_ns_list); 1310 out_free_ctrl: 1311 kfree(ctrl); 1312 out_put_subsystem: 1313 nvmet_subsys_put(subsys); 1314 out: 1315 return status; 1316 } 1317 1318 static void nvmet_ctrl_free(struct kref *ref) 1319 { 1320 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1321 struct nvmet_subsys *subsys = ctrl->subsys; 1322 1323 mutex_lock(&subsys->lock); 1324 nvmet_release_p2p_ns_map(ctrl); 1325 list_del(&ctrl->subsys_entry); 1326 mutex_unlock(&subsys->lock); 1327 1328 nvmet_stop_keep_alive_timer(ctrl); 1329 1330 flush_work(&ctrl->async_event_work); 1331 cancel_work_sync(&ctrl->fatal_err_work); 1332 1333 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 1334 1335 kfree(ctrl->sqs); 1336 kfree(ctrl->cqs); 1337 kfree(ctrl->changed_ns_list); 1338 kfree(ctrl); 1339 1340 nvmet_subsys_put(subsys); 1341 } 1342 1343 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1344 { 1345 kref_put(&ctrl->ref, nvmet_ctrl_free); 1346 } 1347 1348 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1349 { 1350 mutex_lock(&ctrl->lock); 1351 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1352 ctrl->csts |= NVME_CSTS_CFS; 1353 schedule_work(&ctrl->fatal_err_work); 1354 } 1355 mutex_unlock(&ctrl->lock); 1356 } 1357 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1358 1359 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1360 const char *subsysnqn) 1361 { 1362 struct nvmet_subsys_link *p; 1363 1364 if (!port) 1365 return NULL; 1366 1367 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1368 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1369 return NULL; 1370 return nvmet_disc_subsys; 1371 } 1372 1373 down_read(&nvmet_config_sem); 1374 list_for_each_entry(p, &port->subsystems, entry) { 1375 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1376 NVMF_NQN_SIZE)) { 1377 if (!kref_get_unless_zero(&p->subsys->ref)) 1378 break; 1379 up_read(&nvmet_config_sem); 1380 return p->subsys; 1381 } 1382 } 1383 up_read(&nvmet_config_sem); 1384 return NULL; 1385 } 1386 1387 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1388 enum nvme_subsys_type type) 1389 { 1390 struct nvmet_subsys *subsys; 1391 1392 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1393 if (!subsys) 1394 return ERR_PTR(-ENOMEM); 1395 1396 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 1397 /* generate a random serial number as our controllers are ephemeral: */ 1398 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1399 1400 switch (type) { 1401 case NVME_NQN_NVME: 1402 subsys->max_qid = NVMET_NR_QUEUES; 1403 break; 1404 case NVME_NQN_DISC: 1405 subsys->max_qid = 0; 1406 break; 1407 default: 1408 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1409 kfree(subsys); 1410 return ERR_PTR(-EINVAL); 1411 } 1412 subsys->type = type; 1413 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1414 GFP_KERNEL); 1415 if (!subsys->subsysnqn) { 1416 kfree(subsys); 1417 return ERR_PTR(-ENOMEM); 1418 } 1419 1420 kref_init(&subsys->ref); 1421 1422 mutex_init(&subsys->lock); 1423 INIT_LIST_HEAD(&subsys->namespaces); 1424 INIT_LIST_HEAD(&subsys->ctrls); 1425 INIT_LIST_HEAD(&subsys->hosts); 1426 1427 return subsys; 1428 } 1429 1430 static void nvmet_subsys_free(struct kref *ref) 1431 { 1432 struct nvmet_subsys *subsys = 1433 container_of(ref, struct nvmet_subsys, ref); 1434 1435 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1436 1437 kfree(subsys->subsysnqn); 1438 kfree(subsys); 1439 } 1440 1441 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1442 { 1443 struct nvmet_ctrl *ctrl; 1444 1445 mutex_lock(&subsys->lock); 1446 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1447 ctrl->ops->delete_ctrl(ctrl); 1448 mutex_unlock(&subsys->lock); 1449 } 1450 1451 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1452 { 1453 kref_put(&subsys->ref, nvmet_subsys_free); 1454 } 1455 1456 static int __init nvmet_init(void) 1457 { 1458 int error; 1459 1460 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1461 1462 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1463 WQ_MEM_RECLAIM, 0); 1464 if (!buffered_io_wq) { 1465 error = -ENOMEM; 1466 goto out; 1467 } 1468 1469 error = nvmet_init_discovery(); 1470 if (error) 1471 goto out_free_work_queue; 1472 1473 error = nvmet_init_configfs(); 1474 if (error) 1475 goto out_exit_discovery; 1476 return 0; 1477 1478 out_exit_discovery: 1479 nvmet_exit_discovery(); 1480 out_free_work_queue: 1481 destroy_workqueue(buffered_io_wq); 1482 out: 1483 return error; 1484 } 1485 1486 static void __exit nvmet_exit(void) 1487 { 1488 nvmet_exit_configfs(); 1489 nvmet_exit_discovery(); 1490 ida_destroy(&cntlid_ida); 1491 destroy_workqueue(buffered_io_wq); 1492 1493 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1494 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1495 } 1496 1497 module_init(nvmet_init); 1498 module_exit(nvmet_exit); 1499 1500 MODULE_LICENSE("GPL v2"); 1501