1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Common code for the NVMe target. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/module.h> 8 #include <linux/random.h> 9 #include <linux/rculist.h> 10 #include <linux/pci-p2pdma.h> 11 #include <linux/scatterlist.h> 12 13 #include "nvmet.h" 14 15 struct workqueue_struct *buffered_io_wq; 16 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; 17 static DEFINE_IDA(cntlid_ida); 18 19 /* 20 * This read/write semaphore is used to synchronize access to configuration 21 * information on a target system that will result in discovery log page 22 * information change for at least one host. 23 * The full list of resources to protected by this semaphore is: 24 * 25 * - subsystems list 26 * - per-subsystem allowed hosts list 27 * - allow_any_host subsystem attribute 28 * - nvmet_genctr 29 * - the nvmet_transports array 30 * 31 * When updating any of those lists/structures write lock should be obtained, 32 * while when reading (popolating discovery log page or checking host-subsystem 33 * link) read lock is obtained to allow concurrent reads. 34 */ 35 DECLARE_RWSEM(nvmet_config_sem); 36 37 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; 38 u64 nvmet_ana_chgcnt; 39 DECLARE_RWSEM(nvmet_ana_sem); 40 41 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) 42 { 43 u16 status; 44 45 switch (errno) { 46 case -ENOSPC: 47 req->error_loc = offsetof(struct nvme_rw_command, length); 48 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 49 break; 50 case -EREMOTEIO: 51 req->error_loc = offsetof(struct nvme_rw_command, slba); 52 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 53 break; 54 case -EOPNOTSUPP: 55 req->error_loc = offsetof(struct nvme_common_command, opcode); 56 switch (req->cmd->common.opcode) { 57 case nvme_cmd_dsm: 58 case nvme_cmd_write_zeroes: 59 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 60 break; 61 default: 62 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 63 } 64 break; 65 case -ENODATA: 66 req->error_loc = offsetof(struct nvme_rw_command, nsid); 67 status = NVME_SC_ACCESS_DENIED; 68 break; 69 case -EIO: 70 /* FALLTHRU */ 71 default: 72 req->error_loc = offsetof(struct nvme_common_command, opcode); 73 status = NVME_SC_INTERNAL | NVME_SC_DNR; 74 } 75 76 return status; 77 } 78 79 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 80 const char *subsysnqn); 81 82 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, 83 size_t len) 84 { 85 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 86 req->error_loc = offsetof(struct nvme_common_command, dptr); 87 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 88 } 89 return 0; 90 } 91 92 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len) 93 { 94 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) { 95 req->error_loc = offsetof(struct nvme_common_command, dptr); 96 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 97 } 98 return 0; 99 } 100 101 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) 102 { 103 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) { 104 req->error_loc = offsetof(struct nvme_common_command, dptr); 105 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR; 106 } 107 return 0; 108 } 109 110 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) 111 { 112 struct nvmet_ns *ns; 113 114 if (list_empty(&subsys->namespaces)) 115 return 0; 116 117 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link); 118 return ns->nsid; 119 } 120 121 static u32 nvmet_async_event_result(struct nvmet_async_event *aen) 122 { 123 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); 124 } 125 126 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 127 { 128 struct nvmet_req *req; 129 130 while (1) { 131 mutex_lock(&ctrl->lock); 132 if (!ctrl->nr_async_event_cmds) { 133 mutex_unlock(&ctrl->lock); 134 return; 135 } 136 137 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 138 mutex_unlock(&ctrl->lock); 139 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 140 } 141 } 142 143 static void nvmet_async_event_work(struct work_struct *work) 144 { 145 struct nvmet_ctrl *ctrl = 146 container_of(work, struct nvmet_ctrl, async_event_work); 147 struct nvmet_async_event *aen; 148 struct nvmet_req *req; 149 150 while (1) { 151 mutex_lock(&ctrl->lock); 152 aen = list_first_entry_or_null(&ctrl->async_events, 153 struct nvmet_async_event, entry); 154 if (!aen || !ctrl->nr_async_event_cmds) { 155 mutex_unlock(&ctrl->lock); 156 return; 157 } 158 159 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 160 nvmet_set_result(req, nvmet_async_event_result(aen)); 161 162 list_del(&aen->entry); 163 kfree(aen); 164 165 mutex_unlock(&ctrl->lock); 166 nvmet_req_complete(req, 0); 167 } 168 } 169 170 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, 171 u8 event_info, u8 log_page) 172 { 173 struct nvmet_async_event *aen; 174 175 aen = kmalloc(sizeof(*aen), GFP_KERNEL); 176 if (!aen) 177 return; 178 179 aen->event_type = event_type; 180 aen->event_info = event_info; 181 aen->log_page = log_page; 182 183 mutex_lock(&ctrl->lock); 184 list_add_tail(&aen->entry, &ctrl->async_events); 185 mutex_unlock(&ctrl->lock); 186 187 schedule_work(&ctrl->async_event_work); 188 } 189 190 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) 191 { 192 u32 i; 193 194 mutex_lock(&ctrl->lock); 195 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES) 196 goto out_unlock; 197 198 for (i = 0; i < ctrl->nr_changed_ns; i++) { 199 if (ctrl->changed_ns_list[i] == nsid) 200 goto out_unlock; 201 } 202 203 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) { 204 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff); 205 ctrl->nr_changed_ns = U32_MAX; 206 goto out_unlock; 207 } 208 209 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid; 210 out_unlock: 211 mutex_unlock(&ctrl->lock); 212 } 213 214 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) 215 { 216 struct nvmet_ctrl *ctrl; 217 218 lockdep_assert_held(&subsys->lock); 219 220 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 221 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); 222 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) 223 continue; 224 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 225 NVME_AER_NOTICE_NS_CHANGED, 226 NVME_LOG_CHANGED_NS); 227 } 228 } 229 230 void nvmet_send_ana_event(struct nvmet_subsys *subsys, 231 struct nvmet_port *port) 232 { 233 struct nvmet_ctrl *ctrl; 234 235 mutex_lock(&subsys->lock); 236 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 237 if (port && ctrl->port != port) 238 continue; 239 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) 240 continue; 241 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 242 NVME_AER_NOTICE_ANA, NVME_LOG_ANA); 243 } 244 mutex_unlock(&subsys->lock); 245 } 246 247 void nvmet_port_send_ana_event(struct nvmet_port *port) 248 { 249 struct nvmet_subsys_link *p; 250 251 down_read(&nvmet_config_sem); 252 list_for_each_entry(p, &port->subsystems, entry) 253 nvmet_send_ana_event(p->subsys, port); 254 up_read(&nvmet_config_sem); 255 } 256 257 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 258 { 259 int ret = 0; 260 261 down_write(&nvmet_config_sem); 262 if (nvmet_transports[ops->type]) 263 ret = -EINVAL; 264 else 265 nvmet_transports[ops->type] = ops; 266 up_write(&nvmet_config_sem); 267 268 return ret; 269 } 270 EXPORT_SYMBOL_GPL(nvmet_register_transport); 271 272 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) 273 { 274 down_write(&nvmet_config_sem); 275 nvmet_transports[ops->type] = NULL; 276 up_write(&nvmet_config_sem); 277 } 278 EXPORT_SYMBOL_GPL(nvmet_unregister_transport); 279 280 int nvmet_enable_port(struct nvmet_port *port) 281 { 282 const struct nvmet_fabrics_ops *ops; 283 int ret; 284 285 lockdep_assert_held(&nvmet_config_sem); 286 287 ops = nvmet_transports[port->disc_addr.trtype]; 288 if (!ops) { 289 up_write(&nvmet_config_sem); 290 request_module("nvmet-transport-%d", port->disc_addr.trtype); 291 down_write(&nvmet_config_sem); 292 ops = nvmet_transports[port->disc_addr.trtype]; 293 if (!ops) { 294 pr_err("transport type %d not supported\n", 295 port->disc_addr.trtype); 296 return -EINVAL; 297 } 298 } 299 300 if (!try_module_get(ops->owner)) 301 return -EINVAL; 302 303 ret = ops->add_port(port); 304 if (ret) { 305 module_put(ops->owner); 306 return ret; 307 } 308 309 /* If the transport didn't set inline_data_size, then disable it. */ 310 if (port->inline_data_size < 0) 311 port->inline_data_size = 0; 312 313 port->enabled = true; 314 return 0; 315 } 316 317 void nvmet_disable_port(struct nvmet_port *port) 318 { 319 const struct nvmet_fabrics_ops *ops; 320 321 lockdep_assert_held(&nvmet_config_sem); 322 323 port->enabled = false; 324 325 ops = nvmet_transports[port->disc_addr.trtype]; 326 ops->remove_port(port); 327 module_put(ops->owner); 328 } 329 330 static void nvmet_keep_alive_timer(struct work_struct *work) 331 { 332 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), 333 struct nvmet_ctrl, ka_work); 334 bool cmd_seen = ctrl->cmd_seen; 335 336 ctrl->cmd_seen = false; 337 if (cmd_seen) { 338 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", 339 ctrl->cntlid); 340 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 341 return; 342 } 343 344 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", 345 ctrl->cntlid, ctrl->kato); 346 347 nvmet_ctrl_fatal_error(ctrl); 348 } 349 350 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) 351 { 352 pr_debug("ctrl %d start keep-alive timer for %d secs\n", 353 ctrl->cntlid, ctrl->kato); 354 355 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); 356 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); 357 } 358 359 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) 360 { 361 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid); 362 363 cancel_delayed_work_sync(&ctrl->ka_work); 364 } 365 366 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl, 367 __le32 nsid) 368 { 369 struct nvmet_ns *ns; 370 371 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { 372 if (ns->nsid == le32_to_cpu(nsid)) 373 return ns; 374 } 375 376 return NULL; 377 } 378 379 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid) 380 { 381 struct nvmet_ns *ns; 382 383 rcu_read_lock(); 384 ns = __nvmet_find_namespace(ctrl, nsid); 385 if (ns) 386 percpu_ref_get(&ns->ref); 387 rcu_read_unlock(); 388 389 return ns; 390 } 391 392 static void nvmet_destroy_namespace(struct percpu_ref *ref) 393 { 394 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref); 395 396 complete(&ns->disable_done); 397 } 398 399 void nvmet_put_namespace(struct nvmet_ns *ns) 400 { 401 percpu_ref_put(&ns->ref); 402 } 403 404 static void nvmet_ns_dev_disable(struct nvmet_ns *ns) 405 { 406 nvmet_bdev_ns_disable(ns); 407 nvmet_file_ns_disable(ns); 408 } 409 410 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) 411 { 412 int ret; 413 struct pci_dev *p2p_dev; 414 415 if (!ns->use_p2pmem) 416 return 0; 417 418 if (!ns->bdev) { 419 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n"); 420 return -EINVAL; 421 } 422 423 if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) { 424 pr_err("peer-to-peer DMA is not supported by the driver of %s\n", 425 ns->device_path); 426 return -EINVAL; 427 } 428 429 if (ns->p2p_dev) { 430 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true); 431 if (ret < 0) 432 return -EINVAL; 433 } else { 434 /* 435 * Right now we just check that there is p2pmem available so 436 * we can report an error to the user right away if there 437 * is not. We'll find the actual device to use once we 438 * setup the controller when the port's device is available. 439 */ 440 441 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns)); 442 if (!p2p_dev) { 443 pr_err("no peer-to-peer memory is available for %s\n", 444 ns->device_path); 445 return -EINVAL; 446 } 447 448 pci_dev_put(p2p_dev); 449 } 450 451 return 0; 452 } 453 454 /* 455 * Note: ctrl->subsys->lock should be held when calling this function 456 */ 457 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, 458 struct nvmet_ns *ns) 459 { 460 struct device *clients[2]; 461 struct pci_dev *p2p_dev; 462 int ret; 463 464 if (!ctrl->p2p_client || !ns->use_p2pmem) 465 return; 466 467 if (ns->p2p_dev) { 468 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true); 469 if (ret < 0) 470 return; 471 472 p2p_dev = pci_dev_get(ns->p2p_dev); 473 } else { 474 clients[0] = ctrl->p2p_client; 475 clients[1] = nvmet_ns_dev(ns); 476 477 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients)); 478 if (!p2p_dev) { 479 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n", 480 dev_name(ctrl->p2p_client), ns->device_path); 481 return; 482 } 483 } 484 485 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev); 486 if (ret < 0) 487 pci_dev_put(p2p_dev); 488 489 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 490 ns->nsid); 491 } 492 493 int nvmet_ns_enable(struct nvmet_ns *ns) 494 { 495 struct nvmet_subsys *subsys = ns->subsys; 496 struct nvmet_ctrl *ctrl; 497 int ret; 498 499 mutex_lock(&subsys->lock); 500 ret = 0; 501 if (ns->enabled) 502 goto out_unlock; 503 504 ret = -EMFILE; 505 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) 506 goto out_unlock; 507 508 ret = nvmet_bdev_ns_enable(ns); 509 if (ret == -ENOTBLK) 510 ret = nvmet_file_ns_enable(ns); 511 if (ret) 512 goto out_unlock; 513 514 ret = nvmet_p2pmem_ns_enable(ns); 515 if (ret) 516 goto out_dev_disable; 517 518 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 519 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 520 521 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 522 0, GFP_KERNEL); 523 if (ret) 524 goto out_dev_put; 525 526 if (ns->nsid > subsys->max_nsid) 527 subsys->max_nsid = ns->nsid; 528 529 /* 530 * The namespaces list needs to be sorted to simplify the implementation 531 * of the Identify Namepace List subcommand. 532 */ 533 if (list_empty(&subsys->namespaces)) { 534 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces); 535 } else { 536 struct nvmet_ns *old; 537 538 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { 539 BUG_ON(ns->nsid == old->nsid); 540 if (ns->nsid < old->nsid) 541 break; 542 } 543 544 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 545 } 546 subsys->nr_namespaces++; 547 548 nvmet_ns_changed(subsys, ns->nsid); 549 ns->enabled = true; 550 ret = 0; 551 out_unlock: 552 mutex_unlock(&subsys->lock); 553 return ret; 554 out_dev_put: 555 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 556 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 557 out_dev_disable: 558 nvmet_ns_dev_disable(ns); 559 goto out_unlock; 560 } 561 562 void nvmet_ns_disable(struct nvmet_ns *ns) 563 { 564 struct nvmet_subsys *subsys = ns->subsys; 565 struct nvmet_ctrl *ctrl; 566 567 mutex_lock(&subsys->lock); 568 if (!ns->enabled) 569 goto out_unlock; 570 571 ns->enabled = false; 572 list_del_rcu(&ns->dev_link); 573 if (ns->nsid == subsys->max_nsid) 574 subsys->max_nsid = nvmet_max_nsid(subsys); 575 576 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 577 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 578 579 mutex_unlock(&subsys->lock); 580 581 /* 582 * Now that we removed the namespaces from the lookup list, we 583 * can kill the per_cpu ref and wait for any remaining references 584 * to be dropped, as well as a RCU grace period for anyone only 585 * using the namepace under rcu_read_lock(). Note that we can't 586 * use call_rcu here as we need to ensure the namespaces have 587 * been fully destroyed before unloading the module. 588 */ 589 percpu_ref_kill(&ns->ref); 590 synchronize_rcu(); 591 wait_for_completion(&ns->disable_done); 592 percpu_ref_exit(&ns->ref); 593 594 mutex_lock(&subsys->lock); 595 596 subsys->nr_namespaces--; 597 nvmet_ns_changed(subsys, ns->nsid); 598 nvmet_ns_dev_disable(ns); 599 out_unlock: 600 mutex_unlock(&subsys->lock); 601 } 602 603 void nvmet_ns_free(struct nvmet_ns *ns) 604 { 605 nvmet_ns_disable(ns); 606 607 down_write(&nvmet_ana_sem); 608 nvmet_ana_group_enabled[ns->anagrpid]--; 609 up_write(&nvmet_ana_sem); 610 611 kfree(ns->device_path); 612 kfree(ns); 613 } 614 615 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) 616 { 617 struct nvmet_ns *ns; 618 619 ns = kzalloc(sizeof(*ns), GFP_KERNEL); 620 if (!ns) 621 return NULL; 622 623 INIT_LIST_HEAD(&ns->dev_link); 624 init_completion(&ns->disable_done); 625 626 ns->nsid = nsid; 627 ns->subsys = subsys; 628 629 down_write(&nvmet_ana_sem); 630 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; 631 nvmet_ana_group_enabled[ns->anagrpid]++; 632 up_write(&nvmet_ana_sem); 633 634 uuid_gen(&ns->uuid); 635 ns->buffered_io = false; 636 637 return ns; 638 } 639 640 static void nvmet_update_sq_head(struct nvmet_req *req) 641 { 642 if (req->sq->size) { 643 u32 old_sqhd, new_sqhd; 644 645 do { 646 old_sqhd = req->sq->sqhd; 647 new_sqhd = (old_sqhd + 1) % req->sq->size; 648 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != 649 old_sqhd); 650 } 651 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); 652 } 653 654 static void nvmet_set_error(struct nvmet_req *req, u16 status) 655 { 656 struct nvmet_ctrl *ctrl = req->sq->ctrl; 657 struct nvme_error_slot *new_error_slot; 658 unsigned long flags; 659 660 req->cqe->status = cpu_to_le16(status << 1); 661 662 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) 663 return; 664 665 spin_lock_irqsave(&ctrl->error_lock, flags); 666 ctrl->err_counter++; 667 new_error_slot = 668 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS]; 669 670 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter); 671 new_error_slot->sqid = cpu_to_le16(req->sq->qid); 672 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id); 673 new_error_slot->status_field = cpu_to_le16(status << 1); 674 new_error_slot->param_error_location = cpu_to_le16(req->error_loc); 675 new_error_slot->lba = cpu_to_le64(req->error_slba); 676 new_error_slot->nsid = req->cmd->common.nsid; 677 spin_unlock_irqrestore(&ctrl->error_lock, flags); 678 679 /* set the more bit for this request */ 680 req->cqe->status |= cpu_to_le16(1 << 14); 681 } 682 683 static void __nvmet_req_complete(struct nvmet_req *req, u16 status) 684 { 685 if (!req->sq->sqhd_disabled) 686 nvmet_update_sq_head(req); 687 req->cqe->sq_id = cpu_to_le16(req->sq->qid); 688 req->cqe->command_id = req->cmd->common.command_id; 689 690 if (unlikely(status)) 691 nvmet_set_error(req, status); 692 if (req->ns) 693 nvmet_put_namespace(req->ns); 694 req->ops->queue_response(req); 695 } 696 697 void nvmet_req_complete(struct nvmet_req *req, u16 status) 698 { 699 __nvmet_req_complete(req, status); 700 percpu_ref_put(&req->sq->ref); 701 } 702 EXPORT_SYMBOL_GPL(nvmet_req_complete); 703 704 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, 705 u16 qid, u16 size) 706 { 707 cq->qid = qid; 708 cq->size = size; 709 710 ctrl->cqs[qid] = cq; 711 } 712 713 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 714 u16 qid, u16 size) 715 { 716 sq->sqhd = 0; 717 sq->qid = qid; 718 sq->size = size; 719 720 ctrl->sqs[qid] = sq; 721 } 722 723 static void nvmet_confirm_sq(struct percpu_ref *ref) 724 { 725 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 726 727 complete(&sq->confirm_done); 728 } 729 730 void nvmet_sq_destroy(struct nvmet_sq *sq) 731 { 732 /* 733 * If this is the admin queue, complete all AERs so that our 734 * queue doesn't have outstanding requests on it. 735 */ 736 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) 737 nvmet_async_events_free(sq->ctrl); 738 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 739 wait_for_completion(&sq->confirm_done); 740 wait_for_completion(&sq->free_done); 741 percpu_ref_exit(&sq->ref); 742 743 if (sq->ctrl) { 744 nvmet_ctrl_put(sq->ctrl); 745 sq->ctrl = NULL; /* allows reusing the queue later */ 746 } 747 } 748 EXPORT_SYMBOL_GPL(nvmet_sq_destroy); 749 750 static void nvmet_sq_free(struct percpu_ref *ref) 751 { 752 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); 753 754 complete(&sq->free_done); 755 } 756 757 int nvmet_sq_init(struct nvmet_sq *sq) 758 { 759 int ret; 760 761 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL); 762 if (ret) { 763 pr_err("percpu_ref init failed!\n"); 764 return ret; 765 } 766 init_completion(&sq->free_done); 767 init_completion(&sq->confirm_done); 768 769 return 0; 770 } 771 EXPORT_SYMBOL_GPL(nvmet_sq_init); 772 773 static inline u16 nvmet_check_ana_state(struct nvmet_port *port, 774 struct nvmet_ns *ns) 775 { 776 enum nvme_ana_state state = port->ana_state[ns->anagrpid]; 777 778 if (unlikely(state == NVME_ANA_INACCESSIBLE)) 779 return NVME_SC_ANA_INACCESSIBLE; 780 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) 781 return NVME_SC_ANA_PERSISTENT_LOSS; 782 if (unlikely(state == NVME_ANA_CHANGE)) 783 return NVME_SC_ANA_TRANSITION; 784 return 0; 785 } 786 787 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) 788 { 789 if (unlikely(req->ns->readonly)) { 790 switch (req->cmd->common.opcode) { 791 case nvme_cmd_read: 792 case nvme_cmd_flush: 793 break; 794 default: 795 return NVME_SC_NS_WRITE_PROTECTED; 796 } 797 } 798 799 return 0; 800 } 801 802 static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 803 { 804 struct nvme_command *cmd = req->cmd; 805 u16 ret; 806 807 ret = nvmet_check_ctrl_status(req, cmd); 808 if (unlikely(ret)) 809 return ret; 810 811 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 812 if (unlikely(!req->ns)) { 813 req->error_loc = offsetof(struct nvme_common_command, nsid); 814 return NVME_SC_INVALID_NS | NVME_SC_DNR; 815 } 816 ret = nvmet_check_ana_state(req->port, req->ns); 817 if (unlikely(ret)) { 818 req->error_loc = offsetof(struct nvme_common_command, nsid); 819 return ret; 820 } 821 ret = nvmet_io_cmd_check_access(req); 822 if (unlikely(ret)) { 823 req->error_loc = offsetof(struct nvme_common_command, nsid); 824 return ret; 825 } 826 827 if (req->ns->file) 828 return nvmet_file_parse_io_cmd(req); 829 else 830 return nvmet_bdev_parse_io_cmd(req); 831 } 832 833 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 834 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) 835 { 836 u8 flags = req->cmd->common.flags; 837 u16 status; 838 839 req->cq = cq; 840 req->sq = sq; 841 req->ops = ops; 842 req->sg = NULL; 843 req->sg_cnt = 0; 844 req->transfer_len = 0; 845 req->cqe->status = 0; 846 req->cqe->sq_head = 0; 847 req->ns = NULL; 848 req->error_loc = NVMET_NO_ERROR_LOC; 849 req->error_slba = 0; 850 851 /* no support for fused commands yet */ 852 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { 853 req->error_loc = offsetof(struct nvme_common_command, flags); 854 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 855 goto fail; 856 } 857 858 /* 859 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that 860 * contains an address of a single contiguous physical buffer that is 861 * byte aligned. 862 */ 863 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { 864 req->error_loc = offsetof(struct nvme_common_command, flags); 865 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 866 goto fail; 867 } 868 869 if (unlikely(!req->sq->ctrl)) 870 /* will return an error for any Non-connect command: */ 871 status = nvmet_parse_connect_cmd(req); 872 else if (likely(req->sq->qid != 0)) 873 status = nvmet_parse_io_cmd(req); 874 else if (req->cmd->common.opcode == nvme_fabrics_command) 875 status = nvmet_parse_fabrics_cmd(req); 876 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) 877 status = nvmet_parse_discovery_cmd(req); 878 else 879 status = nvmet_parse_admin_cmd(req); 880 881 if (status) 882 goto fail; 883 884 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { 885 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; 886 goto fail; 887 } 888 889 if (sq->ctrl) 890 sq->ctrl->cmd_seen = true; 891 892 return true; 893 894 fail: 895 __nvmet_req_complete(req, status); 896 return false; 897 } 898 EXPORT_SYMBOL_GPL(nvmet_req_init); 899 900 void nvmet_req_uninit(struct nvmet_req *req) 901 { 902 percpu_ref_put(&req->sq->ref); 903 if (req->ns) 904 nvmet_put_namespace(req->ns); 905 } 906 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 907 908 void nvmet_req_execute(struct nvmet_req *req) 909 { 910 if (unlikely(req->data_len != req->transfer_len)) { 911 req->error_loc = offsetof(struct nvme_common_command, dptr); 912 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 913 } else 914 req->execute(req); 915 } 916 EXPORT_SYMBOL_GPL(nvmet_req_execute); 917 918 int nvmet_req_alloc_sgl(struct nvmet_req *req) 919 { 920 struct pci_dev *p2p_dev = NULL; 921 922 if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { 923 if (req->sq->ctrl && req->ns) 924 p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, 925 req->ns->nsid); 926 927 req->p2p_dev = NULL; 928 if (req->sq->qid && p2p_dev) { 929 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 930 req->transfer_len); 931 if (req->sg) { 932 req->p2p_dev = p2p_dev; 933 return 0; 934 } 935 } 936 937 /* 938 * If no P2P memory was available we fallback to using 939 * regular memory 940 */ 941 } 942 943 req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); 944 if (!req->sg) 945 return -ENOMEM; 946 947 return 0; 948 } 949 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); 950 951 void nvmet_req_free_sgl(struct nvmet_req *req) 952 { 953 if (req->p2p_dev) 954 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 955 else 956 sgl_free(req->sg); 957 958 req->sg = NULL; 959 req->sg_cnt = 0; 960 } 961 EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); 962 963 static inline bool nvmet_cc_en(u32 cc) 964 { 965 return (cc >> NVME_CC_EN_SHIFT) & 0x1; 966 } 967 968 static inline u8 nvmet_cc_css(u32 cc) 969 { 970 return (cc >> NVME_CC_CSS_SHIFT) & 0x7; 971 } 972 973 static inline u8 nvmet_cc_mps(u32 cc) 974 { 975 return (cc >> NVME_CC_MPS_SHIFT) & 0xf; 976 } 977 978 static inline u8 nvmet_cc_ams(u32 cc) 979 { 980 return (cc >> NVME_CC_AMS_SHIFT) & 0x7; 981 } 982 983 static inline u8 nvmet_cc_shn(u32 cc) 984 { 985 return (cc >> NVME_CC_SHN_SHIFT) & 0x3; 986 } 987 988 static inline u8 nvmet_cc_iosqes(u32 cc) 989 { 990 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; 991 } 992 993 static inline u8 nvmet_cc_iocqes(u32 cc) 994 { 995 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; 996 } 997 998 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) 999 { 1000 lockdep_assert_held(&ctrl->lock); 1001 1002 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || 1003 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || 1004 nvmet_cc_mps(ctrl->cc) != 0 || 1005 nvmet_cc_ams(ctrl->cc) != 0 || 1006 nvmet_cc_css(ctrl->cc) != 0) { 1007 ctrl->csts = NVME_CSTS_CFS; 1008 return; 1009 } 1010 1011 ctrl->csts = NVME_CSTS_RDY; 1012 1013 /* 1014 * Controllers that are not yet enabled should not really enforce the 1015 * keep alive timeout, but we still want to track a timeout and cleanup 1016 * in case a host died before it enabled the controller. Hence, simply 1017 * reset the keep alive timer when the controller is enabled. 1018 */ 1019 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 1020 } 1021 1022 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) 1023 { 1024 lockdep_assert_held(&ctrl->lock); 1025 1026 /* XXX: tear down queues? */ 1027 ctrl->csts &= ~NVME_CSTS_RDY; 1028 ctrl->cc = 0; 1029 } 1030 1031 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new) 1032 { 1033 u32 old; 1034 1035 mutex_lock(&ctrl->lock); 1036 old = ctrl->cc; 1037 ctrl->cc = new; 1038 1039 if (nvmet_cc_en(new) && !nvmet_cc_en(old)) 1040 nvmet_start_ctrl(ctrl); 1041 if (!nvmet_cc_en(new) && nvmet_cc_en(old)) 1042 nvmet_clear_ctrl(ctrl); 1043 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) { 1044 nvmet_clear_ctrl(ctrl); 1045 ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1046 } 1047 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old)) 1048 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; 1049 mutex_unlock(&ctrl->lock); 1050 } 1051 1052 static void nvmet_init_cap(struct nvmet_ctrl *ctrl) 1053 { 1054 /* command sets supported: NVMe command set: */ 1055 ctrl->cap = (1ULL << 37); 1056 /* CC.EN timeout in 500msec units: */ 1057 ctrl->cap |= (15ULL << 24); 1058 /* maximum queue entries supported: */ 1059 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 1060 } 1061 1062 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, 1063 struct nvmet_req *req, struct nvmet_ctrl **ret) 1064 { 1065 struct nvmet_subsys *subsys; 1066 struct nvmet_ctrl *ctrl; 1067 u16 status = 0; 1068 1069 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1070 if (!subsys) { 1071 pr_warn("connect request for invalid subsystem %s!\n", 1072 subsysnqn); 1073 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1074 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1075 } 1076 1077 mutex_lock(&subsys->lock); 1078 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1079 if (ctrl->cntlid == cntlid) { 1080 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) { 1081 pr_warn("hostnqn mismatch.\n"); 1082 continue; 1083 } 1084 if (!kref_get_unless_zero(&ctrl->ref)) 1085 continue; 1086 1087 *ret = ctrl; 1088 goto out; 1089 } 1090 } 1091 1092 pr_warn("could not find controller %d for subsys %s / host %s\n", 1093 cntlid, subsysnqn, hostnqn); 1094 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); 1095 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1096 1097 out: 1098 mutex_unlock(&subsys->lock); 1099 nvmet_subsys_put(subsys); 1100 return status; 1101 } 1102 1103 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) 1104 { 1105 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { 1106 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", 1107 cmd->common.opcode, req->sq->qid); 1108 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 1109 } 1110 1111 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { 1112 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", 1113 cmd->common.opcode, req->sq->qid); 1114 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; 1115 } 1116 return 0; 1117 } 1118 1119 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) 1120 { 1121 struct nvmet_host_link *p; 1122 1123 lockdep_assert_held(&nvmet_config_sem); 1124 1125 if (subsys->allow_any_host) 1126 return true; 1127 1128 if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */ 1129 return true; 1130 1131 list_for_each_entry(p, &subsys->hosts, entry) { 1132 if (!strcmp(nvmet_host_name(p->host), hostnqn)) 1133 return true; 1134 } 1135 1136 return false; 1137 } 1138 1139 /* 1140 * Note: ctrl->subsys->lock should be held when calling this function 1141 */ 1142 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, 1143 struct nvmet_req *req) 1144 { 1145 struct nvmet_ns *ns; 1146 1147 if (!req->p2p_client) 1148 return; 1149 1150 ctrl->p2p_client = get_device(req->p2p_client); 1151 1152 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) 1153 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 1154 } 1155 1156 /* 1157 * Note: ctrl->subsys->lock should be held when calling this function 1158 */ 1159 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl) 1160 { 1161 struct radix_tree_iter iter; 1162 void __rcu **slot; 1163 1164 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0) 1165 pci_dev_put(radix_tree_deref_slot(slot)); 1166 1167 put_device(ctrl->p2p_client); 1168 } 1169 1170 static void nvmet_fatal_error_handler(struct work_struct *work) 1171 { 1172 struct nvmet_ctrl *ctrl = 1173 container_of(work, struct nvmet_ctrl, fatal_err_work); 1174 1175 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1176 ctrl->ops->delete_ctrl(ctrl); 1177 } 1178 1179 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 1180 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 1181 { 1182 struct nvmet_subsys *subsys; 1183 struct nvmet_ctrl *ctrl; 1184 int ret; 1185 u16 status; 1186 1187 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1188 subsys = nvmet_find_get_subsys(req->port, subsysnqn); 1189 if (!subsys) { 1190 pr_warn("connect request for invalid subsystem %s!\n", 1191 subsysnqn); 1192 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); 1193 goto out; 1194 } 1195 1196 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 1197 down_read(&nvmet_config_sem); 1198 if (!nvmet_host_allowed(subsys, hostnqn)) { 1199 pr_info("connect by host %s for subsystem %s not allowed\n", 1200 hostnqn, subsysnqn); 1201 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); 1202 up_read(&nvmet_config_sem); 1203 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; 1204 goto out_put_subsystem; 1205 } 1206 up_read(&nvmet_config_sem); 1207 1208 status = NVME_SC_INTERNAL; 1209 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1210 if (!ctrl) 1211 goto out_put_subsystem; 1212 mutex_init(&ctrl->lock); 1213 1214 nvmet_init_cap(ctrl); 1215 1216 ctrl->port = req->port; 1217 1218 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1219 INIT_LIST_HEAD(&ctrl->async_events); 1220 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1221 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1222 1223 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 1224 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); 1225 1226 kref_init(&ctrl->ref); 1227 ctrl->subsys = subsys; 1228 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1229 1230 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, 1231 sizeof(__le32), GFP_KERNEL); 1232 if (!ctrl->changed_ns_list) 1233 goto out_free_ctrl; 1234 1235 ctrl->cqs = kcalloc(subsys->max_qid + 1, 1236 sizeof(struct nvmet_cq *), 1237 GFP_KERNEL); 1238 if (!ctrl->cqs) 1239 goto out_free_changed_ns_list; 1240 1241 ctrl->sqs = kcalloc(subsys->max_qid + 1, 1242 sizeof(struct nvmet_sq *), 1243 GFP_KERNEL); 1244 if (!ctrl->sqs) 1245 goto out_free_cqs; 1246 1247 ret = ida_simple_get(&cntlid_ida, 1248 NVME_CNTLID_MIN, NVME_CNTLID_MAX, 1249 GFP_KERNEL); 1250 if (ret < 0) { 1251 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 1252 goto out_free_sqs; 1253 } 1254 ctrl->cntlid = ret; 1255 1256 ctrl->ops = req->ops; 1257 1258 /* 1259 * Discovery controllers may use some arbitrary high value 1260 * in order to cleanup stale discovery sessions 1261 */ 1262 if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato) 1263 kato = NVMET_DISC_KATO_MS; 1264 1265 /* keep-alive timeout in seconds */ 1266 ctrl->kato = DIV_ROUND_UP(kato, 1000); 1267 1268 ctrl->err_counter = 0; 1269 spin_lock_init(&ctrl->error_lock); 1270 1271 nvmet_start_keep_alive_timer(ctrl); 1272 1273 mutex_lock(&subsys->lock); 1274 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 1275 nvmet_setup_p2p_ns_map(ctrl, req); 1276 mutex_unlock(&subsys->lock); 1277 1278 *ctrlp = ctrl; 1279 return 0; 1280 1281 out_free_sqs: 1282 kfree(ctrl->sqs); 1283 out_free_cqs: 1284 kfree(ctrl->cqs); 1285 out_free_changed_ns_list: 1286 kfree(ctrl->changed_ns_list); 1287 out_free_ctrl: 1288 kfree(ctrl); 1289 out_put_subsystem: 1290 nvmet_subsys_put(subsys); 1291 out: 1292 return status; 1293 } 1294 1295 static void nvmet_ctrl_free(struct kref *ref) 1296 { 1297 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); 1298 struct nvmet_subsys *subsys = ctrl->subsys; 1299 1300 mutex_lock(&subsys->lock); 1301 nvmet_release_p2p_ns_map(ctrl); 1302 list_del(&ctrl->subsys_entry); 1303 mutex_unlock(&subsys->lock); 1304 1305 nvmet_stop_keep_alive_timer(ctrl); 1306 1307 flush_work(&ctrl->async_event_work); 1308 cancel_work_sync(&ctrl->fatal_err_work); 1309 1310 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 1311 1312 kfree(ctrl->sqs); 1313 kfree(ctrl->cqs); 1314 kfree(ctrl->changed_ns_list); 1315 kfree(ctrl); 1316 1317 nvmet_subsys_put(subsys); 1318 } 1319 1320 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) 1321 { 1322 kref_put(&ctrl->ref, nvmet_ctrl_free); 1323 } 1324 1325 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1326 { 1327 mutex_lock(&ctrl->lock); 1328 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1329 ctrl->csts |= NVME_CSTS_CFS; 1330 schedule_work(&ctrl->fatal_err_work); 1331 } 1332 mutex_unlock(&ctrl->lock); 1333 } 1334 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); 1335 1336 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 1337 const char *subsysnqn) 1338 { 1339 struct nvmet_subsys_link *p; 1340 1341 if (!port) 1342 return NULL; 1343 1344 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { 1345 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) 1346 return NULL; 1347 return nvmet_disc_subsys; 1348 } 1349 1350 down_read(&nvmet_config_sem); 1351 list_for_each_entry(p, &port->subsystems, entry) { 1352 if (!strncmp(p->subsys->subsysnqn, subsysnqn, 1353 NVMF_NQN_SIZE)) { 1354 if (!kref_get_unless_zero(&p->subsys->ref)) 1355 break; 1356 up_read(&nvmet_config_sem); 1357 return p->subsys; 1358 } 1359 } 1360 up_read(&nvmet_config_sem); 1361 return NULL; 1362 } 1363 1364 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, 1365 enum nvme_subsys_type type) 1366 { 1367 struct nvmet_subsys *subsys; 1368 1369 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); 1370 if (!subsys) 1371 return ERR_PTR(-ENOMEM); 1372 1373 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ 1374 /* generate a random serial number as our controllers are ephemeral: */ 1375 get_random_bytes(&subsys->serial, sizeof(subsys->serial)); 1376 1377 switch (type) { 1378 case NVME_NQN_NVME: 1379 subsys->max_qid = NVMET_NR_QUEUES; 1380 break; 1381 case NVME_NQN_DISC: 1382 subsys->max_qid = 0; 1383 break; 1384 default: 1385 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); 1386 kfree(subsys); 1387 return ERR_PTR(-EINVAL); 1388 } 1389 subsys->type = type; 1390 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, 1391 GFP_KERNEL); 1392 if (!subsys->subsysnqn) { 1393 kfree(subsys); 1394 return ERR_PTR(-ENOMEM); 1395 } 1396 1397 kref_init(&subsys->ref); 1398 1399 mutex_init(&subsys->lock); 1400 INIT_LIST_HEAD(&subsys->namespaces); 1401 INIT_LIST_HEAD(&subsys->ctrls); 1402 INIT_LIST_HEAD(&subsys->hosts); 1403 1404 return subsys; 1405 } 1406 1407 static void nvmet_subsys_free(struct kref *ref) 1408 { 1409 struct nvmet_subsys *subsys = 1410 container_of(ref, struct nvmet_subsys, ref); 1411 1412 WARN_ON_ONCE(!list_empty(&subsys->namespaces)); 1413 1414 kfree(subsys->subsysnqn); 1415 kfree(subsys); 1416 } 1417 1418 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) 1419 { 1420 struct nvmet_ctrl *ctrl; 1421 1422 mutex_lock(&subsys->lock); 1423 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 1424 ctrl->ops->delete_ctrl(ctrl); 1425 mutex_unlock(&subsys->lock); 1426 } 1427 1428 void nvmet_subsys_put(struct nvmet_subsys *subsys) 1429 { 1430 kref_put(&subsys->ref, nvmet_subsys_free); 1431 } 1432 1433 static int __init nvmet_init(void) 1434 { 1435 int error; 1436 1437 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; 1438 1439 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1440 WQ_MEM_RECLAIM, 0); 1441 if (!buffered_io_wq) { 1442 error = -ENOMEM; 1443 goto out; 1444 } 1445 1446 error = nvmet_init_discovery(); 1447 if (error) 1448 goto out_free_work_queue; 1449 1450 error = nvmet_init_configfs(); 1451 if (error) 1452 goto out_exit_discovery; 1453 return 0; 1454 1455 out_exit_discovery: 1456 nvmet_exit_discovery(); 1457 out_free_work_queue: 1458 destroy_workqueue(buffered_io_wq); 1459 out: 1460 return error; 1461 } 1462 1463 static void __exit nvmet_exit(void) 1464 { 1465 nvmet_exit_configfs(); 1466 nvmet_exit_discovery(); 1467 ida_destroy(&cntlid_ida); 1468 destroy_workqueue(buffered_io_wq); 1469 1470 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); 1471 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); 1472 } 1473 1474 module_init(nvmet_init); 1475 module_exit(nvmet_exit); 1476 1477 MODULE_LICENSE("GPL v2"); 1478