1 /* 2 * NVM Express device driver 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 15 #include <linux/blkdev.h> 16 #include <linux/blk-mq.h> 17 #include <linux/delay.h> 18 #include <linux/errno.h> 19 #include <linux/hdreg.h> 20 #include <linux/kernel.h> 21 #include <linux/module.h> 22 #include <linux/list_sort.h> 23 #include <linux/slab.h> 24 #include <linux/types.h> 25 #include <linux/pr.h> 26 #include <linux/ptrace.h> 27 #include <linux/nvme_ioctl.h> 28 #include <linux/t10-pi.h> 29 #include <scsi/sg.h> 30 #include <asm/unaligned.h> 31 32 #include "nvme.h" 33 34 #define NVME_MINORS (1U << MINORBITS) 35 36 unsigned char admin_timeout = 60; 37 module_param(admin_timeout, byte, 0644); 38 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); 39 EXPORT_SYMBOL_GPL(admin_timeout); 40 41 unsigned char nvme_io_timeout = 30; 42 module_param_named(io_timeout, nvme_io_timeout, byte, 0644); 43 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); 44 EXPORT_SYMBOL_GPL(nvme_io_timeout); 45 46 unsigned char shutdown_timeout = 5; 47 module_param(shutdown_timeout, byte, 0644); 48 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); 49 50 static int nvme_major; 51 module_param(nvme_major, int, 0); 52 53 static int nvme_char_major; 54 module_param(nvme_char_major, int, 0); 55 56 static LIST_HEAD(nvme_ctrl_list); 57 static DEFINE_SPINLOCK(dev_list_lock); 58 59 static struct class *nvme_class; 60 61 static void nvme_free_ns(struct kref *kref) 62 { 63 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); 64 65 if (ns->type == NVME_NS_LIGHTNVM) 66 nvme_nvm_unregister(ns->queue, ns->disk->disk_name); 67 68 spin_lock(&dev_list_lock); 69 ns->disk->private_data = NULL; 70 spin_unlock(&dev_list_lock); 71 72 put_disk(ns->disk); 73 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); 74 nvme_put_ctrl(ns->ctrl); 75 kfree(ns); 76 } 77 78 static void nvme_put_ns(struct nvme_ns *ns) 79 { 80 kref_put(&ns->kref, nvme_free_ns); 81 } 82 83 static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) 84 { 85 struct nvme_ns *ns; 86 87 spin_lock(&dev_list_lock); 88 ns = disk->private_data; 89 if (ns) { 90 if (!kref_get_unless_zero(&ns->kref)) 91 goto fail; 92 if (!try_module_get(ns->ctrl->ops->module)) 93 goto fail_put_ns; 94 } 95 spin_unlock(&dev_list_lock); 96 97 return ns; 98 99 fail_put_ns: 100 kref_put(&ns->kref, nvme_free_ns); 101 fail: 102 spin_unlock(&dev_list_lock); 103 return NULL; 104 } 105 106 void nvme_requeue_req(struct request *req) 107 { 108 unsigned long flags; 109 110 blk_mq_requeue_request(req); 111 spin_lock_irqsave(req->q->queue_lock, flags); 112 if (!blk_queue_stopped(req->q)) 113 blk_mq_kick_requeue_list(req->q); 114 spin_unlock_irqrestore(req->q->queue_lock, flags); 115 } 116 EXPORT_SYMBOL_GPL(nvme_requeue_req); 117 118 struct request *nvme_alloc_request(struct request_queue *q, 119 struct nvme_command *cmd, unsigned int flags) 120 { 121 bool write = cmd->common.opcode & 1; 122 struct request *req; 123 124 req = blk_mq_alloc_request(q, write, flags); 125 if (IS_ERR(req)) 126 return req; 127 128 req->cmd_type = REQ_TYPE_DRV_PRIV; 129 req->cmd_flags |= REQ_FAILFAST_DRIVER; 130 req->__data_len = 0; 131 req->__sector = (sector_t) -1; 132 req->bio = req->biotail = NULL; 133 134 req->cmd = (unsigned char *)cmd; 135 req->cmd_len = sizeof(struct nvme_command); 136 137 return req; 138 } 139 EXPORT_SYMBOL_GPL(nvme_alloc_request); 140 141 /* 142 * Returns 0 on success. If the result is negative, it's a Linux error code; 143 * if the result is positive, it's an NVM Express status code 144 */ 145 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 146 struct nvme_completion *cqe, void *buffer, unsigned bufflen, 147 unsigned timeout) 148 { 149 struct request *req; 150 int ret; 151 152 req = nvme_alloc_request(q, cmd, 0); 153 if (IS_ERR(req)) 154 return PTR_ERR(req); 155 156 req->timeout = timeout ? timeout : ADMIN_TIMEOUT; 157 req->special = cqe; 158 159 if (buffer && bufflen) { 160 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); 161 if (ret) 162 goto out; 163 } 164 165 blk_execute_rq(req->q, NULL, req, 0); 166 ret = req->errors; 167 out: 168 blk_mq_free_request(req); 169 return ret; 170 } 171 172 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 173 void *buffer, unsigned bufflen) 174 { 175 return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0); 176 } 177 EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); 178 179 int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, 180 void __user *ubuffer, unsigned bufflen, 181 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 182 u32 *result, unsigned timeout) 183 { 184 bool write = cmd->common.opcode & 1; 185 struct nvme_completion cqe; 186 struct nvme_ns *ns = q->queuedata; 187 struct gendisk *disk = ns ? ns->disk : NULL; 188 struct request *req; 189 struct bio *bio = NULL; 190 void *meta = NULL; 191 int ret; 192 193 req = nvme_alloc_request(q, cmd, 0); 194 if (IS_ERR(req)) 195 return PTR_ERR(req); 196 197 req->timeout = timeout ? timeout : ADMIN_TIMEOUT; 198 req->special = &cqe; 199 200 if (ubuffer && bufflen) { 201 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, 202 GFP_KERNEL); 203 if (ret) 204 goto out; 205 bio = req->bio; 206 207 if (!disk) 208 goto submit; 209 bio->bi_bdev = bdget_disk(disk, 0); 210 if (!bio->bi_bdev) { 211 ret = -ENODEV; 212 goto out_unmap; 213 } 214 215 if (meta_buffer && meta_len) { 216 struct bio_integrity_payload *bip; 217 218 meta = kmalloc(meta_len, GFP_KERNEL); 219 if (!meta) { 220 ret = -ENOMEM; 221 goto out_unmap; 222 } 223 224 if (write) { 225 if (copy_from_user(meta, meta_buffer, 226 meta_len)) { 227 ret = -EFAULT; 228 goto out_free_meta; 229 } 230 } 231 232 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 233 if (IS_ERR(bip)) { 234 ret = PTR_ERR(bip); 235 goto out_free_meta; 236 } 237 238 bip->bip_iter.bi_size = meta_len; 239 bip->bip_iter.bi_sector = meta_seed; 240 241 ret = bio_integrity_add_page(bio, virt_to_page(meta), 242 meta_len, offset_in_page(meta)); 243 if (ret != meta_len) { 244 ret = -ENOMEM; 245 goto out_free_meta; 246 } 247 } 248 } 249 submit: 250 blk_execute_rq(req->q, disk, req, 0); 251 ret = req->errors; 252 if (result) 253 *result = le32_to_cpu(cqe.result); 254 if (meta && !ret && !write) { 255 if (copy_to_user(meta_buffer, meta, meta_len)) 256 ret = -EFAULT; 257 } 258 out_free_meta: 259 kfree(meta); 260 out_unmap: 261 if (bio) { 262 if (disk && bio->bi_bdev) 263 bdput(bio->bi_bdev); 264 blk_rq_unmap_user(bio); 265 } 266 out: 267 blk_mq_free_request(req); 268 return ret; 269 } 270 271 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, 272 void __user *ubuffer, unsigned bufflen, u32 *result, 273 unsigned timeout) 274 { 275 return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, 276 result, timeout); 277 } 278 279 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) 280 { 281 struct nvme_command c = { }; 282 int error; 283 284 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ 285 c.identify.opcode = nvme_admin_identify; 286 c.identify.cns = cpu_to_le32(1); 287 288 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); 289 if (!*id) 290 return -ENOMEM; 291 292 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, 293 sizeof(struct nvme_id_ctrl)); 294 if (error) 295 kfree(*id); 296 return error; 297 } 298 299 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) 300 { 301 struct nvme_command c = { }; 302 303 c.identify.opcode = nvme_admin_identify; 304 c.identify.cns = cpu_to_le32(2); 305 c.identify.nsid = cpu_to_le32(nsid); 306 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); 307 } 308 309 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, 310 struct nvme_id_ns **id) 311 { 312 struct nvme_command c = { }; 313 int error; 314 315 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ 316 c.identify.opcode = nvme_admin_identify, 317 c.identify.nsid = cpu_to_le32(nsid), 318 319 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); 320 if (!*id) 321 return -ENOMEM; 322 323 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, 324 sizeof(struct nvme_id_ns)); 325 if (error) 326 kfree(*id); 327 return error; 328 } 329 330 int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, 331 dma_addr_t dma_addr, u32 *result) 332 { 333 struct nvme_command c; 334 struct nvme_completion cqe; 335 int ret; 336 337 memset(&c, 0, sizeof(c)); 338 c.features.opcode = nvme_admin_get_features; 339 c.features.nsid = cpu_to_le32(nsid); 340 c.features.prp1 = cpu_to_le64(dma_addr); 341 c.features.fid = cpu_to_le32(fid); 342 343 ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0); 344 if (ret >= 0) 345 *result = le32_to_cpu(cqe.result); 346 return ret; 347 } 348 349 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, 350 dma_addr_t dma_addr, u32 *result) 351 { 352 struct nvme_command c; 353 struct nvme_completion cqe; 354 int ret; 355 356 memset(&c, 0, sizeof(c)); 357 c.features.opcode = nvme_admin_set_features; 358 c.features.prp1 = cpu_to_le64(dma_addr); 359 c.features.fid = cpu_to_le32(fid); 360 c.features.dword11 = cpu_to_le32(dword11); 361 362 ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0); 363 if (ret >= 0) 364 *result = le32_to_cpu(cqe.result); 365 return ret; 366 } 367 368 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) 369 { 370 struct nvme_command c = { }; 371 int error; 372 373 c.common.opcode = nvme_admin_get_log_page, 374 c.common.nsid = cpu_to_le32(0xFFFFFFFF), 375 c.common.cdw10[0] = cpu_to_le32( 376 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | 377 NVME_LOG_SMART), 378 379 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); 380 if (!*log) 381 return -ENOMEM; 382 383 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, 384 sizeof(struct nvme_smart_log)); 385 if (error) 386 kfree(*log); 387 return error; 388 } 389 390 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) 391 { 392 u32 q_count = (*count - 1) | ((*count - 1) << 16); 393 u32 result; 394 int status, nr_io_queues; 395 396 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0, 397 &result); 398 if (status) 399 return status; 400 401 nr_io_queues = min(result & 0xffff, result >> 16) + 1; 402 *count = min(*count, nr_io_queues); 403 return 0; 404 } 405 EXPORT_SYMBOL_GPL(nvme_set_queue_count); 406 407 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 408 { 409 struct nvme_user_io io; 410 struct nvme_command c; 411 unsigned length, meta_len; 412 void __user *metadata; 413 414 if (copy_from_user(&io, uio, sizeof(io))) 415 return -EFAULT; 416 if (io.flags) 417 return -EINVAL; 418 419 switch (io.opcode) { 420 case nvme_cmd_write: 421 case nvme_cmd_read: 422 case nvme_cmd_compare: 423 break; 424 default: 425 return -EINVAL; 426 } 427 428 length = (io.nblocks + 1) << ns->lba_shift; 429 meta_len = (io.nblocks + 1) * ns->ms; 430 metadata = (void __user *)(uintptr_t)io.metadata; 431 432 if (ns->ext) { 433 length += meta_len; 434 meta_len = 0; 435 } else if (meta_len) { 436 if ((io.metadata & 3) || !io.metadata) 437 return -EINVAL; 438 } 439 440 memset(&c, 0, sizeof(c)); 441 c.rw.opcode = io.opcode; 442 c.rw.flags = io.flags; 443 c.rw.nsid = cpu_to_le32(ns->ns_id); 444 c.rw.slba = cpu_to_le64(io.slba); 445 c.rw.length = cpu_to_le16(io.nblocks); 446 c.rw.control = cpu_to_le16(io.control); 447 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 448 c.rw.reftag = cpu_to_le32(io.reftag); 449 c.rw.apptag = cpu_to_le16(io.apptag); 450 c.rw.appmask = cpu_to_le16(io.appmask); 451 452 return __nvme_submit_user_cmd(ns->queue, &c, 453 (void __user *)(uintptr_t)io.addr, length, 454 metadata, meta_len, io.slba, NULL, 0); 455 } 456 457 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 458 struct nvme_passthru_cmd __user *ucmd) 459 { 460 struct nvme_passthru_cmd cmd; 461 struct nvme_command c; 462 unsigned timeout = 0; 463 int status; 464 465 if (!capable(CAP_SYS_ADMIN)) 466 return -EACCES; 467 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 468 return -EFAULT; 469 if (cmd.flags) 470 return -EINVAL; 471 472 memset(&c, 0, sizeof(c)); 473 c.common.opcode = cmd.opcode; 474 c.common.flags = cmd.flags; 475 c.common.nsid = cpu_to_le32(cmd.nsid); 476 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 477 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 478 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); 479 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); 480 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); 481 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); 482 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); 483 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); 484 485 if (cmd.timeout_ms) 486 timeout = msecs_to_jiffies(cmd.timeout_ms); 487 488 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 489 (void __user *)(uintptr_t)cmd.addr, cmd.data_len, 490 &cmd.result, timeout); 491 if (status >= 0) { 492 if (put_user(cmd.result, &ucmd->result)) 493 return -EFAULT; 494 } 495 496 return status; 497 } 498 499 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, 500 unsigned int cmd, unsigned long arg) 501 { 502 struct nvme_ns *ns = bdev->bd_disk->private_data; 503 504 switch (cmd) { 505 case NVME_IOCTL_ID: 506 force_successful_syscall_return(); 507 return ns->ns_id; 508 case NVME_IOCTL_ADMIN_CMD: 509 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); 510 case NVME_IOCTL_IO_CMD: 511 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); 512 case NVME_IOCTL_SUBMIT_IO: 513 return nvme_submit_io(ns, (void __user *)arg); 514 #ifdef CONFIG_BLK_DEV_NVME_SCSI 515 case SG_GET_VERSION_NUM: 516 return nvme_sg_get_version_num((void __user *)arg); 517 case SG_IO: 518 return nvme_sg_io(ns, (void __user *)arg); 519 #endif 520 default: 521 return -ENOTTY; 522 } 523 } 524 525 #ifdef CONFIG_COMPAT 526 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, 527 unsigned int cmd, unsigned long arg) 528 { 529 switch (cmd) { 530 case SG_IO: 531 return -ENOIOCTLCMD; 532 } 533 return nvme_ioctl(bdev, mode, cmd, arg); 534 } 535 #else 536 #define nvme_compat_ioctl NULL 537 #endif 538 539 static int nvme_open(struct block_device *bdev, fmode_t mode) 540 { 541 return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; 542 } 543 544 static void nvme_release(struct gendisk *disk, fmode_t mode) 545 { 546 struct nvme_ns *ns = disk->private_data; 547 548 module_put(ns->ctrl->ops->module); 549 nvme_put_ns(ns); 550 } 551 552 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) 553 { 554 /* some standard values */ 555 geo->heads = 1 << 6; 556 geo->sectors = 1 << 5; 557 geo->cylinders = get_capacity(bdev->bd_disk) >> 11; 558 return 0; 559 } 560 561 #ifdef CONFIG_BLK_DEV_INTEGRITY 562 static void nvme_init_integrity(struct nvme_ns *ns) 563 { 564 struct blk_integrity integrity; 565 566 switch (ns->pi_type) { 567 case NVME_NS_DPS_PI_TYPE3: 568 integrity.profile = &t10_pi_type3_crc; 569 break; 570 case NVME_NS_DPS_PI_TYPE1: 571 case NVME_NS_DPS_PI_TYPE2: 572 integrity.profile = &t10_pi_type1_crc; 573 break; 574 default: 575 integrity.profile = NULL; 576 break; 577 } 578 integrity.tuple_size = ns->ms; 579 blk_integrity_register(ns->disk, &integrity); 580 blk_queue_max_integrity_segments(ns->queue, 1); 581 } 582 #else 583 static void nvme_init_integrity(struct nvme_ns *ns) 584 { 585 } 586 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 587 588 static void nvme_config_discard(struct nvme_ns *ns) 589 { 590 struct nvme_ctrl *ctrl = ns->ctrl; 591 u32 logical_block_size = queue_logical_block_size(ns->queue); 592 593 if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) 594 ns->queue->limits.discard_zeroes_data = 1; 595 else 596 ns->queue->limits.discard_zeroes_data = 0; 597 598 ns->queue->limits.discard_alignment = logical_block_size; 599 ns->queue->limits.discard_granularity = logical_block_size; 600 blk_queue_max_discard_sectors(ns->queue, 0xffffffff); 601 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 602 } 603 604 static int nvme_revalidate_disk(struct gendisk *disk) 605 { 606 struct nvme_ns *ns = disk->private_data; 607 struct nvme_id_ns *id; 608 u8 lbaf, pi_type; 609 u16 old_ms; 610 unsigned short bs; 611 612 if (test_bit(NVME_NS_DEAD, &ns->flags)) { 613 set_capacity(disk, 0); 614 return -ENODEV; 615 } 616 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { 617 dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n", 618 __func__); 619 return -ENODEV; 620 } 621 if (id->ncap == 0) { 622 kfree(id); 623 return -ENODEV; 624 } 625 626 if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { 627 if (nvme_nvm_register(ns->queue, disk->disk_name)) { 628 dev_warn(disk_to_dev(ns->disk), 629 "%s: LightNVM init failure\n", __func__); 630 kfree(id); 631 return -ENODEV; 632 } 633 ns->type = NVME_NS_LIGHTNVM; 634 } 635 636 if (ns->ctrl->vs >= NVME_VS(1, 1)) 637 memcpy(ns->eui, id->eui64, sizeof(ns->eui)); 638 if (ns->ctrl->vs >= NVME_VS(1, 2)) 639 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid)); 640 641 old_ms = ns->ms; 642 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; 643 ns->lba_shift = id->lbaf[lbaf].ds; 644 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); 645 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); 646 647 /* 648 * If identify namespace failed, use default 512 byte block size so 649 * block layer can use before failing read/write for 0 capacity. 650 */ 651 if (ns->lba_shift == 0) 652 ns->lba_shift = 9; 653 bs = 1 << ns->lba_shift; 654 /* XXX: PI implementation requires metadata equal t10 pi tuple size */ 655 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? 656 id->dps & NVME_NS_DPS_PI_MASK : 0; 657 658 blk_mq_freeze_queue(disk->queue); 659 if (blk_get_integrity(disk) && (ns->pi_type != pi_type || 660 ns->ms != old_ms || 661 bs != queue_logical_block_size(disk->queue) || 662 (ns->ms && ns->ext))) 663 blk_integrity_unregister(disk); 664 665 ns->pi_type = pi_type; 666 blk_queue_logical_block_size(ns->queue, bs); 667 668 if (ns->ms && !blk_get_integrity(disk) && !ns->ext) 669 nvme_init_integrity(ns); 670 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) 671 set_capacity(disk, 0); 672 else 673 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); 674 675 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) 676 nvme_config_discard(ns); 677 blk_mq_unfreeze_queue(disk->queue); 678 679 kfree(id); 680 return 0; 681 } 682 683 static char nvme_pr_type(enum pr_type type) 684 { 685 switch (type) { 686 case PR_WRITE_EXCLUSIVE: 687 return 1; 688 case PR_EXCLUSIVE_ACCESS: 689 return 2; 690 case PR_WRITE_EXCLUSIVE_REG_ONLY: 691 return 3; 692 case PR_EXCLUSIVE_ACCESS_REG_ONLY: 693 return 4; 694 case PR_WRITE_EXCLUSIVE_ALL_REGS: 695 return 5; 696 case PR_EXCLUSIVE_ACCESS_ALL_REGS: 697 return 6; 698 default: 699 return 0; 700 } 701 }; 702 703 static int nvme_pr_command(struct block_device *bdev, u32 cdw10, 704 u64 key, u64 sa_key, u8 op) 705 { 706 struct nvme_ns *ns = bdev->bd_disk->private_data; 707 struct nvme_command c; 708 u8 data[16] = { 0, }; 709 710 put_unaligned_le64(key, &data[0]); 711 put_unaligned_le64(sa_key, &data[8]); 712 713 memset(&c, 0, sizeof(c)); 714 c.common.opcode = op; 715 c.common.nsid = cpu_to_le32(ns->ns_id); 716 c.common.cdw10[0] = cpu_to_le32(cdw10); 717 718 return nvme_submit_sync_cmd(ns->queue, &c, data, 16); 719 } 720 721 static int nvme_pr_register(struct block_device *bdev, u64 old, 722 u64 new, unsigned flags) 723 { 724 u32 cdw10; 725 726 if (flags & ~PR_FL_IGNORE_KEY) 727 return -EOPNOTSUPP; 728 729 cdw10 = old ? 2 : 0; 730 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; 731 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ 732 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); 733 } 734 735 static int nvme_pr_reserve(struct block_device *bdev, u64 key, 736 enum pr_type type, unsigned flags) 737 { 738 u32 cdw10; 739 740 if (flags & ~PR_FL_IGNORE_KEY) 741 return -EOPNOTSUPP; 742 743 cdw10 = nvme_pr_type(type) << 8; 744 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); 745 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); 746 } 747 748 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, 749 enum pr_type type, bool abort) 750 { 751 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; 752 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); 753 } 754 755 static int nvme_pr_clear(struct block_device *bdev, u64 key) 756 { 757 u32 cdw10 = 1 | (key ? 1 << 3 : 0); 758 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); 759 } 760 761 static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) 762 { 763 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; 764 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); 765 } 766 767 static const struct pr_ops nvme_pr_ops = { 768 .pr_register = nvme_pr_register, 769 .pr_reserve = nvme_pr_reserve, 770 .pr_release = nvme_pr_release, 771 .pr_preempt = nvme_pr_preempt, 772 .pr_clear = nvme_pr_clear, 773 }; 774 775 static const struct block_device_operations nvme_fops = { 776 .owner = THIS_MODULE, 777 .ioctl = nvme_ioctl, 778 .compat_ioctl = nvme_compat_ioctl, 779 .open = nvme_open, 780 .release = nvme_release, 781 .getgeo = nvme_getgeo, 782 .revalidate_disk= nvme_revalidate_disk, 783 .pr_ops = &nvme_pr_ops, 784 }; 785 786 static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) 787 { 788 unsigned long timeout = 789 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; 790 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; 791 int ret; 792 793 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { 794 if ((csts & NVME_CSTS_RDY) == bit) 795 break; 796 797 msleep(100); 798 if (fatal_signal_pending(current)) 799 return -EINTR; 800 if (time_after(jiffies, timeout)) { 801 dev_err(ctrl->device, 802 "Device not ready; aborting %s\n", enabled ? 803 "initialisation" : "reset"); 804 return -ENODEV; 805 } 806 } 807 808 return ret; 809 } 810 811 /* 812 * If the device has been passed off to us in an enabled state, just clear 813 * the enabled bit. The spec says we should set the 'shutdown notification 814 * bits', but doing so may cause the device to complete commands to the 815 * admin queue ... and we don't know what memory that might be pointing at! 816 */ 817 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) 818 { 819 int ret; 820 821 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; 822 ctrl->ctrl_config &= ~NVME_CC_ENABLE; 823 824 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 825 if (ret) 826 return ret; 827 return nvme_wait_ready(ctrl, cap, false); 828 } 829 EXPORT_SYMBOL_GPL(nvme_disable_ctrl); 830 831 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) 832 { 833 /* 834 * Default to a 4K page size, with the intention to update this 835 * path in the future to accomodate architectures with differing 836 * kernel and IO page sizes. 837 */ 838 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12; 839 int ret; 840 841 if (page_shift < dev_page_min) { 842 dev_err(ctrl->device, 843 "Minimum device page size %u too large for host (%u)\n", 844 1 << dev_page_min, 1 << page_shift); 845 return -ENODEV; 846 } 847 848 ctrl->page_size = 1 << page_shift; 849 850 ctrl->ctrl_config = NVME_CC_CSS_NVM; 851 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; 852 ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 853 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 854 ctrl->ctrl_config |= NVME_CC_ENABLE; 855 856 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 857 if (ret) 858 return ret; 859 return nvme_wait_ready(ctrl, cap, true); 860 } 861 EXPORT_SYMBOL_GPL(nvme_enable_ctrl); 862 863 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) 864 { 865 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies; 866 u32 csts; 867 int ret; 868 869 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; 870 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; 871 872 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 873 if (ret) 874 return ret; 875 876 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { 877 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT) 878 break; 879 880 msleep(100); 881 if (fatal_signal_pending(current)) 882 return -EINTR; 883 if (time_after(jiffies, timeout)) { 884 dev_err(ctrl->device, 885 "Device shutdown incomplete; abort shutdown\n"); 886 return -ENODEV; 887 } 888 } 889 890 return ret; 891 } 892 EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); 893 894 static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, 895 struct request_queue *q) 896 { 897 if (ctrl->max_hw_sectors) { 898 u32 max_segments = 899 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; 900 901 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 902 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 903 } 904 if (ctrl->stripe_size) 905 blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); 906 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 907 blk_queue_flush(q, REQ_FLUSH | REQ_FUA); 908 blk_queue_virt_boundary(q, ctrl->page_size - 1); 909 } 910 911 /* 912 * Initialize the cached copies of the Identify data and various controller 913 * register in our nvme_ctrl structure. This should be called as soon as 914 * the admin queue is fully up and running. 915 */ 916 int nvme_init_identify(struct nvme_ctrl *ctrl) 917 { 918 struct nvme_id_ctrl *id; 919 u64 cap; 920 int ret, page_shift; 921 922 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); 923 if (ret) { 924 dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); 925 return ret; 926 } 927 928 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); 929 if (ret) { 930 dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret); 931 return ret; 932 } 933 page_shift = NVME_CAP_MPSMIN(cap) + 12; 934 935 if (ctrl->vs >= NVME_VS(1, 1)) 936 ctrl->subsystem = NVME_CAP_NSSRC(cap); 937 938 ret = nvme_identify_ctrl(ctrl, &id); 939 if (ret) { 940 dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret); 941 return -EIO; 942 } 943 944 ctrl->vid = le16_to_cpu(id->vid); 945 ctrl->oncs = le16_to_cpup(&id->oncs); 946 atomic_set(&ctrl->abort_limit, id->acl + 1); 947 ctrl->vwc = id->vwc; 948 ctrl->cntlid = le16_to_cpup(&id->cntlid); 949 memcpy(ctrl->serial, id->sn, sizeof(id->sn)); 950 memcpy(ctrl->model, id->mn, sizeof(id->mn)); 951 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); 952 if (id->mdts) 953 ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9); 954 else 955 ctrl->max_hw_sectors = UINT_MAX; 956 957 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { 958 unsigned int max_hw_sectors; 959 960 ctrl->stripe_size = 1 << (id->vs[3] + page_shift); 961 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); 962 if (ctrl->max_hw_sectors) { 963 ctrl->max_hw_sectors = min(max_hw_sectors, 964 ctrl->max_hw_sectors); 965 } else { 966 ctrl->max_hw_sectors = max_hw_sectors; 967 } 968 } 969 970 nvme_set_queue_limits(ctrl, ctrl->admin_q); 971 972 kfree(id); 973 return 0; 974 } 975 EXPORT_SYMBOL_GPL(nvme_init_identify); 976 977 static int nvme_dev_open(struct inode *inode, struct file *file) 978 { 979 struct nvme_ctrl *ctrl; 980 int instance = iminor(inode); 981 int ret = -ENODEV; 982 983 spin_lock(&dev_list_lock); 984 list_for_each_entry(ctrl, &nvme_ctrl_list, node) { 985 if (ctrl->instance != instance) 986 continue; 987 988 if (!ctrl->admin_q) { 989 ret = -EWOULDBLOCK; 990 break; 991 } 992 if (!kref_get_unless_zero(&ctrl->kref)) 993 break; 994 file->private_data = ctrl; 995 ret = 0; 996 break; 997 } 998 spin_unlock(&dev_list_lock); 999 1000 return ret; 1001 } 1002 1003 static int nvme_dev_release(struct inode *inode, struct file *file) 1004 { 1005 nvme_put_ctrl(file->private_data); 1006 return 0; 1007 } 1008 1009 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) 1010 { 1011 struct nvme_ns *ns; 1012 int ret; 1013 1014 mutex_lock(&ctrl->namespaces_mutex); 1015 if (list_empty(&ctrl->namespaces)) { 1016 ret = -ENOTTY; 1017 goto out_unlock; 1018 } 1019 1020 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 1021 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 1022 dev_warn(ctrl->device, 1023 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 1024 ret = -EINVAL; 1025 goto out_unlock; 1026 } 1027 1028 dev_warn(ctrl->device, 1029 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 1030 kref_get(&ns->kref); 1031 mutex_unlock(&ctrl->namespaces_mutex); 1032 1033 ret = nvme_user_cmd(ctrl, ns, argp); 1034 nvme_put_ns(ns); 1035 return ret; 1036 1037 out_unlock: 1038 mutex_unlock(&ctrl->namespaces_mutex); 1039 return ret; 1040 } 1041 1042 static long nvme_dev_ioctl(struct file *file, unsigned int cmd, 1043 unsigned long arg) 1044 { 1045 struct nvme_ctrl *ctrl = file->private_data; 1046 void __user *argp = (void __user *)arg; 1047 1048 switch (cmd) { 1049 case NVME_IOCTL_ADMIN_CMD: 1050 return nvme_user_cmd(ctrl, NULL, argp); 1051 case NVME_IOCTL_IO_CMD: 1052 return nvme_dev_user_cmd(ctrl, argp); 1053 case NVME_IOCTL_RESET: 1054 dev_warn(ctrl->device, "resetting controller\n"); 1055 return ctrl->ops->reset_ctrl(ctrl); 1056 case NVME_IOCTL_SUBSYS_RESET: 1057 return nvme_reset_subsystem(ctrl); 1058 default: 1059 return -ENOTTY; 1060 } 1061 } 1062 1063 static const struct file_operations nvme_dev_fops = { 1064 .owner = THIS_MODULE, 1065 .open = nvme_dev_open, 1066 .release = nvme_dev_release, 1067 .unlocked_ioctl = nvme_dev_ioctl, 1068 .compat_ioctl = nvme_dev_ioctl, 1069 }; 1070 1071 static ssize_t nvme_sysfs_reset(struct device *dev, 1072 struct device_attribute *attr, const char *buf, 1073 size_t count) 1074 { 1075 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 1076 int ret; 1077 1078 ret = ctrl->ops->reset_ctrl(ctrl); 1079 if (ret < 0) 1080 return ret; 1081 return count; 1082 } 1083 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); 1084 1085 static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, 1086 char *buf) 1087 { 1088 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1089 struct nvme_ctrl *ctrl = ns->ctrl; 1090 int serial_len = sizeof(ctrl->serial); 1091 int model_len = sizeof(ctrl->model); 1092 1093 if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 1094 return sprintf(buf, "eui.%16phN\n", ns->uuid); 1095 1096 if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) 1097 return sprintf(buf, "eui.%8phN\n", ns->eui); 1098 1099 while (ctrl->serial[serial_len - 1] == ' ') 1100 serial_len--; 1101 while (ctrl->model[model_len - 1] == ' ') 1102 model_len--; 1103 1104 return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid, 1105 serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id); 1106 } 1107 static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); 1108 1109 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, 1110 char *buf) 1111 { 1112 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1113 return sprintf(buf, "%pU\n", ns->uuid); 1114 } 1115 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); 1116 1117 static ssize_t eui_show(struct device *dev, struct device_attribute *attr, 1118 char *buf) 1119 { 1120 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1121 return sprintf(buf, "%8phd\n", ns->eui); 1122 } 1123 static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); 1124 1125 static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, 1126 char *buf) 1127 { 1128 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1129 return sprintf(buf, "%d\n", ns->ns_id); 1130 } 1131 static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); 1132 1133 static struct attribute *nvme_ns_attrs[] = { 1134 &dev_attr_wwid.attr, 1135 &dev_attr_uuid.attr, 1136 &dev_attr_eui.attr, 1137 &dev_attr_nsid.attr, 1138 NULL, 1139 }; 1140 1141 static umode_t nvme_attrs_are_visible(struct kobject *kobj, 1142 struct attribute *a, int n) 1143 { 1144 struct device *dev = container_of(kobj, struct device, kobj); 1145 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1146 1147 if (a == &dev_attr_uuid.attr) { 1148 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 1149 return 0; 1150 } 1151 if (a == &dev_attr_eui.attr) { 1152 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui))) 1153 return 0; 1154 } 1155 return a->mode; 1156 } 1157 1158 static const struct attribute_group nvme_ns_attr_group = { 1159 .attrs = nvme_ns_attrs, 1160 .is_visible = nvme_attrs_are_visible, 1161 }; 1162 1163 #define nvme_show_str_function(field) \ 1164 static ssize_t field##_show(struct device *dev, \ 1165 struct device_attribute *attr, char *buf) \ 1166 { \ 1167 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ 1168 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \ 1169 } \ 1170 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); 1171 1172 #define nvme_show_int_function(field) \ 1173 static ssize_t field##_show(struct device *dev, \ 1174 struct device_attribute *attr, char *buf) \ 1175 { \ 1176 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ 1177 return sprintf(buf, "%d\n", ctrl->field); \ 1178 } \ 1179 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); 1180 1181 nvme_show_str_function(model); 1182 nvme_show_str_function(serial); 1183 nvme_show_str_function(firmware_rev); 1184 nvme_show_int_function(cntlid); 1185 1186 static struct attribute *nvme_dev_attrs[] = { 1187 &dev_attr_reset_controller.attr, 1188 &dev_attr_model.attr, 1189 &dev_attr_serial.attr, 1190 &dev_attr_firmware_rev.attr, 1191 &dev_attr_cntlid.attr, 1192 NULL 1193 }; 1194 1195 static struct attribute_group nvme_dev_attrs_group = { 1196 .attrs = nvme_dev_attrs, 1197 }; 1198 1199 static const struct attribute_group *nvme_dev_attr_groups[] = { 1200 &nvme_dev_attrs_group, 1201 NULL, 1202 }; 1203 1204 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) 1205 { 1206 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); 1207 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); 1208 1209 return nsa->ns_id - nsb->ns_id; 1210 } 1211 1212 static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid) 1213 { 1214 struct nvme_ns *ns; 1215 1216 lockdep_assert_held(&ctrl->namespaces_mutex); 1217 1218 list_for_each_entry(ns, &ctrl->namespaces, list) { 1219 if (ns->ns_id == nsid) 1220 return ns; 1221 if (ns->ns_id > nsid) 1222 break; 1223 } 1224 return NULL; 1225 } 1226 1227 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) 1228 { 1229 struct nvme_ns *ns; 1230 struct gendisk *disk; 1231 int node = dev_to_node(ctrl->dev); 1232 1233 lockdep_assert_held(&ctrl->namespaces_mutex); 1234 1235 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); 1236 if (!ns) 1237 return; 1238 1239 ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL); 1240 if (ns->instance < 0) 1241 goto out_free_ns; 1242 1243 ns->queue = blk_mq_init_queue(ctrl->tagset); 1244 if (IS_ERR(ns->queue)) 1245 goto out_release_instance; 1246 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); 1247 ns->queue->queuedata = ns; 1248 ns->ctrl = ctrl; 1249 1250 disk = alloc_disk_node(0, node); 1251 if (!disk) 1252 goto out_free_queue; 1253 1254 kref_init(&ns->kref); 1255 ns->ns_id = nsid; 1256 ns->disk = disk; 1257 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ 1258 1259 1260 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 1261 nvme_set_queue_limits(ctrl, ns->queue); 1262 1263 disk->major = nvme_major; 1264 disk->first_minor = 0; 1265 disk->fops = &nvme_fops; 1266 disk->private_data = ns; 1267 disk->queue = ns->queue; 1268 disk->driverfs_dev = ctrl->device; 1269 disk->flags = GENHD_FL_EXT_DEVT; 1270 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance); 1271 1272 if (nvme_revalidate_disk(ns->disk)) 1273 goto out_free_disk; 1274 1275 list_add_tail(&ns->list, &ctrl->namespaces); 1276 kref_get(&ctrl->kref); 1277 if (ns->type == NVME_NS_LIGHTNVM) 1278 return; 1279 1280 add_disk(ns->disk); 1281 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, 1282 &nvme_ns_attr_group)) 1283 pr_warn("%s: failed to create sysfs group for identification\n", 1284 ns->disk->disk_name); 1285 return; 1286 out_free_disk: 1287 kfree(disk); 1288 out_free_queue: 1289 blk_cleanup_queue(ns->queue); 1290 out_release_instance: 1291 ida_simple_remove(&ctrl->ns_ida, ns->instance); 1292 out_free_ns: 1293 kfree(ns); 1294 } 1295 1296 static void nvme_ns_remove(struct nvme_ns *ns) 1297 { 1298 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) 1299 return; 1300 1301 if (ns->disk->flags & GENHD_FL_UP) { 1302 if (blk_get_integrity(ns->disk)) 1303 blk_integrity_unregister(ns->disk); 1304 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 1305 &nvme_ns_attr_group); 1306 del_gendisk(ns->disk); 1307 blk_mq_abort_requeue_list(ns->queue); 1308 blk_cleanup_queue(ns->queue); 1309 } 1310 mutex_lock(&ns->ctrl->namespaces_mutex); 1311 list_del_init(&ns->list); 1312 mutex_unlock(&ns->ctrl->namespaces_mutex); 1313 nvme_put_ns(ns); 1314 } 1315 1316 static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) 1317 { 1318 struct nvme_ns *ns; 1319 1320 ns = nvme_find_ns(ctrl, nsid); 1321 if (ns) { 1322 if (revalidate_disk(ns->disk)) 1323 nvme_ns_remove(ns); 1324 } else 1325 nvme_alloc_ns(ctrl, nsid); 1326 } 1327 1328 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) 1329 { 1330 struct nvme_ns *ns; 1331 __le32 *ns_list; 1332 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); 1333 int ret = 0; 1334 1335 ns_list = kzalloc(0x1000, GFP_KERNEL); 1336 if (!ns_list) 1337 return -ENOMEM; 1338 1339 for (i = 0; i < num_lists; i++) { 1340 ret = nvme_identify_ns_list(ctrl, prev, ns_list); 1341 if (ret) 1342 goto out; 1343 1344 for (j = 0; j < min(nn, 1024U); j++) { 1345 nsid = le32_to_cpu(ns_list[j]); 1346 if (!nsid) 1347 goto out; 1348 1349 nvme_validate_ns(ctrl, nsid); 1350 1351 while (++prev < nsid) { 1352 ns = nvme_find_ns(ctrl, prev); 1353 if (ns) 1354 nvme_ns_remove(ns); 1355 } 1356 } 1357 nn -= j; 1358 } 1359 out: 1360 kfree(ns_list); 1361 return ret; 1362 } 1363 1364 static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) 1365 { 1366 struct nvme_ns *ns, *next; 1367 unsigned i; 1368 1369 lockdep_assert_held(&ctrl->namespaces_mutex); 1370 1371 for (i = 1; i <= nn; i++) 1372 nvme_validate_ns(ctrl, i); 1373 1374 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { 1375 if (ns->ns_id > nn) 1376 nvme_ns_remove(ns); 1377 } 1378 } 1379 1380 void nvme_scan_namespaces(struct nvme_ctrl *ctrl) 1381 { 1382 struct nvme_id_ctrl *id; 1383 unsigned nn; 1384 1385 if (nvme_identify_ctrl(ctrl, &id)) 1386 return; 1387 1388 mutex_lock(&ctrl->namespaces_mutex); 1389 nn = le32_to_cpu(id->nn); 1390 if (ctrl->vs >= NVME_VS(1, 1) && 1391 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 1392 if (!nvme_scan_ns_list(ctrl, nn)) 1393 goto done; 1394 } 1395 __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn)); 1396 done: 1397 list_sort(NULL, &ctrl->namespaces, ns_cmp); 1398 mutex_unlock(&ctrl->namespaces_mutex); 1399 kfree(id); 1400 } 1401 EXPORT_SYMBOL_GPL(nvme_scan_namespaces); 1402 1403 void nvme_remove_namespaces(struct nvme_ctrl *ctrl) 1404 { 1405 struct nvme_ns *ns, *next; 1406 1407 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) 1408 nvme_ns_remove(ns); 1409 } 1410 EXPORT_SYMBOL_GPL(nvme_remove_namespaces); 1411 1412 static DEFINE_IDA(nvme_instance_ida); 1413 1414 static int nvme_set_instance(struct nvme_ctrl *ctrl) 1415 { 1416 int instance, error; 1417 1418 do { 1419 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) 1420 return -ENODEV; 1421 1422 spin_lock(&dev_list_lock); 1423 error = ida_get_new(&nvme_instance_ida, &instance); 1424 spin_unlock(&dev_list_lock); 1425 } while (error == -EAGAIN); 1426 1427 if (error) 1428 return -ENODEV; 1429 1430 ctrl->instance = instance; 1431 return 0; 1432 } 1433 1434 static void nvme_release_instance(struct nvme_ctrl *ctrl) 1435 { 1436 spin_lock(&dev_list_lock); 1437 ida_remove(&nvme_instance_ida, ctrl->instance); 1438 spin_unlock(&dev_list_lock); 1439 } 1440 1441 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) 1442 { 1443 device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); 1444 1445 spin_lock(&dev_list_lock); 1446 list_del(&ctrl->node); 1447 spin_unlock(&dev_list_lock); 1448 } 1449 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); 1450 1451 static void nvme_free_ctrl(struct kref *kref) 1452 { 1453 struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); 1454 1455 put_device(ctrl->device); 1456 nvme_release_instance(ctrl); 1457 ida_destroy(&ctrl->ns_ida); 1458 1459 ctrl->ops->free_ctrl(ctrl); 1460 } 1461 1462 void nvme_put_ctrl(struct nvme_ctrl *ctrl) 1463 { 1464 kref_put(&ctrl->kref, nvme_free_ctrl); 1465 } 1466 EXPORT_SYMBOL_GPL(nvme_put_ctrl); 1467 1468 /* 1469 * Initialize a NVMe controller structures. This needs to be called during 1470 * earliest initialization so that we have the initialized structured around 1471 * during probing. 1472 */ 1473 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, 1474 const struct nvme_ctrl_ops *ops, unsigned long quirks) 1475 { 1476 int ret; 1477 1478 INIT_LIST_HEAD(&ctrl->namespaces); 1479 mutex_init(&ctrl->namespaces_mutex); 1480 kref_init(&ctrl->kref); 1481 ctrl->dev = dev; 1482 ctrl->ops = ops; 1483 ctrl->quirks = quirks; 1484 1485 ret = nvme_set_instance(ctrl); 1486 if (ret) 1487 goto out; 1488 1489 ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, 1490 MKDEV(nvme_char_major, ctrl->instance), 1491 ctrl, nvme_dev_attr_groups, 1492 "nvme%d", ctrl->instance); 1493 if (IS_ERR(ctrl->device)) { 1494 ret = PTR_ERR(ctrl->device); 1495 goto out_release_instance; 1496 } 1497 get_device(ctrl->device); 1498 ida_init(&ctrl->ns_ida); 1499 1500 spin_lock(&dev_list_lock); 1501 list_add_tail(&ctrl->node, &nvme_ctrl_list); 1502 spin_unlock(&dev_list_lock); 1503 1504 return 0; 1505 out_release_instance: 1506 nvme_release_instance(ctrl); 1507 out: 1508 return ret; 1509 } 1510 EXPORT_SYMBOL_GPL(nvme_init_ctrl); 1511 1512 /** 1513 * nvme_kill_queues(): Ends all namespace queues 1514 * @ctrl: the dead controller that needs to end 1515 * 1516 * Call this function when the driver determines it is unable to get the 1517 * controller in a state capable of servicing IO. 1518 */ 1519 void nvme_kill_queues(struct nvme_ctrl *ctrl) 1520 { 1521 struct nvme_ns *ns; 1522 1523 mutex_lock(&ctrl->namespaces_mutex); 1524 list_for_each_entry(ns, &ctrl->namespaces, list) { 1525 if (!kref_get_unless_zero(&ns->kref)) 1526 continue; 1527 1528 /* 1529 * Revalidating a dead namespace sets capacity to 0. This will 1530 * end buffered writers dirtying pages that can't be synced. 1531 */ 1532 if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags)) 1533 revalidate_disk(ns->disk); 1534 1535 blk_set_queue_dying(ns->queue); 1536 blk_mq_abort_requeue_list(ns->queue); 1537 blk_mq_start_stopped_hw_queues(ns->queue, true); 1538 1539 nvme_put_ns(ns); 1540 } 1541 mutex_unlock(&ctrl->namespaces_mutex); 1542 } 1543 EXPORT_SYMBOL_GPL(nvme_kill_queues); 1544 1545 void nvme_stop_queues(struct nvme_ctrl *ctrl) 1546 { 1547 struct nvme_ns *ns; 1548 1549 mutex_lock(&ctrl->namespaces_mutex); 1550 list_for_each_entry(ns, &ctrl->namespaces, list) { 1551 spin_lock_irq(ns->queue->queue_lock); 1552 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); 1553 spin_unlock_irq(ns->queue->queue_lock); 1554 1555 blk_mq_cancel_requeue_work(ns->queue); 1556 blk_mq_stop_hw_queues(ns->queue); 1557 } 1558 mutex_unlock(&ctrl->namespaces_mutex); 1559 } 1560 EXPORT_SYMBOL_GPL(nvme_stop_queues); 1561 1562 void nvme_start_queues(struct nvme_ctrl *ctrl) 1563 { 1564 struct nvme_ns *ns; 1565 1566 mutex_lock(&ctrl->namespaces_mutex); 1567 list_for_each_entry(ns, &ctrl->namespaces, list) { 1568 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); 1569 blk_mq_start_stopped_hw_queues(ns->queue, true); 1570 blk_mq_kick_requeue_list(ns->queue); 1571 } 1572 mutex_unlock(&ctrl->namespaces_mutex); 1573 } 1574 EXPORT_SYMBOL_GPL(nvme_start_queues); 1575 1576 int __init nvme_core_init(void) 1577 { 1578 int result; 1579 1580 result = register_blkdev(nvme_major, "nvme"); 1581 if (result < 0) 1582 return result; 1583 else if (result > 0) 1584 nvme_major = result; 1585 1586 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", 1587 &nvme_dev_fops); 1588 if (result < 0) 1589 goto unregister_blkdev; 1590 else if (result > 0) 1591 nvme_char_major = result; 1592 1593 nvme_class = class_create(THIS_MODULE, "nvme"); 1594 if (IS_ERR(nvme_class)) { 1595 result = PTR_ERR(nvme_class); 1596 goto unregister_chrdev; 1597 } 1598 1599 return 0; 1600 1601 unregister_chrdev: 1602 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 1603 unregister_blkdev: 1604 unregister_blkdev(nvme_major, "nvme"); 1605 return result; 1606 } 1607 1608 void nvme_core_exit(void) 1609 { 1610 unregister_blkdev(nvme_major, "nvme"); 1611 class_destroy(nvme_class); 1612 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 1613 } 1614 1615 MODULE_LICENSE("GPL"); 1616 MODULE_VERSION("1.0"); 1617 module_init(nvme_core_init); 1618 module_exit(nvme_core_exit); 1619