1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 7 #include <linux/nvme_ioctl.h> 8 #include <linux/io_uring.h> 9 #include "nvme.h" 10 11 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 12 fmode_t mode) 13 { 14 if (capable(CAP_SYS_ADMIN)) 15 return true; 16 17 /* 18 * Do not allow unprivileged processes to send vendor specific or fabrics 19 * commands as we can't be sure about their effects. 20 */ 21 if (c->common.opcode >= nvme_cmd_vendor_start || 22 c->common.opcode == nvme_fabrics_command) 23 return false; 24 25 /* 26 * Do not allow unprivileged passthrough of admin commands except 27 * for a subset of identify commands that contain information required 28 * to form proper I/O commands in userspace and do not expose any 29 * potentially sensitive information. 30 */ 31 if (!ns) { 32 if (c->common.opcode == nvme_admin_identify) { 33 switch (c->identify.cns) { 34 case NVME_ID_CNS_NS: 35 case NVME_ID_CNS_CS_NS: 36 case NVME_ID_CNS_NS_CS_INDEP: 37 case NVME_ID_CNS_CS_CTRL: 38 case NVME_ID_CNS_CTRL: 39 return true; 40 } 41 } 42 return false; 43 } 44 45 /* 46 * Only allow I/O commands that transfer data to the controller if the 47 * special file is open for writing, but always allow I/O commands that 48 * transfer data from the controller. 49 */ 50 if (nvme_is_write(c)) 51 return mode & FMODE_WRITE; 52 return true; 53 } 54 55 /* 56 * Convert integer values from ioctl structures to user pointers, silently 57 * ignoring the upper bits in the compat case to match behaviour of 32-bit 58 * kernels. 59 */ 60 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 61 { 62 if (in_compat_syscall()) 63 ptrval = (compat_uptr_t)ptrval; 64 return (void __user *)ptrval; 65 } 66 67 static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, 68 unsigned len, u32 seed) 69 { 70 struct bio_integrity_payload *bip; 71 int ret = -ENOMEM; 72 void *buf; 73 struct bio *bio = req->bio; 74 75 buf = kmalloc(len, GFP_KERNEL); 76 if (!buf) 77 goto out; 78 79 ret = -EFAULT; 80 if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) 81 goto out_free_meta; 82 83 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 84 if (IS_ERR(bip)) { 85 ret = PTR_ERR(bip); 86 goto out_free_meta; 87 } 88 89 bip->bip_iter.bi_size = len; 90 bip->bip_iter.bi_sector = seed; 91 ret = bio_integrity_add_page(bio, virt_to_page(buf), len, 92 offset_in_page(buf)); 93 if (ret != len) { 94 ret = -ENOMEM; 95 goto out_free_meta; 96 } 97 98 req->cmd_flags |= REQ_INTEGRITY; 99 return buf; 100 out_free_meta: 101 kfree(buf); 102 out: 103 return ERR_PTR(ret); 104 } 105 106 static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, 107 void *meta, unsigned len, int ret) 108 { 109 if (!ret && req_op(req) == REQ_OP_DRV_IN && 110 copy_to_user(ubuf, meta, len)) 111 ret = -EFAULT; 112 kfree(meta); 113 return ret; 114 } 115 116 static struct request *nvme_alloc_user_request(struct request_queue *q, 117 struct nvme_command *cmd, blk_opf_t rq_flags, 118 blk_mq_req_flags_t blk_flags) 119 { 120 struct request *req; 121 122 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 123 if (IS_ERR(req)) 124 return req; 125 nvme_init_request(req, cmd); 126 nvme_req(req)->flags |= NVME_REQ_USERCMD; 127 return req; 128 } 129 130 static int nvme_map_user_request(struct request *req, u64 ubuffer, 131 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 132 u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, 133 bool vec) 134 { 135 struct request_queue *q = req->q; 136 struct nvme_ns *ns = q->queuedata; 137 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 138 struct bio *bio = NULL; 139 void *meta = NULL; 140 int ret; 141 142 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 143 struct iov_iter iter; 144 145 /* fixedbufs is only for non-vectored io */ 146 if (WARN_ON_ONCE(vec)) 147 return -EINVAL; 148 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 149 rq_data_dir(req), &iter, ioucmd); 150 if (ret < 0) 151 goto out; 152 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 153 } else { 154 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 155 bufflen, GFP_KERNEL, vec, 0, 0, 156 rq_data_dir(req)); 157 } 158 159 if (ret) 160 goto out; 161 bio = req->bio; 162 if (bdev) 163 bio_set_dev(bio, bdev); 164 165 if (bdev && meta_buffer && meta_len) { 166 meta = nvme_add_user_metadata(req, meta_buffer, meta_len, 167 meta_seed); 168 if (IS_ERR(meta)) { 169 ret = PTR_ERR(meta); 170 goto out_unmap; 171 } 172 *metap = meta; 173 } 174 175 return ret; 176 177 out_unmap: 178 if (bio) 179 blk_rq_unmap_user(bio); 180 out: 181 blk_mq_free_request(req); 182 return ret; 183 } 184 185 static int nvme_submit_user_cmd(struct request_queue *q, 186 struct nvme_command *cmd, u64 ubuffer, 187 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 188 u32 meta_seed, u64 *result, unsigned timeout, bool vec) 189 { 190 struct nvme_ctrl *ctrl; 191 struct request *req; 192 void *meta = NULL; 193 struct bio *bio; 194 u32 effects; 195 int ret; 196 197 req = nvme_alloc_user_request(q, cmd, 0, 0); 198 if (IS_ERR(req)) 199 return PTR_ERR(req); 200 201 req->timeout = timeout; 202 if (ubuffer && bufflen) { 203 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 204 meta_len, meta_seed, &meta, NULL, vec); 205 if (ret) 206 return ret; 207 } 208 209 bio = req->bio; 210 ctrl = nvme_req(req)->ctrl; 211 212 ret = nvme_execute_passthru_rq(req, &effects); 213 214 if (result) 215 *result = le64_to_cpu(nvme_req(req)->result.u64); 216 if (meta) 217 ret = nvme_finish_user_metadata(req, meta_buffer, meta, 218 meta_len, ret); 219 if (bio) 220 blk_rq_unmap_user(bio); 221 blk_mq_free_request(req); 222 223 if (effects) 224 nvme_passthru_end(ctrl, effects, cmd, ret); 225 226 return ret; 227 } 228 229 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 230 { 231 struct nvme_user_io io; 232 struct nvme_command c; 233 unsigned length, meta_len; 234 void __user *metadata; 235 236 if (copy_from_user(&io, uio, sizeof(io))) 237 return -EFAULT; 238 if (io.flags) 239 return -EINVAL; 240 241 switch (io.opcode) { 242 case nvme_cmd_write: 243 case nvme_cmd_read: 244 case nvme_cmd_compare: 245 break; 246 default: 247 return -EINVAL; 248 } 249 250 length = (io.nblocks + 1) << ns->lba_shift; 251 252 if ((io.control & NVME_RW_PRINFO_PRACT) && 253 ns->ms == sizeof(struct t10_pi_tuple)) { 254 /* 255 * Protection information is stripped/inserted by the 256 * controller. 257 */ 258 if (nvme_to_user_ptr(io.metadata)) 259 return -EINVAL; 260 meta_len = 0; 261 metadata = NULL; 262 } else { 263 meta_len = (io.nblocks + 1) * ns->ms; 264 metadata = nvme_to_user_ptr(io.metadata); 265 } 266 267 if (ns->features & NVME_NS_EXT_LBAS) { 268 length += meta_len; 269 meta_len = 0; 270 } else if (meta_len) { 271 if ((io.metadata & 3) || !io.metadata) 272 return -EINVAL; 273 } 274 275 memset(&c, 0, sizeof(c)); 276 c.rw.opcode = io.opcode; 277 c.rw.flags = io.flags; 278 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 279 c.rw.slba = cpu_to_le64(io.slba); 280 c.rw.length = cpu_to_le16(io.nblocks); 281 c.rw.control = cpu_to_le16(io.control); 282 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 283 c.rw.reftag = cpu_to_le32(io.reftag); 284 c.rw.apptag = cpu_to_le16(io.apptag); 285 c.rw.appmask = cpu_to_le16(io.appmask); 286 287 return nvme_submit_user_cmd(ns->queue, &c, 288 io.addr, length, 289 metadata, meta_len, lower_32_bits(io.slba), NULL, 0, 290 false); 291 } 292 293 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 294 struct nvme_ns *ns, __u32 nsid) 295 { 296 if (ns && nsid != ns->head->ns_id) { 297 dev_err(ctrl->device, 298 "%s: nsid (%u) in cmd does not match nsid (%u)" 299 "of namespace\n", 300 current->comm, nsid, ns->head->ns_id); 301 return false; 302 } 303 304 return true; 305 } 306 307 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 308 struct nvme_passthru_cmd __user *ucmd, fmode_t mode) 309 { 310 struct nvme_passthru_cmd cmd; 311 struct nvme_command c; 312 unsigned timeout = 0; 313 u64 result; 314 int status; 315 316 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 317 return -EFAULT; 318 if (cmd.flags) 319 return -EINVAL; 320 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 321 return -EINVAL; 322 323 memset(&c, 0, sizeof(c)); 324 c.common.opcode = cmd.opcode; 325 c.common.flags = cmd.flags; 326 c.common.nsid = cpu_to_le32(cmd.nsid); 327 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 328 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 329 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 330 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 331 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 332 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 333 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 334 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 335 336 if (!nvme_cmd_allowed(ns, &c, mode)) 337 return -EACCES; 338 339 if (cmd.timeout_ms) 340 timeout = msecs_to_jiffies(cmd.timeout_ms); 341 342 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 343 cmd.addr, cmd.data_len, 344 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 345 0, &result, timeout, false); 346 347 if (status >= 0) { 348 if (put_user(result, &ucmd->result)) 349 return -EFAULT; 350 } 351 352 return status; 353 } 354 355 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 356 struct nvme_passthru_cmd64 __user *ucmd, bool vec, 357 fmode_t mode) 358 { 359 struct nvme_passthru_cmd64 cmd; 360 struct nvme_command c; 361 unsigned timeout = 0; 362 int status; 363 364 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 365 return -EFAULT; 366 if (cmd.flags) 367 return -EINVAL; 368 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 369 return -EINVAL; 370 371 memset(&c, 0, sizeof(c)); 372 c.common.opcode = cmd.opcode; 373 c.common.flags = cmd.flags; 374 c.common.nsid = cpu_to_le32(cmd.nsid); 375 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 376 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 377 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 378 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 379 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 380 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 381 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 382 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 383 384 if (!nvme_cmd_allowed(ns, &c, mode)) 385 return -EACCES; 386 387 if (cmd.timeout_ms) 388 timeout = msecs_to_jiffies(cmd.timeout_ms); 389 390 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 391 cmd.addr, cmd.data_len, 392 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 393 0, &cmd.result, timeout, vec); 394 395 if (status >= 0) { 396 if (put_user(cmd.result, &ucmd->result)) 397 return -EFAULT; 398 } 399 400 return status; 401 } 402 403 struct nvme_uring_data { 404 __u64 metadata; 405 __u64 addr; 406 __u32 data_len; 407 __u32 metadata_len; 408 __u32 timeout_ms; 409 }; 410 411 /* 412 * This overlays struct io_uring_cmd pdu. 413 * Expect build errors if this grows larger than that. 414 */ 415 struct nvme_uring_cmd_pdu { 416 union { 417 struct bio *bio; 418 struct request *req; 419 }; 420 u32 meta_len; 421 u32 nvme_status; 422 union { 423 struct { 424 void *meta; /* kernel-resident buffer */ 425 void __user *meta_buffer; 426 }; 427 u64 result; 428 } u; 429 }; 430 431 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 432 struct io_uring_cmd *ioucmd) 433 { 434 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 435 } 436 437 static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) 438 { 439 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 440 struct request *req = pdu->req; 441 int status; 442 u64 result; 443 444 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 445 status = -EINTR; 446 else 447 status = nvme_req(req)->status; 448 449 result = le64_to_cpu(nvme_req(req)->result.u64); 450 451 if (pdu->meta_len) 452 status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, 453 pdu->u.meta, pdu->meta_len, status); 454 if (req->bio) 455 blk_rq_unmap_user(req->bio); 456 blk_mq_free_request(req); 457 458 io_uring_cmd_done(ioucmd, status, result); 459 } 460 461 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) 462 { 463 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 464 465 if (pdu->bio) 466 blk_rq_unmap_user(pdu->bio); 467 468 io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); 469 } 470 471 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 472 blk_status_t err) 473 { 474 struct io_uring_cmd *ioucmd = req->end_io_data; 475 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 476 void *cookie = READ_ONCE(ioucmd->cookie); 477 478 req->bio = pdu->bio; 479 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 480 pdu->nvme_status = -EINTR; 481 else 482 pdu->nvme_status = nvme_req(req)->status; 483 pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); 484 485 /* 486 * For iopoll, complete it directly. 487 * Otherwise, move the completion to task work. 488 */ 489 if (cookie != NULL && blk_rq_is_poll(req)) 490 nvme_uring_task_cb(ioucmd); 491 else 492 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); 493 494 return RQ_END_IO_FREE; 495 } 496 497 static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, 498 blk_status_t err) 499 { 500 struct io_uring_cmd *ioucmd = req->end_io_data; 501 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 502 void *cookie = READ_ONCE(ioucmd->cookie); 503 504 req->bio = pdu->bio; 505 pdu->req = req; 506 507 /* 508 * For iopoll, complete it directly. 509 * Otherwise, move the completion to task work. 510 */ 511 if (cookie != NULL && blk_rq_is_poll(req)) 512 nvme_uring_task_meta_cb(ioucmd); 513 else 514 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); 515 516 return RQ_END_IO_NONE; 517 } 518 519 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 520 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 521 { 522 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 523 const struct nvme_uring_cmd *cmd = ioucmd->cmd; 524 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 525 struct nvme_uring_data d; 526 struct nvme_command c; 527 struct request *req; 528 blk_opf_t rq_flags = 0; 529 blk_mq_req_flags_t blk_flags = 0; 530 void *meta = NULL; 531 int ret; 532 533 c.common.opcode = READ_ONCE(cmd->opcode); 534 c.common.flags = READ_ONCE(cmd->flags); 535 if (c.common.flags) 536 return -EINVAL; 537 538 c.common.command_id = 0; 539 c.common.nsid = cpu_to_le32(cmd->nsid); 540 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 541 return -EINVAL; 542 543 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 544 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 545 c.common.metadata = 0; 546 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 547 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 548 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 549 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 550 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 551 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 552 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 553 554 if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode)) 555 return -EACCES; 556 557 d.metadata = READ_ONCE(cmd->metadata); 558 d.addr = READ_ONCE(cmd->addr); 559 d.data_len = READ_ONCE(cmd->data_len); 560 d.metadata_len = READ_ONCE(cmd->metadata_len); 561 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 562 563 if (issue_flags & IO_URING_F_NONBLOCK) { 564 rq_flags = REQ_NOWAIT; 565 blk_flags = BLK_MQ_REQ_NOWAIT; 566 } 567 if (issue_flags & IO_URING_F_IOPOLL) 568 rq_flags |= REQ_POLLED; 569 570 retry: 571 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 572 if (IS_ERR(req)) 573 return PTR_ERR(req); 574 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 575 576 if (d.addr && d.data_len) { 577 ret = nvme_map_user_request(req, d.addr, 578 d.data_len, nvme_to_user_ptr(d.metadata), 579 d.metadata_len, 0, &meta, ioucmd, vec); 580 if (ret) 581 return ret; 582 } 583 584 if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { 585 if (unlikely(!req->bio)) { 586 /* we can't poll this, so alloc regular req instead */ 587 blk_mq_free_request(req); 588 rq_flags &= ~REQ_POLLED; 589 goto retry; 590 } else { 591 WRITE_ONCE(ioucmd->cookie, req->bio); 592 req->bio->bi_opf |= REQ_POLLED; 593 } 594 } 595 /* to free bio on completion, as req->bio will be null at that time */ 596 pdu->bio = req->bio; 597 pdu->meta_len = d.metadata_len; 598 req->end_io_data = ioucmd; 599 if (pdu->meta_len) { 600 pdu->u.meta = meta; 601 pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); 602 req->end_io = nvme_uring_cmd_end_io_meta; 603 } else { 604 req->end_io = nvme_uring_cmd_end_io; 605 } 606 blk_execute_rq_nowait(req, false); 607 return -EIOCBQUEUED; 608 } 609 610 static bool is_ctrl_ioctl(unsigned int cmd) 611 { 612 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 613 return true; 614 if (is_sed_ioctl(cmd)) 615 return true; 616 return false; 617 } 618 619 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 620 void __user *argp, fmode_t mode) 621 { 622 switch (cmd) { 623 case NVME_IOCTL_ADMIN_CMD: 624 return nvme_user_cmd(ctrl, NULL, argp, mode); 625 case NVME_IOCTL_ADMIN64_CMD: 626 return nvme_user_cmd64(ctrl, NULL, argp, false, mode); 627 default: 628 return sed_ioctl(ctrl->opal_dev, cmd, argp); 629 } 630 } 631 632 #ifdef COMPAT_FOR_U64_ALIGNMENT 633 struct nvme_user_io32 { 634 __u8 opcode; 635 __u8 flags; 636 __u16 control; 637 __u16 nblocks; 638 __u16 rsvd; 639 __u64 metadata; 640 __u64 addr; 641 __u64 slba; 642 __u32 dsmgmt; 643 __u32 reftag; 644 __u16 apptag; 645 __u16 appmask; 646 } __attribute__((__packed__)); 647 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 648 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 649 650 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 651 void __user *argp, fmode_t mode) 652 { 653 switch (cmd) { 654 case NVME_IOCTL_ID: 655 force_successful_syscall_return(); 656 return ns->head->ns_id; 657 case NVME_IOCTL_IO_CMD: 658 return nvme_user_cmd(ns->ctrl, ns, argp, mode); 659 /* 660 * struct nvme_user_io can have different padding on some 32-bit ABIs. 661 * Just accept the compat version as all fields that are used are the 662 * same size and at the same offset. 663 */ 664 #ifdef COMPAT_FOR_U64_ALIGNMENT 665 case NVME_IOCTL_SUBMIT_IO32: 666 #endif 667 case NVME_IOCTL_SUBMIT_IO: 668 return nvme_submit_io(ns, argp); 669 case NVME_IOCTL_IO64_CMD: 670 return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode); 671 case NVME_IOCTL_IO64_CMD_VEC: 672 return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode); 673 default: 674 return -ENOTTY; 675 } 676 } 677 678 static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg, 679 fmode_t mode) 680 { 681 if (is_ctrl_ioctl(cmd)) 682 return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode); 683 return nvme_ns_ioctl(ns, cmd, arg, mode); 684 } 685 686 int nvme_ioctl(struct block_device *bdev, fmode_t mode, 687 unsigned int cmd, unsigned long arg) 688 { 689 struct nvme_ns *ns = bdev->bd_disk->private_data; 690 691 return __nvme_ioctl(ns, cmd, (void __user *)arg, mode); 692 } 693 694 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 695 { 696 struct nvme_ns *ns = 697 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 698 699 return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode); 700 } 701 702 static int nvme_uring_cmd_checks(unsigned int issue_flags) 703 { 704 705 /* NVMe passthrough requires big SQE/CQE support */ 706 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 707 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 708 return -EOPNOTSUPP; 709 return 0; 710 } 711 712 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 713 unsigned int issue_flags) 714 { 715 struct nvme_ctrl *ctrl = ns->ctrl; 716 int ret; 717 718 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 719 720 ret = nvme_uring_cmd_checks(issue_flags); 721 if (ret) 722 return ret; 723 724 switch (ioucmd->cmd_op) { 725 case NVME_URING_CMD_IO: 726 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 727 break; 728 case NVME_URING_CMD_IO_VEC: 729 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 730 break; 731 default: 732 ret = -ENOTTY; 733 } 734 735 return ret; 736 } 737 738 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 739 { 740 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 741 struct nvme_ns, cdev); 742 743 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 744 } 745 746 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 747 struct io_comp_batch *iob, 748 unsigned int poll_flags) 749 { 750 struct bio *bio; 751 int ret = 0; 752 struct nvme_ns *ns; 753 struct request_queue *q; 754 755 rcu_read_lock(); 756 bio = READ_ONCE(ioucmd->cookie); 757 ns = container_of(file_inode(ioucmd->file)->i_cdev, 758 struct nvme_ns, cdev); 759 q = ns->queue; 760 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev) 761 ret = bio_poll(bio, iob, poll_flags); 762 rcu_read_unlock(); 763 return ret; 764 } 765 #ifdef CONFIG_NVME_MULTIPATH 766 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 767 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 768 fmode_t mode) 769 __releases(&head->srcu) 770 { 771 struct nvme_ctrl *ctrl = ns->ctrl; 772 int ret; 773 774 nvme_get_ctrl(ns->ctrl); 775 srcu_read_unlock(&head->srcu, srcu_idx); 776 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); 777 778 nvme_put_ctrl(ctrl); 779 return ret; 780 } 781 782 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, 783 unsigned int cmd, unsigned long arg) 784 { 785 struct nvme_ns_head *head = bdev->bd_disk->private_data; 786 void __user *argp = (void __user *)arg; 787 struct nvme_ns *ns; 788 int srcu_idx, ret = -EWOULDBLOCK; 789 790 srcu_idx = srcu_read_lock(&head->srcu); 791 ns = nvme_find_path(head); 792 if (!ns) 793 goto out_unlock; 794 795 /* 796 * Handle ioctls that apply to the controller instead of the namespace 797 * seperately and drop the ns SRCU reference early. This avoids a 798 * deadlock when deleting namespaces using the passthrough interface. 799 */ 800 if (is_ctrl_ioctl(cmd)) 801 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 802 mode); 803 804 ret = nvme_ns_ioctl(ns, cmd, argp, mode); 805 out_unlock: 806 srcu_read_unlock(&head->srcu, srcu_idx); 807 return ret; 808 } 809 810 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 811 unsigned long arg) 812 { 813 struct cdev *cdev = file_inode(file)->i_cdev; 814 struct nvme_ns_head *head = 815 container_of(cdev, struct nvme_ns_head, cdev); 816 void __user *argp = (void __user *)arg; 817 struct nvme_ns *ns; 818 int srcu_idx, ret = -EWOULDBLOCK; 819 820 srcu_idx = srcu_read_lock(&head->srcu); 821 ns = nvme_find_path(head); 822 if (!ns) 823 goto out_unlock; 824 825 if (is_ctrl_ioctl(cmd)) 826 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 827 file->f_mode); 828 829 ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode); 830 out_unlock: 831 srcu_read_unlock(&head->srcu, srcu_idx); 832 return ret; 833 } 834 835 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 836 unsigned int issue_flags) 837 { 838 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 839 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 840 int srcu_idx = srcu_read_lock(&head->srcu); 841 struct nvme_ns *ns = nvme_find_path(head); 842 int ret = -EINVAL; 843 844 if (ns) 845 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 846 srcu_read_unlock(&head->srcu, srcu_idx); 847 return ret; 848 } 849 850 int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 851 struct io_comp_batch *iob, 852 unsigned int poll_flags) 853 { 854 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 855 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 856 int srcu_idx = srcu_read_lock(&head->srcu); 857 struct nvme_ns *ns = nvme_find_path(head); 858 struct bio *bio; 859 int ret = 0; 860 struct request_queue *q; 861 862 if (ns) { 863 rcu_read_lock(); 864 bio = READ_ONCE(ioucmd->cookie); 865 q = ns->queue; 866 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio 867 && bio->bi_bdev) 868 ret = bio_poll(bio, iob, poll_flags); 869 rcu_read_unlock(); 870 } 871 srcu_read_unlock(&head->srcu, srcu_idx); 872 return ret; 873 } 874 #endif /* CONFIG_NVME_MULTIPATH */ 875 876 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 877 { 878 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 879 int ret; 880 881 /* IOPOLL not supported yet */ 882 if (issue_flags & IO_URING_F_IOPOLL) 883 return -EOPNOTSUPP; 884 885 ret = nvme_uring_cmd_checks(issue_flags); 886 if (ret) 887 return ret; 888 889 switch (ioucmd->cmd_op) { 890 case NVME_URING_CMD_ADMIN: 891 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 892 break; 893 case NVME_URING_CMD_ADMIN_VEC: 894 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 895 break; 896 default: 897 ret = -ENOTTY; 898 } 899 900 return ret; 901 } 902 903 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 904 fmode_t mode) 905 { 906 struct nvme_ns *ns; 907 int ret; 908 909 down_read(&ctrl->namespaces_rwsem); 910 if (list_empty(&ctrl->namespaces)) { 911 ret = -ENOTTY; 912 goto out_unlock; 913 } 914 915 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 916 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 917 dev_warn(ctrl->device, 918 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 919 ret = -EINVAL; 920 goto out_unlock; 921 } 922 923 dev_warn(ctrl->device, 924 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 925 kref_get(&ns->kref); 926 up_read(&ctrl->namespaces_rwsem); 927 928 ret = nvme_user_cmd(ctrl, ns, argp, mode); 929 nvme_put_ns(ns); 930 return ret; 931 932 out_unlock: 933 up_read(&ctrl->namespaces_rwsem); 934 return ret; 935 } 936 937 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 938 unsigned long arg) 939 { 940 struct nvme_ctrl *ctrl = file->private_data; 941 void __user *argp = (void __user *)arg; 942 943 switch (cmd) { 944 case NVME_IOCTL_ADMIN_CMD: 945 return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); 946 case NVME_IOCTL_ADMIN64_CMD: 947 return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode); 948 case NVME_IOCTL_IO_CMD: 949 return nvme_dev_user_cmd(ctrl, argp, file->f_mode); 950 case NVME_IOCTL_RESET: 951 if (!capable(CAP_SYS_ADMIN)) 952 return -EACCES; 953 dev_warn(ctrl->device, "resetting controller\n"); 954 return nvme_reset_ctrl_sync(ctrl); 955 case NVME_IOCTL_SUBSYS_RESET: 956 if (!capable(CAP_SYS_ADMIN)) 957 return -EACCES; 958 return nvme_reset_subsystem(ctrl); 959 case NVME_IOCTL_RESCAN: 960 if (!capable(CAP_SYS_ADMIN)) 961 return -EACCES; 962 nvme_queue_scan(ctrl); 963 return 0; 964 default: 965 return -ENOTTY; 966 } 967 } 968