1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * Copyright (c) 2017-2021 Christoph Hellwig. 5 */ 6 #include <linux/ptrace.h> /* for force_successful_syscall_return */ 7 #include <linux/nvme_ioctl.h> 8 #include <linux/io_uring.h> 9 #include "nvme.h" 10 11 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, 12 fmode_t mode) 13 { 14 u32 effects; 15 16 if (capable(CAP_SYS_ADMIN)) 17 return true; 18 19 /* 20 * Do not allow unprivileged processes to send vendor specific or fabrics 21 * commands as we can't be sure about their effects. 22 */ 23 if (c->common.opcode >= nvme_cmd_vendor_start || 24 c->common.opcode == nvme_fabrics_command) 25 return false; 26 27 /* 28 * Do not allow unprivileged passthrough of admin commands except 29 * for a subset of identify commands that contain information required 30 * to form proper I/O commands in userspace and do not expose any 31 * potentially sensitive information. 32 */ 33 if (!ns) { 34 if (c->common.opcode == nvme_admin_identify) { 35 switch (c->identify.cns) { 36 case NVME_ID_CNS_NS: 37 case NVME_ID_CNS_CS_NS: 38 case NVME_ID_CNS_NS_CS_INDEP: 39 case NVME_ID_CNS_CS_CTRL: 40 case NVME_ID_CNS_CTRL: 41 return true; 42 } 43 } 44 return false; 45 } 46 47 /* 48 * Check if the controller provides a Commands Supported and Effects log 49 * and marks this command as supported. If not reject unprivileged 50 * passthrough. 51 */ 52 effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); 53 if (!(effects & NVME_CMD_EFFECTS_CSUPP)) 54 return false; 55 56 /* 57 * Don't allow passthrough for command that have intrusive (or unknown) 58 * effects. 59 */ 60 if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | 61 NVME_CMD_EFFECTS_UUID_SEL | 62 NVME_CMD_EFFECTS_SCOPE_MASK)) 63 return false; 64 65 /* 66 * Only allow I/O commands that transfer data to the controller or that 67 * change the logical block contents if the file descriptor is open for 68 * writing. 69 */ 70 if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) 71 return mode & FMODE_WRITE; 72 return true; 73 } 74 75 /* 76 * Convert integer values from ioctl structures to user pointers, silently 77 * ignoring the upper bits in the compat case to match behaviour of 32-bit 78 * kernels. 79 */ 80 static void __user *nvme_to_user_ptr(uintptr_t ptrval) 81 { 82 if (in_compat_syscall()) 83 ptrval = (compat_uptr_t)ptrval; 84 return (void __user *)ptrval; 85 } 86 87 static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, 88 unsigned len, u32 seed) 89 { 90 struct bio_integrity_payload *bip; 91 int ret = -ENOMEM; 92 void *buf; 93 struct bio *bio = req->bio; 94 95 buf = kmalloc(len, GFP_KERNEL); 96 if (!buf) 97 goto out; 98 99 ret = -EFAULT; 100 if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) 101 goto out_free_meta; 102 103 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 104 if (IS_ERR(bip)) { 105 ret = PTR_ERR(bip); 106 goto out_free_meta; 107 } 108 109 bip->bip_iter.bi_size = len; 110 bip->bip_iter.bi_sector = seed; 111 ret = bio_integrity_add_page(bio, virt_to_page(buf), len, 112 offset_in_page(buf)); 113 if (ret != len) { 114 ret = -ENOMEM; 115 goto out_free_meta; 116 } 117 118 req->cmd_flags |= REQ_INTEGRITY; 119 return buf; 120 out_free_meta: 121 kfree(buf); 122 out: 123 return ERR_PTR(ret); 124 } 125 126 static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, 127 void *meta, unsigned len, int ret) 128 { 129 if (!ret && req_op(req) == REQ_OP_DRV_IN && 130 copy_to_user(ubuf, meta, len)) 131 ret = -EFAULT; 132 kfree(meta); 133 return ret; 134 } 135 136 static struct request *nvme_alloc_user_request(struct request_queue *q, 137 struct nvme_command *cmd, blk_opf_t rq_flags, 138 blk_mq_req_flags_t blk_flags) 139 { 140 struct request *req; 141 142 req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); 143 if (IS_ERR(req)) 144 return req; 145 nvme_init_request(req, cmd); 146 nvme_req(req)->flags |= NVME_REQ_USERCMD; 147 return req; 148 } 149 150 static int nvme_map_user_request(struct request *req, u64 ubuffer, 151 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 152 u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, 153 bool vec) 154 { 155 struct request_queue *q = req->q; 156 struct nvme_ns *ns = q->queuedata; 157 struct block_device *bdev = ns ? ns->disk->part0 : NULL; 158 struct bio *bio = NULL; 159 void *meta = NULL; 160 int ret; 161 162 if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { 163 struct iov_iter iter; 164 165 /* fixedbufs is only for non-vectored io */ 166 if (WARN_ON_ONCE(vec)) 167 return -EINVAL; 168 ret = io_uring_cmd_import_fixed(ubuffer, bufflen, 169 rq_data_dir(req), &iter, ioucmd); 170 if (ret < 0) 171 goto out; 172 ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); 173 } else { 174 ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), 175 bufflen, GFP_KERNEL, vec, 0, 0, 176 rq_data_dir(req)); 177 } 178 179 if (ret) 180 goto out; 181 bio = req->bio; 182 if (bdev) 183 bio_set_dev(bio, bdev); 184 185 if (bdev && meta_buffer && meta_len) { 186 meta = nvme_add_user_metadata(req, meta_buffer, meta_len, 187 meta_seed); 188 if (IS_ERR(meta)) { 189 ret = PTR_ERR(meta); 190 goto out_unmap; 191 } 192 *metap = meta; 193 } 194 195 return ret; 196 197 out_unmap: 198 if (bio) 199 blk_rq_unmap_user(bio); 200 out: 201 blk_mq_free_request(req); 202 return ret; 203 } 204 205 static int nvme_submit_user_cmd(struct request_queue *q, 206 struct nvme_command *cmd, u64 ubuffer, 207 unsigned bufflen, void __user *meta_buffer, unsigned meta_len, 208 u32 meta_seed, u64 *result, unsigned timeout, bool vec) 209 { 210 struct nvme_ctrl *ctrl; 211 struct request *req; 212 void *meta = NULL; 213 struct bio *bio; 214 u32 effects; 215 int ret; 216 217 req = nvme_alloc_user_request(q, cmd, 0, 0); 218 if (IS_ERR(req)) 219 return PTR_ERR(req); 220 221 req->timeout = timeout; 222 if (ubuffer && bufflen) { 223 ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, 224 meta_len, meta_seed, &meta, NULL, vec); 225 if (ret) 226 return ret; 227 } 228 229 bio = req->bio; 230 ctrl = nvme_req(req)->ctrl; 231 232 ret = nvme_execute_passthru_rq(req, &effects); 233 234 if (result) 235 *result = le64_to_cpu(nvme_req(req)->result.u64); 236 if (meta) 237 ret = nvme_finish_user_metadata(req, meta_buffer, meta, 238 meta_len, ret); 239 if (bio) 240 blk_rq_unmap_user(bio); 241 blk_mq_free_request(req); 242 243 if (effects) 244 nvme_passthru_end(ctrl, effects, cmd, ret); 245 246 return ret; 247 } 248 249 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 250 { 251 struct nvme_user_io io; 252 struct nvme_command c; 253 unsigned length, meta_len; 254 void __user *metadata; 255 256 if (copy_from_user(&io, uio, sizeof(io))) 257 return -EFAULT; 258 if (io.flags) 259 return -EINVAL; 260 261 switch (io.opcode) { 262 case nvme_cmd_write: 263 case nvme_cmd_read: 264 case nvme_cmd_compare: 265 break; 266 default: 267 return -EINVAL; 268 } 269 270 length = (io.nblocks + 1) << ns->lba_shift; 271 272 if ((io.control & NVME_RW_PRINFO_PRACT) && 273 ns->ms == sizeof(struct t10_pi_tuple)) { 274 /* 275 * Protection information is stripped/inserted by the 276 * controller. 277 */ 278 if (nvme_to_user_ptr(io.metadata)) 279 return -EINVAL; 280 meta_len = 0; 281 metadata = NULL; 282 } else { 283 meta_len = (io.nblocks + 1) * ns->ms; 284 metadata = nvme_to_user_ptr(io.metadata); 285 } 286 287 if (ns->features & NVME_NS_EXT_LBAS) { 288 length += meta_len; 289 meta_len = 0; 290 } else if (meta_len) { 291 if ((io.metadata & 3) || !io.metadata) 292 return -EINVAL; 293 } 294 295 memset(&c, 0, sizeof(c)); 296 c.rw.opcode = io.opcode; 297 c.rw.flags = io.flags; 298 c.rw.nsid = cpu_to_le32(ns->head->ns_id); 299 c.rw.slba = cpu_to_le64(io.slba); 300 c.rw.length = cpu_to_le16(io.nblocks); 301 c.rw.control = cpu_to_le16(io.control); 302 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 303 c.rw.reftag = cpu_to_le32(io.reftag); 304 c.rw.apptag = cpu_to_le16(io.apptag); 305 c.rw.appmask = cpu_to_le16(io.appmask); 306 307 return nvme_submit_user_cmd(ns->queue, &c, 308 io.addr, length, 309 metadata, meta_len, lower_32_bits(io.slba), NULL, 0, 310 false); 311 } 312 313 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, 314 struct nvme_ns *ns, __u32 nsid) 315 { 316 if (ns && nsid != ns->head->ns_id) { 317 dev_err(ctrl->device, 318 "%s: nsid (%u) in cmd does not match nsid (%u)" 319 "of namespace\n", 320 current->comm, nsid, ns->head->ns_id); 321 return false; 322 } 323 324 return true; 325 } 326 327 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 328 struct nvme_passthru_cmd __user *ucmd, fmode_t mode) 329 { 330 struct nvme_passthru_cmd cmd; 331 struct nvme_command c; 332 unsigned timeout = 0; 333 u64 result; 334 int status; 335 336 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 337 return -EFAULT; 338 if (cmd.flags) 339 return -EINVAL; 340 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 341 return -EINVAL; 342 343 memset(&c, 0, sizeof(c)); 344 c.common.opcode = cmd.opcode; 345 c.common.flags = cmd.flags; 346 c.common.nsid = cpu_to_le32(cmd.nsid); 347 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 348 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 349 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 350 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 351 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 352 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 353 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 354 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 355 356 if (!nvme_cmd_allowed(ns, &c, mode)) 357 return -EACCES; 358 359 if (cmd.timeout_ms) 360 timeout = msecs_to_jiffies(cmd.timeout_ms); 361 362 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 363 cmd.addr, cmd.data_len, 364 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 365 0, &result, timeout, false); 366 367 if (status >= 0) { 368 if (put_user(result, &ucmd->result)) 369 return -EFAULT; 370 } 371 372 return status; 373 } 374 375 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 376 struct nvme_passthru_cmd64 __user *ucmd, bool vec, 377 fmode_t mode) 378 { 379 struct nvme_passthru_cmd64 cmd; 380 struct nvme_command c; 381 unsigned timeout = 0; 382 int status; 383 384 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 385 return -EFAULT; 386 if (cmd.flags) 387 return -EINVAL; 388 if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid)) 389 return -EINVAL; 390 391 memset(&c, 0, sizeof(c)); 392 c.common.opcode = cmd.opcode; 393 c.common.flags = cmd.flags; 394 c.common.nsid = cpu_to_le32(cmd.nsid); 395 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 396 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 397 c.common.cdw10 = cpu_to_le32(cmd.cdw10); 398 c.common.cdw11 = cpu_to_le32(cmd.cdw11); 399 c.common.cdw12 = cpu_to_le32(cmd.cdw12); 400 c.common.cdw13 = cpu_to_le32(cmd.cdw13); 401 c.common.cdw14 = cpu_to_le32(cmd.cdw14); 402 c.common.cdw15 = cpu_to_le32(cmd.cdw15); 403 404 if (!nvme_cmd_allowed(ns, &c, mode)) 405 return -EACCES; 406 407 if (cmd.timeout_ms) 408 timeout = msecs_to_jiffies(cmd.timeout_ms); 409 410 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 411 cmd.addr, cmd.data_len, 412 nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 413 0, &cmd.result, timeout, vec); 414 415 if (status >= 0) { 416 if (put_user(cmd.result, &ucmd->result)) 417 return -EFAULT; 418 } 419 420 return status; 421 } 422 423 struct nvme_uring_data { 424 __u64 metadata; 425 __u64 addr; 426 __u32 data_len; 427 __u32 metadata_len; 428 __u32 timeout_ms; 429 }; 430 431 /* 432 * This overlays struct io_uring_cmd pdu. 433 * Expect build errors if this grows larger than that. 434 */ 435 struct nvme_uring_cmd_pdu { 436 union { 437 struct bio *bio; 438 struct request *req; 439 }; 440 u32 meta_len; 441 u32 nvme_status; 442 union { 443 struct { 444 void *meta; /* kernel-resident buffer */ 445 void __user *meta_buffer; 446 }; 447 u64 result; 448 } u; 449 }; 450 451 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( 452 struct io_uring_cmd *ioucmd) 453 { 454 return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; 455 } 456 457 static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) 458 { 459 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 460 struct request *req = pdu->req; 461 int status; 462 u64 result; 463 464 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 465 status = -EINTR; 466 else 467 status = nvme_req(req)->status; 468 469 result = le64_to_cpu(nvme_req(req)->result.u64); 470 471 if (pdu->meta_len) 472 status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, 473 pdu->u.meta, pdu->meta_len, status); 474 if (req->bio) 475 blk_rq_unmap_user(req->bio); 476 blk_mq_free_request(req); 477 478 io_uring_cmd_done(ioucmd, status, result); 479 } 480 481 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) 482 { 483 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 484 485 if (pdu->bio) 486 blk_rq_unmap_user(pdu->bio); 487 488 io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); 489 } 490 491 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, 492 blk_status_t err) 493 { 494 struct io_uring_cmd *ioucmd = req->end_io_data; 495 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 496 void *cookie = READ_ONCE(ioucmd->cookie); 497 498 req->bio = pdu->bio; 499 if (nvme_req(req)->flags & NVME_REQ_CANCELLED) 500 pdu->nvme_status = -EINTR; 501 else 502 pdu->nvme_status = nvme_req(req)->status; 503 pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); 504 505 /* 506 * For iopoll, complete it directly. 507 * Otherwise, move the completion to task work. 508 */ 509 if (cookie != NULL && blk_rq_is_poll(req)) 510 nvme_uring_task_cb(ioucmd); 511 else 512 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); 513 514 return RQ_END_IO_FREE; 515 } 516 517 static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, 518 blk_status_t err) 519 { 520 struct io_uring_cmd *ioucmd = req->end_io_data; 521 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 522 void *cookie = READ_ONCE(ioucmd->cookie); 523 524 req->bio = pdu->bio; 525 pdu->req = req; 526 527 /* 528 * For iopoll, complete it directly. 529 * Otherwise, move the completion to task work. 530 */ 531 if (cookie != NULL && blk_rq_is_poll(req)) 532 nvme_uring_task_meta_cb(ioucmd); 533 else 534 io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); 535 536 return RQ_END_IO_NONE; 537 } 538 539 static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 540 struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) 541 { 542 struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); 543 const struct nvme_uring_cmd *cmd = ioucmd->cmd; 544 struct request_queue *q = ns ? ns->queue : ctrl->admin_q; 545 struct nvme_uring_data d; 546 struct nvme_command c; 547 struct request *req; 548 blk_opf_t rq_flags = 0; 549 blk_mq_req_flags_t blk_flags = 0; 550 void *meta = NULL; 551 int ret; 552 553 c.common.opcode = READ_ONCE(cmd->opcode); 554 c.common.flags = READ_ONCE(cmd->flags); 555 if (c.common.flags) 556 return -EINVAL; 557 558 c.common.command_id = 0; 559 c.common.nsid = cpu_to_le32(cmd->nsid); 560 if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) 561 return -EINVAL; 562 563 c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); 564 c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); 565 c.common.metadata = 0; 566 c.common.dptr.prp1 = c.common.dptr.prp2 = 0; 567 c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); 568 c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); 569 c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); 570 c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); 571 c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); 572 c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); 573 574 if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode)) 575 return -EACCES; 576 577 d.metadata = READ_ONCE(cmd->metadata); 578 d.addr = READ_ONCE(cmd->addr); 579 d.data_len = READ_ONCE(cmd->data_len); 580 d.metadata_len = READ_ONCE(cmd->metadata_len); 581 d.timeout_ms = READ_ONCE(cmd->timeout_ms); 582 583 if (issue_flags & IO_URING_F_NONBLOCK) { 584 rq_flags = REQ_NOWAIT; 585 blk_flags = BLK_MQ_REQ_NOWAIT; 586 } 587 if (issue_flags & IO_URING_F_IOPOLL) 588 rq_flags |= REQ_POLLED; 589 590 retry: 591 req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); 592 if (IS_ERR(req)) 593 return PTR_ERR(req); 594 req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; 595 596 if (d.addr && d.data_len) { 597 ret = nvme_map_user_request(req, d.addr, 598 d.data_len, nvme_to_user_ptr(d.metadata), 599 d.metadata_len, 0, &meta, ioucmd, vec); 600 if (ret) 601 return ret; 602 } 603 604 if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { 605 if (unlikely(!req->bio)) { 606 /* we can't poll this, so alloc regular req instead */ 607 blk_mq_free_request(req); 608 rq_flags &= ~REQ_POLLED; 609 goto retry; 610 } else { 611 WRITE_ONCE(ioucmd->cookie, req->bio); 612 req->bio->bi_opf |= REQ_POLLED; 613 } 614 } 615 /* to free bio on completion, as req->bio will be null at that time */ 616 pdu->bio = req->bio; 617 pdu->meta_len = d.metadata_len; 618 req->end_io_data = ioucmd; 619 if (pdu->meta_len) { 620 pdu->u.meta = meta; 621 pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); 622 req->end_io = nvme_uring_cmd_end_io_meta; 623 } else { 624 req->end_io = nvme_uring_cmd_end_io; 625 } 626 blk_execute_rq_nowait(req, false); 627 return -EIOCBQUEUED; 628 } 629 630 static bool is_ctrl_ioctl(unsigned int cmd) 631 { 632 if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) 633 return true; 634 if (is_sed_ioctl(cmd)) 635 return true; 636 return false; 637 } 638 639 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, 640 void __user *argp, fmode_t mode) 641 { 642 switch (cmd) { 643 case NVME_IOCTL_ADMIN_CMD: 644 return nvme_user_cmd(ctrl, NULL, argp, mode); 645 case NVME_IOCTL_ADMIN64_CMD: 646 return nvme_user_cmd64(ctrl, NULL, argp, false, mode); 647 default: 648 return sed_ioctl(ctrl->opal_dev, cmd, argp); 649 } 650 } 651 652 #ifdef COMPAT_FOR_U64_ALIGNMENT 653 struct nvme_user_io32 { 654 __u8 opcode; 655 __u8 flags; 656 __u16 control; 657 __u16 nblocks; 658 __u16 rsvd; 659 __u64 metadata; 660 __u64 addr; 661 __u64 slba; 662 __u32 dsmgmt; 663 __u32 reftag; 664 __u16 apptag; 665 __u16 appmask; 666 } __attribute__((__packed__)); 667 #define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) 668 #endif /* COMPAT_FOR_U64_ALIGNMENT */ 669 670 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, 671 void __user *argp, fmode_t mode) 672 { 673 switch (cmd) { 674 case NVME_IOCTL_ID: 675 force_successful_syscall_return(); 676 return ns->head->ns_id; 677 case NVME_IOCTL_IO_CMD: 678 return nvme_user_cmd(ns->ctrl, ns, argp, mode); 679 /* 680 * struct nvme_user_io can have different padding on some 32-bit ABIs. 681 * Just accept the compat version as all fields that are used are the 682 * same size and at the same offset. 683 */ 684 #ifdef COMPAT_FOR_U64_ALIGNMENT 685 case NVME_IOCTL_SUBMIT_IO32: 686 #endif 687 case NVME_IOCTL_SUBMIT_IO: 688 return nvme_submit_io(ns, argp); 689 case NVME_IOCTL_IO64_CMD: 690 return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode); 691 case NVME_IOCTL_IO64_CMD_VEC: 692 return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode); 693 default: 694 return -ENOTTY; 695 } 696 } 697 698 static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg, 699 fmode_t mode) 700 { 701 if (is_ctrl_ioctl(cmd)) 702 return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode); 703 return nvme_ns_ioctl(ns, cmd, arg, mode); 704 } 705 706 int nvme_ioctl(struct block_device *bdev, fmode_t mode, 707 unsigned int cmd, unsigned long arg) 708 { 709 struct nvme_ns *ns = bdev->bd_disk->private_data; 710 711 return __nvme_ioctl(ns, cmd, (void __user *)arg, mode); 712 } 713 714 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 715 { 716 struct nvme_ns *ns = 717 container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); 718 719 return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode); 720 } 721 722 static int nvme_uring_cmd_checks(unsigned int issue_flags) 723 { 724 725 /* NVMe passthrough requires big SQE/CQE support */ 726 if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != 727 (IO_URING_F_SQE128|IO_URING_F_CQE32)) 728 return -EOPNOTSUPP; 729 return 0; 730 } 731 732 static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, 733 unsigned int issue_flags) 734 { 735 struct nvme_ctrl *ctrl = ns->ctrl; 736 int ret; 737 738 BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); 739 740 ret = nvme_uring_cmd_checks(issue_flags); 741 if (ret) 742 return ret; 743 744 switch (ioucmd->cmd_op) { 745 case NVME_URING_CMD_IO: 746 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); 747 break; 748 case NVME_URING_CMD_IO_VEC: 749 ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); 750 break; 751 default: 752 ret = -ENOTTY; 753 } 754 755 return ret; 756 } 757 758 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 759 { 760 struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, 761 struct nvme_ns, cdev); 762 763 return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 764 } 765 766 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 767 struct io_comp_batch *iob, 768 unsigned int poll_flags) 769 { 770 struct bio *bio; 771 int ret = 0; 772 struct nvme_ns *ns; 773 struct request_queue *q; 774 775 rcu_read_lock(); 776 bio = READ_ONCE(ioucmd->cookie); 777 ns = container_of(file_inode(ioucmd->file)->i_cdev, 778 struct nvme_ns, cdev); 779 q = ns->queue; 780 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev) 781 ret = bio_poll(bio, iob, poll_flags); 782 rcu_read_unlock(); 783 return ret; 784 } 785 #ifdef CONFIG_NVME_MULTIPATH 786 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, 787 void __user *argp, struct nvme_ns_head *head, int srcu_idx, 788 fmode_t mode) 789 __releases(&head->srcu) 790 { 791 struct nvme_ctrl *ctrl = ns->ctrl; 792 int ret; 793 794 nvme_get_ctrl(ns->ctrl); 795 srcu_read_unlock(&head->srcu, srcu_idx); 796 ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); 797 798 nvme_put_ctrl(ctrl); 799 return ret; 800 } 801 802 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, 803 unsigned int cmd, unsigned long arg) 804 { 805 struct nvme_ns_head *head = bdev->bd_disk->private_data; 806 void __user *argp = (void __user *)arg; 807 struct nvme_ns *ns; 808 int srcu_idx, ret = -EWOULDBLOCK; 809 810 srcu_idx = srcu_read_lock(&head->srcu); 811 ns = nvme_find_path(head); 812 if (!ns) 813 goto out_unlock; 814 815 /* 816 * Handle ioctls that apply to the controller instead of the namespace 817 * seperately and drop the ns SRCU reference early. This avoids a 818 * deadlock when deleting namespaces using the passthrough interface. 819 */ 820 if (is_ctrl_ioctl(cmd)) 821 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 822 mode); 823 824 ret = nvme_ns_ioctl(ns, cmd, argp, mode); 825 out_unlock: 826 srcu_read_unlock(&head->srcu, srcu_idx); 827 return ret; 828 } 829 830 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, 831 unsigned long arg) 832 { 833 struct cdev *cdev = file_inode(file)->i_cdev; 834 struct nvme_ns_head *head = 835 container_of(cdev, struct nvme_ns_head, cdev); 836 void __user *argp = (void __user *)arg; 837 struct nvme_ns *ns; 838 int srcu_idx, ret = -EWOULDBLOCK; 839 840 srcu_idx = srcu_read_lock(&head->srcu); 841 ns = nvme_find_path(head); 842 if (!ns) 843 goto out_unlock; 844 845 if (is_ctrl_ioctl(cmd)) 846 return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, 847 file->f_mode); 848 849 ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode); 850 out_unlock: 851 srcu_read_unlock(&head->srcu, srcu_idx); 852 return ret; 853 } 854 855 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 856 unsigned int issue_flags) 857 { 858 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 859 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 860 int srcu_idx = srcu_read_lock(&head->srcu); 861 struct nvme_ns *ns = nvme_find_path(head); 862 int ret = -EINVAL; 863 864 if (ns) 865 ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); 866 srcu_read_unlock(&head->srcu, srcu_idx); 867 return ret; 868 } 869 870 int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, 871 struct io_comp_batch *iob, 872 unsigned int poll_flags) 873 { 874 struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; 875 struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); 876 int srcu_idx = srcu_read_lock(&head->srcu); 877 struct nvme_ns *ns = nvme_find_path(head); 878 struct bio *bio; 879 int ret = 0; 880 struct request_queue *q; 881 882 if (ns) { 883 rcu_read_lock(); 884 bio = READ_ONCE(ioucmd->cookie); 885 q = ns->queue; 886 if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio 887 && bio->bi_bdev) 888 ret = bio_poll(bio, iob, poll_flags); 889 rcu_read_unlock(); 890 } 891 srcu_read_unlock(&head->srcu, srcu_idx); 892 return ret; 893 } 894 #endif /* CONFIG_NVME_MULTIPATH */ 895 896 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) 897 { 898 struct nvme_ctrl *ctrl = ioucmd->file->private_data; 899 int ret; 900 901 /* IOPOLL not supported yet */ 902 if (issue_flags & IO_URING_F_IOPOLL) 903 return -EOPNOTSUPP; 904 905 ret = nvme_uring_cmd_checks(issue_flags); 906 if (ret) 907 return ret; 908 909 switch (ioucmd->cmd_op) { 910 case NVME_URING_CMD_ADMIN: 911 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); 912 break; 913 case NVME_URING_CMD_ADMIN_VEC: 914 ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); 915 break; 916 default: 917 ret = -ENOTTY; 918 } 919 920 return ret; 921 } 922 923 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, 924 fmode_t mode) 925 { 926 struct nvme_ns *ns; 927 int ret; 928 929 down_read(&ctrl->namespaces_rwsem); 930 if (list_empty(&ctrl->namespaces)) { 931 ret = -ENOTTY; 932 goto out_unlock; 933 } 934 935 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 936 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 937 dev_warn(ctrl->device, 938 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 939 ret = -EINVAL; 940 goto out_unlock; 941 } 942 943 dev_warn(ctrl->device, 944 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 945 kref_get(&ns->kref); 946 up_read(&ctrl->namespaces_rwsem); 947 948 ret = nvme_user_cmd(ctrl, ns, argp, mode); 949 nvme_put_ns(ns); 950 return ret; 951 952 out_unlock: 953 up_read(&ctrl->namespaces_rwsem); 954 return ret; 955 } 956 957 long nvme_dev_ioctl(struct file *file, unsigned int cmd, 958 unsigned long arg) 959 { 960 struct nvme_ctrl *ctrl = file->private_data; 961 void __user *argp = (void __user *)arg; 962 963 switch (cmd) { 964 case NVME_IOCTL_ADMIN_CMD: 965 return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); 966 case NVME_IOCTL_ADMIN64_CMD: 967 return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode); 968 case NVME_IOCTL_IO_CMD: 969 return nvme_dev_user_cmd(ctrl, argp, file->f_mode); 970 case NVME_IOCTL_RESET: 971 if (!capable(CAP_SYS_ADMIN)) 972 return -EACCES; 973 dev_warn(ctrl->device, "resetting controller\n"); 974 return nvme_reset_ctrl_sync(ctrl); 975 case NVME_IOCTL_SUBSYS_RESET: 976 if (!capable(CAP_SYS_ADMIN)) 977 return -EACCES; 978 return nvme_reset_subsystem(ctrl); 979 case NVME_IOCTL_RESCAN: 980 if (!capable(CAP_SYS_ADMIN)) 981 return -EACCES; 982 nvme_queue_scan(ctrl); 983 return 0; 984 default: 985 return -ENOTTY; 986 } 987 } 988