1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Network block device - make block devices work over TCP 4 * 5 * Note that you can not swap over this thing, yet. Seems to work but 6 * deadlocks sometimes - you can not swap over TCP in general. 7 * 8 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 9 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 10 * 11 * (part of code stolen from loop.c) 12 */ 13 14 #include <linux/major.h> 15 16 #include <linux/blkdev.h> 17 #include <linux/module.h> 18 #include <linux/init.h> 19 #include <linux/sched.h> 20 #include <linux/sched/mm.h> 21 #include <linux/fs.h> 22 #include <linux/bio.h> 23 #include <linux/stat.h> 24 #include <linux/errno.h> 25 #include <linux/file.h> 26 #include <linux/ioctl.h> 27 #include <linux/mutex.h> 28 #include <linux/compiler.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/slab.h> 32 #include <net/sock.h> 33 #include <linux/net.h> 34 #include <linux/kthread.h> 35 #include <linux/types.h> 36 #include <linux/debugfs.h> 37 #include <linux/blk-mq.h> 38 39 #include <linux/uaccess.h> 40 #include <asm/types.h> 41 42 #include <linux/nbd.h> 43 #include <linux/nbd-netlink.h> 44 #include <net/genetlink.h> 45 46 #define CREATE_TRACE_POINTS 47 #include <trace/events/nbd.h> 48 49 static DEFINE_IDR(nbd_index_idr); 50 static DEFINE_MUTEX(nbd_index_mutex); 51 static int nbd_total_devices = 0; 52 53 struct nbd_sock { 54 struct socket *sock; 55 struct mutex tx_lock; 56 struct request *pending; 57 int sent; 58 bool dead; 59 int fallback_index; 60 int cookie; 61 }; 62 63 struct recv_thread_args { 64 struct work_struct work; 65 struct nbd_device *nbd; 66 int index; 67 }; 68 69 struct link_dead_args { 70 struct work_struct work; 71 int index; 72 }; 73 74 #define NBD_TIMEDOUT 0 75 #define NBD_DISCONNECT_REQUESTED 1 76 #define NBD_DISCONNECTED 2 77 #define NBD_HAS_PID_FILE 3 78 #define NBD_HAS_CONFIG_REF 4 79 #define NBD_BOUND 5 80 #define NBD_DESTROY_ON_DISCONNECT 6 81 #define NBD_DISCONNECT_ON_CLOSE 7 82 83 struct nbd_config { 84 u32 flags; 85 unsigned long runtime_flags; 86 u64 dead_conn_timeout; 87 88 struct nbd_sock **socks; 89 int num_connections; 90 atomic_t live_connections; 91 wait_queue_head_t conn_wait; 92 93 atomic_t recv_threads; 94 wait_queue_head_t recv_wq; 95 loff_t blksize; 96 loff_t bytesize; 97 #if IS_ENABLED(CONFIG_DEBUG_FS) 98 struct dentry *dbg_dir; 99 #endif 100 }; 101 102 struct nbd_device { 103 struct blk_mq_tag_set tag_set; 104 105 int index; 106 refcount_t config_refs; 107 refcount_t refs; 108 struct nbd_config *config; 109 struct mutex config_lock; 110 struct gendisk *disk; 111 112 struct list_head list; 113 struct task_struct *task_recv; 114 struct task_struct *task_setup; 115 }; 116 117 #define NBD_CMD_REQUEUED 1 118 119 struct nbd_cmd { 120 struct nbd_device *nbd; 121 struct mutex lock; 122 int index; 123 int cookie; 124 blk_status_t status; 125 unsigned long flags; 126 u32 cmd_cookie; 127 }; 128 129 #if IS_ENABLED(CONFIG_DEBUG_FS) 130 static struct dentry *nbd_dbg_dir; 131 #endif 132 133 #define nbd_name(nbd) ((nbd)->disk->disk_name) 134 135 #define NBD_MAGIC 0x68797548 136 137 #define NBD_DEF_BLKSIZE 1024 138 139 static unsigned int nbds_max = 16; 140 static int max_part = 16; 141 static struct workqueue_struct *recv_workqueue; 142 static int part_shift; 143 144 static int nbd_dev_dbg_init(struct nbd_device *nbd); 145 static void nbd_dev_dbg_close(struct nbd_device *nbd); 146 static void nbd_config_put(struct nbd_device *nbd); 147 static void nbd_connect_reply(struct genl_info *info, int index); 148 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 149 static void nbd_dead_link_work(struct work_struct *work); 150 static void nbd_disconnect_and_put(struct nbd_device *nbd); 151 152 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 153 { 154 return disk_to_dev(nbd->disk); 155 } 156 157 static void nbd_requeue_cmd(struct nbd_cmd *cmd) 158 { 159 struct request *req = blk_mq_rq_from_pdu(cmd); 160 161 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) 162 blk_mq_requeue_request(req, true); 163 } 164 165 #define NBD_COOKIE_BITS 32 166 167 static u64 nbd_cmd_handle(struct nbd_cmd *cmd) 168 { 169 struct request *req = blk_mq_rq_from_pdu(cmd); 170 u32 tag = blk_mq_unique_tag(req); 171 u64 cookie = cmd->cmd_cookie; 172 173 return (cookie << NBD_COOKIE_BITS) | tag; 174 } 175 176 static u32 nbd_handle_to_tag(u64 handle) 177 { 178 return (u32)handle; 179 } 180 181 static u32 nbd_handle_to_cookie(u64 handle) 182 { 183 return (u32)(handle >> NBD_COOKIE_BITS); 184 } 185 186 static const char *nbdcmd_to_ascii(int cmd) 187 { 188 switch (cmd) { 189 case NBD_CMD_READ: return "read"; 190 case NBD_CMD_WRITE: return "write"; 191 case NBD_CMD_DISC: return "disconnect"; 192 case NBD_CMD_FLUSH: return "flush"; 193 case NBD_CMD_TRIM: return "trim/discard"; 194 } 195 return "invalid"; 196 } 197 198 static ssize_t pid_show(struct device *dev, 199 struct device_attribute *attr, char *buf) 200 { 201 struct gendisk *disk = dev_to_disk(dev); 202 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 203 204 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 205 } 206 207 static const struct device_attribute pid_attr = { 208 .attr = { .name = "pid", .mode = 0444}, 209 .show = pid_show, 210 }; 211 212 static void nbd_dev_remove(struct nbd_device *nbd) 213 { 214 struct gendisk *disk = nbd->disk; 215 struct request_queue *q; 216 217 if (disk) { 218 q = disk->queue; 219 del_gendisk(disk); 220 blk_cleanup_queue(q); 221 blk_mq_free_tag_set(&nbd->tag_set); 222 disk->private_data = NULL; 223 put_disk(disk); 224 } 225 kfree(nbd); 226 } 227 228 static void nbd_put(struct nbd_device *nbd) 229 { 230 if (refcount_dec_and_mutex_lock(&nbd->refs, 231 &nbd_index_mutex)) { 232 idr_remove(&nbd_index_idr, nbd->index); 233 mutex_unlock(&nbd_index_mutex); 234 nbd_dev_remove(nbd); 235 } 236 } 237 238 static int nbd_disconnected(struct nbd_config *config) 239 { 240 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 241 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 242 } 243 244 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 245 int notify) 246 { 247 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 248 struct link_dead_args *args; 249 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 250 if (args) { 251 INIT_WORK(&args->work, nbd_dead_link_work); 252 args->index = nbd->index; 253 queue_work(system_wq, &args->work); 254 } 255 } 256 if (!nsock->dead) { 257 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 258 if (atomic_dec_return(&nbd->config->live_connections) == 0) { 259 if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, 260 &nbd->config->runtime_flags)) { 261 set_bit(NBD_DISCONNECTED, 262 &nbd->config->runtime_flags); 263 dev_info(nbd_to_dev(nbd), 264 "Disconnected due to user request.\n"); 265 } 266 } 267 } 268 nsock->dead = true; 269 nsock->pending = NULL; 270 nsock->sent = 0; 271 } 272 273 static void nbd_size_clear(struct nbd_device *nbd) 274 { 275 if (nbd->config->bytesize) { 276 set_capacity(nbd->disk, 0); 277 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 278 } 279 } 280 281 static void nbd_size_update(struct nbd_device *nbd) 282 { 283 struct nbd_config *config = nbd->config; 284 struct block_device *bdev = bdget_disk(nbd->disk, 0); 285 286 if (config->flags & NBD_FLAG_SEND_TRIM) { 287 nbd->disk->queue->limits.discard_granularity = config->blksize; 288 nbd->disk->queue->limits.discard_alignment = config->blksize; 289 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 290 } 291 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 292 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 293 set_capacity(nbd->disk, config->bytesize >> 9); 294 if (bdev) { 295 if (bdev->bd_disk) { 296 bd_set_size(bdev, config->bytesize); 297 set_blocksize(bdev, config->blksize); 298 } else 299 bdev->bd_invalidated = 1; 300 bdput(bdev); 301 } 302 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 303 } 304 305 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 306 loff_t nr_blocks) 307 { 308 struct nbd_config *config = nbd->config; 309 config->blksize = blocksize; 310 config->bytesize = blocksize * nr_blocks; 311 if (nbd->task_recv != NULL) 312 nbd_size_update(nbd); 313 } 314 315 static void nbd_complete_rq(struct request *req) 316 { 317 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 318 319 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req, 320 cmd->status ? "failed" : "done"); 321 322 blk_mq_end_request(req, cmd->status); 323 } 324 325 /* 326 * Forcibly shutdown the socket causing all listeners to error 327 */ 328 static void sock_shutdown(struct nbd_device *nbd) 329 { 330 struct nbd_config *config = nbd->config; 331 int i; 332 333 if (config->num_connections == 0) 334 return; 335 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 336 return; 337 338 for (i = 0; i < config->num_connections; i++) { 339 struct nbd_sock *nsock = config->socks[i]; 340 mutex_lock(&nsock->tx_lock); 341 nbd_mark_nsock_dead(nbd, nsock, 0); 342 mutex_unlock(&nsock->tx_lock); 343 } 344 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 345 } 346 347 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 348 bool reserved) 349 { 350 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 351 struct nbd_device *nbd = cmd->nbd; 352 struct nbd_config *config; 353 354 if (!refcount_inc_not_zero(&nbd->config_refs)) { 355 cmd->status = BLK_STS_TIMEOUT; 356 goto done; 357 } 358 config = nbd->config; 359 360 if (!mutex_trylock(&cmd->lock)) 361 return BLK_EH_RESET_TIMER; 362 363 if (config->num_connections > 1) { 364 dev_err_ratelimited(nbd_to_dev(nbd), 365 "Connection timed out, retrying (%d/%d alive)\n", 366 atomic_read(&config->live_connections), 367 config->num_connections); 368 /* 369 * Hooray we have more connections, requeue this IO, the submit 370 * path will put it on a real connection. 371 */ 372 if (config->socks && config->num_connections > 1) { 373 if (cmd->index < config->num_connections) { 374 struct nbd_sock *nsock = 375 config->socks[cmd->index]; 376 mutex_lock(&nsock->tx_lock); 377 /* We can have multiple outstanding requests, so 378 * we don't want to mark the nsock dead if we've 379 * already reconnected with a new socket, so 380 * only mark it dead if its the same socket we 381 * were sent out on. 382 */ 383 if (cmd->cookie == nsock->cookie) 384 nbd_mark_nsock_dead(nbd, nsock, 1); 385 mutex_unlock(&nsock->tx_lock); 386 } 387 mutex_unlock(&cmd->lock); 388 nbd_requeue_cmd(cmd); 389 nbd_config_put(nbd); 390 return BLK_EH_DONE; 391 } 392 } else { 393 dev_err_ratelimited(nbd_to_dev(nbd), 394 "Connection timed out\n"); 395 } 396 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 397 cmd->status = BLK_STS_IOERR; 398 mutex_unlock(&cmd->lock); 399 sock_shutdown(nbd); 400 nbd_config_put(nbd); 401 done: 402 blk_mq_complete_request(req); 403 return BLK_EH_DONE; 404 } 405 406 /* 407 * Send or receive packet. 408 */ 409 static int sock_xmit(struct nbd_device *nbd, int index, int send, 410 struct iov_iter *iter, int msg_flags, int *sent) 411 { 412 struct nbd_config *config = nbd->config; 413 struct socket *sock = config->socks[index]->sock; 414 int result; 415 struct msghdr msg; 416 unsigned int noreclaim_flag; 417 418 if (unlikely(!sock)) { 419 dev_err_ratelimited(disk_to_dev(nbd->disk), 420 "Attempted %s on closed socket in sock_xmit\n", 421 (send ? "send" : "recv")); 422 return -EINVAL; 423 } 424 425 msg.msg_iter = *iter; 426 427 noreclaim_flag = memalloc_noreclaim_save(); 428 do { 429 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 430 msg.msg_name = NULL; 431 msg.msg_namelen = 0; 432 msg.msg_control = NULL; 433 msg.msg_controllen = 0; 434 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 435 436 if (send) 437 result = sock_sendmsg(sock, &msg); 438 else 439 result = sock_recvmsg(sock, &msg, msg.msg_flags); 440 441 if (result <= 0) { 442 if (result == 0) 443 result = -EPIPE; /* short read */ 444 break; 445 } 446 if (sent) 447 *sent += result; 448 } while (msg_data_left(&msg)); 449 450 memalloc_noreclaim_restore(noreclaim_flag); 451 452 return result; 453 } 454 455 /* 456 * Different settings for sk->sk_sndtimeo can result in different return values 457 * if there is a signal pending when we enter sendmsg, because reasons? 458 */ 459 static inline int was_interrupted(int result) 460 { 461 return result == -ERESTARTSYS || result == -EINTR; 462 } 463 464 /* always call with the tx_lock held */ 465 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 466 { 467 struct request *req = blk_mq_rq_from_pdu(cmd); 468 struct nbd_config *config = nbd->config; 469 struct nbd_sock *nsock = config->socks[index]; 470 int result; 471 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 472 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 473 struct iov_iter from; 474 unsigned long size = blk_rq_bytes(req); 475 struct bio *bio; 476 u64 handle; 477 u32 type; 478 u32 nbd_cmd_flags = 0; 479 int sent = nsock->sent, skip = 0; 480 481 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); 482 483 switch (req_op(req)) { 484 case REQ_OP_DISCARD: 485 type = NBD_CMD_TRIM; 486 break; 487 case REQ_OP_FLUSH: 488 type = NBD_CMD_FLUSH; 489 break; 490 case REQ_OP_WRITE: 491 type = NBD_CMD_WRITE; 492 break; 493 case REQ_OP_READ: 494 type = NBD_CMD_READ; 495 break; 496 default: 497 return -EIO; 498 } 499 500 if (rq_data_dir(req) == WRITE && 501 (config->flags & NBD_FLAG_READ_ONLY)) { 502 dev_err_ratelimited(disk_to_dev(nbd->disk), 503 "Write on read-only\n"); 504 return -EIO; 505 } 506 507 if (req->cmd_flags & REQ_FUA) 508 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 509 510 /* We did a partial send previously, and we at least sent the whole 511 * request struct, so just go and send the rest of the pages in the 512 * request. 513 */ 514 if (sent) { 515 if (sent >= sizeof(request)) { 516 skip = sent - sizeof(request); 517 518 /* initialize handle for tracing purposes */ 519 handle = nbd_cmd_handle(cmd); 520 521 goto send_pages; 522 } 523 iov_iter_advance(&from, sent); 524 } else { 525 cmd->cmd_cookie++; 526 } 527 cmd->index = index; 528 cmd->cookie = nsock->cookie; 529 request.type = htonl(type | nbd_cmd_flags); 530 if (type != NBD_CMD_FLUSH) { 531 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 532 request.len = htonl(size); 533 } 534 handle = nbd_cmd_handle(cmd); 535 memcpy(request.handle, &handle, sizeof(handle)); 536 537 trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd)); 538 539 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 540 req, nbdcmd_to_ascii(type), 541 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 542 result = sock_xmit(nbd, index, 1, &from, 543 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 544 trace_nbd_header_sent(req, handle); 545 if (result <= 0) { 546 if (was_interrupted(result)) { 547 /* If we havne't sent anything we can just return BUSY, 548 * however if we have sent something we need to make 549 * sure we only allow this req to be sent until we are 550 * completely done. 551 */ 552 if (sent) { 553 nsock->pending = req; 554 nsock->sent = sent; 555 } 556 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 557 return BLK_STS_RESOURCE; 558 } 559 dev_err_ratelimited(disk_to_dev(nbd->disk), 560 "Send control failed (result %d)\n", result); 561 return -EAGAIN; 562 } 563 send_pages: 564 if (type != NBD_CMD_WRITE) 565 goto out; 566 567 bio = req->bio; 568 while (bio) { 569 struct bio *next = bio->bi_next; 570 struct bvec_iter iter; 571 struct bio_vec bvec; 572 573 bio_for_each_segment(bvec, bio, iter) { 574 bool is_last = !next && bio_iter_last(bvec, iter); 575 int flags = is_last ? 0 : MSG_MORE; 576 577 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 578 req, bvec.bv_len); 579 iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len); 580 if (skip) { 581 if (skip >= iov_iter_count(&from)) { 582 skip -= iov_iter_count(&from); 583 continue; 584 } 585 iov_iter_advance(&from, skip); 586 skip = 0; 587 } 588 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 589 if (result <= 0) { 590 if (was_interrupted(result)) { 591 /* We've already sent the header, we 592 * have no choice but to set pending and 593 * return BUSY. 594 */ 595 nsock->pending = req; 596 nsock->sent = sent; 597 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 598 return BLK_STS_RESOURCE; 599 } 600 dev_err(disk_to_dev(nbd->disk), 601 "Send data failed (result %d)\n", 602 result); 603 return -EAGAIN; 604 } 605 /* 606 * The completion might already have come in, 607 * so break for the last one instead of letting 608 * the iterator do it. This prevents use-after-free 609 * of the bio. 610 */ 611 if (is_last) 612 break; 613 } 614 bio = next; 615 } 616 out: 617 trace_nbd_payload_sent(req, handle); 618 nsock->pending = NULL; 619 nsock->sent = 0; 620 return 0; 621 } 622 623 /* NULL returned = something went wrong, inform userspace */ 624 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 625 { 626 struct nbd_config *config = nbd->config; 627 int result; 628 struct nbd_reply reply; 629 struct nbd_cmd *cmd; 630 struct request *req = NULL; 631 u64 handle; 632 u16 hwq; 633 u32 tag; 634 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 635 struct iov_iter to; 636 int ret = 0; 637 638 reply.magic = 0; 639 iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); 640 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 641 if (result <= 0) { 642 if (!nbd_disconnected(config)) 643 dev_err(disk_to_dev(nbd->disk), 644 "Receive control failed (result %d)\n", result); 645 return ERR_PTR(result); 646 } 647 648 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 649 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 650 (unsigned long)ntohl(reply.magic)); 651 return ERR_PTR(-EPROTO); 652 } 653 654 memcpy(&handle, reply.handle, sizeof(handle)); 655 tag = nbd_handle_to_tag(handle); 656 hwq = blk_mq_unique_tag_to_hwq(tag); 657 if (hwq < nbd->tag_set.nr_hw_queues) 658 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 659 blk_mq_unique_tag_to_tag(tag)); 660 if (!req || !blk_mq_request_started(req)) { 661 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 662 tag, req); 663 return ERR_PTR(-ENOENT); 664 } 665 trace_nbd_header_received(req, handle); 666 cmd = blk_mq_rq_to_pdu(req); 667 668 mutex_lock(&cmd->lock); 669 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { 670 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", 671 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); 672 ret = -ENOENT; 673 goto out; 674 } 675 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { 676 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", 677 req); 678 ret = -ENOENT; 679 goto out; 680 } 681 if (ntohl(reply.error)) { 682 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 683 ntohl(reply.error)); 684 cmd->status = BLK_STS_IOERR; 685 goto out; 686 } 687 688 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 689 if (rq_data_dir(req) != WRITE) { 690 struct req_iterator iter; 691 struct bio_vec bvec; 692 693 rq_for_each_segment(bvec, req, iter) { 694 iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); 695 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 696 if (result <= 0) { 697 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 698 result); 699 /* 700 * If we've disconnected or we only have 1 701 * connection then we need to make sure we 702 * complete this request, otherwise error out 703 * and let the timeout stuff handle resubmitting 704 * this request onto another connection. 705 */ 706 if (nbd_disconnected(config) || 707 config->num_connections <= 1) { 708 cmd->status = BLK_STS_IOERR; 709 goto out; 710 } 711 ret = -EIO; 712 goto out; 713 } 714 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 715 req, bvec.bv_len); 716 } 717 } 718 out: 719 trace_nbd_payload_received(req, handle); 720 mutex_unlock(&cmd->lock); 721 return ret ? ERR_PTR(ret) : cmd; 722 } 723 724 static void recv_work(struct work_struct *work) 725 { 726 struct recv_thread_args *args = container_of(work, 727 struct recv_thread_args, 728 work); 729 struct nbd_device *nbd = args->nbd; 730 struct nbd_config *config = nbd->config; 731 struct nbd_cmd *cmd; 732 733 while (1) { 734 cmd = nbd_read_stat(nbd, args->index); 735 if (IS_ERR(cmd)) { 736 struct nbd_sock *nsock = config->socks[args->index]; 737 738 mutex_lock(&nsock->tx_lock); 739 nbd_mark_nsock_dead(nbd, nsock, 1); 740 mutex_unlock(&nsock->tx_lock); 741 break; 742 } 743 744 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 745 } 746 atomic_dec(&config->recv_threads); 747 wake_up(&config->recv_wq); 748 nbd_config_put(nbd); 749 kfree(args); 750 } 751 752 static bool nbd_clear_req(struct request *req, void *data, bool reserved) 753 { 754 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 755 756 cmd->status = BLK_STS_IOERR; 757 blk_mq_complete_request(req); 758 return true; 759 } 760 761 static void nbd_clear_que(struct nbd_device *nbd) 762 { 763 blk_mq_quiesce_queue(nbd->disk->queue); 764 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 765 blk_mq_unquiesce_queue(nbd->disk->queue); 766 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 767 } 768 769 static int find_fallback(struct nbd_device *nbd, int index) 770 { 771 struct nbd_config *config = nbd->config; 772 int new_index = -1; 773 struct nbd_sock *nsock = config->socks[index]; 774 int fallback = nsock->fallback_index; 775 776 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 777 return new_index; 778 779 if (config->num_connections <= 1) { 780 dev_err_ratelimited(disk_to_dev(nbd->disk), 781 "Attempted send on invalid socket\n"); 782 return new_index; 783 } 784 785 if (fallback >= 0 && fallback < config->num_connections && 786 !config->socks[fallback]->dead) 787 return fallback; 788 789 if (nsock->fallback_index < 0 || 790 nsock->fallback_index >= config->num_connections || 791 config->socks[nsock->fallback_index]->dead) { 792 int i; 793 for (i = 0; i < config->num_connections; i++) { 794 if (i == index) 795 continue; 796 if (!config->socks[i]->dead) { 797 new_index = i; 798 break; 799 } 800 } 801 nsock->fallback_index = new_index; 802 if (new_index < 0) { 803 dev_err_ratelimited(disk_to_dev(nbd->disk), 804 "Dead connection, failed to find a fallback\n"); 805 return new_index; 806 } 807 } 808 new_index = nsock->fallback_index; 809 return new_index; 810 } 811 812 static int wait_for_reconnect(struct nbd_device *nbd) 813 { 814 struct nbd_config *config = nbd->config; 815 if (!config->dead_conn_timeout) 816 return 0; 817 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 818 return 0; 819 return wait_event_timeout(config->conn_wait, 820 atomic_read(&config->live_connections) > 0, 821 config->dead_conn_timeout) > 0; 822 } 823 824 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 825 { 826 struct request *req = blk_mq_rq_from_pdu(cmd); 827 struct nbd_device *nbd = cmd->nbd; 828 struct nbd_config *config; 829 struct nbd_sock *nsock; 830 int ret; 831 832 if (!refcount_inc_not_zero(&nbd->config_refs)) { 833 dev_err_ratelimited(disk_to_dev(nbd->disk), 834 "Socks array is empty\n"); 835 blk_mq_start_request(req); 836 return -EINVAL; 837 } 838 config = nbd->config; 839 840 if (index >= config->num_connections) { 841 dev_err_ratelimited(disk_to_dev(nbd->disk), 842 "Attempted send on invalid socket\n"); 843 nbd_config_put(nbd); 844 blk_mq_start_request(req); 845 return -EINVAL; 846 } 847 cmd->status = BLK_STS_OK; 848 again: 849 nsock = config->socks[index]; 850 mutex_lock(&nsock->tx_lock); 851 if (nsock->dead) { 852 int old_index = index; 853 index = find_fallback(nbd, index); 854 mutex_unlock(&nsock->tx_lock); 855 if (index < 0) { 856 if (wait_for_reconnect(nbd)) { 857 index = old_index; 858 goto again; 859 } 860 /* All the sockets should already be down at this point, 861 * we just want to make sure that DISCONNECTED is set so 862 * any requests that come in that were queue'ed waiting 863 * for the reconnect timer don't trigger the timer again 864 * and instead just error out. 865 */ 866 sock_shutdown(nbd); 867 nbd_config_put(nbd); 868 blk_mq_start_request(req); 869 return -EIO; 870 } 871 goto again; 872 } 873 874 /* Handle the case that we have a pending request that was partially 875 * transmitted that _has_ to be serviced first. We need to call requeue 876 * here so that it gets put _after_ the request that is already on the 877 * dispatch list. 878 */ 879 blk_mq_start_request(req); 880 if (unlikely(nsock->pending && nsock->pending != req)) { 881 nbd_requeue_cmd(cmd); 882 ret = 0; 883 goto out; 884 } 885 /* 886 * Some failures are related to the link going down, so anything that 887 * returns EAGAIN can be retried on a different socket. 888 */ 889 ret = nbd_send_cmd(nbd, cmd, index); 890 if (ret == -EAGAIN) { 891 dev_err_ratelimited(disk_to_dev(nbd->disk), 892 "Request send failed, requeueing\n"); 893 nbd_mark_nsock_dead(nbd, nsock, 1); 894 nbd_requeue_cmd(cmd); 895 ret = 0; 896 } 897 out: 898 mutex_unlock(&nsock->tx_lock); 899 nbd_config_put(nbd); 900 return ret; 901 } 902 903 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 904 const struct blk_mq_queue_data *bd) 905 { 906 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 907 int ret; 908 909 /* 910 * Since we look at the bio's to send the request over the network we 911 * need to make sure the completion work doesn't mark this request done 912 * before we are done doing our send. This keeps us from dereferencing 913 * freed data if we have particularly fast completions (ie we get the 914 * completion before we exit sock_xmit on the last bvec) or in the case 915 * that the server is misbehaving (or there was an error) before we're 916 * done sending everything over the wire. 917 */ 918 mutex_lock(&cmd->lock); 919 clear_bit(NBD_CMD_REQUEUED, &cmd->flags); 920 921 /* We can be called directly from the user space process, which means we 922 * could possibly have signals pending so our sendmsg will fail. In 923 * this case we need to return that we are busy, otherwise error out as 924 * appropriate. 925 */ 926 ret = nbd_handle_cmd(cmd, hctx->queue_num); 927 if (ret < 0) 928 ret = BLK_STS_IOERR; 929 else if (!ret) 930 ret = BLK_STS_OK; 931 mutex_unlock(&cmd->lock); 932 933 return ret; 934 } 935 936 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 937 bool netlink) 938 { 939 struct nbd_config *config = nbd->config; 940 struct socket *sock; 941 struct nbd_sock **socks; 942 struct nbd_sock *nsock; 943 int err; 944 945 sock = sockfd_lookup(arg, &err); 946 if (!sock) 947 return err; 948 949 if (!netlink && !nbd->task_setup && 950 !test_bit(NBD_BOUND, &config->runtime_flags)) 951 nbd->task_setup = current; 952 953 if (!netlink && 954 (nbd->task_setup != current || 955 test_bit(NBD_BOUND, &config->runtime_flags))) { 956 dev_err(disk_to_dev(nbd->disk), 957 "Device being setup by another task"); 958 sockfd_put(sock); 959 return -EBUSY; 960 } 961 962 socks = krealloc(config->socks, (config->num_connections + 1) * 963 sizeof(struct nbd_sock *), GFP_KERNEL); 964 if (!socks) { 965 sockfd_put(sock); 966 return -ENOMEM; 967 } 968 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 969 if (!nsock) { 970 sockfd_put(sock); 971 return -ENOMEM; 972 } 973 974 config->socks = socks; 975 976 nsock->fallback_index = -1; 977 nsock->dead = false; 978 mutex_init(&nsock->tx_lock); 979 nsock->sock = sock; 980 nsock->pending = NULL; 981 nsock->sent = 0; 982 nsock->cookie = 0; 983 socks[config->num_connections++] = nsock; 984 atomic_inc(&config->live_connections); 985 986 return 0; 987 } 988 989 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 990 { 991 struct nbd_config *config = nbd->config; 992 struct socket *sock, *old; 993 struct recv_thread_args *args; 994 int i; 995 int err; 996 997 sock = sockfd_lookup(arg, &err); 998 if (!sock) 999 return err; 1000 1001 args = kzalloc(sizeof(*args), GFP_KERNEL); 1002 if (!args) { 1003 sockfd_put(sock); 1004 return -ENOMEM; 1005 } 1006 1007 for (i = 0; i < config->num_connections; i++) { 1008 struct nbd_sock *nsock = config->socks[i]; 1009 1010 if (!nsock->dead) 1011 continue; 1012 1013 mutex_lock(&nsock->tx_lock); 1014 if (!nsock->dead) { 1015 mutex_unlock(&nsock->tx_lock); 1016 continue; 1017 } 1018 sk_set_memalloc(sock->sk); 1019 if (nbd->tag_set.timeout) 1020 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 1021 atomic_inc(&config->recv_threads); 1022 refcount_inc(&nbd->config_refs); 1023 old = nsock->sock; 1024 nsock->fallback_index = -1; 1025 nsock->sock = sock; 1026 nsock->dead = false; 1027 INIT_WORK(&args->work, recv_work); 1028 args->index = i; 1029 args->nbd = nbd; 1030 nsock->cookie++; 1031 mutex_unlock(&nsock->tx_lock); 1032 sockfd_put(old); 1033 1034 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 1035 1036 /* We take the tx_mutex in an error path in the recv_work, so we 1037 * need to queue_work outside of the tx_mutex. 1038 */ 1039 queue_work(recv_workqueue, &args->work); 1040 1041 atomic_inc(&config->live_connections); 1042 wake_up(&config->conn_wait); 1043 return 0; 1044 } 1045 sockfd_put(sock); 1046 kfree(args); 1047 return -ENOSPC; 1048 } 1049 1050 static void nbd_bdev_reset(struct block_device *bdev) 1051 { 1052 if (bdev->bd_openers > 1) 1053 return; 1054 bd_set_size(bdev, 0); 1055 } 1056 1057 static void nbd_parse_flags(struct nbd_device *nbd) 1058 { 1059 struct nbd_config *config = nbd->config; 1060 if (config->flags & NBD_FLAG_READ_ONLY) 1061 set_disk_ro(nbd->disk, true); 1062 else 1063 set_disk_ro(nbd->disk, false); 1064 if (config->flags & NBD_FLAG_SEND_TRIM) 1065 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1066 if (config->flags & NBD_FLAG_SEND_FLUSH) { 1067 if (config->flags & NBD_FLAG_SEND_FUA) 1068 blk_queue_write_cache(nbd->disk->queue, true, true); 1069 else 1070 blk_queue_write_cache(nbd->disk->queue, true, false); 1071 } 1072 else 1073 blk_queue_write_cache(nbd->disk->queue, false, false); 1074 } 1075 1076 static void send_disconnects(struct nbd_device *nbd) 1077 { 1078 struct nbd_config *config = nbd->config; 1079 struct nbd_request request = { 1080 .magic = htonl(NBD_REQUEST_MAGIC), 1081 .type = htonl(NBD_CMD_DISC), 1082 }; 1083 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 1084 struct iov_iter from; 1085 int i, ret; 1086 1087 for (i = 0; i < config->num_connections; i++) { 1088 struct nbd_sock *nsock = config->socks[i]; 1089 1090 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); 1091 mutex_lock(&nsock->tx_lock); 1092 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1093 if (ret <= 0) 1094 dev_err(disk_to_dev(nbd->disk), 1095 "Send disconnect failed %d\n", ret); 1096 mutex_unlock(&nsock->tx_lock); 1097 } 1098 } 1099 1100 static int nbd_disconnect(struct nbd_device *nbd) 1101 { 1102 struct nbd_config *config = nbd->config; 1103 1104 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1105 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1106 send_disconnects(nbd); 1107 return 0; 1108 } 1109 1110 static void nbd_clear_sock(struct nbd_device *nbd) 1111 { 1112 sock_shutdown(nbd); 1113 nbd_clear_que(nbd); 1114 nbd->task_setup = NULL; 1115 } 1116 1117 static void nbd_config_put(struct nbd_device *nbd) 1118 { 1119 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1120 &nbd->config_lock)) { 1121 struct nbd_config *config = nbd->config; 1122 nbd_dev_dbg_close(nbd); 1123 nbd_size_clear(nbd); 1124 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1125 &config->runtime_flags)) 1126 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1127 nbd->task_recv = NULL; 1128 nbd_clear_sock(nbd); 1129 if (config->num_connections) { 1130 int i; 1131 for (i = 0; i < config->num_connections; i++) { 1132 sockfd_put(config->socks[i]->sock); 1133 kfree(config->socks[i]); 1134 } 1135 kfree(config->socks); 1136 } 1137 kfree(nbd->config); 1138 nbd->config = NULL; 1139 1140 nbd->tag_set.timeout = 0; 1141 nbd->disk->queue->limits.discard_granularity = 0; 1142 nbd->disk->queue->limits.discard_alignment = 0; 1143 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 1144 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1145 1146 mutex_unlock(&nbd->config_lock); 1147 nbd_put(nbd); 1148 module_put(THIS_MODULE); 1149 } 1150 } 1151 1152 static int nbd_start_device(struct nbd_device *nbd) 1153 { 1154 struct nbd_config *config = nbd->config; 1155 int num_connections = config->num_connections; 1156 int error = 0, i; 1157 1158 if (nbd->task_recv) 1159 return -EBUSY; 1160 if (!config->socks) 1161 return -EINVAL; 1162 if (num_connections > 1 && 1163 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1164 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1165 return -EINVAL; 1166 } 1167 1168 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1169 nbd->task_recv = current; 1170 1171 nbd_parse_flags(nbd); 1172 1173 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1174 if (error) { 1175 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1176 return error; 1177 } 1178 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1179 1180 nbd_dev_dbg_init(nbd); 1181 for (i = 0; i < num_connections; i++) { 1182 struct recv_thread_args *args; 1183 1184 args = kzalloc(sizeof(*args), GFP_KERNEL); 1185 if (!args) { 1186 sock_shutdown(nbd); 1187 return -ENOMEM; 1188 } 1189 sk_set_memalloc(config->socks[i]->sock->sk); 1190 if (nbd->tag_set.timeout) 1191 config->socks[i]->sock->sk->sk_sndtimeo = 1192 nbd->tag_set.timeout; 1193 atomic_inc(&config->recv_threads); 1194 refcount_inc(&nbd->config_refs); 1195 INIT_WORK(&args->work, recv_work); 1196 args->nbd = nbd; 1197 args->index = i; 1198 queue_work(recv_workqueue, &args->work); 1199 } 1200 nbd_size_update(nbd); 1201 return error; 1202 } 1203 1204 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1205 { 1206 struct nbd_config *config = nbd->config; 1207 int ret; 1208 1209 ret = nbd_start_device(nbd); 1210 if (ret) 1211 return ret; 1212 1213 if (max_part) 1214 bdev->bd_invalidated = 1; 1215 mutex_unlock(&nbd->config_lock); 1216 ret = wait_event_interruptible(config->recv_wq, 1217 atomic_read(&config->recv_threads) == 0); 1218 if (ret) 1219 sock_shutdown(nbd); 1220 mutex_lock(&nbd->config_lock); 1221 nbd_bdev_reset(bdev); 1222 /* user requested, ignore socket errors */ 1223 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1224 ret = 0; 1225 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1226 ret = -ETIMEDOUT; 1227 return ret; 1228 } 1229 1230 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1231 struct block_device *bdev) 1232 { 1233 sock_shutdown(nbd); 1234 __invalidate_device(bdev, true); 1235 nbd_bdev_reset(bdev); 1236 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1237 &nbd->config->runtime_flags)) 1238 nbd_config_put(nbd); 1239 } 1240 1241 static bool nbd_is_valid_blksize(unsigned long blksize) 1242 { 1243 if (!blksize || !is_power_of_2(blksize) || blksize < 512 || 1244 blksize > PAGE_SIZE) 1245 return false; 1246 return true; 1247 } 1248 1249 /* Must be called with config_lock held */ 1250 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1251 unsigned int cmd, unsigned long arg) 1252 { 1253 struct nbd_config *config = nbd->config; 1254 1255 switch (cmd) { 1256 case NBD_DISCONNECT: 1257 return nbd_disconnect(nbd); 1258 case NBD_CLEAR_SOCK: 1259 nbd_clear_sock_ioctl(nbd, bdev); 1260 return 0; 1261 case NBD_SET_SOCK: 1262 return nbd_add_socket(nbd, arg, false); 1263 case NBD_SET_BLKSIZE: 1264 if (!arg) 1265 arg = NBD_DEF_BLKSIZE; 1266 if (!nbd_is_valid_blksize(arg)) 1267 return -EINVAL; 1268 nbd_size_set(nbd, arg, 1269 div_s64(config->bytesize, arg)); 1270 return 0; 1271 case NBD_SET_SIZE: 1272 nbd_size_set(nbd, config->blksize, 1273 div_s64(arg, config->blksize)); 1274 return 0; 1275 case NBD_SET_SIZE_BLOCKS: 1276 nbd_size_set(nbd, config->blksize, arg); 1277 return 0; 1278 case NBD_SET_TIMEOUT: 1279 if (arg) { 1280 nbd->tag_set.timeout = arg * HZ; 1281 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1282 } 1283 return 0; 1284 1285 case NBD_SET_FLAGS: 1286 config->flags = arg; 1287 return 0; 1288 case NBD_DO_IT: 1289 return nbd_start_device_ioctl(nbd, bdev); 1290 case NBD_CLEAR_QUE: 1291 /* 1292 * This is for compatibility only. The queue is always cleared 1293 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1294 */ 1295 return 0; 1296 case NBD_PRINT_DEBUG: 1297 /* 1298 * For compatibility only, we no longer keep a list of 1299 * outstanding requests. 1300 */ 1301 return 0; 1302 } 1303 return -ENOTTY; 1304 } 1305 1306 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1307 unsigned int cmd, unsigned long arg) 1308 { 1309 struct nbd_device *nbd = bdev->bd_disk->private_data; 1310 struct nbd_config *config = nbd->config; 1311 int error = -EINVAL; 1312 1313 if (!capable(CAP_SYS_ADMIN)) 1314 return -EPERM; 1315 1316 /* The block layer will pass back some non-nbd ioctls in case we have 1317 * special handling for them, but we don't so just return an error. 1318 */ 1319 if (_IOC_TYPE(cmd) != 0xab) 1320 return -EINVAL; 1321 1322 mutex_lock(&nbd->config_lock); 1323 1324 /* Don't allow ioctl operations on a nbd device that was created with 1325 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1326 */ 1327 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1328 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1329 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1330 else 1331 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1332 mutex_unlock(&nbd->config_lock); 1333 return error; 1334 } 1335 1336 static struct nbd_config *nbd_alloc_config(void) 1337 { 1338 struct nbd_config *config; 1339 1340 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1341 if (!config) 1342 return NULL; 1343 atomic_set(&config->recv_threads, 0); 1344 init_waitqueue_head(&config->recv_wq); 1345 init_waitqueue_head(&config->conn_wait); 1346 config->blksize = NBD_DEF_BLKSIZE; 1347 atomic_set(&config->live_connections, 0); 1348 try_module_get(THIS_MODULE); 1349 return config; 1350 } 1351 1352 static int nbd_open(struct block_device *bdev, fmode_t mode) 1353 { 1354 struct nbd_device *nbd; 1355 int ret = 0; 1356 1357 mutex_lock(&nbd_index_mutex); 1358 nbd = bdev->bd_disk->private_data; 1359 if (!nbd) { 1360 ret = -ENXIO; 1361 goto out; 1362 } 1363 if (!refcount_inc_not_zero(&nbd->refs)) { 1364 ret = -ENXIO; 1365 goto out; 1366 } 1367 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1368 struct nbd_config *config; 1369 1370 mutex_lock(&nbd->config_lock); 1371 if (refcount_inc_not_zero(&nbd->config_refs)) { 1372 mutex_unlock(&nbd->config_lock); 1373 goto out; 1374 } 1375 config = nbd->config = nbd_alloc_config(); 1376 if (!config) { 1377 ret = -ENOMEM; 1378 mutex_unlock(&nbd->config_lock); 1379 goto out; 1380 } 1381 refcount_set(&nbd->config_refs, 1); 1382 refcount_inc(&nbd->refs); 1383 mutex_unlock(&nbd->config_lock); 1384 bdev->bd_invalidated = 1; 1385 } else if (nbd_disconnected(nbd->config)) { 1386 bdev->bd_invalidated = 1; 1387 } 1388 out: 1389 mutex_unlock(&nbd_index_mutex); 1390 return ret; 1391 } 1392 1393 static void nbd_release(struct gendisk *disk, fmode_t mode) 1394 { 1395 struct nbd_device *nbd = disk->private_data; 1396 struct block_device *bdev = bdget_disk(disk, 0); 1397 1398 if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1399 bdev->bd_openers == 0) 1400 nbd_disconnect_and_put(nbd); 1401 1402 nbd_config_put(nbd); 1403 nbd_put(nbd); 1404 } 1405 1406 static const struct block_device_operations nbd_fops = 1407 { 1408 .owner = THIS_MODULE, 1409 .open = nbd_open, 1410 .release = nbd_release, 1411 .ioctl = nbd_ioctl, 1412 .compat_ioctl = nbd_ioctl, 1413 }; 1414 1415 #if IS_ENABLED(CONFIG_DEBUG_FS) 1416 1417 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1418 { 1419 struct nbd_device *nbd = s->private; 1420 1421 if (nbd->task_recv) 1422 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1423 1424 return 0; 1425 } 1426 1427 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1428 { 1429 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1430 } 1431 1432 static const struct file_operations nbd_dbg_tasks_ops = { 1433 .open = nbd_dbg_tasks_open, 1434 .read = seq_read, 1435 .llseek = seq_lseek, 1436 .release = single_release, 1437 }; 1438 1439 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1440 { 1441 struct nbd_device *nbd = s->private; 1442 u32 flags = nbd->config->flags; 1443 1444 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1445 1446 seq_puts(s, "Known flags:\n"); 1447 1448 if (flags & NBD_FLAG_HAS_FLAGS) 1449 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1450 if (flags & NBD_FLAG_READ_ONLY) 1451 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1452 if (flags & NBD_FLAG_SEND_FLUSH) 1453 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1454 if (flags & NBD_FLAG_SEND_FUA) 1455 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1456 if (flags & NBD_FLAG_SEND_TRIM) 1457 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1458 1459 return 0; 1460 } 1461 1462 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1463 { 1464 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1465 } 1466 1467 static const struct file_operations nbd_dbg_flags_ops = { 1468 .open = nbd_dbg_flags_open, 1469 .read = seq_read, 1470 .llseek = seq_lseek, 1471 .release = single_release, 1472 }; 1473 1474 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1475 { 1476 struct dentry *dir; 1477 struct nbd_config *config = nbd->config; 1478 1479 if (!nbd_dbg_dir) 1480 return -EIO; 1481 1482 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1483 if (!dir) { 1484 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1485 nbd_name(nbd)); 1486 return -EIO; 1487 } 1488 config->dbg_dir = dir; 1489 1490 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1491 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1492 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1493 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1494 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1495 1496 return 0; 1497 } 1498 1499 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1500 { 1501 debugfs_remove_recursive(nbd->config->dbg_dir); 1502 } 1503 1504 static int nbd_dbg_init(void) 1505 { 1506 struct dentry *dbg_dir; 1507 1508 dbg_dir = debugfs_create_dir("nbd", NULL); 1509 if (!dbg_dir) 1510 return -EIO; 1511 1512 nbd_dbg_dir = dbg_dir; 1513 1514 return 0; 1515 } 1516 1517 static void nbd_dbg_close(void) 1518 { 1519 debugfs_remove_recursive(nbd_dbg_dir); 1520 } 1521 1522 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1523 1524 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1525 { 1526 return 0; 1527 } 1528 1529 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1530 { 1531 } 1532 1533 static int nbd_dbg_init(void) 1534 { 1535 return 0; 1536 } 1537 1538 static void nbd_dbg_close(void) 1539 { 1540 } 1541 1542 #endif 1543 1544 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1545 unsigned int hctx_idx, unsigned int numa_node) 1546 { 1547 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1548 cmd->nbd = set->driver_data; 1549 cmd->flags = 0; 1550 mutex_init(&cmd->lock); 1551 return 0; 1552 } 1553 1554 static const struct blk_mq_ops nbd_mq_ops = { 1555 .queue_rq = nbd_queue_rq, 1556 .complete = nbd_complete_rq, 1557 .init_request = nbd_init_request, 1558 .timeout = nbd_xmit_timeout, 1559 }; 1560 1561 static int nbd_dev_add(int index) 1562 { 1563 struct nbd_device *nbd; 1564 struct gendisk *disk; 1565 struct request_queue *q; 1566 int err = -ENOMEM; 1567 1568 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1569 if (!nbd) 1570 goto out; 1571 1572 disk = alloc_disk(1 << part_shift); 1573 if (!disk) 1574 goto out_free_nbd; 1575 1576 if (index >= 0) { 1577 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1578 GFP_KERNEL); 1579 if (err == -ENOSPC) 1580 err = -EEXIST; 1581 } else { 1582 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1583 if (err >= 0) 1584 index = err; 1585 } 1586 if (err < 0) 1587 goto out_free_disk; 1588 1589 nbd->index = index; 1590 nbd->disk = disk; 1591 nbd->tag_set.ops = &nbd_mq_ops; 1592 nbd->tag_set.nr_hw_queues = 1; 1593 nbd->tag_set.queue_depth = 128; 1594 nbd->tag_set.numa_node = NUMA_NO_NODE; 1595 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1596 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1597 BLK_MQ_F_BLOCKING; 1598 nbd->tag_set.driver_data = nbd; 1599 1600 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1601 if (err) 1602 goto out_free_idr; 1603 1604 q = blk_mq_init_queue(&nbd->tag_set); 1605 if (IS_ERR(q)) { 1606 err = PTR_ERR(q); 1607 goto out_free_tags; 1608 } 1609 disk->queue = q; 1610 1611 /* 1612 * Tell the block layer that we are not a rotational device 1613 */ 1614 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 1615 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1616 disk->queue->limits.discard_granularity = 0; 1617 disk->queue->limits.discard_alignment = 0; 1618 blk_queue_max_discard_sectors(disk->queue, 0); 1619 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1620 blk_queue_max_segments(disk->queue, USHRT_MAX); 1621 blk_queue_max_hw_sectors(disk->queue, 65536); 1622 disk->queue->limits.max_sectors = 256; 1623 1624 mutex_init(&nbd->config_lock); 1625 refcount_set(&nbd->config_refs, 0); 1626 refcount_set(&nbd->refs, 1); 1627 INIT_LIST_HEAD(&nbd->list); 1628 disk->major = NBD_MAJOR; 1629 disk->first_minor = index << part_shift; 1630 disk->fops = &nbd_fops; 1631 disk->private_data = nbd; 1632 sprintf(disk->disk_name, "nbd%d", index); 1633 add_disk(disk); 1634 nbd_total_devices++; 1635 return index; 1636 1637 out_free_tags: 1638 blk_mq_free_tag_set(&nbd->tag_set); 1639 out_free_idr: 1640 idr_remove(&nbd_index_idr, index); 1641 out_free_disk: 1642 put_disk(disk); 1643 out_free_nbd: 1644 kfree(nbd); 1645 out: 1646 return err; 1647 } 1648 1649 static int find_free_cb(int id, void *ptr, void *data) 1650 { 1651 struct nbd_device *nbd = ptr; 1652 struct nbd_device **found = data; 1653 1654 if (!refcount_read(&nbd->config_refs)) { 1655 *found = nbd; 1656 return 1; 1657 } 1658 return 0; 1659 } 1660 1661 /* Netlink interface. */ 1662 static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1663 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1664 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1665 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1666 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1667 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1668 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1669 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1670 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1671 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1672 }; 1673 1674 static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1675 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1676 }; 1677 1678 /* We don't use this right now since we don't parse the incoming list, but we 1679 * still want it here so userspace knows what to expect. 1680 */ 1681 static const struct nla_policy __attribute__((unused)) 1682 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1683 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1684 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1685 }; 1686 1687 static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) 1688 { 1689 struct nbd_config *config = nbd->config; 1690 u64 bsize = config->blksize; 1691 u64 bytes = config->bytesize; 1692 1693 if (info->attrs[NBD_ATTR_SIZE_BYTES]) 1694 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1695 1696 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1697 bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1698 if (!bsize) 1699 bsize = NBD_DEF_BLKSIZE; 1700 if (!nbd_is_valid_blksize(bsize)) { 1701 printk(KERN_ERR "Invalid block size %llu\n", bsize); 1702 return -EINVAL; 1703 } 1704 } 1705 1706 if (bytes != config->bytesize || bsize != config->blksize) 1707 nbd_size_set(nbd, bsize, div64_u64(bytes, bsize)); 1708 return 0; 1709 } 1710 1711 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1712 { 1713 struct nbd_device *nbd = NULL; 1714 struct nbd_config *config; 1715 int index = -1; 1716 int ret; 1717 bool put_dev = false; 1718 1719 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1720 return -EPERM; 1721 1722 if (info->attrs[NBD_ATTR_INDEX]) 1723 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1724 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1725 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1726 return -EINVAL; 1727 } 1728 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1729 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1730 return -EINVAL; 1731 } 1732 again: 1733 mutex_lock(&nbd_index_mutex); 1734 if (index == -1) { 1735 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1736 if (ret == 0) { 1737 int new_index; 1738 new_index = nbd_dev_add(-1); 1739 if (new_index < 0) { 1740 mutex_unlock(&nbd_index_mutex); 1741 printk(KERN_ERR "nbd: failed to add new device\n"); 1742 return new_index; 1743 } 1744 nbd = idr_find(&nbd_index_idr, new_index); 1745 } 1746 } else { 1747 nbd = idr_find(&nbd_index_idr, index); 1748 if (!nbd) { 1749 ret = nbd_dev_add(index); 1750 if (ret < 0) { 1751 mutex_unlock(&nbd_index_mutex); 1752 printk(KERN_ERR "nbd: failed to add new device\n"); 1753 return ret; 1754 } 1755 nbd = idr_find(&nbd_index_idr, index); 1756 } 1757 } 1758 if (!nbd) { 1759 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1760 index); 1761 mutex_unlock(&nbd_index_mutex); 1762 return -EINVAL; 1763 } 1764 if (!refcount_inc_not_zero(&nbd->refs)) { 1765 mutex_unlock(&nbd_index_mutex); 1766 if (index == -1) 1767 goto again; 1768 printk(KERN_ERR "nbd: device at index %d is going down\n", 1769 index); 1770 return -EINVAL; 1771 } 1772 mutex_unlock(&nbd_index_mutex); 1773 1774 mutex_lock(&nbd->config_lock); 1775 if (refcount_read(&nbd->config_refs)) { 1776 mutex_unlock(&nbd->config_lock); 1777 nbd_put(nbd); 1778 if (index == -1) 1779 goto again; 1780 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1781 return -EBUSY; 1782 } 1783 if (WARN_ON(nbd->config)) { 1784 mutex_unlock(&nbd->config_lock); 1785 nbd_put(nbd); 1786 return -EINVAL; 1787 } 1788 config = nbd->config = nbd_alloc_config(); 1789 if (!nbd->config) { 1790 mutex_unlock(&nbd->config_lock); 1791 nbd_put(nbd); 1792 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1793 return -ENOMEM; 1794 } 1795 refcount_set(&nbd->config_refs, 1); 1796 set_bit(NBD_BOUND, &config->runtime_flags); 1797 1798 ret = nbd_genl_size_set(info, nbd); 1799 if (ret) 1800 goto out; 1801 1802 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1803 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1804 nbd->tag_set.timeout = timeout * HZ; 1805 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1806 } 1807 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1808 config->dead_conn_timeout = 1809 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1810 config->dead_conn_timeout *= HZ; 1811 } 1812 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1813 config->flags = 1814 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1815 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1816 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1817 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1818 set_bit(NBD_DESTROY_ON_DISCONNECT, 1819 &config->runtime_flags); 1820 put_dev = true; 1821 } 1822 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1823 set_bit(NBD_DISCONNECT_ON_CLOSE, 1824 &config->runtime_flags); 1825 } 1826 } 1827 1828 if (info->attrs[NBD_ATTR_SOCKETS]) { 1829 struct nlattr *attr; 1830 int rem, fd; 1831 1832 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1833 rem) { 1834 struct nlattr *socks[NBD_SOCK_MAX+1]; 1835 1836 if (nla_type(attr) != NBD_SOCK_ITEM) { 1837 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1838 ret = -EINVAL; 1839 goto out; 1840 } 1841 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX, 1842 attr, 1843 nbd_sock_policy, 1844 info->extack); 1845 if (ret != 0) { 1846 printk(KERN_ERR "nbd: error processing sock list\n"); 1847 ret = -EINVAL; 1848 goto out; 1849 } 1850 if (!socks[NBD_SOCK_FD]) 1851 continue; 1852 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1853 ret = nbd_add_socket(nbd, fd, true); 1854 if (ret) 1855 goto out; 1856 } 1857 } 1858 ret = nbd_start_device(nbd); 1859 out: 1860 mutex_unlock(&nbd->config_lock); 1861 if (!ret) { 1862 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1863 refcount_inc(&nbd->config_refs); 1864 nbd_connect_reply(info, nbd->index); 1865 } 1866 nbd_config_put(nbd); 1867 if (put_dev) 1868 nbd_put(nbd); 1869 return ret; 1870 } 1871 1872 static void nbd_disconnect_and_put(struct nbd_device *nbd) 1873 { 1874 mutex_lock(&nbd->config_lock); 1875 nbd_disconnect(nbd); 1876 nbd_clear_sock(nbd); 1877 mutex_unlock(&nbd->config_lock); 1878 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1879 &nbd->config->runtime_flags)) 1880 nbd_config_put(nbd); 1881 } 1882 1883 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1884 { 1885 struct nbd_device *nbd; 1886 int index; 1887 1888 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1889 return -EPERM; 1890 1891 if (!info->attrs[NBD_ATTR_INDEX]) { 1892 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1893 return -EINVAL; 1894 } 1895 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1896 mutex_lock(&nbd_index_mutex); 1897 nbd = idr_find(&nbd_index_idr, index); 1898 if (!nbd) { 1899 mutex_unlock(&nbd_index_mutex); 1900 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1901 index); 1902 return -EINVAL; 1903 } 1904 if (!refcount_inc_not_zero(&nbd->refs)) { 1905 mutex_unlock(&nbd_index_mutex); 1906 printk(KERN_ERR "nbd: device at index %d is going down\n", 1907 index); 1908 return -EINVAL; 1909 } 1910 mutex_unlock(&nbd_index_mutex); 1911 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1912 nbd_put(nbd); 1913 return 0; 1914 } 1915 nbd_disconnect_and_put(nbd); 1916 nbd_config_put(nbd); 1917 nbd_put(nbd); 1918 return 0; 1919 } 1920 1921 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1922 { 1923 struct nbd_device *nbd = NULL; 1924 struct nbd_config *config; 1925 int index; 1926 int ret = 0; 1927 bool put_dev = false; 1928 1929 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1930 return -EPERM; 1931 1932 if (!info->attrs[NBD_ATTR_INDEX]) { 1933 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1934 return -EINVAL; 1935 } 1936 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1937 mutex_lock(&nbd_index_mutex); 1938 nbd = idr_find(&nbd_index_idr, index); 1939 if (!nbd) { 1940 mutex_unlock(&nbd_index_mutex); 1941 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1942 index); 1943 return -EINVAL; 1944 } 1945 if (!refcount_inc_not_zero(&nbd->refs)) { 1946 mutex_unlock(&nbd_index_mutex); 1947 printk(KERN_ERR "nbd: device at index %d is going down\n", 1948 index); 1949 return -EINVAL; 1950 } 1951 mutex_unlock(&nbd_index_mutex); 1952 1953 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1954 dev_err(nbd_to_dev(nbd), 1955 "not configured, cannot reconfigure\n"); 1956 nbd_put(nbd); 1957 return -EINVAL; 1958 } 1959 1960 mutex_lock(&nbd->config_lock); 1961 config = nbd->config; 1962 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1963 !nbd->task_recv) { 1964 dev_err(nbd_to_dev(nbd), 1965 "not configured, cannot reconfigure\n"); 1966 ret = -EINVAL; 1967 goto out; 1968 } 1969 1970 ret = nbd_genl_size_set(info, nbd); 1971 if (ret) 1972 goto out; 1973 1974 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1975 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1976 nbd->tag_set.timeout = timeout * HZ; 1977 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1978 } 1979 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1980 config->dead_conn_timeout = 1981 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1982 config->dead_conn_timeout *= HZ; 1983 } 1984 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1985 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1986 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1987 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1988 &config->runtime_flags)) 1989 put_dev = true; 1990 } else { 1991 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1992 &config->runtime_flags)) 1993 refcount_inc(&nbd->refs); 1994 } 1995 1996 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1997 set_bit(NBD_DISCONNECT_ON_CLOSE, 1998 &config->runtime_flags); 1999 } else { 2000 clear_bit(NBD_DISCONNECT_ON_CLOSE, 2001 &config->runtime_flags); 2002 } 2003 } 2004 2005 if (info->attrs[NBD_ATTR_SOCKETS]) { 2006 struct nlattr *attr; 2007 int rem, fd; 2008 2009 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 2010 rem) { 2011 struct nlattr *socks[NBD_SOCK_MAX+1]; 2012 2013 if (nla_type(attr) != NBD_SOCK_ITEM) { 2014 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 2015 ret = -EINVAL; 2016 goto out; 2017 } 2018 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX, 2019 attr, 2020 nbd_sock_policy, 2021 info->extack); 2022 if (ret != 0) { 2023 printk(KERN_ERR "nbd: error processing sock list\n"); 2024 ret = -EINVAL; 2025 goto out; 2026 } 2027 if (!socks[NBD_SOCK_FD]) 2028 continue; 2029 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 2030 ret = nbd_reconnect_socket(nbd, fd); 2031 if (ret) { 2032 if (ret == -ENOSPC) 2033 ret = 0; 2034 goto out; 2035 } 2036 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 2037 } 2038 } 2039 out: 2040 mutex_unlock(&nbd->config_lock); 2041 nbd_config_put(nbd); 2042 nbd_put(nbd); 2043 if (put_dev) 2044 nbd_put(nbd); 2045 return ret; 2046 } 2047 2048 static const struct genl_ops nbd_connect_genl_ops[] = { 2049 { 2050 .cmd = NBD_CMD_CONNECT, 2051 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2052 .doit = nbd_genl_connect, 2053 }, 2054 { 2055 .cmd = NBD_CMD_DISCONNECT, 2056 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2057 .doit = nbd_genl_disconnect, 2058 }, 2059 { 2060 .cmd = NBD_CMD_RECONFIGURE, 2061 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2062 .doit = nbd_genl_reconfigure, 2063 }, 2064 { 2065 .cmd = NBD_CMD_STATUS, 2066 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 2067 .doit = nbd_genl_status, 2068 }, 2069 }; 2070 2071 static const struct genl_multicast_group nbd_mcast_grps[] = { 2072 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 2073 }; 2074 2075 static struct genl_family nbd_genl_family __ro_after_init = { 2076 .hdrsize = 0, 2077 .name = NBD_GENL_FAMILY_NAME, 2078 .version = NBD_GENL_VERSION, 2079 .module = THIS_MODULE, 2080 .ops = nbd_connect_genl_ops, 2081 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 2082 .maxattr = NBD_ATTR_MAX, 2083 .policy = nbd_attr_policy, 2084 .mcgrps = nbd_mcast_grps, 2085 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 2086 }; 2087 2088 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 2089 { 2090 struct nlattr *dev_opt; 2091 u8 connected = 0; 2092 int ret; 2093 2094 /* This is a little racey, but for status it's ok. The 2095 * reason we don't take a ref here is because we can't 2096 * take a ref in the index == -1 case as we would need 2097 * to put under the nbd_index_mutex, which could 2098 * deadlock if we are configured to remove ourselves 2099 * once we're disconnected. 2100 */ 2101 if (refcount_read(&nbd->config_refs)) 2102 connected = 1; 2103 dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM); 2104 if (!dev_opt) 2105 return -EMSGSIZE; 2106 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 2107 if (ret) 2108 return -EMSGSIZE; 2109 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 2110 connected); 2111 if (ret) 2112 return -EMSGSIZE; 2113 nla_nest_end(reply, dev_opt); 2114 return 0; 2115 } 2116 2117 static int status_cb(int id, void *ptr, void *data) 2118 { 2119 struct nbd_device *nbd = ptr; 2120 return populate_nbd_status(nbd, (struct sk_buff *)data); 2121 } 2122 2123 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 2124 { 2125 struct nlattr *dev_list; 2126 struct sk_buff *reply; 2127 void *reply_head; 2128 size_t msg_size; 2129 int index = -1; 2130 int ret = -ENOMEM; 2131 2132 if (info->attrs[NBD_ATTR_INDEX]) 2133 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 2134 2135 mutex_lock(&nbd_index_mutex); 2136 2137 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 2138 nla_attr_size(sizeof(u8))); 2139 msg_size *= (index == -1) ? nbd_total_devices : 1; 2140 2141 reply = genlmsg_new(msg_size, GFP_KERNEL); 2142 if (!reply) 2143 goto out; 2144 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 2145 NBD_CMD_STATUS); 2146 if (!reply_head) { 2147 nlmsg_free(reply); 2148 goto out; 2149 } 2150 2151 dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); 2152 if (index == -1) { 2153 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 2154 if (ret) { 2155 nlmsg_free(reply); 2156 goto out; 2157 } 2158 } else { 2159 struct nbd_device *nbd; 2160 nbd = idr_find(&nbd_index_idr, index); 2161 if (nbd) { 2162 ret = populate_nbd_status(nbd, reply); 2163 if (ret) { 2164 nlmsg_free(reply); 2165 goto out; 2166 } 2167 } 2168 } 2169 nla_nest_end(reply, dev_list); 2170 genlmsg_end(reply, reply_head); 2171 ret = genlmsg_reply(reply, info); 2172 out: 2173 mutex_unlock(&nbd_index_mutex); 2174 return ret; 2175 } 2176 2177 static void nbd_connect_reply(struct genl_info *info, int index) 2178 { 2179 struct sk_buff *skb; 2180 void *msg_head; 2181 int ret; 2182 2183 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2184 if (!skb) 2185 return; 2186 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2187 NBD_CMD_CONNECT); 2188 if (!msg_head) { 2189 nlmsg_free(skb); 2190 return; 2191 } 2192 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2193 if (ret) { 2194 nlmsg_free(skb); 2195 return; 2196 } 2197 genlmsg_end(skb, msg_head); 2198 genlmsg_reply(skb, info); 2199 } 2200 2201 static void nbd_mcast_index(int index) 2202 { 2203 struct sk_buff *skb; 2204 void *msg_head; 2205 int ret; 2206 2207 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2208 if (!skb) 2209 return; 2210 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2211 NBD_CMD_LINK_DEAD); 2212 if (!msg_head) { 2213 nlmsg_free(skb); 2214 return; 2215 } 2216 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2217 if (ret) { 2218 nlmsg_free(skb); 2219 return; 2220 } 2221 genlmsg_end(skb, msg_head); 2222 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2223 } 2224 2225 static void nbd_dead_link_work(struct work_struct *work) 2226 { 2227 struct link_dead_args *args = container_of(work, struct link_dead_args, 2228 work); 2229 nbd_mcast_index(args->index); 2230 kfree(args); 2231 } 2232 2233 static int __init nbd_init(void) 2234 { 2235 int i; 2236 2237 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2238 2239 if (max_part < 0) { 2240 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2241 return -EINVAL; 2242 } 2243 2244 part_shift = 0; 2245 if (max_part > 0) { 2246 part_shift = fls(max_part); 2247 2248 /* 2249 * Adjust max_part according to part_shift as it is exported 2250 * to user space so that user can know the max number of 2251 * partition kernel should be able to manage. 2252 * 2253 * Note that -1 is required because partition 0 is reserved 2254 * for the whole disk. 2255 */ 2256 max_part = (1UL << part_shift) - 1; 2257 } 2258 2259 if ((1UL << part_shift) > DISK_MAX_PARTS) 2260 return -EINVAL; 2261 2262 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2263 return -EINVAL; 2264 recv_workqueue = alloc_workqueue("knbd-recv", 2265 WQ_MEM_RECLAIM | WQ_HIGHPRI | 2266 WQ_UNBOUND, 0); 2267 if (!recv_workqueue) 2268 return -ENOMEM; 2269 2270 if (register_blkdev(NBD_MAJOR, "nbd")) { 2271 destroy_workqueue(recv_workqueue); 2272 return -EIO; 2273 } 2274 2275 if (genl_register_family(&nbd_genl_family)) { 2276 unregister_blkdev(NBD_MAJOR, "nbd"); 2277 destroy_workqueue(recv_workqueue); 2278 return -EINVAL; 2279 } 2280 nbd_dbg_init(); 2281 2282 mutex_lock(&nbd_index_mutex); 2283 for (i = 0; i < nbds_max; i++) 2284 nbd_dev_add(i); 2285 mutex_unlock(&nbd_index_mutex); 2286 return 0; 2287 } 2288 2289 static int nbd_exit_cb(int id, void *ptr, void *data) 2290 { 2291 struct list_head *list = (struct list_head *)data; 2292 struct nbd_device *nbd = ptr; 2293 2294 list_add_tail(&nbd->list, list); 2295 return 0; 2296 } 2297 2298 static void __exit nbd_cleanup(void) 2299 { 2300 struct nbd_device *nbd; 2301 LIST_HEAD(del_list); 2302 2303 nbd_dbg_close(); 2304 2305 mutex_lock(&nbd_index_mutex); 2306 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2307 mutex_unlock(&nbd_index_mutex); 2308 2309 while (!list_empty(&del_list)) { 2310 nbd = list_first_entry(&del_list, struct nbd_device, list); 2311 list_del_init(&nbd->list); 2312 if (refcount_read(&nbd->refs) != 1) 2313 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2314 nbd_put(nbd); 2315 } 2316 2317 idr_destroy(&nbd_index_idr); 2318 genl_unregister_family(&nbd_genl_family); 2319 destroy_workqueue(recv_workqueue); 2320 unregister_blkdev(NBD_MAJOR, "nbd"); 2321 } 2322 2323 module_init(nbd_init); 2324 module_exit(nbd_cleanup); 2325 2326 MODULE_DESCRIPTION("Network Block Device"); 2327 MODULE_LICENSE("GPL"); 2328 2329 module_param(nbds_max, int, 0444); 2330 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2331 module_param(max_part, int, 0444); 2332 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2333