1 /* 2 * Network block device - make block devices work over TCP 3 * 4 * Note that you can not swap over this thing, yet. Seems to work but 5 * deadlocks sometimes - you can not swap over TCP in general. 6 * 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 9 * 10 * This file is released under GPLv2 or later. 11 * 12 * (part of code stolen from loop.c) 13 */ 14 15 #include <linux/major.h> 16 17 #include <linux/blkdev.h> 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/sched.h> 21 #include <linux/sched/mm.h> 22 #include <linux/fs.h> 23 #include <linux/bio.h> 24 #include <linux/stat.h> 25 #include <linux/errno.h> 26 #include <linux/file.h> 27 #include <linux/ioctl.h> 28 #include <linux/mutex.h> 29 #include <linux/compiler.h> 30 #include <linux/err.h> 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <net/sock.h> 34 #include <linux/net.h> 35 #include <linux/kthread.h> 36 #include <linux/types.h> 37 #include <linux/debugfs.h> 38 #include <linux/blk-mq.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/types.h> 42 43 #include <linux/nbd.h> 44 #include <linux/nbd-netlink.h> 45 #include <net/genetlink.h> 46 47 static DEFINE_IDR(nbd_index_idr); 48 static DEFINE_MUTEX(nbd_index_mutex); 49 static int nbd_total_devices = 0; 50 51 struct nbd_sock { 52 struct socket *sock; 53 struct mutex tx_lock; 54 struct request *pending; 55 int sent; 56 bool dead; 57 int fallback_index; 58 int cookie; 59 }; 60 61 struct recv_thread_args { 62 struct work_struct work; 63 struct nbd_device *nbd; 64 int index; 65 }; 66 67 struct link_dead_args { 68 struct work_struct work; 69 int index; 70 }; 71 72 #define NBD_TIMEDOUT 0 73 #define NBD_DISCONNECT_REQUESTED 1 74 #define NBD_DISCONNECTED 2 75 #define NBD_HAS_PID_FILE 3 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 #define NBD_DISCONNECT_ON_CLOSE 7 80 81 struct nbd_config { 82 u32 flags; 83 unsigned long runtime_flags; 84 u64 dead_conn_timeout; 85 86 struct nbd_sock **socks; 87 int num_connections; 88 atomic_t live_connections; 89 wait_queue_head_t conn_wait; 90 91 atomic_t recv_threads; 92 wait_queue_head_t recv_wq; 93 loff_t blksize; 94 loff_t bytesize; 95 #if IS_ENABLED(CONFIG_DEBUG_FS) 96 struct dentry *dbg_dir; 97 #endif 98 }; 99 100 struct nbd_device { 101 struct blk_mq_tag_set tag_set; 102 103 int index; 104 refcount_t config_refs; 105 refcount_t refs; 106 struct nbd_config *config; 107 struct mutex config_lock; 108 struct gendisk *disk; 109 110 struct list_head list; 111 struct task_struct *task_recv; 112 struct task_struct *task_setup; 113 }; 114 115 #define NBD_CMD_REQUEUED 1 116 117 struct nbd_cmd { 118 struct nbd_device *nbd; 119 struct mutex lock; 120 int index; 121 int cookie; 122 blk_status_t status; 123 unsigned long flags; 124 u32 cmd_cookie; 125 }; 126 127 #if IS_ENABLED(CONFIG_DEBUG_FS) 128 static struct dentry *nbd_dbg_dir; 129 #endif 130 131 #define nbd_name(nbd) ((nbd)->disk->disk_name) 132 133 #define NBD_MAGIC 0x68797548 134 135 static unsigned int nbds_max = 16; 136 static int max_part = 16; 137 static struct workqueue_struct *recv_workqueue; 138 static int part_shift; 139 140 static int nbd_dev_dbg_init(struct nbd_device *nbd); 141 static void nbd_dev_dbg_close(struct nbd_device *nbd); 142 static void nbd_config_put(struct nbd_device *nbd); 143 static void nbd_connect_reply(struct genl_info *info, int index); 144 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 145 static void nbd_dead_link_work(struct work_struct *work); 146 static void nbd_disconnect_and_put(struct nbd_device *nbd); 147 148 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 149 { 150 return disk_to_dev(nbd->disk); 151 } 152 153 static void nbd_requeue_cmd(struct nbd_cmd *cmd) 154 { 155 struct request *req = blk_mq_rq_from_pdu(cmd); 156 157 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) 158 blk_mq_requeue_request(req, true); 159 } 160 161 #define NBD_COOKIE_BITS 32 162 163 static u64 nbd_cmd_handle(struct nbd_cmd *cmd) 164 { 165 struct request *req = blk_mq_rq_from_pdu(cmd); 166 u32 tag = blk_mq_unique_tag(req); 167 u64 cookie = cmd->cmd_cookie; 168 169 return (cookie << NBD_COOKIE_BITS) | tag; 170 } 171 172 static u32 nbd_handle_to_tag(u64 handle) 173 { 174 return (u32)handle; 175 } 176 177 static u32 nbd_handle_to_cookie(u64 handle) 178 { 179 return (u32)(handle >> NBD_COOKIE_BITS); 180 } 181 182 static const char *nbdcmd_to_ascii(int cmd) 183 { 184 switch (cmd) { 185 case NBD_CMD_READ: return "read"; 186 case NBD_CMD_WRITE: return "write"; 187 case NBD_CMD_DISC: return "disconnect"; 188 case NBD_CMD_FLUSH: return "flush"; 189 case NBD_CMD_TRIM: return "trim/discard"; 190 } 191 return "invalid"; 192 } 193 194 static ssize_t pid_show(struct device *dev, 195 struct device_attribute *attr, char *buf) 196 { 197 struct gendisk *disk = dev_to_disk(dev); 198 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 199 200 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 201 } 202 203 static const struct device_attribute pid_attr = { 204 .attr = { .name = "pid", .mode = 0444}, 205 .show = pid_show, 206 }; 207 208 static void nbd_dev_remove(struct nbd_device *nbd) 209 { 210 struct gendisk *disk = nbd->disk; 211 struct request_queue *q; 212 213 if (disk) { 214 q = disk->queue; 215 del_gendisk(disk); 216 blk_cleanup_queue(q); 217 blk_mq_free_tag_set(&nbd->tag_set); 218 disk->private_data = NULL; 219 put_disk(disk); 220 } 221 kfree(nbd); 222 } 223 224 static void nbd_put(struct nbd_device *nbd) 225 { 226 if (refcount_dec_and_mutex_lock(&nbd->refs, 227 &nbd_index_mutex)) { 228 idr_remove(&nbd_index_idr, nbd->index); 229 mutex_unlock(&nbd_index_mutex); 230 nbd_dev_remove(nbd); 231 } 232 } 233 234 static int nbd_disconnected(struct nbd_config *config) 235 { 236 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 237 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 238 } 239 240 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 241 int notify) 242 { 243 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 244 struct link_dead_args *args; 245 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 246 if (args) { 247 INIT_WORK(&args->work, nbd_dead_link_work); 248 args->index = nbd->index; 249 queue_work(system_wq, &args->work); 250 } 251 } 252 if (!nsock->dead) { 253 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 254 if (atomic_dec_return(&nbd->config->live_connections) == 0) { 255 if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, 256 &nbd->config->runtime_flags)) { 257 set_bit(NBD_DISCONNECTED, 258 &nbd->config->runtime_flags); 259 dev_info(nbd_to_dev(nbd), 260 "Disconnected due to user request.\n"); 261 } 262 } 263 } 264 nsock->dead = true; 265 nsock->pending = NULL; 266 nsock->sent = 0; 267 } 268 269 static void nbd_size_clear(struct nbd_device *nbd) 270 { 271 if (nbd->config->bytesize) { 272 set_capacity(nbd->disk, 0); 273 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 274 } 275 } 276 277 static void nbd_size_update(struct nbd_device *nbd) 278 { 279 struct nbd_config *config = nbd->config; 280 struct block_device *bdev = bdget_disk(nbd->disk, 0); 281 282 if (config->flags & NBD_FLAG_SEND_TRIM) { 283 nbd->disk->queue->limits.discard_granularity = config->blksize; 284 nbd->disk->queue->limits.discard_alignment = config->blksize; 285 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 286 } 287 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 288 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 289 set_capacity(nbd->disk, config->bytesize >> 9); 290 if (bdev) { 291 if (bdev->bd_disk) 292 bd_set_size(bdev, config->bytesize); 293 else 294 bdev->bd_invalidated = 1; 295 bdput(bdev); 296 } 297 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 298 } 299 300 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 301 loff_t nr_blocks) 302 { 303 struct nbd_config *config = nbd->config; 304 config->blksize = blocksize; 305 config->bytesize = blocksize * nr_blocks; 306 if (nbd->task_recv != NULL) 307 nbd_size_update(nbd); 308 } 309 310 static void nbd_complete_rq(struct request *req) 311 { 312 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 313 314 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req, 315 cmd->status ? "failed" : "done"); 316 317 blk_mq_end_request(req, cmd->status); 318 } 319 320 /* 321 * Forcibly shutdown the socket causing all listeners to error 322 */ 323 static void sock_shutdown(struct nbd_device *nbd) 324 { 325 struct nbd_config *config = nbd->config; 326 int i; 327 328 if (config->num_connections == 0) 329 return; 330 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 331 return; 332 333 for (i = 0; i < config->num_connections; i++) { 334 struct nbd_sock *nsock = config->socks[i]; 335 mutex_lock(&nsock->tx_lock); 336 nbd_mark_nsock_dead(nbd, nsock, 0); 337 mutex_unlock(&nsock->tx_lock); 338 } 339 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 340 } 341 342 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 343 bool reserved) 344 { 345 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 346 struct nbd_device *nbd = cmd->nbd; 347 struct nbd_config *config; 348 349 if (!refcount_inc_not_zero(&nbd->config_refs)) { 350 cmd->status = BLK_STS_TIMEOUT; 351 goto done; 352 } 353 config = nbd->config; 354 355 if (!mutex_trylock(&cmd->lock)) 356 return BLK_EH_RESET_TIMER; 357 358 if (config->num_connections > 1) { 359 dev_err_ratelimited(nbd_to_dev(nbd), 360 "Connection timed out, retrying (%d/%d alive)\n", 361 atomic_read(&config->live_connections), 362 config->num_connections); 363 /* 364 * Hooray we have more connections, requeue this IO, the submit 365 * path will put it on a real connection. 366 */ 367 if (config->socks && config->num_connections > 1) { 368 if (cmd->index < config->num_connections) { 369 struct nbd_sock *nsock = 370 config->socks[cmd->index]; 371 mutex_lock(&nsock->tx_lock); 372 /* We can have multiple outstanding requests, so 373 * we don't want to mark the nsock dead if we've 374 * already reconnected with a new socket, so 375 * only mark it dead if its the same socket we 376 * were sent out on. 377 */ 378 if (cmd->cookie == nsock->cookie) 379 nbd_mark_nsock_dead(nbd, nsock, 1); 380 mutex_unlock(&nsock->tx_lock); 381 } 382 mutex_unlock(&cmd->lock); 383 nbd_requeue_cmd(cmd); 384 nbd_config_put(nbd); 385 return BLK_EH_DONE; 386 } 387 } else { 388 dev_err_ratelimited(nbd_to_dev(nbd), 389 "Connection timed out\n"); 390 } 391 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 392 cmd->status = BLK_STS_IOERR; 393 mutex_unlock(&cmd->lock); 394 sock_shutdown(nbd); 395 nbd_config_put(nbd); 396 done: 397 blk_mq_complete_request(req); 398 return BLK_EH_DONE; 399 } 400 401 /* 402 * Send or receive packet. 403 */ 404 static int sock_xmit(struct nbd_device *nbd, int index, int send, 405 struct iov_iter *iter, int msg_flags, int *sent) 406 { 407 struct nbd_config *config = nbd->config; 408 struct socket *sock = config->socks[index]->sock; 409 int result; 410 struct msghdr msg; 411 unsigned int noreclaim_flag; 412 413 if (unlikely(!sock)) { 414 dev_err_ratelimited(disk_to_dev(nbd->disk), 415 "Attempted %s on closed socket in sock_xmit\n", 416 (send ? "send" : "recv")); 417 return -EINVAL; 418 } 419 420 msg.msg_iter = *iter; 421 422 noreclaim_flag = memalloc_noreclaim_save(); 423 do { 424 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 425 msg.msg_name = NULL; 426 msg.msg_namelen = 0; 427 msg.msg_control = NULL; 428 msg.msg_controllen = 0; 429 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 430 431 if (send) 432 result = sock_sendmsg(sock, &msg); 433 else 434 result = sock_recvmsg(sock, &msg, msg.msg_flags); 435 436 if (result <= 0) { 437 if (result == 0) 438 result = -EPIPE; /* short read */ 439 break; 440 } 441 if (sent) 442 *sent += result; 443 } while (msg_data_left(&msg)); 444 445 memalloc_noreclaim_restore(noreclaim_flag); 446 447 return result; 448 } 449 450 /* 451 * Different settings for sk->sk_sndtimeo can result in different return values 452 * if there is a signal pending when we enter sendmsg, because reasons? 453 */ 454 static inline int was_interrupted(int result) 455 { 456 return result == -ERESTARTSYS || result == -EINTR; 457 } 458 459 /* always call with the tx_lock held */ 460 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 461 { 462 struct request *req = blk_mq_rq_from_pdu(cmd); 463 struct nbd_config *config = nbd->config; 464 struct nbd_sock *nsock = config->socks[index]; 465 int result; 466 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 467 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 468 struct iov_iter from; 469 unsigned long size = blk_rq_bytes(req); 470 struct bio *bio; 471 u64 handle; 472 u32 type; 473 u32 nbd_cmd_flags = 0; 474 int sent = nsock->sent, skip = 0; 475 476 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 477 478 switch (req_op(req)) { 479 case REQ_OP_DISCARD: 480 type = NBD_CMD_TRIM; 481 break; 482 case REQ_OP_FLUSH: 483 type = NBD_CMD_FLUSH; 484 break; 485 case REQ_OP_WRITE: 486 type = NBD_CMD_WRITE; 487 break; 488 case REQ_OP_READ: 489 type = NBD_CMD_READ; 490 break; 491 default: 492 return -EIO; 493 } 494 495 if (rq_data_dir(req) == WRITE && 496 (config->flags & NBD_FLAG_READ_ONLY)) { 497 dev_err_ratelimited(disk_to_dev(nbd->disk), 498 "Write on read-only\n"); 499 return -EIO; 500 } 501 502 if (req->cmd_flags & REQ_FUA) 503 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 504 505 /* We did a partial send previously, and we at least sent the whole 506 * request struct, so just go and send the rest of the pages in the 507 * request. 508 */ 509 if (sent) { 510 if (sent >= sizeof(request)) { 511 skip = sent - sizeof(request); 512 goto send_pages; 513 } 514 iov_iter_advance(&from, sent); 515 } else { 516 cmd->cmd_cookie++; 517 } 518 cmd->index = index; 519 cmd->cookie = nsock->cookie; 520 request.type = htonl(type | nbd_cmd_flags); 521 if (type != NBD_CMD_FLUSH) { 522 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 523 request.len = htonl(size); 524 } 525 handle = nbd_cmd_handle(cmd); 526 memcpy(request.handle, &handle, sizeof(handle)); 527 528 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 529 req, nbdcmd_to_ascii(type), 530 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 531 result = sock_xmit(nbd, index, 1, &from, 532 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 533 if (result <= 0) { 534 if (was_interrupted(result)) { 535 /* If we havne't sent anything we can just return BUSY, 536 * however if we have sent something we need to make 537 * sure we only allow this req to be sent until we are 538 * completely done. 539 */ 540 if (sent) { 541 nsock->pending = req; 542 nsock->sent = sent; 543 } 544 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 545 return BLK_STS_RESOURCE; 546 } 547 dev_err_ratelimited(disk_to_dev(nbd->disk), 548 "Send control failed (result %d)\n", result); 549 return -EAGAIN; 550 } 551 send_pages: 552 if (type != NBD_CMD_WRITE) 553 goto out; 554 555 bio = req->bio; 556 while (bio) { 557 struct bio *next = bio->bi_next; 558 struct bvec_iter iter; 559 struct bio_vec bvec; 560 561 bio_for_each_segment(bvec, bio, iter) { 562 bool is_last = !next && bio_iter_last(bvec, iter); 563 int flags = is_last ? 0 : MSG_MORE; 564 565 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 566 req, bvec.bv_len); 567 iov_iter_bvec(&from, ITER_BVEC | WRITE, 568 &bvec, 1, bvec.bv_len); 569 if (skip) { 570 if (skip >= iov_iter_count(&from)) { 571 skip -= iov_iter_count(&from); 572 continue; 573 } 574 iov_iter_advance(&from, skip); 575 skip = 0; 576 } 577 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 578 if (result <= 0) { 579 if (was_interrupted(result)) { 580 /* We've already sent the header, we 581 * have no choice but to set pending and 582 * return BUSY. 583 */ 584 nsock->pending = req; 585 nsock->sent = sent; 586 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 587 return BLK_STS_RESOURCE; 588 } 589 dev_err(disk_to_dev(nbd->disk), 590 "Send data failed (result %d)\n", 591 result); 592 return -EAGAIN; 593 } 594 /* 595 * The completion might already have come in, 596 * so break for the last one instead of letting 597 * the iterator do it. This prevents use-after-free 598 * of the bio. 599 */ 600 if (is_last) 601 break; 602 } 603 bio = next; 604 } 605 out: 606 nsock->pending = NULL; 607 nsock->sent = 0; 608 return 0; 609 } 610 611 /* NULL returned = something went wrong, inform userspace */ 612 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 613 { 614 struct nbd_config *config = nbd->config; 615 int result; 616 struct nbd_reply reply; 617 struct nbd_cmd *cmd; 618 struct request *req = NULL; 619 u64 handle; 620 u16 hwq; 621 u32 tag; 622 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 623 struct iov_iter to; 624 int ret = 0; 625 626 reply.magic = 0; 627 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 628 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 629 if (result <= 0) { 630 if (!nbd_disconnected(config)) 631 dev_err(disk_to_dev(nbd->disk), 632 "Receive control failed (result %d)\n", result); 633 return ERR_PTR(result); 634 } 635 636 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 637 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 638 (unsigned long)ntohl(reply.magic)); 639 return ERR_PTR(-EPROTO); 640 } 641 642 memcpy(&handle, reply.handle, sizeof(handle)); 643 tag = nbd_handle_to_tag(handle); 644 hwq = blk_mq_unique_tag_to_hwq(tag); 645 if (hwq < nbd->tag_set.nr_hw_queues) 646 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 647 blk_mq_unique_tag_to_tag(tag)); 648 if (!req || !blk_mq_request_started(req)) { 649 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 650 tag, req); 651 return ERR_PTR(-ENOENT); 652 } 653 cmd = blk_mq_rq_to_pdu(req); 654 655 mutex_lock(&cmd->lock); 656 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { 657 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", 658 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); 659 ret = -ENOENT; 660 goto out; 661 } 662 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { 663 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", 664 req); 665 ret = -ENOENT; 666 goto out; 667 } 668 if (ntohl(reply.error)) { 669 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 670 ntohl(reply.error)); 671 cmd->status = BLK_STS_IOERR; 672 goto out; 673 } 674 675 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 676 if (rq_data_dir(req) != WRITE) { 677 struct req_iterator iter; 678 struct bio_vec bvec; 679 680 rq_for_each_segment(bvec, req, iter) { 681 iov_iter_bvec(&to, ITER_BVEC | READ, 682 &bvec, 1, bvec.bv_len); 683 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 684 if (result <= 0) { 685 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 686 result); 687 /* 688 * If we've disconnected or we only have 1 689 * connection then we need to make sure we 690 * complete this request, otherwise error out 691 * and let the timeout stuff handle resubmitting 692 * this request onto another connection. 693 */ 694 if (nbd_disconnected(config) || 695 config->num_connections <= 1) { 696 cmd->status = BLK_STS_IOERR; 697 goto out; 698 } 699 ret = -EIO; 700 goto out; 701 } 702 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 703 req, bvec.bv_len); 704 } 705 } 706 out: 707 mutex_unlock(&cmd->lock); 708 return ret ? ERR_PTR(ret) : cmd; 709 } 710 711 static void recv_work(struct work_struct *work) 712 { 713 struct recv_thread_args *args = container_of(work, 714 struct recv_thread_args, 715 work); 716 struct nbd_device *nbd = args->nbd; 717 struct nbd_config *config = nbd->config; 718 struct nbd_cmd *cmd; 719 720 while (1) { 721 cmd = nbd_read_stat(nbd, args->index); 722 if (IS_ERR(cmd)) { 723 struct nbd_sock *nsock = config->socks[args->index]; 724 725 mutex_lock(&nsock->tx_lock); 726 nbd_mark_nsock_dead(nbd, nsock, 1); 727 mutex_unlock(&nsock->tx_lock); 728 break; 729 } 730 731 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 732 } 733 atomic_dec(&config->recv_threads); 734 wake_up(&config->recv_wq); 735 nbd_config_put(nbd); 736 kfree(args); 737 } 738 739 static void nbd_clear_req(struct request *req, void *data, bool reserved) 740 { 741 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 742 743 cmd->status = BLK_STS_IOERR; 744 blk_mq_complete_request(req); 745 } 746 747 static void nbd_clear_que(struct nbd_device *nbd) 748 { 749 blk_mq_quiesce_queue(nbd->disk->queue); 750 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 751 blk_mq_unquiesce_queue(nbd->disk->queue); 752 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 753 } 754 755 static int find_fallback(struct nbd_device *nbd, int index) 756 { 757 struct nbd_config *config = nbd->config; 758 int new_index = -1; 759 struct nbd_sock *nsock = config->socks[index]; 760 int fallback = nsock->fallback_index; 761 762 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 763 return new_index; 764 765 if (config->num_connections <= 1) { 766 dev_err_ratelimited(disk_to_dev(nbd->disk), 767 "Attempted send on invalid socket\n"); 768 return new_index; 769 } 770 771 if (fallback >= 0 && fallback < config->num_connections && 772 !config->socks[fallback]->dead) 773 return fallback; 774 775 if (nsock->fallback_index < 0 || 776 nsock->fallback_index >= config->num_connections || 777 config->socks[nsock->fallback_index]->dead) { 778 int i; 779 for (i = 0; i < config->num_connections; i++) { 780 if (i == index) 781 continue; 782 if (!config->socks[i]->dead) { 783 new_index = i; 784 break; 785 } 786 } 787 nsock->fallback_index = new_index; 788 if (new_index < 0) { 789 dev_err_ratelimited(disk_to_dev(nbd->disk), 790 "Dead connection, failed to find a fallback\n"); 791 return new_index; 792 } 793 } 794 new_index = nsock->fallback_index; 795 return new_index; 796 } 797 798 static int wait_for_reconnect(struct nbd_device *nbd) 799 { 800 struct nbd_config *config = nbd->config; 801 if (!config->dead_conn_timeout) 802 return 0; 803 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 804 return 0; 805 return wait_event_timeout(config->conn_wait, 806 atomic_read(&config->live_connections) > 0, 807 config->dead_conn_timeout) > 0; 808 } 809 810 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 811 { 812 struct request *req = blk_mq_rq_from_pdu(cmd); 813 struct nbd_device *nbd = cmd->nbd; 814 struct nbd_config *config; 815 struct nbd_sock *nsock; 816 int ret; 817 818 if (!refcount_inc_not_zero(&nbd->config_refs)) { 819 dev_err_ratelimited(disk_to_dev(nbd->disk), 820 "Socks array is empty\n"); 821 blk_mq_start_request(req); 822 return -EINVAL; 823 } 824 config = nbd->config; 825 826 if (index >= config->num_connections) { 827 dev_err_ratelimited(disk_to_dev(nbd->disk), 828 "Attempted send on invalid socket\n"); 829 nbd_config_put(nbd); 830 blk_mq_start_request(req); 831 return -EINVAL; 832 } 833 cmd->status = BLK_STS_OK; 834 again: 835 nsock = config->socks[index]; 836 mutex_lock(&nsock->tx_lock); 837 if (nsock->dead) { 838 int old_index = index; 839 index = find_fallback(nbd, index); 840 mutex_unlock(&nsock->tx_lock); 841 if (index < 0) { 842 if (wait_for_reconnect(nbd)) { 843 index = old_index; 844 goto again; 845 } 846 /* All the sockets should already be down at this point, 847 * we just want to make sure that DISCONNECTED is set so 848 * any requests that come in that were queue'ed waiting 849 * for the reconnect timer don't trigger the timer again 850 * and instead just error out. 851 */ 852 sock_shutdown(nbd); 853 nbd_config_put(nbd); 854 blk_mq_start_request(req); 855 return -EIO; 856 } 857 goto again; 858 } 859 860 /* Handle the case that we have a pending request that was partially 861 * transmitted that _has_ to be serviced first. We need to call requeue 862 * here so that it gets put _after_ the request that is already on the 863 * dispatch list. 864 */ 865 blk_mq_start_request(req); 866 if (unlikely(nsock->pending && nsock->pending != req)) { 867 nbd_requeue_cmd(cmd); 868 ret = 0; 869 goto out; 870 } 871 /* 872 * Some failures are related to the link going down, so anything that 873 * returns EAGAIN can be retried on a different socket. 874 */ 875 ret = nbd_send_cmd(nbd, cmd, index); 876 if (ret == -EAGAIN) { 877 dev_err_ratelimited(disk_to_dev(nbd->disk), 878 "Request send failed, requeueing\n"); 879 nbd_mark_nsock_dead(nbd, nsock, 1); 880 nbd_requeue_cmd(cmd); 881 ret = 0; 882 } 883 out: 884 mutex_unlock(&nsock->tx_lock); 885 nbd_config_put(nbd); 886 return ret; 887 } 888 889 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 890 const struct blk_mq_queue_data *bd) 891 { 892 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 893 int ret; 894 895 /* 896 * Since we look at the bio's to send the request over the network we 897 * need to make sure the completion work doesn't mark this request done 898 * before we are done doing our send. This keeps us from dereferencing 899 * freed data if we have particularly fast completions (ie we get the 900 * completion before we exit sock_xmit on the last bvec) or in the case 901 * that the server is misbehaving (or there was an error) before we're 902 * done sending everything over the wire. 903 */ 904 mutex_lock(&cmd->lock); 905 clear_bit(NBD_CMD_REQUEUED, &cmd->flags); 906 907 /* We can be called directly from the user space process, which means we 908 * could possibly have signals pending so our sendmsg will fail. In 909 * this case we need to return that we are busy, otherwise error out as 910 * appropriate. 911 */ 912 ret = nbd_handle_cmd(cmd, hctx->queue_num); 913 if (ret < 0) 914 ret = BLK_STS_IOERR; 915 else if (!ret) 916 ret = BLK_STS_OK; 917 mutex_unlock(&cmd->lock); 918 919 return ret; 920 } 921 922 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 923 bool netlink) 924 { 925 struct nbd_config *config = nbd->config; 926 struct socket *sock; 927 struct nbd_sock **socks; 928 struct nbd_sock *nsock; 929 int err; 930 931 sock = sockfd_lookup(arg, &err); 932 if (!sock) 933 return err; 934 935 if (!netlink && !nbd->task_setup && 936 !test_bit(NBD_BOUND, &config->runtime_flags)) 937 nbd->task_setup = current; 938 939 if (!netlink && 940 (nbd->task_setup != current || 941 test_bit(NBD_BOUND, &config->runtime_flags))) { 942 dev_err(disk_to_dev(nbd->disk), 943 "Device being setup by another task"); 944 sockfd_put(sock); 945 return -EBUSY; 946 } 947 948 socks = krealloc(config->socks, (config->num_connections + 1) * 949 sizeof(struct nbd_sock *), GFP_KERNEL); 950 if (!socks) { 951 sockfd_put(sock); 952 return -ENOMEM; 953 } 954 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 955 if (!nsock) { 956 sockfd_put(sock); 957 return -ENOMEM; 958 } 959 960 config->socks = socks; 961 962 nsock->fallback_index = -1; 963 nsock->dead = false; 964 mutex_init(&nsock->tx_lock); 965 nsock->sock = sock; 966 nsock->pending = NULL; 967 nsock->sent = 0; 968 nsock->cookie = 0; 969 socks[config->num_connections++] = nsock; 970 atomic_inc(&config->live_connections); 971 972 return 0; 973 } 974 975 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 976 { 977 struct nbd_config *config = nbd->config; 978 struct socket *sock, *old; 979 struct recv_thread_args *args; 980 int i; 981 int err; 982 983 sock = sockfd_lookup(arg, &err); 984 if (!sock) 985 return err; 986 987 args = kzalloc(sizeof(*args), GFP_KERNEL); 988 if (!args) { 989 sockfd_put(sock); 990 return -ENOMEM; 991 } 992 993 for (i = 0; i < config->num_connections; i++) { 994 struct nbd_sock *nsock = config->socks[i]; 995 996 if (!nsock->dead) 997 continue; 998 999 mutex_lock(&nsock->tx_lock); 1000 if (!nsock->dead) { 1001 mutex_unlock(&nsock->tx_lock); 1002 continue; 1003 } 1004 sk_set_memalloc(sock->sk); 1005 if (nbd->tag_set.timeout) 1006 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 1007 atomic_inc(&config->recv_threads); 1008 refcount_inc(&nbd->config_refs); 1009 old = nsock->sock; 1010 nsock->fallback_index = -1; 1011 nsock->sock = sock; 1012 nsock->dead = false; 1013 INIT_WORK(&args->work, recv_work); 1014 args->index = i; 1015 args->nbd = nbd; 1016 nsock->cookie++; 1017 mutex_unlock(&nsock->tx_lock); 1018 sockfd_put(old); 1019 1020 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 1021 1022 /* We take the tx_mutex in an error path in the recv_work, so we 1023 * need to queue_work outside of the tx_mutex. 1024 */ 1025 queue_work(recv_workqueue, &args->work); 1026 1027 atomic_inc(&config->live_connections); 1028 wake_up(&config->conn_wait); 1029 return 0; 1030 } 1031 sockfd_put(sock); 1032 kfree(args); 1033 return -ENOSPC; 1034 } 1035 1036 static void nbd_bdev_reset(struct block_device *bdev) 1037 { 1038 if (bdev->bd_openers > 1) 1039 return; 1040 bd_set_size(bdev, 0); 1041 } 1042 1043 static void nbd_parse_flags(struct nbd_device *nbd) 1044 { 1045 struct nbd_config *config = nbd->config; 1046 if (config->flags & NBD_FLAG_READ_ONLY) 1047 set_disk_ro(nbd->disk, true); 1048 else 1049 set_disk_ro(nbd->disk, false); 1050 if (config->flags & NBD_FLAG_SEND_TRIM) 1051 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1052 if (config->flags & NBD_FLAG_SEND_FLUSH) { 1053 if (config->flags & NBD_FLAG_SEND_FUA) 1054 blk_queue_write_cache(nbd->disk->queue, true, true); 1055 else 1056 blk_queue_write_cache(nbd->disk->queue, true, false); 1057 } 1058 else 1059 blk_queue_write_cache(nbd->disk->queue, false, false); 1060 } 1061 1062 static void send_disconnects(struct nbd_device *nbd) 1063 { 1064 struct nbd_config *config = nbd->config; 1065 struct nbd_request request = { 1066 .magic = htonl(NBD_REQUEST_MAGIC), 1067 .type = htonl(NBD_CMD_DISC), 1068 }; 1069 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 1070 struct iov_iter from; 1071 int i, ret; 1072 1073 for (i = 0; i < config->num_connections; i++) { 1074 struct nbd_sock *nsock = config->socks[i]; 1075 1076 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 1077 mutex_lock(&nsock->tx_lock); 1078 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1079 if (ret <= 0) 1080 dev_err(disk_to_dev(nbd->disk), 1081 "Send disconnect failed %d\n", ret); 1082 mutex_unlock(&nsock->tx_lock); 1083 } 1084 } 1085 1086 static int nbd_disconnect(struct nbd_device *nbd) 1087 { 1088 struct nbd_config *config = nbd->config; 1089 1090 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1091 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1092 send_disconnects(nbd); 1093 return 0; 1094 } 1095 1096 static void nbd_clear_sock(struct nbd_device *nbd) 1097 { 1098 sock_shutdown(nbd); 1099 nbd_clear_que(nbd); 1100 nbd->task_setup = NULL; 1101 } 1102 1103 static void nbd_config_put(struct nbd_device *nbd) 1104 { 1105 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1106 &nbd->config_lock)) { 1107 struct nbd_config *config = nbd->config; 1108 nbd_dev_dbg_close(nbd); 1109 nbd_size_clear(nbd); 1110 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1111 &config->runtime_flags)) 1112 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1113 nbd->task_recv = NULL; 1114 nbd_clear_sock(nbd); 1115 if (config->num_connections) { 1116 int i; 1117 for (i = 0; i < config->num_connections; i++) { 1118 sockfd_put(config->socks[i]->sock); 1119 kfree(config->socks[i]); 1120 } 1121 kfree(config->socks); 1122 } 1123 kfree(nbd->config); 1124 nbd->config = NULL; 1125 1126 nbd->tag_set.timeout = 0; 1127 nbd->disk->queue->limits.discard_granularity = 0; 1128 nbd->disk->queue->limits.discard_alignment = 0; 1129 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 1130 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1131 1132 mutex_unlock(&nbd->config_lock); 1133 nbd_put(nbd); 1134 module_put(THIS_MODULE); 1135 } 1136 } 1137 1138 static int nbd_start_device(struct nbd_device *nbd) 1139 { 1140 struct nbd_config *config = nbd->config; 1141 int num_connections = config->num_connections; 1142 int error = 0, i; 1143 1144 if (nbd->task_recv) 1145 return -EBUSY; 1146 if (!config->socks) 1147 return -EINVAL; 1148 if (num_connections > 1 && 1149 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1150 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1151 return -EINVAL; 1152 } 1153 1154 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1155 nbd->task_recv = current; 1156 1157 nbd_parse_flags(nbd); 1158 1159 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1160 if (error) { 1161 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1162 return error; 1163 } 1164 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1165 1166 nbd_dev_dbg_init(nbd); 1167 for (i = 0; i < num_connections; i++) { 1168 struct recv_thread_args *args; 1169 1170 args = kzalloc(sizeof(*args), GFP_KERNEL); 1171 if (!args) { 1172 sock_shutdown(nbd); 1173 return -ENOMEM; 1174 } 1175 sk_set_memalloc(config->socks[i]->sock->sk); 1176 if (nbd->tag_set.timeout) 1177 config->socks[i]->sock->sk->sk_sndtimeo = 1178 nbd->tag_set.timeout; 1179 atomic_inc(&config->recv_threads); 1180 refcount_inc(&nbd->config_refs); 1181 INIT_WORK(&args->work, recv_work); 1182 args->nbd = nbd; 1183 args->index = i; 1184 queue_work(recv_workqueue, &args->work); 1185 } 1186 nbd_size_update(nbd); 1187 return error; 1188 } 1189 1190 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1191 { 1192 struct nbd_config *config = nbd->config; 1193 int ret; 1194 1195 ret = nbd_start_device(nbd); 1196 if (ret) 1197 return ret; 1198 1199 if (max_part) 1200 bdev->bd_invalidated = 1; 1201 mutex_unlock(&nbd->config_lock); 1202 ret = wait_event_interruptible(config->recv_wq, 1203 atomic_read(&config->recv_threads) == 0); 1204 if (ret) 1205 sock_shutdown(nbd); 1206 mutex_lock(&nbd->config_lock); 1207 nbd_bdev_reset(bdev); 1208 /* user requested, ignore socket errors */ 1209 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1210 ret = 0; 1211 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1212 ret = -ETIMEDOUT; 1213 return ret; 1214 } 1215 1216 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1217 struct block_device *bdev) 1218 { 1219 sock_shutdown(nbd); 1220 kill_bdev(bdev); 1221 nbd_bdev_reset(bdev); 1222 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1223 &nbd->config->runtime_flags)) 1224 nbd_config_put(nbd); 1225 } 1226 1227 /* Must be called with config_lock held */ 1228 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1229 unsigned int cmd, unsigned long arg) 1230 { 1231 struct nbd_config *config = nbd->config; 1232 1233 switch (cmd) { 1234 case NBD_DISCONNECT: 1235 return nbd_disconnect(nbd); 1236 case NBD_CLEAR_SOCK: 1237 nbd_clear_sock_ioctl(nbd, bdev); 1238 return 0; 1239 case NBD_SET_SOCK: 1240 return nbd_add_socket(nbd, arg, false); 1241 case NBD_SET_BLKSIZE: 1242 if (!arg || !is_power_of_2(arg) || arg < 512 || 1243 arg > PAGE_SIZE) 1244 return -EINVAL; 1245 nbd_size_set(nbd, arg, 1246 div_s64(config->bytesize, arg)); 1247 return 0; 1248 case NBD_SET_SIZE: 1249 nbd_size_set(nbd, config->blksize, 1250 div_s64(arg, config->blksize)); 1251 return 0; 1252 case NBD_SET_SIZE_BLOCKS: 1253 nbd_size_set(nbd, config->blksize, arg); 1254 return 0; 1255 case NBD_SET_TIMEOUT: 1256 if (arg) { 1257 nbd->tag_set.timeout = arg * HZ; 1258 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1259 } 1260 return 0; 1261 1262 case NBD_SET_FLAGS: 1263 config->flags = arg; 1264 return 0; 1265 case NBD_DO_IT: 1266 return nbd_start_device_ioctl(nbd, bdev); 1267 case NBD_CLEAR_QUE: 1268 /* 1269 * This is for compatibility only. The queue is always cleared 1270 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1271 */ 1272 return 0; 1273 case NBD_PRINT_DEBUG: 1274 /* 1275 * For compatibility only, we no longer keep a list of 1276 * outstanding requests. 1277 */ 1278 return 0; 1279 } 1280 return -ENOTTY; 1281 } 1282 1283 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1284 unsigned int cmd, unsigned long arg) 1285 { 1286 struct nbd_device *nbd = bdev->bd_disk->private_data; 1287 struct nbd_config *config = nbd->config; 1288 int error = -EINVAL; 1289 1290 if (!capable(CAP_SYS_ADMIN)) 1291 return -EPERM; 1292 1293 /* The block layer will pass back some non-nbd ioctls in case we have 1294 * special handling for them, but we don't so just return an error. 1295 */ 1296 if (_IOC_TYPE(cmd) != 0xab) 1297 return -EINVAL; 1298 1299 mutex_lock(&nbd->config_lock); 1300 1301 /* Don't allow ioctl operations on a nbd device that was created with 1302 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1303 */ 1304 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1305 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1306 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1307 else 1308 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1309 mutex_unlock(&nbd->config_lock); 1310 return error; 1311 } 1312 1313 static struct nbd_config *nbd_alloc_config(void) 1314 { 1315 struct nbd_config *config; 1316 1317 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1318 if (!config) 1319 return NULL; 1320 atomic_set(&config->recv_threads, 0); 1321 init_waitqueue_head(&config->recv_wq); 1322 init_waitqueue_head(&config->conn_wait); 1323 config->blksize = 1024; 1324 atomic_set(&config->live_connections, 0); 1325 try_module_get(THIS_MODULE); 1326 return config; 1327 } 1328 1329 static int nbd_open(struct block_device *bdev, fmode_t mode) 1330 { 1331 struct nbd_device *nbd; 1332 int ret = 0; 1333 1334 mutex_lock(&nbd_index_mutex); 1335 nbd = bdev->bd_disk->private_data; 1336 if (!nbd) { 1337 ret = -ENXIO; 1338 goto out; 1339 } 1340 if (!refcount_inc_not_zero(&nbd->refs)) { 1341 ret = -ENXIO; 1342 goto out; 1343 } 1344 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1345 struct nbd_config *config; 1346 1347 mutex_lock(&nbd->config_lock); 1348 if (refcount_inc_not_zero(&nbd->config_refs)) { 1349 mutex_unlock(&nbd->config_lock); 1350 goto out; 1351 } 1352 config = nbd->config = nbd_alloc_config(); 1353 if (!config) { 1354 ret = -ENOMEM; 1355 mutex_unlock(&nbd->config_lock); 1356 goto out; 1357 } 1358 refcount_set(&nbd->config_refs, 1); 1359 refcount_inc(&nbd->refs); 1360 mutex_unlock(&nbd->config_lock); 1361 bdev->bd_invalidated = 1; 1362 } else if (nbd_disconnected(nbd->config)) { 1363 bdev->bd_invalidated = 1; 1364 } 1365 out: 1366 mutex_unlock(&nbd_index_mutex); 1367 return ret; 1368 } 1369 1370 static void nbd_release(struct gendisk *disk, fmode_t mode) 1371 { 1372 struct nbd_device *nbd = disk->private_data; 1373 struct block_device *bdev = bdget_disk(disk, 0); 1374 1375 if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1376 bdev->bd_openers == 0) 1377 nbd_disconnect_and_put(nbd); 1378 1379 nbd_config_put(nbd); 1380 nbd_put(nbd); 1381 } 1382 1383 static const struct block_device_operations nbd_fops = 1384 { 1385 .owner = THIS_MODULE, 1386 .open = nbd_open, 1387 .release = nbd_release, 1388 .ioctl = nbd_ioctl, 1389 .compat_ioctl = nbd_ioctl, 1390 }; 1391 1392 #if IS_ENABLED(CONFIG_DEBUG_FS) 1393 1394 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1395 { 1396 struct nbd_device *nbd = s->private; 1397 1398 if (nbd->task_recv) 1399 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1400 1401 return 0; 1402 } 1403 1404 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1405 { 1406 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1407 } 1408 1409 static const struct file_operations nbd_dbg_tasks_ops = { 1410 .open = nbd_dbg_tasks_open, 1411 .read = seq_read, 1412 .llseek = seq_lseek, 1413 .release = single_release, 1414 }; 1415 1416 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1417 { 1418 struct nbd_device *nbd = s->private; 1419 u32 flags = nbd->config->flags; 1420 1421 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1422 1423 seq_puts(s, "Known flags:\n"); 1424 1425 if (flags & NBD_FLAG_HAS_FLAGS) 1426 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1427 if (flags & NBD_FLAG_READ_ONLY) 1428 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1429 if (flags & NBD_FLAG_SEND_FLUSH) 1430 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1431 if (flags & NBD_FLAG_SEND_FUA) 1432 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1433 if (flags & NBD_FLAG_SEND_TRIM) 1434 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1435 1436 return 0; 1437 } 1438 1439 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1440 { 1441 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1442 } 1443 1444 static const struct file_operations nbd_dbg_flags_ops = { 1445 .open = nbd_dbg_flags_open, 1446 .read = seq_read, 1447 .llseek = seq_lseek, 1448 .release = single_release, 1449 }; 1450 1451 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1452 { 1453 struct dentry *dir; 1454 struct nbd_config *config = nbd->config; 1455 1456 if (!nbd_dbg_dir) 1457 return -EIO; 1458 1459 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1460 if (!dir) { 1461 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1462 nbd_name(nbd)); 1463 return -EIO; 1464 } 1465 config->dbg_dir = dir; 1466 1467 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1468 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1469 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1470 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1471 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1472 1473 return 0; 1474 } 1475 1476 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1477 { 1478 debugfs_remove_recursive(nbd->config->dbg_dir); 1479 } 1480 1481 static int nbd_dbg_init(void) 1482 { 1483 struct dentry *dbg_dir; 1484 1485 dbg_dir = debugfs_create_dir("nbd", NULL); 1486 if (!dbg_dir) 1487 return -EIO; 1488 1489 nbd_dbg_dir = dbg_dir; 1490 1491 return 0; 1492 } 1493 1494 static void nbd_dbg_close(void) 1495 { 1496 debugfs_remove_recursive(nbd_dbg_dir); 1497 } 1498 1499 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1500 1501 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1502 { 1503 return 0; 1504 } 1505 1506 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1507 { 1508 } 1509 1510 static int nbd_dbg_init(void) 1511 { 1512 return 0; 1513 } 1514 1515 static void nbd_dbg_close(void) 1516 { 1517 } 1518 1519 #endif 1520 1521 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1522 unsigned int hctx_idx, unsigned int numa_node) 1523 { 1524 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1525 cmd->nbd = set->driver_data; 1526 cmd->flags = 0; 1527 mutex_init(&cmd->lock); 1528 return 0; 1529 } 1530 1531 static const struct blk_mq_ops nbd_mq_ops = { 1532 .queue_rq = nbd_queue_rq, 1533 .complete = nbd_complete_rq, 1534 .init_request = nbd_init_request, 1535 .timeout = nbd_xmit_timeout, 1536 }; 1537 1538 static int nbd_dev_add(int index) 1539 { 1540 struct nbd_device *nbd; 1541 struct gendisk *disk; 1542 struct request_queue *q; 1543 int err = -ENOMEM; 1544 1545 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1546 if (!nbd) 1547 goto out; 1548 1549 disk = alloc_disk(1 << part_shift); 1550 if (!disk) 1551 goto out_free_nbd; 1552 1553 if (index >= 0) { 1554 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1555 GFP_KERNEL); 1556 if (err == -ENOSPC) 1557 err = -EEXIST; 1558 } else { 1559 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1560 if (err >= 0) 1561 index = err; 1562 } 1563 if (err < 0) 1564 goto out_free_disk; 1565 1566 nbd->index = index; 1567 nbd->disk = disk; 1568 nbd->tag_set.ops = &nbd_mq_ops; 1569 nbd->tag_set.nr_hw_queues = 1; 1570 nbd->tag_set.queue_depth = 128; 1571 nbd->tag_set.numa_node = NUMA_NO_NODE; 1572 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1573 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1574 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; 1575 nbd->tag_set.driver_data = nbd; 1576 1577 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1578 if (err) 1579 goto out_free_idr; 1580 1581 q = blk_mq_init_queue(&nbd->tag_set); 1582 if (IS_ERR(q)) { 1583 err = PTR_ERR(q); 1584 goto out_free_tags; 1585 } 1586 disk->queue = q; 1587 1588 /* 1589 * Tell the block layer that we are not a rotational device 1590 */ 1591 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 1592 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1593 disk->queue->limits.discard_granularity = 0; 1594 disk->queue->limits.discard_alignment = 0; 1595 blk_queue_max_discard_sectors(disk->queue, 0); 1596 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1597 blk_queue_max_segments(disk->queue, USHRT_MAX); 1598 blk_queue_max_hw_sectors(disk->queue, 65536); 1599 disk->queue->limits.max_sectors = 256; 1600 1601 mutex_init(&nbd->config_lock); 1602 refcount_set(&nbd->config_refs, 0); 1603 refcount_set(&nbd->refs, 1); 1604 INIT_LIST_HEAD(&nbd->list); 1605 disk->major = NBD_MAJOR; 1606 disk->first_minor = index << part_shift; 1607 disk->fops = &nbd_fops; 1608 disk->private_data = nbd; 1609 sprintf(disk->disk_name, "nbd%d", index); 1610 add_disk(disk); 1611 nbd_total_devices++; 1612 return index; 1613 1614 out_free_tags: 1615 blk_mq_free_tag_set(&nbd->tag_set); 1616 out_free_idr: 1617 idr_remove(&nbd_index_idr, index); 1618 out_free_disk: 1619 put_disk(disk); 1620 out_free_nbd: 1621 kfree(nbd); 1622 out: 1623 return err; 1624 } 1625 1626 static int find_free_cb(int id, void *ptr, void *data) 1627 { 1628 struct nbd_device *nbd = ptr; 1629 struct nbd_device **found = data; 1630 1631 if (!refcount_read(&nbd->config_refs)) { 1632 *found = nbd; 1633 return 1; 1634 } 1635 return 0; 1636 } 1637 1638 /* Netlink interface. */ 1639 static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1640 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1641 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1642 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1643 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1644 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1645 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1646 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1647 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1648 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1649 }; 1650 1651 static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1652 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1653 }; 1654 1655 /* We don't use this right now since we don't parse the incoming list, but we 1656 * still want it here so userspace knows what to expect. 1657 */ 1658 static const struct nla_policy __attribute__((unused)) 1659 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1660 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1661 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1662 }; 1663 1664 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1665 { 1666 struct nbd_device *nbd = NULL; 1667 struct nbd_config *config; 1668 int index = -1; 1669 int ret; 1670 bool put_dev = false; 1671 1672 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1673 return -EPERM; 1674 1675 if (info->attrs[NBD_ATTR_INDEX]) 1676 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1677 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1678 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1679 return -EINVAL; 1680 } 1681 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1682 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1683 return -EINVAL; 1684 } 1685 again: 1686 mutex_lock(&nbd_index_mutex); 1687 if (index == -1) { 1688 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1689 if (ret == 0) { 1690 int new_index; 1691 new_index = nbd_dev_add(-1); 1692 if (new_index < 0) { 1693 mutex_unlock(&nbd_index_mutex); 1694 printk(KERN_ERR "nbd: failed to add new device\n"); 1695 return new_index; 1696 } 1697 nbd = idr_find(&nbd_index_idr, new_index); 1698 } 1699 } else { 1700 nbd = idr_find(&nbd_index_idr, index); 1701 if (!nbd) { 1702 ret = nbd_dev_add(index); 1703 if (ret < 0) { 1704 mutex_unlock(&nbd_index_mutex); 1705 printk(KERN_ERR "nbd: failed to add new device\n"); 1706 return ret; 1707 } 1708 nbd = idr_find(&nbd_index_idr, index); 1709 } 1710 } 1711 if (!nbd) { 1712 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1713 index); 1714 mutex_unlock(&nbd_index_mutex); 1715 return -EINVAL; 1716 } 1717 if (!refcount_inc_not_zero(&nbd->refs)) { 1718 mutex_unlock(&nbd_index_mutex); 1719 if (index == -1) 1720 goto again; 1721 printk(KERN_ERR "nbd: device at index %d is going down\n", 1722 index); 1723 return -EINVAL; 1724 } 1725 mutex_unlock(&nbd_index_mutex); 1726 1727 mutex_lock(&nbd->config_lock); 1728 if (refcount_read(&nbd->config_refs)) { 1729 mutex_unlock(&nbd->config_lock); 1730 nbd_put(nbd); 1731 if (index == -1) 1732 goto again; 1733 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1734 return -EBUSY; 1735 } 1736 if (WARN_ON(nbd->config)) { 1737 mutex_unlock(&nbd->config_lock); 1738 nbd_put(nbd); 1739 return -EINVAL; 1740 } 1741 config = nbd->config = nbd_alloc_config(); 1742 if (!nbd->config) { 1743 mutex_unlock(&nbd->config_lock); 1744 nbd_put(nbd); 1745 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1746 return -ENOMEM; 1747 } 1748 refcount_set(&nbd->config_refs, 1); 1749 set_bit(NBD_BOUND, &config->runtime_flags); 1750 1751 if (info->attrs[NBD_ATTR_SIZE_BYTES]) { 1752 u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1753 nbd_size_set(nbd, config->blksize, 1754 div64_u64(bytes, config->blksize)); 1755 } 1756 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1757 u64 bsize = 1758 nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1759 nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); 1760 } 1761 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1762 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1763 nbd->tag_set.timeout = timeout * HZ; 1764 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1765 } 1766 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1767 config->dead_conn_timeout = 1768 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1769 config->dead_conn_timeout *= HZ; 1770 } 1771 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1772 config->flags = 1773 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1774 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1775 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1776 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1777 set_bit(NBD_DESTROY_ON_DISCONNECT, 1778 &config->runtime_flags); 1779 put_dev = true; 1780 } 1781 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1782 set_bit(NBD_DISCONNECT_ON_CLOSE, 1783 &config->runtime_flags); 1784 } 1785 } 1786 1787 if (info->attrs[NBD_ATTR_SOCKETS]) { 1788 struct nlattr *attr; 1789 int rem, fd; 1790 1791 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1792 rem) { 1793 struct nlattr *socks[NBD_SOCK_MAX+1]; 1794 1795 if (nla_type(attr) != NBD_SOCK_ITEM) { 1796 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1797 ret = -EINVAL; 1798 goto out; 1799 } 1800 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1801 nbd_sock_policy, info->extack); 1802 if (ret != 0) { 1803 printk(KERN_ERR "nbd: error processing sock list\n"); 1804 ret = -EINVAL; 1805 goto out; 1806 } 1807 if (!socks[NBD_SOCK_FD]) 1808 continue; 1809 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1810 ret = nbd_add_socket(nbd, fd, true); 1811 if (ret) 1812 goto out; 1813 } 1814 } 1815 ret = nbd_start_device(nbd); 1816 out: 1817 mutex_unlock(&nbd->config_lock); 1818 if (!ret) { 1819 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1820 refcount_inc(&nbd->config_refs); 1821 nbd_connect_reply(info, nbd->index); 1822 } 1823 nbd_config_put(nbd); 1824 if (put_dev) 1825 nbd_put(nbd); 1826 return ret; 1827 } 1828 1829 static void nbd_disconnect_and_put(struct nbd_device *nbd) 1830 { 1831 mutex_lock(&nbd->config_lock); 1832 nbd_disconnect(nbd); 1833 nbd_clear_sock(nbd); 1834 mutex_unlock(&nbd->config_lock); 1835 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1836 &nbd->config->runtime_flags)) 1837 nbd_config_put(nbd); 1838 } 1839 1840 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1841 { 1842 struct nbd_device *nbd; 1843 int index; 1844 1845 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1846 return -EPERM; 1847 1848 if (!info->attrs[NBD_ATTR_INDEX]) { 1849 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1850 return -EINVAL; 1851 } 1852 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1853 mutex_lock(&nbd_index_mutex); 1854 nbd = idr_find(&nbd_index_idr, index); 1855 if (!nbd) { 1856 mutex_unlock(&nbd_index_mutex); 1857 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1858 index); 1859 return -EINVAL; 1860 } 1861 if (!refcount_inc_not_zero(&nbd->refs)) { 1862 mutex_unlock(&nbd_index_mutex); 1863 printk(KERN_ERR "nbd: device at index %d is going down\n", 1864 index); 1865 return -EINVAL; 1866 } 1867 mutex_unlock(&nbd_index_mutex); 1868 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1869 nbd_put(nbd); 1870 return 0; 1871 } 1872 nbd_disconnect_and_put(nbd); 1873 nbd_config_put(nbd); 1874 nbd_put(nbd); 1875 return 0; 1876 } 1877 1878 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1879 { 1880 struct nbd_device *nbd = NULL; 1881 struct nbd_config *config; 1882 int index; 1883 int ret = 0; 1884 bool put_dev = false; 1885 1886 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1887 return -EPERM; 1888 1889 if (!info->attrs[NBD_ATTR_INDEX]) { 1890 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1891 return -EINVAL; 1892 } 1893 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1894 mutex_lock(&nbd_index_mutex); 1895 nbd = idr_find(&nbd_index_idr, index); 1896 if (!nbd) { 1897 mutex_unlock(&nbd_index_mutex); 1898 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1899 index); 1900 return -EINVAL; 1901 } 1902 if (!refcount_inc_not_zero(&nbd->refs)) { 1903 mutex_unlock(&nbd_index_mutex); 1904 printk(KERN_ERR "nbd: device at index %d is going down\n", 1905 index); 1906 return -EINVAL; 1907 } 1908 mutex_unlock(&nbd_index_mutex); 1909 1910 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1911 dev_err(nbd_to_dev(nbd), 1912 "not configured, cannot reconfigure\n"); 1913 nbd_put(nbd); 1914 return -EINVAL; 1915 } 1916 1917 mutex_lock(&nbd->config_lock); 1918 config = nbd->config; 1919 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1920 !nbd->task_recv) { 1921 dev_err(nbd_to_dev(nbd), 1922 "not configured, cannot reconfigure\n"); 1923 ret = -EINVAL; 1924 goto out; 1925 } 1926 1927 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1928 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1929 nbd->tag_set.timeout = timeout * HZ; 1930 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1931 } 1932 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1933 config->dead_conn_timeout = 1934 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1935 config->dead_conn_timeout *= HZ; 1936 } 1937 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1938 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1939 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1940 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1941 &config->runtime_flags)) 1942 put_dev = true; 1943 } else { 1944 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1945 &config->runtime_flags)) 1946 refcount_inc(&nbd->refs); 1947 } 1948 1949 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1950 set_bit(NBD_DISCONNECT_ON_CLOSE, 1951 &config->runtime_flags); 1952 } else { 1953 clear_bit(NBD_DISCONNECT_ON_CLOSE, 1954 &config->runtime_flags); 1955 } 1956 } 1957 1958 if (info->attrs[NBD_ATTR_SOCKETS]) { 1959 struct nlattr *attr; 1960 int rem, fd; 1961 1962 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1963 rem) { 1964 struct nlattr *socks[NBD_SOCK_MAX+1]; 1965 1966 if (nla_type(attr) != NBD_SOCK_ITEM) { 1967 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1968 ret = -EINVAL; 1969 goto out; 1970 } 1971 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1972 nbd_sock_policy, info->extack); 1973 if (ret != 0) { 1974 printk(KERN_ERR "nbd: error processing sock list\n"); 1975 ret = -EINVAL; 1976 goto out; 1977 } 1978 if (!socks[NBD_SOCK_FD]) 1979 continue; 1980 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1981 ret = nbd_reconnect_socket(nbd, fd); 1982 if (ret) { 1983 if (ret == -ENOSPC) 1984 ret = 0; 1985 goto out; 1986 } 1987 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 1988 } 1989 } 1990 out: 1991 mutex_unlock(&nbd->config_lock); 1992 nbd_config_put(nbd); 1993 nbd_put(nbd); 1994 if (put_dev) 1995 nbd_put(nbd); 1996 return ret; 1997 } 1998 1999 static const struct genl_ops nbd_connect_genl_ops[] = { 2000 { 2001 .cmd = NBD_CMD_CONNECT, 2002 .policy = nbd_attr_policy, 2003 .doit = nbd_genl_connect, 2004 }, 2005 { 2006 .cmd = NBD_CMD_DISCONNECT, 2007 .policy = nbd_attr_policy, 2008 .doit = nbd_genl_disconnect, 2009 }, 2010 { 2011 .cmd = NBD_CMD_RECONFIGURE, 2012 .policy = nbd_attr_policy, 2013 .doit = nbd_genl_reconfigure, 2014 }, 2015 { 2016 .cmd = NBD_CMD_STATUS, 2017 .policy = nbd_attr_policy, 2018 .doit = nbd_genl_status, 2019 }, 2020 }; 2021 2022 static const struct genl_multicast_group nbd_mcast_grps[] = { 2023 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 2024 }; 2025 2026 static struct genl_family nbd_genl_family __ro_after_init = { 2027 .hdrsize = 0, 2028 .name = NBD_GENL_FAMILY_NAME, 2029 .version = NBD_GENL_VERSION, 2030 .module = THIS_MODULE, 2031 .ops = nbd_connect_genl_ops, 2032 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 2033 .maxattr = NBD_ATTR_MAX, 2034 .mcgrps = nbd_mcast_grps, 2035 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 2036 }; 2037 2038 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 2039 { 2040 struct nlattr *dev_opt; 2041 u8 connected = 0; 2042 int ret; 2043 2044 /* This is a little racey, but for status it's ok. The 2045 * reason we don't take a ref here is because we can't 2046 * take a ref in the index == -1 case as we would need 2047 * to put under the nbd_index_mutex, which could 2048 * deadlock if we are configured to remove ourselves 2049 * once we're disconnected. 2050 */ 2051 if (refcount_read(&nbd->config_refs)) 2052 connected = 1; 2053 dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); 2054 if (!dev_opt) 2055 return -EMSGSIZE; 2056 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 2057 if (ret) 2058 return -EMSGSIZE; 2059 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 2060 connected); 2061 if (ret) 2062 return -EMSGSIZE; 2063 nla_nest_end(reply, dev_opt); 2064 return 0; 2065 } 2066 2067 static int status_cb(int id, void *ptr, void *data) 2068 { 2069 struct nbd_device *nbd = ptr; 2070 return populate_nbd_status(nbd, (struct sk_buff *)data); 2071 } 2072 2073 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 2074 { 2075 struct nlattr *dev_list; 2076 struct sk_buff *reply; 2077 void *reply_head; 2078 size_t msg_size; 2079 int index = -1; 2080 int ret = -ENOMEM; 2081 2082 if (info->attrs[NBD_ATTR_INDEX]) 2083 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 2084 2085 mutex_lock(&nbd_index_mutex); 2086 2087 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 2088 nla_attr_size(sizeof(u8))); 2089 msg_size *= (index == -1) ? nbd_total_devices : 1; 2090 2091 reply = genlmsg_new(msg_size, GFP_KERNEL); 2092 if (!reply) 2093 goto out; 2094 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 2095 NBD_CMD_STATUS); 2096 if (!reply_head) { 2097 nlmsg_free(reply); 2098 goto out; 2099 } 2100 2101 dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); 2102 if (index == -1) { 2103 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 2104 if (ret) { 2105 nlmsg_free(reply); 2106 goto out; 2107 } 2108 } else { 2109 struct nbd_device *nbd; 2110 nbd = idr_find(&nbd_index_idr, index); 2111 if (nbd) { 2112 ret = populate_nbd_status(nbd, reply); 2113 if (ret) { 2114 nlmsg_free(reply); 2115 goto out; 2116 } 2117 } 2118 } 2119 nla_nest_end(reply, dev_list); 2120 genlmsg_end(reply, reply_head); 2121 genlmsg_reply(reply, info); 2122 ret = 0; 2123 out: 2124 mutex_unlock(&nbd_index_mutex); 2125 return ret; 2126 } 2127 2128 static void nbd_connect_reply(struct genl_info *info, int index) 2129 { 2130 struct sk_buff *skb; 2131 void *msg_head; 2132 int ret; 2133 2134 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2135 if (!skb) 2136 return; 2137 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2138 NBD_CMD_CONNECT); 2139 if (!msg_head) { 2140 nlmsg_free(skb); 2141 return; 2142 } 2143 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2144 if (ret) { 2145 nlmsg_free(skb); 2146 return; 2147 } 2148 genlmsg_end(skb, msg_head); 2149 genlmsg_reply(skb, info); 2150 } 2151 2152 static void nbd_mcast_index(int index) 2153 { 2154 struct sk_buff *skb; 2155 void *msg_head; 2156 int ret; 2157 2158 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2159 if (!skb) 2160 return; 2161 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2162 NBD_CMD_LINK_DEAD); 2163 if (!msg_head) { 2164 nlmsg_free(skb); 2165 return; 2166 } 2167 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2168 if (ret) { 2169 nlmsg_free(skb); 2170 return; 2171 } 2172 genlmsg_end(skb, msg_head); 2173 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2174 } 2175 2176 static void nbd_dead_link_work(struct work_struct *work) 2177 { 2178 struct link_dead_args *args = container_of(work, struct link_dead_args, 2179 work); 2180 nbd_mcast_index(args->index); 2181 kfree(args); 2182 } 2183 2184 static int __init nbd_init(void) 2185 { 2186 int i; 2187 2188 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2189 2190 if (max_part < 0) { 2191 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2192 return -EINVAL; 2193 } 2194 2195 part_shift = 0; 2196 if (max_part > 0) { 2197 part_shift = fls(max_part); 2198 2199 /* 2200 * Adjust max_part according to part_shift as it is exported 2201 * to user space so that user can know the max number of 2202 * partition kernel should be able to manage. 2203 * 2204 * Note that -1 is required because partition 0 is reserved 2205 * for the whole disk. 2206 */ 2207 max_part = (1UL << part_shift) - 1; 2208 } 2209 2210 if ((1UL << part_shift) > DISK_MAX_PARTS) 2211 return -EINVAL; 2212 2213 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2214 return -EINVAL; 2215 recv_workqueue = alloc_workqueue("knbd-recv", 2216 WQ_MEM_RECLAIM | WQ_HIGHPRI | 2217 WQ_UNBOUND, 0); 2218 if (!recv_workqueue) 2219 return -ENOMEM; 2220 2221 if (register_blkdev(NBD_MAJOR, "nbd")) { 2222 destroy_workqueue(recv_workqueue); 2223 return -EIO; 2224 } 2225 2226 if (genl_register_family(&nbd_genl_family)) { 2227 unregister_blkdev(NBD_MAJOR, "nbd"); 2228 destroy_workqueue(recv_workqueue); 2229 return -EINVAL; 2230 } 2231 nbd_dbg_init(); 2232 2233 mutex_lock(&nbd_index_mutex); 2234 for (i = 0; i < nbds_max; i++) 2235 nbd_dev_add(i); 2236 mutex_unlock(&nbd_index_mutex); 2237 return 0; 2238 } 2239 2240 static int nbd_exit_cb(int id, void *ptr, void *data) 2241 { 2242 struct list_head *list = (struct list_head *)data; 2243 struct nbd_device *nbd = ptr; 2244 2245 list_add_tail(&nbd->list, list); 2246 return 0; 2247 } 2248 2249 static void __exit nbd_cleanup(void) 2250 { 2251 struct nbd_device *nbd; 2252 LIST_HEAD(del_list); 2253 2254 nbd_dbg_close(); 2255 2256 mutex_lock(&nbd_index_mutex); 2257 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2258 mutex_unlock(&nbd_index_mutex); 2259 2260 while (!list_empty(&del_list)) { 2261 nbd = list_first_entry(&del_list, struct nbd_device, list); 2262 list_del_init(&nbd->list); 2263 if (refcount_read(&nbd->refs) != 1) 2264 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2265 nbd_put(nbd); 2266 } 2267 2268 idr_destroy(&nbd_index_idr); 2269 genl_unregister_family(&nbd_genl_family); 2270 destroy_workqueue(recv_workqueue); 2271 unregister_blkdev(NBD_MAJOR, "nbd"); 2272 } 2273 2274 module_init(nbd_init); 2275 module_exit(nbd_cleanup); 2276 2277 MODULE_DESCRIPTION("Network Block Device"); 2278 MODULE_LICENSE("GPL"); 2279 2280 module_param(nbds_max, int, 0444); 2281 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2282 module_param(max_part, int, 0444); 2283 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2284