1 /* 2 * Network block device - make block devices work over TCP 3 * 4 * Note that you can not swap over this thing, yet. Seems to work but 5 * deadlocks sometimes - you can not swap over TCP in general. 6 * 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 9 * 10 * This file is released under GPLv2 or later. 11 * 12 * (part of code stolen from loop.c) 13 */ 14 15 #include <linux/major.h> 16 17 #include <linux/blkdev.h> 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/sched.h> 21 #include <linux/sched/mm.h> 22 #include <linux/fs.h> 23 #include <linux/bio.h> 24 #include <linux/stat.h> 25 #include <linux/errno.h> 26 #include <linux/file.h> 27 #include <linux/ioctl.h> 28 #include <linux/mutex.h> 29 #include <linux/compiler.h> 30 #include <linux/err.h> 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <net/sock.h> 34 #include <linux/net.h> 35 #include <linux/kthread.h> 36 #include <linux/types.h> 37 #include <linux/debugfs.h> 38 #include <linux/blk-mq.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/types.h> 42 43 #include <linux/nbd.h> 44 #include <linux/nbd-netlink.h> 45 #include <net/genetlink.h> 46 47 static DEFINE_IDR(nbd_index_idr); 48 static DEFINE_MUTEX(nbd_index_mutex); 49 static int nbd_total_devices = 0; 50 51 struct nbd_sock { 52 struct socket *sock; 53 struct mutex tx_lock; 54 struct request *pending; 55 int sent; 56 bool dead; 57 int fallback_index; 58 int cookie; 59 }; 60 61 struct recv_thread_args { 62 struct work_struct work; 63 struct nbd_device *nbd; 64 int index; 65 }; 66 67 struct link_dead_args { 68 struct work_struct work; 69 int index; 70 }; 71 72 #define NBD_TIMEDOUT 0 73 #define NBD_DISCONNECT_REQUESTED 1 74 #define NBD_DISCONNECTED 2 75 #define NBD_HAS_PID_FILE 3 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 #define NBD_DISCONNECT_ON_CLOSE 7 80 81 struct nbd_config { 82 u32 flags; 83 unsigned long runtime_flags; 84 u64 dead_conn_timeout; 85 86 struct nbd_sock **socks; 87 int num_connections; 88 atomic_t live_connections; 89 wait_queue_head_t conn_wait; 90 91 atomic_t recv_threads; 92 wait_queue_head_t recv_wq; 93 loff_t blksize; 94 loff_t bytesize; 95 #if IS_ENABLED(CONFIG_DEBUG_FS) 96 struct dentry *dbg_dir; 97 #endif 98 }; 99 100 struct nbd_device { 101 struct blk_mq_tag_set tag_set; 102 103 int index; 104 refcount_t config_refs; 105 refcount_t refs; 106 struct nbd_config *config; 107 struct mutex config_lock; 108 struct gendisk *disk; 109 110 struct list_head list; 111 struct task_struct *task_recv; 112 struct task_struct *task_setup; 113 }; 114 115 #define NBD_CMD_REQUEUED 1 116 117 struct nbd_cmd { 118 struct nbd_device *nbd; 119 struct mutex lock; 120 int index; 121 int cookie; 122 blk_status_t status; 123 unsigned long flags; 124 u32 cmd_cookie; 125 }; 126 127 #if IS_ENABLED(CONFIG_DEBUG_FS) 128 static struct dentry *nbd_dbg_dir; 129 #endif 130 131 #define nbd_name(nbd) ((nbd)->disk->disk_name) 132 133 #define NBD_MAGIC 0x68797548 134 135 static unsigned int nbds_max = 16; 136 static int max_part = 16; 137 static struct workqueue_struct *recv_workqueue; 138 static int part_shift; 139 140 static int nbd_dev_dbg_init(struct nbd_device *nbd); 141 static void nbd_dev_dbg_close(struct nbd_device *nbd); 142 static void nbd_config_put(struct nbd_device *nbd); 143 static void nbd_connect_reply(struct genl_info *info, int index); 144 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 145 static void nbd_dead_link_work(struct work_struct *work); 146 static void nbd_disconnect_and_put(struct nbd_device *nbd); 147 148 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 149 { 150 return disk_to_dev(nbd->disk); 151 } 152 153 static void nbd_requeue_cmd(struct nbd_cmd *cmd) 154 { 155 struct request *req = blk_mq_rq_from_pdu(cmd); 156 157 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) 158 blk_mq_requeue_request(req, true); 159 } 160 161 #define NBD_COOKIE_BITS 32 162 163 static u64 nbd_cmd_handle(struct nbd_cmd *cmd) 164 { 165 struct request *req = blk_mq_rq_from_pdu(cmd); 166 u32 tag = blk_mq_unique_tag(req); 167 u64 cookie = cmd->cmd_cookie; 168 169 return (cookie << NBD_COOKIE_BITS) | tag; 170 } 171 172 static u32 nbd_handle_to_tag(u64 handle) 173 { 174 return (u32)handle; 175 } 176 177 static u32 nbd_handle_to_cookie(u64 handle) 178 { 179 return (u32)(handle >> NBD_COOKIE_BITS); 180 } 181 182 static const char *nbdcmd_to_ascii(int cmd) 183 { 184 switch (cmd) { 185 case NBD_CMD_READ: return "read"; 186 case NBD_CMD_WRITE: return "write"; 187 case NBD_CMD_DISC: return "disconnect"; 188 case NBD_CMD_FLUSH: return "flush"; 189 case NBD_CMD_TRIM: return "trim/discard"; 190 } 191 return "invalid"; 192 } 193 194 static ssize_t pid_show(struct device *dev, 195 struct device_attribute *attr, char *buf) 196 { 197 struct gendisk *disk = dev_to_disk(dev); 198 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 199 200 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 201 } 202 203 static const struct device_attribute pid_attr = { 204 .attr = { .name = "pid", .mode = 0444}, 205 .show = pid_show, 206 }; 207 208 static void nbd_dev_remove(struct nbd_device *nbd) 209 { 210 struct gendisk *disk = nbd->disk; 211 struct request_queue *q; 212 213 if (disk) { 214 q = disk->queue; 215 del_gendisk(disk); 216 blk_cleanup_queue(q); 217 blk_mq_free_tag_set(&nbd->tag_set); 218 disk->private_data = NULL; 219 put_disk(disk); 220 } 221 kfree(nbd); 222 } 223 224 static void nbd_put(struct nbd_device *nbd) 225 { 226 if (refcount_dec_and_mutex_lock(&nbd->refs, 227 &nbd_index_mutex)) { 228 idr_remove(&nbd_index_idr, nbd->index); 229 mutex_unlock(&nbd_index_mutex); 230 nbd_dev_remove(nbd); 231 } 232 } 233 234 static int nbd_disconnected(struct nbd_config *config) 235 { 236 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 237 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 238 } 239 240 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 241 int notify) 242 { 243 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 244 struct link_dead_args *args; 245 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 246 if (args) { 247 INIT_WORK(&args->work, nbd_dead_link_work); 248 args->index = nbd->index; 249 queue_work(system_wq, &args->work); 250 } 251 } 252 if (!nsock->dead) { 253 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 254 if (atomic_dec_return(&nbd->config->live_connections) == 0) { 255 if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, 256 &nbd->config->runtime_flags)) { 257 set_bit(NBD_DISCONNECTED, 258 &nbd->config->runtime_flags); 259 dev_info(nbd_to_dev(nbd), 260 "Disconnected due to user request.\n"); 261 } 262 } 263 } 264 nsock->dead = true; 265 nsock->pending = NULL; 266 nsock->sent = 0; 267 } 268 269 static void nbd_size_clear(struct nbd_device *nbd) 270 { 271 if (nbd->config->bytesize) { 272 set_capacity(nbd->disk, 0); 273 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 274 } 275 } 276 277 static void nbd_size_update(struct nbd_device *nbd) 278 { 279 struct nbd_config *config = nbd->config; 280 struct block_device *bdev = bdget_disk(nbd->disk, 0); 281 282 if (config->flags & NBD_FLAG_SEND_TRIM) { 283 nbd->disk->queue->limits.discard_granularity = config->blksize; 284 nbd->disk->queue->limits.discard_alignment = config->blksize; 285 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 286 } 287 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 288 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 289 set_capacity(nbd->disk, config->bytesize >> 9); 290 if (bdev) { 291 if (bdev->bd_disk) 292 bd_set_size(bdev, config->bytesize); 293 else 294 bdev->bd_invalidated = 1; 295 bdput(bdev); 296 } 297 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 298 } 299 300 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 301 loff_t nr_blocks) 302 { 303 struct nbd_config *config = nbd->config; 304 config->blksize = blocksize; 305 config->bytesize = blocksize * nr_blocks; 306 if (nbd->task_recv != NULL) 307 nbd_size_update(nbd); 308 } 309 310 static void nbd_complete_rq(struct request *req) 311 { 312 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 313 314 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req, 315 cmd->status ? "failed" : "done"); 316 317 blk_mq_end_request(req, cmd->status); 318 } 319 320 /* 321 * Forcibly shutdown the socket causing all listeners to error 322 */ 323 static void sock_shutdown(struct nbd_device *nbd) 324 { 325 struct nbd_config *config = nbd->config; 326 int i; 327 328 if (config->num_connections == 0) 329 return; 330 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 331 return; 332 333 for (i = 0; i < config->num_connections; i++) { 334 struct nbd_sock *nsock = config->socks[i]; 335 mutex_lock(&nsock->tx_lock); 336 nbd_mark_nsock_dead(nbd, nsock, 0); 337 mutex_unlock(&nsock->tx_lock); 338 } 339 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 340 } 341 342 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 343 bool reserved) 344 { 345 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 346 struct nbd_device *nbd = cmd->nbd; 347 struct nbd_config *config; 348 349 if (!refcount_inc_not_zero(&nbd->config_refs)) { 350 cmd->status = BLK_STS_TIMEOUT; 351 goto done; 352 } 353 config = nbd->config; 354 355 if (!mutex_trylock(&cmd->lock)) 356 return BLK_EH_RESET_TIMER; 357 358 if (config->num_connections > 1) { 359 dev_err_ratelimited(nbd_to_dev(nbd), 360 "Connection timed out, retrying (%d/%d alive)\n", 361 atomic_read(&config->live_connections), 362 config->num_connections); 363 /* 364 * Hooray we have more connections, requeue this IO, the submit 365 * path will put it on a real connection. 366 */ 367 if (config->socks && config->num_connections > 1) { 368 if (cmd->index < config->num_connections) { 369 struct nbd_sock *nsock = 370 config->socks[cmd->index]; 371 mutex_lock(&nsock->tx_lock); 372 /* We can have multiple outstanding requests, so 373 * we don't want to mark the nsock dead if we've 374 * already reconnected with a new socket, so 375 * only mark it dead if its the same socket we 376 * were sent out on. 377 */ 378 if (cmd->cookie == nsock->cookie) 379 nbd_mark_nsock_dead(nbd, nsock, 1); 380 mutex_unlock(&nsock->tx_lock); 381 } 382 mutex_unlock(&cmd->lock); 383 nbd_requeue_cmd(cmd); 384 nbd_config_put(nbd); 385 return BLK_EH_DONE; 386 } 387 } else { 388 dev_err_ratelimited(nbd_to_dev(nbd), 389 "Connection timed out\n"); 390 } 391 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 392 cmd->status = BLK_STS_IOERR; 393 mutex_unlock(&cmd->lock); 394 sock_shutdown(nbd); 395 nbd_config_put(nbd); 396 done: 397 blk_mq_complete_request(req); 398 return BLK_EH_DONE; 399 } 400 401 /* 402 * Send or receive packet. 403 */ 404 static int sock_xmit(struct nbd_device *nbd, int index, int send, 405 struct iov_iter *iter, int msg_flags, int *sent) 406 { 407 struct nbd_config *config = nbd->config; 408 struct socket *sock = config->socks[index]->sock; 409 int result; 410 struct msghdr msg; 411 unsigned int noreclaim_flag; 412 413 if (unlikely(!sock)) { 414 dev_err_ratelimited(disk_to_dev(nbd->disk), 415 "Attempted %s on closed socket in sock_xmit\n", 416 (send ? "send" : "recv")); 417 return -EINVAL; 418 } 419 420 msg.msg_iter = *iter; 421 422 noreclaim_flag = memalloc_noreclaim_save(); 423 do { 424 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 425 msg.msg_name = NULL; 426 msg.msg_namelen = 0; 427 msg.msg_control = NULL; 428 msg.msg_controllen = 0; 429 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 430 431 if (send) 432 result = sock_sendmsg(sock, &msg); 433 else 434 result = sock_recvmsg(sock, &msg, msg.msg_flags); 435 436 if (result <= 0) { 437 if (result == 0) 438 result = -EPIPE; /* short read */ 439 break; 440 } 441 if (sent) 442 *sent += result; 443 } while (msg_data_left(&msg)); 444 445 memalloc_noreclaim_restore(noreclaim_flag); 446 447 return result; 448 } 449 450 /* 451 * Different settings for sk->sk_sndtimeo can result in different return values 452 * if there is a signal pending when we enter sendmsg, because reasons? 453 */ 454 static inline int was_interrupted(int result) 455 { 456 return result == -ERESTARTSYS || result == -EINTR; 457 } 458 459 /* always call with the tx_lock held */ 460 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 461 { 462 struct request *req = blk_mq_rq_from_pdu(cmd); 463 struct nbd_config *config = nbd->config; 464 struct nbd_sock *nsock = config->socks[index]; 465 int result; 466 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 467 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 468 struct iov_iter from; 469 unsigned long size = blk_rq_bytes(req); 470 struct bio *bio; 471 u64 handle; 472 u32 type; 473 u32 nbd_cmd_flags = 0; 474 int sent = nsock->sent, skip = 0; 475 476 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); 477 478 switch (req_op(req)) { 479 case REQ_OP_DISCARD: 480 type = NBD_CMD_TRIM; 481 break; 482 case REQ_OP_FLUSH: 483 type = NBD_CMD_FLUSH; 484 break; 485 case REQ_OP_WRITE: 486 type = NBD_CMD_WRITE; 487 break; 488 case REQ_OP_READ: 489 type = NBD_CMD_READ; 490 break; 491 default: 492 return -EIO; 493 } 494 495 if (rq_data_dir(req) == WRITE && 496 (config->flags & NBD_FLAG_READ_ONLY)) { 497 dev_err_ratelimited(disk_to_dev(nbd->disk), 498 "Write on read-only\n"); 499 return -EIO; 500 } 501 502 if (req->cmd_flags & REQ_FUA) 503 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 504 505 /* We did a partial send previously, and we at least sent the whole 506 * request struct, so just go and send the rest of the pages in the 507 * request. 508 */ 509 if (sent) { 510 if (sent >= sizeof(request)) { 511 skip = sent - sizeof(request); 512 goto send_pages; 513 } 514 iov_iter_advance(&from, sent); 515 } else { 516 cmd->cmd_cookie++; 517 } 518 cmd->index = index; 519 cmd->cookie = nsock->cookie; 520 request.type = htonl(type | nbd_cmd_flags); 521 if (type != NBD_CMD_FLUSH) { 522 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 523 request.len = htonl(size); 524 } 525 handle = nbd_cmd_handle(cmd); 526 memcpy(request.handle, &handle, sizeof(handle)); 527 528 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 529 req, nbdcmd_to_ascii(type), 530 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 531 result = sock_xmit(nbd, index, 1, &from, 532 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 533 if (result <= 0) { 534 if (was_interrupted(result)) { 535 /* If we havne't sent anything we can just return BUSY, 536 * however if we have sent something we need to make 537 * sure we only allow this req to be sent until we are 538 * completely done. 539 */ 540 if (sent) { 541 nsock->pending = req; 542 nsock->sent = sent; 543 } 544 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 545 return BLK_STS_RESOURCE; 546 } 547 dev_err_ratelimited(disk_to_dev(nbd->disk), 548 "Send control failed (result %d)\n", result); 549 return -EAGAIN; 550 } 551 send_pages: 552 if (type != NBD_CMD_WRITE) 553 goto out; 554 555 bio = req->bio; 556 while (bio) { 557 struct bio *next = bio->bi_next; 558 struct bvec_iter iter; 559 struct bio_vec bvec; 560 561 bio_for_each_segment(bvec, bio, iter) { 562 bool is_last = !next && bio_iter_last(bvec, iter); 563 int flags = is_last ? 0 : MSG_MORE; 564 565 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 566 req, bvec.bv_len); 567 iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len); 568 if (skip) { 569 if (skip >= iov_iter_count(&from)) { 570 skip -= iov_iter_count(&from); 571 continue; 572 } 573 iov_iter_advance(&from, skip); 574 skip = 0; 575 } 576 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 577 if (result <= 0) { 578 if (was_interrupted(result)) { 579 /* We've already sent the header, we 580 * have no choice but to set pending and 581 * return BUSY. 582 */ 583 nsock->pending = req; 584 nsock->sent = sent; 585 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 586 return BLK_STS_RESOURCE; 587 } 588 dev_err(disk_to_dev(nbd->disk), 589 "Send data failed (result %d)\n", 590 result); 591 return -EAGAIN; 592 } 593 /* 594 * The completion might already have come in, 595 * so break for the last one instead of letting 596 * the iterator do it. This prevents use-after-free 597 * of the bio. 598 */ 599 if (is_last) 600 break; 601 } 602 bio = next; 603 } 604 out: 605 nsock->pending = NULL; 606 nsock->sent = 0; 607 return 0; 608 } 609 610 /* NULL returned = something went wrong, inform userspace */ 611 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 612 { 613 struct nbd_config *config = nbd->config; 614 int result; 615 struct nbd_reply reply; 616 struct nbd_cmd *cmd; 617 struct request *req = NULL; 618 u64 handle; 619 u16 hwq; 620 u32 tag; 621 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 622 struct iov_iter to; 623 int ret = 0; 624 625 reply.magic = 0; 626 iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); 627 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 628 if (result <= 0) { 629 if (!nbd_disconnected(config)) 630 dev_err(disk_to_dev(nbd->disk), 631 "Receive control failed (result %d)\n", result); 632 return ERR_PTR(result); 633 } 634 635 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 636 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 637 (unsigned long)ntohl(reply.magic)); 638 return ERR_PTR(-EPROTO); 639 } 640 641 memcpy(&handle, reply.handle, sizeof(handle)); 642 tag = nbd_handle_to_tag(handle); 643 hwq = blk_mq_unique_tag_to_hwq(tag); 644 if (hwq < nbd->tag_set.nr_hw_queues) 645 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 646 blk_mq_unique_tag_to_tag(tag)); 647 if (!req || !blk_mq_request_started(req)) { 648 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 649 tag, req); 650 return ERR_PTR(-ENOENT); 651 } 652 cmd = blk_mq_rq_to_pdu(req); 653 654 mutex_lock(&cmd->lock); 655 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { 656 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", 657 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); 658 ret = -ENOENT; 659 goto out; 660 } 661 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { 662 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", 663 req); 664 ret = -ENOENT; 665 goto out; 666 } 667 if (ntohl(reply.error)) { 668 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 669 ntohl(reply.error)); 670 cmd->status = BLK_STS_IOERR; 671 goto out; 672 } 673 674 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 675 if (rq_data_dir(req) != WRITE) { 676 struct req_iterator iter; 677 struct bio_vec bvec; 678 679 rq_for_each_segment(bvec, req, iter) { 680 iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); 681 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 682 if (result <= 0) { 683 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 684 result); 685 /* 686 * If we've disconnected or we only have 1 687 * connection then we need to make sure we 688 * complete this request, otherwise error out 689 * and let the timeout stuff handle resubmitting 690 * this request onto another connection. 691 */ 692 if (nbd_disconnected(config) || 693 config->num_connections <= 1) { 694 cmd->status = BLK_STS_IOERR; 695 goto out; 696 } 697 ret = -EIO; 698 goto out; 699 } 700 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 701 req, bvec.bv_len); 702 } 703 } 704 out: 705 mutex_unlock(&cmd->lock); 706 return ret ? ERR_PTR(ret) : cmd; 707 } 708 709 static void recv_work(struct work_struct *work) 710 { 711 struct recv_thread_args *args = container_of(work, 712 struct recv_thread_args, 713 work); 714 struct nbd_device *nbd = args->nbd; 715 struct nbd_config *config = nbd->config; 716 struct nbd_cmd *cmd; 717 718 while (1) { 719 cmd = nbd_read_stat(nbd, args->index); 720 if (IS_ERR(cmd)) { 721 struct nbd_sock *nsock = config->socks[args->index]; 722 723 mutex_lock(&nsock->tx_lock); 724 nbd_mark_nsock_dead(nbd, nsock, 1); 725 mutex_unlock(&nsock->tx_lock); 726 break; 727 } 728 729 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 730 } 731 atomic_dec(&config->recv_threads); 732 wake_up(&config->recv_wq); 733 nbd_config_put(nbd); 734 kfree(args); 735 } 736 737 static void nbd_clear_req(struct request *req, void *data, bool reserved) 738 { 739 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 740 741 cmd->status = BLK_STS_IOERR; 742 blk_mq_complete_request(req); 743 } 744 745 static void nbd_clear_que(struct nbd_device *nbd) 746 { 747 blk_mq_quiesce_queue(nbd->disk->queue); 748 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 749 blk_mq_unquiesce_queue(nbd->disk->queue); 750 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 751 } 752 753 static int find_fallback(struct nbd_device *nbd, int index) 754 { 755 struct nbd_config *config = nbd->config; 756 int new_index = -1; 757 struct nbd_sock *nsock = config->socks[index]; 758 int fallback = nsock->fallback_index; 759 760 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 761 return new_index; 762 763 if (config->num_connections <= 1) { 764 dev_err_ratelimited(disk_to_dev(nbd->disk), 765 "Attempted send on invalid socket\n"); 766 return new_index; 767 } 768 769 if (fallback >= 0 && fallback < config->num_connections && 770 !config->socks[fallback]->dead) 771 return fallback; 772 773 if (nsock->fallback_index < 0 || 774 nsock->fallback_index >= config->num_connections || 775 config->socks[nsock->fallback_index]->dead) { 776 int i; 777 for (i = 0; i < config->num_connections; i++) { 778 if (i == index) 779 continue; 780 if (!config->socks[i]->dead) { 781 new_index = i; 782 break; 783 } 784 } 785 nsock->fallback_index = new_index; 786 if (new_index < 0) { 787 dev_err_ratelimited(disk_to_dev(nbd->disk), 788 "Dead connection, failed to find a fallback\n"); 789 return new_index; 790 } 791 } 792 new_index = nsock->fallback_index; 793 return new_index; 794 } 795 796 static int wait_for_reconnect(struct nbd_device *nbd) 797 { 798 struct nbd_config *config = nbd->config; 799 if (!config->dead_conn_timeout) 800 return 0; 801 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 802 return 0; 803 return wait_event_timeout(config->conn_wait, 804 atomic_read(&config->live_connections) > 0, 805 config->dead_conn_timeout) > 0; 806 } 807 808 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 809 { 810 struct request *req = blk_mq_rq_from_pdu(cmd); 811 struct nbd_device *nbd = cmd->nbd; 812 struct nbd_config *config; 813 struct nbd_sock *nsock; 814 int ret; 815 816 if (!refcount_inc_not_zero(&nbd->config_refs)) { 817 dev_err_ratelimited(disk_to_dev(nbd->disk), 818 "Socks array is empty\n"); 819 blk_mq_start_request(req); 820 return -EINVAL; 821 } 822 config = nbd->config; 823 824 if (index >= config->num_connections) { 825 dev_err_ratelimited(disk_to_dev(nbd->disk), 826 "Attempted send on invalid socket\n"); 827 nbd_config_put(nbd); 828 blk_mq_start_request(req); 829 return -EINVAL; 830 } 831 cmd->status = BLK_STS_OK; 832 again: 833 nsock = config->socks[index]; 834 mutex_lock(&nsock->tx_lock); 835 if (nsock->dead) { 836 int old_index = index; 837 index = find_fallback(nbd, index); 838 mutex_unlock(&nsock->tx_lock); 839 if (index < 0) { 840 if (wait_for_reconnect(nbd)) { 841 index = old_index; 842 goto again; 843 } 844 /* All the sockets should already be down at this point, 845 * we just want to make sure that DISCONNECTED is set so 846 * any requests that come in that were queue'ed waiting 847 * for the reconnect timer don't trigger the timer again 848 * and instead just error out. 849 */ 850 sock_shutdown(nbd); 851 nbd_config_put(nbd); 852 blk_mq_start_request(req); 853 return -EIO; 854 } 855 goto again; 856 } 857 858 /* Handle the case that we have a pending request that was partially 859 * transmitted that _has_ to be serviced first. We need to call requeue 860 * here so that it gets put _after_ the request that is already on the 861 * dispatch list. 862 */ 863 blk_mq_start_request(req); 864 if (unlikely(nsock->pending && nsock->pending != req)) { 865 nbd_requeue_cmd(cmd); 866 ret = 0; 867 goto out; 868 } 869 /* 870 * Some failures are related to the link going down, so anything that 871 * returns EAGAIN can be retried on a different socket. 872 */ 873 ret = nbd_send_cmd(nbd, cmd, index); 874 if (ret == -EAGAIN) { 875 dev_err_ratelimited(disk_to_dev(nbd->disk), 876 "Request send failed, requeueing\n"); 877 nbd_mark_nsock_dead(nbd, nsock, 1); 878 nbd_requeue_cmd(cmd); 879 ret = 0; 880 } 881 out: 882 mutex_unlock(&nsock->tx_lock); 883 nbd_config_put(nbd); 884 return ret; 885 } 886 887 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 888 const struct blk_mq_queue_data *bd) 889 { 890 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 891 int ret; 892 893 /* 894 * Since we look at the bio's to send the request over the network we 895 * need to make sure the completion work doesn't mark this request done 896 * before we are done doing our send. This keeps us from dereferencing 897 * freed data if we have particularly fast completions (ie we get the 898 * completion before we exit sock_xmit on the last bvec) or in the case 899 * that the server is misbehaving (or there was an error) before we're 900 * done sending everything over the wire. 901 */ 902 mutex_lock(&cmd->lock); 903 clear_bit(NBD_CMD_REQUEUED, &cmd->flags); 904 905 /* We can be called directly from the user space process, which means we 906 * could possibly have signals pending so our sendmsg will fail. In 907 * this case we need to return that we are busy, otherwise error out as 908 * appropriate. 909 */ 910 ret = nbd_handle_cmd(cmd, hctx->queue_num); 911 if (ret < 0) 912 ret = BLK_STS_IOERR; 913 else if (!ret) 914 ret = BLK_STS_OK; 915 mutex_unlock(&cmd->lock); 916 917 return ret; 918 } 919 920 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 921 bool netlink) 922 { 923 struct nbd_config *config = nbd->config; 924 struct socket *sock; 925 struct nbd_sock **socks; 926 struct nbd_sock *nsock; 927 int err; 928 929 sock = sockfd_lookup(arg, &err); 930 if (!sock) 931 return err; 932 933 if (!netlink && !nbd->task_setup && 934 !test_bit(NBD_BOUND, &config->runtime_flags)) 935 nbd->task_setup = current; 936 937 if (!netlink && 938 (nbd->task_setup != current || 939 test_bit(NBD_BOUND, &config->runtime_flags))) { 940 dev_err(disk_to_dev(nbd->disk), 941 "Device being setup by another task"); 942 sockfd_put(sock); 943 return -EBUSY; 944 } 945 946 socks = krealloc(config->socks, (config->num_connections + 1) * 947 sizeof(struct nbd_sock *), GFP_KERNEL); 948 if (!socks) { 949 sockfd_put(sock); 950 return -ENOMEM; 951 } 952 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 953 if (!nsock) { 954 sockfd_put(sock); 955 return -ENOMEM; 956 } 957 958 config->socks = socks; 959 960 nsock->fallback_index = -1; 961 nsock->dead = false; 962 mutex_init(&nsock->tx_lock); 963 nsock->sock = sock; 964 nsock->pending = NULL; 965 nsock->sent = 0; 966 nsock->cookie = 0; 967 socks[config->num_connections++] = nsock; 968 atomic_inc(&config->live_connections); 969 970 return 0; 971 } 972 973 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 974 { 975 struct nbd_config *config = nbd->config; 976 struct socket *sock, *old; 977 struct recv_thread_args *args; 978 int i; 979 int err; 980 981 sock = sockfd_lookup(arg, &err); 982 if (!sock) 983 return err; 984 985 args = kzalloc(sizeof(*args), GFP_KERNEL); 986 if (!args) { 987 sockfd_put(sock); 988 return -ENOMEM; 989 } 990 991 for (i = 0; i < config->num_connections; i++) { 992 struct nbd_sock *nsock = config->socks[i]; 993 994 if (!nsock->dead) 995 continue; 996 997 mutex_lock(&nsock->tx_lock); 998 if (!nsock->dead) { 999 mutex_unlock(&nsock->tx_lock); 1000 continue; 1001 } 1002 sk_set_memalloc(sock->sk); 1003 if (nbd->tag_set.timeout) 1004 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 1005 atomic_inc(&config->recv_threads); 1006 refcount_inc(&nbd->config_refs); 1007 old = nsock->sock; 1008 nsock->fallback_index = -1; 1009 nsock->sock = sock; 1010 nsock->dead = false; 1011 INIT_WORK(&args->work, recv_work); 1012 args->index = i; 1013 args->nbd = nbd; 1014 nsock->cookie++; 1015 mutex_unlock(&nsock->tx_lock); 1016 sockfd_put(old); 1017 1018 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 1019 1020 /* We take the tx_mutex in an error path in the recv_work, so we 1021 * need to queue_work outside of the tx_mutex. 1022 */ 1023 queue_work(recv_workqueue, &args->work); 1024 1025 atomic_inc(&config->live_connections); 1026 wake_up(&config->conn_wait); 1027 return 0; 1028 } 1029 sockfd_put(sock); 1030 kfree(args); 1031 return -ENOSPC; 1032 } 1033 1034 static void nbd_bdev_reset(struct block_device *bdev) 1035 { 1036 if (bdev->bd_openers > 1) 1037 return; 1038 bd_set_size(bdev, 0); 1039 } 1040 1041 static void nbd_parse_flags(struct nbd_device *nbd) 1042 { 1043 struct nbd_config *config = nbd->config; 1044 if (config->flags & NBD_FLAG_READ_ONLY) 1045 set_disk_ro(nbd->disk, true); 1046 else 1047 set_disk_ro(nbd->disk, false); 1048 if (config->flags & NBD_FLAG_SEND_TRIM) 1049 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1050 if (config->flags & NBD_FLAG_SEND_FLUSH) { 1051 if (config->flags & NBD_FLAG_SEND_FUA) 1052 blk_queue_write_cache(nbd->disk->queue, true, true); 1053 else 1054 blk_queue_write_cache(nbd->disk->queue, true, false); 1055 } 1056 else 1057 blk_queue_write_cache(nbd->disk->queue, false, false); 1058 } 1059 1060 static void send_disconnects(struct nbd_device *nbd) 1061 { 1062 struct nbd_config *config = nbd->config; 1063 struct nbd_request request = { 1064 .magic = htonl(NBD_REQUEST_MAGIC), 1065 .type = htonl(NBD_CMD_DISC), 1066 }; 1067 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 1068 struct iov_iter from; 1069 int i, ret; 1070 1071 for (i = 0; i < config->num_connections; i++) { 1072 struct nbd_sock *nsock = config->socks[i]; 1073 1074 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); 1075 mutex_lock(&nsock->tx_lock); 1076 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1077 if (ret <= 0) 1078 dev_err(disk_to_dev(nbd->disk), 1079 "Send disconnect failed %d\n", ret); 1080 mutex_unlock(&nsock->tx_lock); 1081 } 1082 } 1083 1084 static int nbd_disconnect(struct nbd_device *nbd) 1085 { 1086 struct nbd_config *config = nbd->config; 1087 1088 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1089 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1090 send_disconnects(nbd); 1091 return 0; 1092 } 1093 1094 static void nbd_clear_sock(struct nbd_device *nbd) 1095 { 1096 sock_shutdown(nbd); 1097 nbd_clear_que(nbd); 1098 nbd->task_setup = NULL; 1099 } 1100 1101 static void nbd_config_put(struct nbd_device *nbd) 1102 { 1103 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1104 &nbd->config_lock)) { 1105 struct nbd_config *config = nbd->config; 1106 nbd_dev_dbg_close(nbd); 1107 nbd_size_clear(nbd); 1108 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1109 &config->runtime_flags)) 1110 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1111 nbd->task_recv = NULL; 1112 nbd_clear_sock(nbd); 1113 if (config->num_connections) { 1114 int i; 1115 for (i = 0; i < config->num_connections; i++) { 1116 sockfd_put(config->socks[i]->sock); 1117 kfree(config->socks[i]); 1118 } 1119 kfree(config->socks); 1120 } 1121 kfree(nbd->config); 1122 nbd->config = NULL; 1123 1124 nbd->tag_set.timeout = 0; 1125 nbd->disk->queue->limits.discard_granularity = 0; 1126 nbd->disk->queue->limits.discard_alignment = 0; 1127 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 1128 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1129 1130 mutex_unlock(&nbd->config_lock); 1131 nbd_put(nbd); 1132 module_put(THIS_MODULE); 1133 } 1134 } 1135 1136 static int nbd_start_device(struct nbd_device *nbd) 1137 { 1138 struct nbd_config *config = nbd->config; 1139 int num_connections = config->num_connections; 1140 int error = 0, i; 1141 1142 if (nbd->task_recv) 1143 return -EBUSY; 1144 if (!config->socks) 1145 return -EINVAL; 1146 if (num_connections > 1 && 1147 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1148 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1149 return -EINVAL; 1150 } 1151 1152 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1153 nbd->task_recv = current; 1154 1155 nbd_parse_flags(nbd); 1156 1157 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1158 if (error) { 1159 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1160 return error; 1161 } 1162 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1163 1164 nbd_dev_dbg_init(nbd); 1165 for (i = 0; i < num_connections; i++) { 1166 struct recv_thread_args *args; 1167 1168 args = kzalloc(sizeof(*args), GFP_KERNEL); 1169 if (!args) { 1170 sock_shutdown(nbd); 1171 return -ENOMEM; 1172 } 1173 sk_set_memalloc(config->socks[i]->sock->sk); 1174 if (nbd->tag_set.timeout) 1175 config->socks[i]->sock->sk->sk_sndtimeo = 1176 nbd->tag_set.timeout; 1177 atomic_inc(&config->recv_threads); 1178 refcount_inc(&nbd->config_refs); 1179 INIT_WORK(&args->work, recv_work); 1180 args->nbd = nbd; 1181 args->index = i; 1182 queue_work(recv_workqueue, &args->work); 1183 } 1184 nbd_size_update(nbd); 1185 return error; 1186 } 1187 1188 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1189 { 1190 struct nbd_config *config = nbd->config; 1191 int ret; 1192 1193 ret = nbd_start_device(nbd); 1194 if (ret) 1195 return ret; 1196 1197 if (max_part) 1198 bdev->bd_invalidated = 1; 1199 mutex_unlock(&nbd->config_lock); 1200 ret = wait_event_interruptible(config->recv_wq, 1201 atomic_read(&config->recv_threads) == 0); 1202 if (ret) 1203 sock_shutdown(nbd); 1204 mutex_lock(&nbd->config_lock); 1205 nbd_bdev_reset(bdev); 1206 /* user requested, ignore socket errors */ 1207 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1208 ret = 0; 1209 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1210 ret = -ETIMEDOUT; 1211 return ret; 1212 } 1213 1214 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1215 struct block_device *bdev) 1216 { 1217 sock_shutdown(nbd); 1218 kill_bdev(bdev); 1219 nbd_bdev_reset(bdev); 1220 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1221 &nbd->config->runtime_flags)) 1222 nbd_config_put(nbd); 1223 } 1224 1225 /* Must be called with config_lock held */ 1226 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1227 unsigned int cmd, unsigned long arg) 1228 { 1229 struct nbd_config *config = nbd->config; 1230 1231 switch (cmd) { 1232 case NBD_DISCONNECT: 1233 return nbd_disconnect(nbd); 1234 case NBD_CLEAR_SOCK: 1235 nbd_clear_sock_ioctl(nbd, bdev); 1236 return 0; 1237 case NBD_SET_SOCK: 1238 return nbd_add_socket(nbd, arg, false); 1239 case NBD_SET_BLKSIZE: 1240 if (!arg || !is_power_of_2(arg) || arg < 512 || 1241 arg > PAGE_SIZE) 1242 return -EINVAL; 1243 nbd_size_set(nbd, arg, 1244 div_s64(config->bytesize, arg)); 1245 return 0; 1246 case NBD_SET_SIZE: 1247 nbd_size_set(nbd, config->blksize, 1248 div_s64(arg, config->blksize)); 1249 return 0; 1250 case NBD_SET_SIZE_BLOCKS: 1251 nbd_size_set(nbd, config->blksize, arg); 1252 return 0; 1253 case NBD_SET_TIMEOUT: 1254 if (arg) { 1255 nbd->tag_set.timeout = arg * HZ; 1256 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1257 } 1258 return 0; 1259 1260 case NBD_SET_FLAGS: 1261 config->flags = arg; 1262 return 0; 1263 case NBD_DO_IT: 1264 return nbd_start_device_ioctl(nbd, bdev); 1265 case NBD_CLEAR_QUE: 1266 /* 1267 * This is for compatibility only. The queue is always cleared 1268 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1269 */ 1270 return 0; 1271 case NBD_PRINT_DEBUG: 1272 /* 1273 * For compatibility only, we no longer keep a list of 1274 * outstanding requests. 1275 */ 1276 return 0; 1277 } 1278 return -ENOTTY; 1279 } 1280 1281 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1282 unsigned int cmd, unsigned long arg) 1283 { 1284 struct nbd_device *nbd = bdev->bd_disk->private_data; 1285 struct nbd_config *config = nbd->config; 1286 int error = -EINVAL; 1287 1288 if (!capable(CAP_SYS_ADMIN)) 1289 return -EPERM; 1290 1291 /* The block layer will pass back some non-nbd ioctls in case we have 1292 * special handling for them, but we don't so just return an error. 1293 */ 1294 if (_IOC_TYPE(cmd) != 0xab) 1295 return -EINVAL; 1296 1297 mutex_lock(&nbd->config_lock); 1298 1299 /* Don't allow ioctl operations on a nbd device that was created with 1300 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1301 */ 1302 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1303 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1304 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1305 else 1306 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1307 mutex_unlock(&nbd->config_lock); 1308 return error; 1309 } 1310 1311 static struct nbd_config *nbd_alloc_config(void) 1312 { 1313 struct nbd_config *config; 1314 1315 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1316 if (!config) 1317 return NULL; 1318 atomic_set(&config->recv_threads, 0); 1319 init_waitqueue_head(&config->recv_wq); 1320 init_waitqueue_head(&config->conn_wait); 1321 config->blksize = 1024; 1322 atomic_set(&config->live_connections, 0); 1323 try_module_get(THIS_MODULE); 1324 return config; 1325 } 1326 1327 static int nbd_open(struct block_device *bdev, fmode_t mode) 1328 { 1329 struct nbd_device *nbd; 1330 int ret = 0; 1331 1332 mutex_lock(&nbd_index_mutex); 1333 nbd = bdev->bd_disk->private_data; 1334 if (!nbd) { 1335 ret = -ENXIO; 1336 goto out; 1337 } 1338 if (!refcount_inc_not_zero(&nbd->refs)) { 1339 ret = -ENXIO; 1340 goto out; 1341 } 1342 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1343 struct nbd_config *config; 1344 1345 mutex_lock(&nbd->config_lock); 1346 if (refcount_inc_not_zero(&nbd->config_refs)) { 1347 mutex_unlock(&nbd->config_lock); 1348 goto out; 1349 } 1350 config = nbd->config = nbd_alloc_config(); 1351 if (!config) { 1352 ret = -ENOMEM; 1353 mutex_unlock(&nbd->config_lock); 1354 goto out; 1355 } 1356 refcount_set(&nbd->config_refs, 1); 1357 refcount_inc(&nbd->refs); 1358 mutex_unlock(&nbd->config_lock); 1359 bdev->bd_invalidated = 1; 1360 } else if (nbd_disconnected(nbd->config)) { 1361 bdev->bd_invalidated = 1; 1362 } 1363 out: 1364 mutex_unlock(&nbd_index_mutex); 1365 return ret; 1366 } 1367 1368 static void nbd_release(struct gendisk *disk, fmode_t mode) 1369 { 1370 struct nbd_device *nbd = disk->private_data; 1371 struct block_device *bdev = bdget_disk(disk, 0); 1372 1373 if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1374 bdev->bd_openers == 0) 1375 nbd_disconnect_and_put(nbd); 1376 1377 nbd_config_put(nbd); 1378 nbd_put(nbd); 1379 } 1380 1381 static const struct block_device_operations nbd_fops = 1382 { 1383 .owner = THIS_MODULE, 1384 .open = nbd_open, 1385 .release = nbd_release, 1386 .ioctl = nbd_ioctl, 1387 .compat_ioctl = nbd_ioctl, 1388 }; 1389 1390 #if IS_ENABLED(CONFIG_DEBUG_FS) 1391 1392 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1393 { 1394 struct nbd_device *nbd = s->private; 1395 1396 if (nbd->task_recv) 1397 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1398 1399 return 0; 1400 } 1401 1402 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1403 { 1404 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1405 } 1406 1407 static const struct file_operations nbd_dbg_tasks_ops = { 1408 .open = nbd_dbg_tasks_open, 1409 .read = seq_read, 1410 .llseek = seq_lseek, 1411 .release = single_release, 1412 }; 1413 1414 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1415 { 1416 struct nbd_device *nbd = s->private; 1417 u32 flags = nbd->config->flags; 1418 1419 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1420 1421 seq_puts(s, "Known flags:\n"); 1422 1423 if (flags & NBD_FLAG_HAS_FLAGS) 1424 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1425 if (flags & NBD_FLAG_READ_ONLY) 1426 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1427 if (flags & NBD_FLAG_SEND_FLUSH) 1428 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1429 if (flags & NBD_FLAG_SEND_FUA) 1430 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1431 if (flags & NBD_FLAG_SEND_TRIM) 1432 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1433 1434 return 0; 1435 } 1436 1437 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1438 { 1439 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1440 } 1441 1442 static const struct file_operations nbd_dbg_flags_ops = { 1443 .open = nbd_dbg_flags_open, 1444 .read = seq_read, 1445 .llseek = seq_lseek, 1446 .release = single_release, 1447 }; 1448 1449 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1450 { 1451 struct dentry *dir; 1452 struct nbd_config *config = nbd->config; 1453 1454 if (!nbd_dbg_dir) 1455 return -EIO; 1456 1457 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1458 if (!dir) { 1459 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1460 nbd_name(nbd)); 1461 return -EIO; 1462 } 1463 config->dbg_dir = dir; 1464 1465 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1466 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1467 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1468 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1469 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1470 1471 return 0; 1472 } 1473 1474 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1475 { 1476 debugfs_remove_recursive(nbd->config->dbg_dir); 1477 } 1478 1479 static int nbd_dbg_init(void) 1480 { 1481 struct dentry *dbg_dir; 1482 1483 dbg_dir = debugfs_create_dir("nbd", NULL); 1484 if (!dbg_dir) 1485 return -EIO; 1486 1487 nbd_dbg_dir = dbg_dir; 1488 1489 return 0; 1490 } 1491 1492 static void nbd_dbg_close(void) 1493 { 1494 debugfs_remove_recursive(nbd_dbg_dir); 1495 } 1496 1497 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1498 1499 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1500 { 1501 return 0; 1502 } 1503 1504 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1505 { 1506 } 1507 1508 static int nbd_dbg_init(void) 1509 { 1510 return 0; 1511 } 1512 1513 static void nbd_dbg_close(void) 1514 { 1515 } 1516 1517 #endif 1518 1519 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1520 unsigned int hctx_idx, unsigned int numa_node) 1521 { 1522 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1523 cmd->nbd = set->driver_data; 1524 cmd->flags = 0; 1525 mutex_init(&cmd->lock); 1526 return 0; 1527 } 1528 1529 static const struct blk_mq_ops nbd_mq_ops = { 1530 .queue_rq = nbd_queue_rq, 1531 .complete = nbd_complete_rq, 1532 .init_request = nbd_init_request, 1533 .timeout = nbd_xmit_timeout, 1534 }; 1535 1536 static int nbd_dev_add(int index) 1537 { 1538 struct nbd_device *nbd; 1539 struct gendisk *disk; 1540 struct request_queue *q; 1541 int err = -ENOMEM; 1542 1543 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1544 if (!nbd) 1545 goto out; 1546 1547 disk = alloc_disk(1 << part_shift); 1548 if (!disk) 1549 goto out_free_nbd; 1550 1551 if (index >= 0) { 1552 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1553 GFP_KERNEL); 1554 if (err == -ENOSPC) 1555 err = -EEXIST; 1556 } else { 1557 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1558 if (err >= 0) 1559 index = err; 1560 } 1561 if (err < 0) 1562 goto out_free_disk; 1563 1564 nbd->index = index; 1565 nbd->disk = disk; 1566 nbd->tag_set.ops = &nbd_mq_ops; 1567 nbd->tag_set.nr_hw_queues = 1; 1568 nbd->tag_set.queue_depth = 128; 1569 nbd->tag_set.numa_node = NUMA_NO_NODE; 1570 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1571 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1572 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; 1573 nbd->tag_set.driver_data = nbd; 1574 1575 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1576 if (err) 1577 goto out_free_idr; 1578 1579 q = blk_mq_init_queue(&nbd->tag_set); 1580 if (IS_ERR(q)) { 1581 err = PTR_ERR(q); 1582 goto out_free_tags; 1583 } 1584 disk->queue = q; 1585 1586 /* 1587 * Tell the block layer that we are not a rotational device 1588 */ 1589 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 1590 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1591 disk->queue->limits.discard_granularity = 0; 1592 disk->queue->limits.discard_alignment = 0; 1593 blk_queue_max_discard_sectors(disk->queue, 0); 1594 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1595 blk_queue_max_segments(disk->queue, USHRT_MAX); 1596 blk_queue_max_hw_sectors(disk->queue, 65536); 1597 disk->queue->limits.max_sectors = 256; 1598 1599 mutex_init(&nbd->config_lock); 1600 refcount_set(&nbd->config_refs, 0); 1601 refcount_set(&nbd->refs, 1); 1602 INIT_LIST_HEAD(&nbd->list); 1603 disk->major = NBD_MAJOR; 1604 disk->first_minor = index << part_shift; 1605 disk->fops = &nbd_fops; 1606 disk->private_data = nbd; 1607 sprintf(disk->disk_name, "nbd%d", index); 1608 add_disk(disk); 1609 nbd_total_devices++; 1610 return index; 1611 1612 out_free_tags: 1613 blk_mq_free_tag_set(&nbd->tag_set); 1614 out_free_idr: 1615 idr_remove(&nbd_index_idr, index); 1616 out_free_disk: 1617 put_disk(disk); 1618 out_free_nbd: 1619 kfree(nbd); 1620 out: 1621 return err; 1622 } 1623 1624 static int find_free_cb(int id, void *ptr, void *data) 1625 { 1626 struct nbd_device *nbd = ptr; 1627 struct nbd_device **found = data; 1628 1629 if (!refcount_read(&nbd->config_refs)) { 1630 *found = nbd; 1631 return 1; 1632 } 1633 return 0; 1634 } 1635 1636 /* Netlink interface. */ 1637 static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1638 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1639 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1640 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1641 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1642 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1643 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1644 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1645 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1646 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1647 }; 1648 1649 static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1650 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1651 }; 1652 1653 /* We don't use this right now since we don't parse the incoming list, but we 1654 * still want it here so userspace knows what to expect. 1655 */ 1656 static const struct nla_policy __attribute__((unused)) 1657 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1658 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1659 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1660 }; 1661 1662 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1663 { 1664 struct nbd_device *nbd = NULL; 1665 struct nbd_config *config; 1666 int index = -1; 1667 int ret; 1668 bool put_dev = false; 1669 1670 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1671 return -EPERM; 1672 1673 if (info->attrs[NBD_ATTR_INDEX]) 1674 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1675 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1676 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1677 return -EINVAL; 1678 } 1679 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1680 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1681 return -EINVAL; 1682 } 1683 again: 1684 mutex_lock(&nbd_index_mutex); 1685 if (index == -1) { 1686 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1687 if (ret == 0) { 1688 int new_index; 1689 new_index = nbd_dev_add(-1); 1690 if (new_index < 0) { 1691 mutex_unlock(&nbd_index_mutex); 1692 printk(KERN_ERR "nbd: failed to add new device\n"); 1693 return new_index; 1694 } 1695 nbd = idr_find(&nbd_index_idr, new_index); 1696 } 1697 } else { 1698 nbd = idr_find(&nbd_index_idr, index); 1699 if (!nbd) { 1700 ret = nbd_dev_add(index); 1701 if (ret < 0) { 1702 mutex_unlock(&nbd_index_mutex); 1703 printk(KERN_ERR "nbd: failed to add new device\n"); 1704 return ret; 1705 } 1706 nbd = idr_find(&nbd_index_idr, index); 1707 } 1708 } 1709 if (!nbd) { 1710 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1711 index); 1712 mutex_unlock(&nbd_index_mutex); 1713 return -EINVAL; 1714 } 1715 if (!refcount_inc_not_zero(&nbd->refs)) { 1716 mutex_unlock(&nbd_index_mutex); 1717 if (index == -1) 1718 goto again; 1719 printk(KERN_ERR "nbd: device at index %d is going down\n", 1720 index); 1721 return -EINVAL; 1722 } 1723 mutex_unlock(&nbd_index_mutex); 1724 1725 mutex_lock(&nbd->config_lock); 1726 if (refcount_read(&nbd->config_refs)) { 1727 mutex_unlock(&nbd->config_lock); 1728 nbd_put(nbd); 1729 if (index == -1) 1730 goto again; 1731 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1732 return -EBUSY; 1733 } 1734 if (WARN_ON(nbd->config)) { 1735 mutex_unlock(&nbd->config_lock); 1736 nbd_put(nbd); 1737 return -EINVAL; 1738 } 1739 config = nbd->config = nbd_alloc_config(); 1740 if (!nbd->config) { 1741 mutex_unlock(&nbd->config_lock); 1742 nbd_put(nbd); 1743 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1744 return -ENOMEM; 1745 } 1746 refcount_set(&nbd->config_refs, 1); 1747 set_bit(NBD_BOUND, &config->runtime_flags); 1748 1749 if (info->attrs[NBD_ATTR_SIZE_BYTES]) { 1750 u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1751 nbd_size_set(nbd, config->blksize, 1752 div64_u64(bytes, config->blksize)); 1753 } 1754 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1755 u64 bsize = 1756 nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1757 nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); 1758 } 1759 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1760 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1761 nbd->tag_set.timeout = timeout * HZ; 1762 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1763 } 1764 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1765 config->dead_conn_timeout = 1766 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1767 config->dead_conn_timeout *= HZ; 1768 } 1769 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1770 config->flags = 1771 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1772 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1773 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1774 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1775 set_bit(NBD_DESTROY_ON_DISCONNECT, 1776 &config->runtime_flags); 1777 put_dev = true; 1778 } 1779 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1780 set_bit(NBD_DISCONNECT_ON_CLOSE, 1781 &config->runtime_flags); 1782 } 1783 } 1784 1785 if (info->attrs[NBD_ATTR_SOCKETS]) { 1786 struct nlattr *attr; 1787 int rem, fd; 1788 1789 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1790 rem) { 1791 struct nlattr *socks[NBD_SOCK_MAX+1]; 1792 1793 if (nla_type(attr) != NBD_SOCK_ITEM) { 1794 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1795 ret = -EINVAL; 1796 goto out; 1797 } 1798 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1799 nbd_sock_policy, info->extack); 1800 if (ret != 0) { 1801 printk(KERN_ERR "nbd: error processing sock list\n"); 1802 ret = -EINVAL; 1803 goto out; 1804 } 1805 if (!socks[NBD_SOCK_FD]) 1806 continue; 1807 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1808 ret = nbd_add_socket(nbd, fd, true); 1809 if (ret) 1810 goto out; 1811 } 1812 } 1813 ret = nbd_start_device(nbd); 1814 out: 1815 mutex_unlock(&nbd->config_lock); 1816 if (!ret) { 1817 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1818 refcount_inc(&nbd->config_refs); 1819 nbd_connect_reply(info, nbd->index); 1820 } 1821 nbd_config_put(nbd); 1822 if (put_dev) 1823 nbd_put(nbd); 1824 return ret; 1825 } 1826 1827 static void nbd_disconnect_and_put(struct nbd_device *nbd) 1828 { 1829 mutex_lock(&nbd->config_lock); 1830 nbd_disconnect(nbd); 1831 nbd_clear_sock(nbd); 1832 mutex_unlock(&nbd->config_lock); 1833 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1834 &nbd->config->runtime_flags)) 1835 nbd_config_put(nbd); 1836 } 1837 1838 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1839 { 1840 struct nbd_device *nbd; 1841 int index; 1842 1843 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1844 return -EPERM; 1845 1846 if (!info->attrs[NBD_ATTR_INDEX]) { 1847 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1848 return -EINVAL; 1849 } 1850 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1851 mutex_lock(&nbd_index_mutex); 1852 nbd = idr_find(&nbd_index_idr, index); 1853 if (!nbd) { 1854 mutex_unlock(&nbd_index_mutex); 1855 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1856 index); 1857 return -EINVAL; 1858 } 1859 if (!refcount_inc_not_zero(&nbd->refs)) { 1860 mutex_unlock(&nbd_index_mutex); 1861 printk(KERN_ERR "nbd: device at index %d is going down\n", 1862 index); 1863 return -EINVAL; 1864 } 1865 mutex_unlock(&nbd_index_mutex); 1866 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1867 nbd_put(nbd); 1868 return 0; 1869 } 1870 nbd_disconnect_and_put(nbd); 1871 nbd_config_put(nbd); 1872 nbd_put(nbd); 1873 return 0; 1874 } 1875 1876 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1877 { 1878 struct nbd_device *nbd = NULL; 1879 struct nbd_config *config; 1880 int index; 1881 int ret = 0; 1882 bool put_dev = false; 1883 1884 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1885 return -EPERM; 1886 1887 if (!info->attrs[NBD_ATTR_INDEX]) { 1888 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1889 return -EINVAL; 1890 } 1891 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1892 mutex_lock(&nbd_index_mutex); 1893 nbd = idr_find(&nbd_index_idr, index); 1894 if (!nbd) { 1895 mutex_unlock(&nbd_index_mutex); 1896 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1897 index); 1898 return -EINVAL; 1899 } 1900 if (!refcount_inc_not_zero(&nbd->refs)) { 1901 mutex_unlock(&nbd_index_mutex); 1902 printk(KERN_ERR "nbd: device at index %d is going down\n", 1903 index); 1904 return -EINVAL; 1905 } 1906 mutex_unlock(&nbd_index_mutex); 1907 1908 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1909 dev_err(nbd_to_dev(nbd), 1910 "not configured, cannot reconfigure\n"); 1911 nbd_put(nbd); 1912 return -EINVAL; 1913 } 1914 1915 mutex_lock(&nbd->config_lock); 1916 config = nbd->config; 1917 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1918 !nbd->task_recv) { 1919 dev_err(nbd_to_dev(nbd), 1920 "not configured, cannot reconfigure\n"); 1921 ret = -EINVAL; 1922 goto out; 1923 } 1924 1925 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1926 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1927 nbd->tag_set.timeout = timeout * HZ; 1928 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1929 } 1930 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1931 config->dead_conn_timeout = 1932 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1933 config->dead_conn_timeout *= HZ; 1934 } 1935 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1936 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1937 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1938 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1939 &config->runtime_flags)) 1940 put_dev = true; 1941 } else { 1942 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1943 &config->runtime_flags)) 1944 refcount_inc(&nbd->refs); 1945 } 1946 1947 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1948 set_bit(NBD_DISCONNECT_ON_CLOSE, 1949 &config->runtime_flags); 1950 } else { 1951 clear_bit(NBD_DISCONNECT_ON_CLOSE, 1952 &config->runtime_flags); 1953 } 1954 } 1955 1956 if (info->attrs[NBD_ATTR_SOCKETS]) { 1957 struct nlattr *attr; 1958 int rem, fd; 1959 1960 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1961 rem) { 1962 struct nlattr *socks[NBD_SOCK_MAX+1]; 1963 1964 if (nla_type(attr) != NBD_SOCK_ITEM) { 1965 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1966 ret = -EINVAL; 1967 goto out; 1968 } 1969 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1970 nbd_sock_policy, info->extack); 1971 if (ret != 0) { 1972 printk(KERN_ERR "nbd: error processing sock list\n"); 1973 ret = -EINVAL; 1974 goto out; 1975 } 1976 if (!socks[NBD_SOCK_FD]) 1977 continue; 1978 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1979 ret = nbd_reconnect_socket(nbd, fd); 1980 if (ret) { 1981 if (ret == -ENOSPC) 1982 ret = 0; 1983 goto out; 1984 } 1985 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 1986 } 1987 } 1988 out: 1989 mutex_unlock(&nbd->config_lock); 1990 nbd_config_put(nbd); 1991 nbd_put(nbd); 1992 if (put_dev) 1993 nbd_put(nbd); 1994 return ret; 1995 } 1996 1997 static const struct genl_ops nbd_connect_genl_ops[] = { 1998 { 1999 .cmd = NBD_CMD_CONNECT, 2000 .policy = nbd_attr_policy, 2001 .doit = nbd_genl_connect, 2002 }, 2003 { 2004 .cmd = NBD_CMD_DISCONNECT, 2005 .policy = nbd_attr_policy, 2006 .doit = nbd_genl_disconnect, 2007 }, 2008 { 2009 .cmd = NBD_CMD_RECONFIGURE, 2010 .policy = nbd_attr_policy, 2011 .doit = nbd_genl_reconfigure, 2012 }, 2013 { 2014 .cmd = NBD_CMD_STATUS, 2015 .policy = nbd_attr_policy, 2016 .doit = nbd_genl_status, 2017 }, 2018 }; 2019 2020 static const struct genl_multicast_group nbd_mcast_grps[] = { 2021 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 2022 }; 2023 2024 static struct genl_family nbd_genl_family __ro_after_init = { 2025 .hdrsize = 0, 2026 .name = NBD_GENL_FAMILY_NAME, 2027 .version = NBD_GENL_VERSION, 2028 .module = THIS_MODULE, 2029 .ops = nbd_connect_genl_ops, 2030 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 2031 .maxattr = NBD_ATTR_MAX, 2032 .mcgrps = nbd_mcast_grps, 2033 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 2034 }; 2035 2036 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 2037 { 2038 struct nlattr *dev_opt; 2039 u8 connected = 0; 2040 int ret; 2041 2042 /* This is a little racey, but for status it's ok. The 2043 * reason we don't take a ref here is because we can't 2044 * take a ref in the index == -1 case as we would need 2045 * to put under the nbd_index_mutex, which could 2046 * deadlock if we are configured to remove ourselves 2047 * once we're disconnected. 2048 */ 2049 if (refcount_read(&nbd->config_refs)) 2050 connected = 1; 2051 dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); 2052 if (!dev_opt) 2053 return -EMSGSIZE; 2054 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 2055 if (ret) 2056 return -EMSGSIZE; 2057 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 2058 connected); 2059 if (ret) 2060 return -EMSGSIZE; 2061 nla_nest_end(reply, dev_opt); 2062 return 0; 2063 } 2064 2065 static int status_cb(int id, void *ptr, void *data) 2066 { 2067 struct nbd_device *nbd = ptr; 2068 return populate_nbd_status(nbd, (struct sk_buff *)data); 2069 } 2070 2071 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 2072 { 2073 struct nlattr *dev_list; 2074 struct sk_buff *reply; 2075 void *reply_head; 2076 size_t msg_size; 2077 int index = -1; 2078 int ret = -ENOMEM; 2079 2080 if (info->attrs[NBD_ATTR_INDEX]) 2081 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 2082 2083 mutex_lock(&nbd_index_mutex); 2084 2085 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 2086 nla_attr_size(sizeof(u8))); 2087 msg_size *= (index == -1) ? nbd_total_devices : 1; 2088 2089 reply = genlmsg_new(msg_size, GFP_KERNEL); 2090 if (!reply) 2091 goto out; 2092 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 2093 NBD_CMD_STATUS); 2094 if (!reply_head) { 2095 nlmsg_free(reply); 2096 goto out; 2097 } 2098 2099 dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); 2100 if (index == -1) { 2101 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 2102 if (ret) { 2103 nlmsg_free(reply); 2104 goto out; 2105 } 2106 } else { 2107 struct nbd_device *nbd; 2108 nbd = idr_find(&nbd_index_idr, index); 2109 if (nbd) { 2110 ret = populate_nbd_status(nbd, reply); 2111 if (ret) { 2112 nlmsg_free(reply); 2113 goto out; 2114 } 2115 } 2116 } 2117 nla_nest_end(reply, dev_list); 2118 genlmsg_end(reply, reply_head); 2119 genlmsg_reply(reply, info); 2120 ret = 0; 2121 out: 2122 mutex_unlock(&nbd_index_mutex); 2123 return ret; 2124 } 2125 2126 static void nbd_connect_reply(struct genl_info *info, int index) 2127 { 2128 struct sk_buff *skb; 2129 void *msg_head; 2130 int ret; 2131 2132 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2133 if (!skb) 2134 return; 2135 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2136 NBD_CMD_CONNECT); 2137 if (!msg_head) { 2138 nlmsg_free(skb); 2139 return; 2140 } 2141 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2142 if (ret) { 2143 nlmsg_free(skb); 2144 return; 2145 } 2146 genlmsg_end(skb, msg_head); 2147 genlmsg_reply(skb, info); 2148 } 2149 2150 static void nbd_mcast_index(int index) 2151 { 2152 struct sk_buff *skb; 2153 void *msg_head; 2154 int ret; 2155 2156 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2157 if (!skb) 2158 return; 2159 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2160 NBD_CMD_LINK_DEAD); 2161 if (!msg_head) { 2162 nlmsg_free(skb); 2163 return; 2164 } 2165 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2166 if (ret) { 2167 nlmsg_free(skb); 2168 return; 2169 } 2170 genlmsg_end(skb, msg_head); 2171 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2172 } 2173 2174 static void nbd_dead_link_work(struct work_struct *work) 2175 { 2176 struct link_dead_args *args = container_of(work, struct link_dead_args, 2177 work); 2178 nbd_mcast_index(args->index); 2179 kfree(args); 2180 } 2181 2182 static int __init nbd_init(void) 2183 { 2184 int i; 2185 2186 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2187 2188 if (max_part < 0) { 2189 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2190 return -EINVAL; 2191 } 2192 2193 part_shift = 0; 2194 if (max_part > 0) { 2195 part_shift = fls(max_part); 2196 2197 /* 2198 * Adjust max_part according to part_shift as it is exported 2199 * to user space so that user can know the max number of 2200 * partition kernel should be able to manage. 2201 * 2202 * Note that -1 is required because partition 0 is reserved 2203 * for the whole disk. 2204 */ 2205 max_part = (1UL << part_shift) - 1; 2206 } 2207 2208 if ((1UL << part_shift) > DISK_MAX_PARTS) 2209 return -EINVAL; 2210 2211 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2212 return -EINVAL; 2213 recv_workqueue = alloc_workqueue("knbd-recv", 2214 WQ_MEM_RECLAIM | WQ_HIGHPRI | 2215 WQ_UNBOUND, 0); 2216 if (!recv_workqueue) 2217 return -ENOMEM; 2218 2219 if (register_blkdev(NBD_MAJOR, "nbd")) { 2220 destroy_workqueue(recv_workqueue); 2221 return -EIO; 2222 } 2223 2224 if (genl_register_family(&nbd_genl_family)) { 2225 unregister_blkdev(NBD_MAJOR, "nbd"); 2226 destroy_workqueue(recv_workqueue); 2227 return -EINVAL; 2228 } 2229 nbd_dbg_init(); 2230 2231 mutex_lock(&nbd_index_mutex); 2232 for (i = 0; i < nbds_max; i++) 2233 nbd_dev_add(i); 2234 mutex_unlock(&nbd_index_mutex); 2235 return 0; 2236 } 2237 2238 static int nbd_exit_cb(int id, void *ptr, void *data) 2239 { 2240 struct list_head *list = (struct list_head *)data; 2241 struct nbd_device *nbd = ptr; 2242 2243 list_add_tail(&nbd->list, list); 2244 return 0; 2245 } 2246 2247 static void __exit nbd_cleanup(void) 2248 { 2249 struct nbd_device *nbd; 2250 LIST_HEAD(del_list); 2251 2252 nbd_dbg_close(); 2253 2254 mutex_lock(&nbd_index_mutex); 2255 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2256 mutex_unlock(&nbd_index_mutex); 2257 2258 while (!list_empty(&del_list)) { 2259 nbd = list_first_entry(&del_list, struct nbd_device, list); 2260 list_del_init(&nbd->list); 2261 if (refcount_read(&nbd->refs) != 1) 2262 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2263 nbd_put(nbd); 2264 } 2265 2266 idr_destroy(&nbd_index_idr); 2267 genl_unregister_family(&nbd_genl_family); 2268 destroy_workqueue(recv_workqueue); 2269 unregister_blkdev(NBD_MAJOR, "nbd"); 2270 } 2271 2272 module_init(nbd_init); 2273 module_exit(nbd_cleanup); 2274 2275 MODULE_DESCRIPTION("Network Block Device"); 2276 MODULE_LICENSE("GPL"); 2277 2278 module_param(nbds_max, int, 0444); 2279 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2280 module_param(max_part, int, 0444); 2281 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2282