1 /* 2 * Network block device - make block devices work over TCP 3 * 4 * Note that you can not swap over this thing, yet. Seems to work but 5 * deadlocks sometimes - you can not swap over TCP in general. 6 * 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 9 * 10 * This file is released under GPLv2 or later. 11 * 12 * (part of code stolen from loop.c) 13 */ 14 15 #include <linux/major.h> 16 17 #include <linux/blkdev.h> 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/sched.h> 21 #include <linux/sched/mm.h> 22 #include <linux/fs.h> 23 #include <linux/bio.h> 24 #include <linux/stat.h> 25 #include <linux/errno.h> 26 #include <linux/file.h> 27 #include <linux/ioctl.h> 28 #include <linux/mutex.h> 29 #include <linux/compiler.h> 30 #include <linux/err.h> 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <net/sock.h> 34 #include <linux/net.h> 35 #include <linux/kthread.h> 36 #include <linux/types.h> 37 #include <linux/debugfs.h> 38 #include <linux/blk-mq.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/types.h> 42 43 #include <linux/nbd.h> 44 #include <linux/nbd-netlink.h> 45 #include <net/genetlink.h> 46 47 static DEFINE_IDR(nbd_index_idr); 48 static DEFINE_MUTEX(nbd_index_mutex); 49 static int nbd_total_devices = 0; 50 51 struct nbd_sock { 52 struct socket *sock; 53 struct mutex tx_lock; 54 struct request *pending; 55 int sent; 56 bool dead; 57 int fallback_index; 58 int cookie; 59 }; 60 61 struct recv_thread_args { 62 struct work_struct work; 63 struct nbd_device *nbd; 64 int index; 65 }; 66 67 struct link_dead_args { 68 struct work_struct work; 69 int index; 70 }; 71 72 #define NBD_TIMEDOUT 0 73 #define NBD_DISCONNECT_REQUESTED 1 74 #define NBD_DISCONNECTED 2 75 #define NBD_HAS_PID_FILE 3 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 #define NBD_DISCONNECT_ON_CLOSE 7 80 81 struct nbd_config { 82 u32 flags; 83 unsigned long runtime_flags; 84 u64 dead_conn_timeout; 85 86 struct nbd_sock **socks; 87 int num_connections; 88 atomic_t live_connections; 89 wait_queue_head_t conn_wait; 90 91 atomic_t recv_threads; 92 wait_queue_head_t recv_wq; 93 loff_t blksize; 94 loff_t bytesize; 95 #if IS_ENABLED(CONFIG_DEBUG_FS) 96 struct dentry *dbg_dir; 97 #endif 98 }; 99 100 struct nbd_device { 101 struct blk_mq_tag_set tag_set; 102 103 int index; 104 refcount_t config_refs; 105 refcount_t refs; 106 struct nbd_config *config; 107 struct mutex config_lock; 108 struct gendisk *disk; 109 110 struct list_head list; 111 struct task_struct *task_recv; 112 struct task_struct *task_setup; 113 }; 114 115 #define NBD_CMD_REQUEUED 1 116 117 struct nbd_cmd { 118 struct nbd_device *nbd; 119 struct mutex lock; 120 int index; 121 int cookie; 122 blk_status_t status; 123 unsigned long flags; 124 u32 cmd_cookie; 125 }; 126 127 #if IS_ENABLED(CONFIG_DEBUG_FS) 128 static struct dentry *nbd_dbg_dir; 129 #endif 130 131 #define nbd_name(nbd) ((nbd)->disk->disk_name) 132 133 #define NBD_MAGIC 0x68797548 134 135 static unsigned int nbds_max = 16; 136 static int max_part = 16; 137 static struct workqueue_struct *recv_workqueue; 138 static int part_shift; 139 140 static int nbd_dev_dbg_init(struct nbd_device *nbd); 141 static void nbd_dev_dbg_close(struct nbd_device *nbd); 142 static void nbd_config_put(struct nbd_device *nbd); 143 static void nbd_connect_reply(struct genl_info *info, int index); 144 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 145 static void nbd_dead_link_work(struct work_struct *work); 146 static void nbd_disconnect_and_put(struct nbd_device *nbd); 147 148 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 149 { 150 return disk_to_dev(nbd->disk); 151 } 152 153 static void nbd_requeue_cmd(struct nbd_cmd *cmd) 154 { 155 struct request *req = blk_mq_rq_from_pdu(cmd); 156 157 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) 158 blk_mq_requeue_request(req, true); 159 } 160 161 #define NBD_COOKIE_BITS 32 162 163 static u64 nbd_cmd_handle(struct nbd_cmd *cmd) 164 { 165 struct request *req = blk_mq_rq_from_pdu(cmd); 166 u32 tag = blk_mq_unique_tag(req); 167 u64 cookie = cmd->cmd_cookie; 168 169 return (cookie << NBD_COOKIE_BITS) | tag; 170 } 171 172 static u32 nbd_handle_to_tag(u64 handle) 173 { 174 return (u32)handle; 175 } 176 177 static u32 nbd_handle_to_cookie(u64 handle) 178 { 179 return (u32)(handle >> NBD_COOKIE_BITS); 180 } 181 182 static const char *nbdcmd_to_ascii(int cmd) 183 { 184 switch (cmd) { 185 case NBD_CMD_READ: return "read"; 186 case NBD_CMD_WRITE: return "write"; 187 case NBD_CMD_DISC: return "disconnect"; 188 case NBD_CMD_FLUSH: return "flush"; 189 case NBD_CMD_TRIM: return "trim/discard"; 190 } 191 return "invalid"; 192 } 193 194 static ssize_t pid_show(struct device *dev, 195 struct device_attribute *attr, char *buf) 196 { 197 struct gendisk *disk = dev_to_disk(dev); 198 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 199 200 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 201 } 202 203 static const struct device_attribute pid_attr = { 204 .attr = { .name = "pid", .mode = 0444}, 205 .show = pid_show, 206 }; 207 208 static void nbd_dev_remove(struct nbd_device *nbd) 209 { 210 struct gendisk *disk = nbd->disk; 211 struct request_queue *q; 212 213 if (disk) { 214 q = disk->queue; 215 del_gendisk(disk); 216 blk_cleanup_queue(q); 217 blk_mq_free_tag_set(&nbd->tag_set); 218 disk->private_data = NULL; 219 put_disk(disk); 220 } 221 kfree(nbd); 222 } 223 224 static void nbd_put(struct nbd_device *nbd) 225 { 226 if (refcount_dec_and_mutex_lock(&nbd->refs, 227 &nbd_index_mutex)) { 228 idr_remove(&nbd_index_idr, nbd->index); 229 mutex_unlock(&nbd_index_mutex); 230 nbd_dev_remove(nbd); 231 } 232 } 233 234 static int nbd_disconnected(struct nbd_config *config) 235 { 236 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 237 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 238 } 239 240 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 241 int notify) 242 { 243 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 244 struct link_dead_args *args; 245 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 246 if (args) { 247 INIT_WORK(&args->work, nbd_dead_link_work); 248 args->index = nbd->index; 249 queue_work(system_wq, &args->work); 250 } 251 } 252 if (!nsock->dead) { 253 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 254 if (atomic_dec_return(&nbd->config->live_connections) == 0) { 255 if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, 256 &nbd->config->runtime_flags)) { 257 set_bit(NBD_DISCONNECTED, 258 &nbd->config->runtime_flags); 259 dev_info(nbd_to_dev(nbd), 260 "Disconnected due to user request.\n"); 261 } 262 } 263 } 264 nsock->dead = true; 265 nsock->pending = NULL; 266 nsock->sent = 0; 267 } 268 269 static void nbd_size_clear(struct nbd_device *nbd) 270 { 271 if (nbd->config->bytesize) { 272 set_capacity(nbd->disk, 0); 273 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 274 } 275 } 276 277 static void nbd_size_update(struct nbd_device *nbd) 278 { 279 struct nbd_config *config = nbd->config; 280 struct block_device *bdev = bdget_disk(nbd->disk, 0); 281 282 if (config->flags & NBD_FLAG_SEND_TRIM) { 283 nbd->disk->queue->limits.discard_granularity = config->blksize; 284 nbd->disk->queue->limits.discard_alignment = config->blksize; 285 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 286 } 287 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 288 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 289 set_capacity(nbd->disk, config->bytesize >> 9); 290 if (bdev) { 291 if (bdev->bd_disk) 292 bd_set_size(bdev, config->bytesize); 293 else 294 bdev->bd_invalidated = 1; 295 bdput(bdev); 296 } 297 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 298 } 299 300 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 301 loff_t nr_blocks) 302 { 303 struct nbd_config *config = nbd->config; 304 config->blksize = blocksize; 305 config->bytesize = blocksize * nr_blocks; 306 if (nbd->task_recv != NULL) 307 nbd_size_update(nbd); 308 } 309 310 static void nbd_complete_rq(struct request *req) 311 { 312 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 313 314 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req, 315 cmd->status ? "failed" : "done"); 316 317 blk_mq_end_request(req, cmd->status); 318 } 319 320 /* 321 * Forcibly shutdown the socket causing all listeners to error 322 */ 323 static void sock_shutdown(struct nbd_device *nbd) 324 { 325 struct nbd_config *config = nbd->config; 326 int i; 327 328 if (config->num_connections == 0) 329 return; 330 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 331 return; 332 333 for (i = 0; i < config->num_connections; i++) { 334 struct nbd_sock *nsock = config->socks[i]; 335 mutex_lock(&nsock->tx_lock); 336 nbd_mark_nsock_dead(nbd, nsock, 0); 337 mutex_unlock(&nsock->tx_lock); 338 } 339 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 340 } 341 342 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 343 bool reserved) 344 { 345 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 346 struct nbd_device *nbd = cmd->nbd; 347 struct nbd_config *config; 348 349 if (!refcount_inc_not_zero(&nbd->config_refs)) { 350 cmd->status = BLK_STS_TIMEOUT; 351 goto done; 352 } 353 config = nbd->config; 354 355 if (!mutex_trylock(&cmd->lock)) 356 return BLK_EH_RESET_TIMER; 357 358 if (config->num_connections > 1) { 359 dev_err_ratelimited(nbd_to_dev(nbd), 360 "Connection timed out, retrying (%d/%d alive)\n", 361 atomic_read(&config->live_connections), 362 config->num_connections); 363 /* 364 * Hooray we have more connections, requeue this IO, the submit 365 * path will put it on a real connection. 366 */ 367 if (config->socks && config->num_connections > 1) { 368 if (cmd->index < config->num_connections) { 369 struct nbd_sock *nsock = 370 config->socks[cmd->index]; 371 mutex_lock(&nsock->tx_lock); 372 /* We can have multiple outstanding requests, so 373 * we don't want to mark the nsock dead if we've 374 * already reconnected with a new socket, so 375 * only mark it dead if its the same socket we 376 * were sent out on. 377 */ 378 if (cmd->cookie == nsock->cookie) 379 nbd_mark_nsock_dead(nbd, nsock, 1); 380 mutex_unlock(&nsock->tx_lock); 381 } 382 mutex_unlock(&cmd->lock); 383 nbd_requeue_cmd(cmd); 384 nbd_config_put(nbd); 385 return BLK_EH_DONE; 386 } 387 } else { 388 dev_err_ratelimited(nbd_to_dev(nbd), 389 "Connection timed out\n"); 390 } 391 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 392 cmd->status = BLK_STS_IOERR; 393 mutex_unlock(&cmd->lock); 394 sock_shutdown(nbd); 395 nbd_config_put(nbd); 396 done: 397 blk_mq_complete_request(req); 398 return BLK_EH_DONE; 399 } 400 401 /* 402 * Send or receive packet. 403 */ 404 static int sock_xmit(struct nbd_device *nbd, int index, int send, 405 struct iov_iter *iter, int msg_flags, int *sent) 406 { 407 struct nbd_config *config = nbd->config; 408 struct socket *sock = config->socks[index]->sock; 409 int result; 410 struct msghdr msg; 411 unsigned int noreclaim_flag; 412 413 if (unlikely(!sock)) { 414 dev_err_ratelimited(disk_to_dev(nbd->disk), 415 "Attempted %s on closed socket in sock_xmit\n", 416 (send ? "send" : "recv")); 417 return -EINVAL; 418 } 419 420 msg.msg_iter = *iter; 421 422 noreclaim_flag = memalloc_noreclaim_save(); 423 do { 424 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 425 msg.msg_name = NULL; 426 msg.msg_namelen = 0; 427 msg.msg_control = NULL; 428 msg.msg_controllen = 0; 429 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 430 431 if (send) 432 result = sock_sendmsg(sock, &msg); 433 else 434 result = sock_recvmsg(sock, &msg, msg.msg_flags); 435 436 if (result <= 0) { 437 if (result == 0) 438 result = -EPIPE; /* short read */ 439 break; 440 } 441 if (sent) 442 *sent += result; 443 } while (msg_data_left(&msg)); 444 445 memalloc_noreclaim_restore(noreclaim_flag); 446 447 return result; 448 } 449 450 /* 451 * Different settings for sk->sk_sndtimeo can result in different return values 452 * if there is a signal pending when we enter sendmsg, because reasons? 453 */ 454 static inline int was_interrupted(int result) 455 { 456 return result == -ERESTARTSYS || result == -EINTR; 457 } 458 459 /* always call with the tx_lock held */ 460 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 461 { 462 struct request *req = blk_mq_rq_from_pdu(cmd); 463 struct nbd_config *config = nbd->config; 464 struct nbd_sock *nsock = config->socks[index]; 465 int result; 466 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 467 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 468 struct iov_iter from; 469 unsigned long size = blk_rq_bytes(req); 470 struct bio *bio; 471 u64 handle; 472 u32 type; 473 u32 nbd_cmd_flags = 0; 474 int sent = nsock->sent, skip = 0; 475 476 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); 477 478 switch (req_op(req)) { 479 case REQ_OP_DISCARD: 480 type = NBD_CMD_TRIM; 481 break; 482 case REQ_OP_FLUSH: 483 type = NBD_CMD_FLUSH; 484 break; 485 case REQ_OP_WRITE: 486 type = NBD_CMD_WRITE; 487 break; 488 case REQ_OP_READ: 489 type = NBD_CMD_READ; 490 break; 491 default: 492 return -EIO; 493 } 494 495 if (rq_data_dir(req) == WRITE && 496 (config->flags & NBD_FLAG_READ_ONLY)) { 497 dev_err_ratelimited(disk_to_dev(nbd->disk), 498 "Write on read-only\n"); 499 return -EIO; 500 } 501 502 if (req->cmd_flags & REQ_FUA) 503 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 504 505 /* We did a partial send previously, and we at least sent the whole 506 * request struct, so just go and send the rest of the pages in the 507 * request. 508 */ 509 if (sent) { 510 if (sent >= sizeof(request)) { 511 skip = sent - sizeof(request); 512 goto send_pages; 513 } 514 iov_iter_advance(&from, sent); 515 } else { 516 cmd->cmd_cookie++; 517 } 518 cmd->index = index; 519 cmd->cookie = nsock->cookie; 520 request.type = htonl(type | nbd_cmd_flags); 521 if (type != NBD_CMD_FLUSH) { 522 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 523 request.len = htonl(size); 524 } 525 handle = nbd_cmd_handle(cmd); 526 memcpy(request.handle, &handle, sizeof(handle)); 527 528 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 529 req, nbdcmd_to_ascii(type), 530 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 531 result = sock_xmit(nbd, index, 1, &from, 532 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 533 if (result <= 0) { 534 if (was_interrupted(result)) { 535 /* If we havne't sent anything we can just return BUSY, 536 * however if we have sent something we need to make 537 * sure we only allow this req to be sent until we are 538 * completely done. 539 */ 540 if (sent) { 541 nsock->pending = req; 542 nsock->sent = sent; 543 } 544 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 545 return BLK_STS_RESOURCE; 546 } 547 dev_err_ratelimited(disk_to_dev(nbd->disk), 548 "Send control failed (result %d)\n", result); 549 return -EAGAIN; 550 } 551 send_pages: 552 if (type != NBD_CMD_WRITE) 553 goto out; 554 555 bio = req->bio; 556 while (bio) { 557 struct bio *next = bio->bi_next; 558 struct bvec_iter iter; 559 struct bio_vec bvec; 560 561 bio_for_each_segment(bvec, bio, iter) { 562 bool is_last = !next && bio_iter_last(bvec, iter); 563 int flags = is_last ? 0 : MSG_MORE; 564 565 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 566 req, bvec.bv_len); 567 iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len); 568 if (skip) { 569 if (skip >= iov_iter_count(&from)) { 570 skip -= iov_iter_count(&from); 571 continue; 572 } 573 iov_iter_advance(&from, skip); 574 skip = 0; 575 } 576 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 577 if (result <= 0) { 578 if (was_interrupted(result)) { 579 /* We've already sent the header, we 580 * have no choice but to set pending and 581 * return BUSY. 582 */ 583 nsock->pending = req; 584 nsock->sent = sent; 585 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 586 return BLK_STS_RESOURCE; 587 } 588 dev_err(disk_to_dev(nbd->disk), 589 "Send data failed (result %d)\n", 590 result); 591 return -EAGAIN; 592 } 593 /* 594 * The completion might already have come in, 595 * so break for the last one instead of letting 596 * the iterator do it. This prevents use-after-free 597 * of the bio. 598 */ 599 if (is_last) 600 break; 601 } 602 bio = next; 603 } 604 out: 605 nsock->pending = NULL; 606 nsock->sent = 0; 607 return 0; 608 } 609 610 /* NULL returned = something went wrong, inform userspace */ 611 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 612 { 613 struct nbd_config *config = nbd->config; 614 int result; 615 struct nbd_reply reply; 616 struct nbd_cmd *cmd; 617 struct request *req = NULL; 618 u64 handle; 619 u16 hwq; 620 u32 tag; 621 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 622 struct iov_iter to; 623 int ret = 0; 624 625 reply.magic = 0; 626 iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); 627 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 628 if (result <= 0) { 629 if (!nbd_disconnected(config)) 630 dev_err(disk_to_dev(nbd->disk), 631 "Receive control failed (result %d)\n", result); 632 return ERR_PTR(result); 633 } 634 635 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 636 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 637 (unsigned long)ntohl(reply.magic)); 638 return ERR_PTR(-EPROTO); 639 } 640 641 memcpy(&handle, reply.handle, sizeof(handle)); 642 tag = nbd_handle_to_tag(handle); 643 hwq = blk_mq_unique_tag_to_hwq(tag); 644 if (hwq < nbd->tag_set.nr_hw_queues) 645 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 646 blk_mq_unique_tag_to_tag(tag)); 647 if (!req || !blk_mq_request_started(req)) { 648 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 649 tag, req); 650 return ERR_PTR(-ENOENT); 651 } 652 cmd = blk_mq_rq_to_pdu(req); 653 654 mutex_lock(&cmd->lock); 655 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { 656 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", 657 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); 658 ret = -ENOENT; 659 goto out; 660 } 661 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { 662 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", 663 req); 664 ret = -ENOENT; 665 goto out; 666 } 667 if (ntohl(reply.error)) { 668 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 669 ntohl(reply.error)); 670 cmd->status = BLK_STS_IOERR; 671 goto out; 672 } 673 674 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 675 if (rq_data_dir(req) != WRITE) { 676 struct req_iterator iter; 677 struct bio_vec bvec; 678 679 rq_for_each_segment(bvec, req, iter) { 680 iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); 681 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 682 if (result <= 0) { 683 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 684 result); 685 /* 686 * If we've disconnected or we only have 1 687 * connection then we need to make sure we 688 * complete this request, otherwise error out 689 * and let the timeout stuff handle resubmitting 690 * this request onto another connection. 691 */ 692 if (nbd_disconnected(config) || 693 config->num_connections <= 1) { 694 cmd->status = BLK_STS_IOERR; 695 goto out; 696 } 697 ret = -EIO; 698 goto out; 699 } 700 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 701 req, bvec.bv_len); 702 } 703 } 704 out: 705 mutex_unlock(&cmd->lock); 706 return ret ? ERR_PTR(ret) : cmd; 707 } 708 709 static void recv_work(struct work_struct *work) 710 { 711 struct recv_thread_args *args = container_of(work, 712 struct recv_thread_args, 713 work); 714 struct nbd_device *nbd = args->nbd; 715 struct nbd_config *config = nbd->config; 716 struct nbd_cmd *cmd; 717 718 while (1) { 719 cmd = nbd_read_stat(nbd, args->index); 720 if (IS_ERR(cmd)) { 721 struct nbd_sock *nsock = config->socks[args->index]; 722 723 mutex_lock(&nsock->tx_lock); 724 nbd_mark_nsock_dead(nbd, nsock, 1); 725 mutex_unlock(&nsock->tx_lock); 726 break; 727 } 728 729 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 730 } 731 atomic_dec(&config->recv_threads); 732 wake_up(&config->recv_wq); 733 nbd_config_put(nbd); 734 kfree(args); 735 } 736 737 static bool nbd_clear_req(struct request *req, void *data, bool reserved) 738 { 739 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 740 741 cmd->status = BLK_STS_IOERR; 742 blk_mq_complete_request(req); 743 return true; 744 } 745 746 static void nbd_clear_que(struct nbd_device *nbd) 747 { 748 blk_mq_quiesce_queue(nbd->disk->queue); 749 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 750 blk_mq_unquiesce_queue(nbd->disk->queue); 751 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 752 } 753 754 static int find_fallback(struct nbd_device *nbd, int index) 755 { 756 struct nbd_config *config = nbd->config; 757 int new_index = -1; 758 struct nbd_sock *nsock = config->socks[index]; 759 int fallback = nsock->fallback_index; 760 761 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 762 return new_index; 763 764 if (config->num_connections <= 1) { 765 dev_err_ratelimited(disk_to_dev(nbd->disk), 766 "Attempted send on invalid socket\n"); 767 return new_index; 768 } 769 770 if (fallback >= 0 && fallback < config->num_connections && 771 !config->socks[fallback]->dead) 772 return fallback; 773 774 if (nsock->fallback_index < 0 || 775 nsock->fallback_index >= config->num_connections || 776 config->socks[nsock->fallback_index]->dead) { 777 int i; 778 for (i = 0; i < config->num_connections; i++) { 779 if (i == index) 780 continue; 781 if (!config->socks[i]->dead) { 782 new_index = i; 783 break; 784 } 785 } 786 nsock->fallback_index = new_index; 787 if (new_index < 0) { 788 dev_err_ratelimited(disk_to_dev(nbd->disk), 789 "Dead connection, failed to find a fallback\n"); 790 return new_index; 791 } 792 } 793 new_index = nsock->fallback_index; 794 return new_index; 795 } 796 797 static int wait_for_reconnect(struct nbd_device *nbd) 798 { 799 struct nbd_config *config = nbd->config; 800 if (!config->dead_conn_timeout) 801 return 0; 802 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 803 return 0; 804 return wait_event_timeout(config->conn_wait, 805 atomic_read(&config->live_connections) > 0, 806 config->dead_conn_timeout) > 0; 807 } 808 809 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 810 { 811 struct request *req = blk_mq_rq_from_pdu(cmd); 812 struct nbd_device *nbd = cmd->nbd; 813 struct nbd_config *config; 814 struct nbd_sock *nsock; 815 int ret; 816 817 if (!refcount_inc_not_zero(&nbd->config_refs)) { 818 dev_err_ratelimited(disk_to_dev(nbd->disk), 819 "Socks array is empty\n"); 820 blk_mq_start_request(req); 821 return -EINVAL; 822 } 823 config = nbd->config; 824 825 if (index >= config->num_connections) { 826 dev_err_ratelimited(disk_to_dev(nbd->disk), 827 "Attempted send on invalid socket\n"); 828 nbd_config_put(nbd); 829 blk_mq_start_request(req); 830 return -EINVAL; 831 } 832 cmd->status = BLK_STS_OK; 833 again: 834 nsock = config->socks[index]; 835 mutex_lock(&nsock->tx_lock); 836 if (nsock->dead) { 837 int old_index = index; 838 index = find_fallback(nbd, index); 839 mutex_unlock(&nsock->tx_lock); 840 if (index < 0) { 841 if (wait_for_reconnect(nbd)) { 842 index = old_index; 843 goto again; 844 } 845 /* All the sockets should already be down at this point, 846 * we just want to make sure that DISCONNECTED is set so 847 * any requests that come in that were queue'ed waiting 848 * for the reconnect timer don't trigger the timer again 849 * and instead just error out. 850 */ 851 sock_shutdown(nbd); 852 nbd_config_put(nbd); 853 blk_mq_start_request(req); 854 return -EIO; 855 } 856 goto again; 857 } 858 859 /* Handle the case that we have a pending request that was partially 860 * transmitted that _has_ to be serviced first. We need to call requeue 861 * here so that it gets put _after_ the request that is already on the 862 * dispatch list. 863 */ 864 blk_mq_start_request(req); 865 if (unlikely(nsock->pending && nsock->pending != req)) { 866 nbd_requeue_cmd(cmd); 867 ret = 0; 868 goto out; 869 } 870 /* 871 * Some failures are related to the link going down, so anything that 872 * returns EAGAIN can be retried on a different socket. 873 */ 874 ret = nbd_send_cmd(nbd, cmd, index); 875 if (ret == -EAGAIN) { 876 dev_err_ratelimited(disk_to_dev(nbd->disk), 877 "Request send failed, requeueing\n"); 878 nbd_mark_nsock_dead(nbd, nsock, 1); 879 nbd_requeue_cmd(cmd); 880 ret = 0; 881 } 882 out: 883 mutex_unlock(&nsock->tx_lock); 884 nbd_config_put(nbd); 885 return ret; 886 } 887 888 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 889 const struct blk_mq_queue_data *bd) 890 { 891 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 892 int ret; 893 894 /* 895 * Since we look at the bio's to send the request over the network we 896 * need to make sure the completion work doesn't mark this request done 897 * before we are done doing our send. This keeps us from dereferencing 898 * freed data if we have particularly fast completions (ie we get the 899 * completion before we exit sock_xmit on the last bvec) or in the case 900 * that the server is misbehaving (or there was an error) before we're 901 * done sending everything over the wire. 902 */ 903 mutex_lock(&cmd->lock); 904 clear_bit(NBD_CMD_REQUEUED, &cmd->flags); 905 906 /* We can be called directly from the user space process, which means we 907 * could possibly have signals pending so our sendmsg will fail. In 908 * this case we need to return that we are busy, otherwise error out as 909 * appropriate. 910 */ 911 ret = nbd_handle_cmd(cmd, hctx->queue_num); 912 if (ret < 0) 913 ret = BLK_STS_IOERR; 914 else if (!ret) 915 ret = BLK_STS_OK; 916 mutex_unlock(&cmd->lock); 917 918 return ret; 919 } 920 921 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 922 bool netlink) 923 { 924 struct nbd_config *config = nbd->config; 925 struct socket *sock; 926 struct nbd_sock **socks; 927 struct nbd_sock *nsock; 928 int err; 929 930 sock = sockfd_lookup(arg, &err); 931 if (!sock) 932 return err; 933 934 if (!netlink && !nbd->task_setup && 935 !test_bit(NBD_BOUND, &config->runtime_flags)) 936 nbd->task_setup = current; 937 938 if (!netlink && 939 (nbd->task_setup != current || 940 test_bit(NBD_BOUND, &config->runtime_flags))) { 941 dev_err(disk_to_dev(nbd->disk), 942 "Device being setup by another task"); 943 sockfd_put(sock); 944 return -EBUSY; 945 } 946 947 socks = krealloc(config->socks, (config->num_connections + 1) * 948 sizeof(struct nbd_sock *), GFP_KERNEL); 949 if (!socks) { 950 sockfd_put(sock); 951 return -ENOMEM; 952 } 953 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 954 if (!nsock) { 955 sockfd_put(sock); 956 return -ENOMEM; 957 } 958 959 config->socks = socks; 960 961 nsock->fallback_index = -1; 962 nsock->dead = false; 963 mutex_init(&nsock->tx_lock); 964 nsock->sock = sock; 965 nsock->pending = NULL; 966 nsock->sent = 0; 967 nsock->cookie = 0; 968 socks[config->num_connections++] = nsock; 969 atomic_inc(&config->live_connections); 970 971 return 0; 972 } 973 974 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 975 { 976 struct nbd_config *config = nbd->config; 977 struct socket *sock, *old; 978 struct recv_thread_args *args; 979 int i; 980 int err; 981 982 sock = sockfd_lookup(arg, &err); 983 if (!sock) 984 return err; 985 986 args = kzalloc(sizeof(*args), GFP_KERNEL); 987 if (!args) { 988 sockfd_put(sock); 989 return -ENOMEM; 990 } 991 992 for (i = 0; i < config->num_connections; i++) { 993 struct nbd_sock *nsock = config->socks[i]; 994 995 if (!nsock->dead) 996 continue; 997 998 mutex_lock(&nsock->tx_lock); 999 if (!nsock->dead) { 1000 mutex_unlock(&nsock->tx_lock); 1001 continue; 1002 } 1003 sk_set_memalloc(sock->sk); 1004 if (nbd->tag_set.timeout) 1005 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 1006 atomic_inc(&config->recv_threads); 1007 refcount_inc(&nbd->config_refs); 1008 old = nsock->sock; 1009 nsock->fallback_index = -1; 1010 nsock->sock = sock; 1011 nsock->dead = false; 1012 INIT_WORK(&args->work, recv_work); 1013 args->index = i; 1014 args->nbd = nbd; 1015 nsock->cookie++; 1016 mutex_unlock(&nsock->tx_lock); 1017 sockfd_put(old); 1018 1019 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 1020 1021 /* We take the tx_mutex in an error path in the recv_work, so we 1022 * need to queue_work outside of the tx_mutex. 1023 */ 1024 queue_work(recv_workqueue, &args->work); 1025 1026 atomic_inc(&config->live_connections); 1027 wake_up(&config->conn_wait); 1028 return 0; 1029 } 1030 sockfd_put(sock); 1031 kfree(args); 1032 return -ENOSPC; 1033 } 1034 1035 static void nbd_bdev_reset(struct block_device *bdev) 1036 { 1037 if (bdev->bd_openers > 1) 1038 return; 1039 bd_set_size(bdev, 0); 1040 } 1041 1042 static void nbd_parse_flags(struct nbd_device *nbd) 1043 { 1044 struct nbd_config *config = nbd->config; 1045 if (config->flags & NBD_FLAG_READ_ONLY) 1046 set_disk_ro(nbd->disk, true); 1047 else 1048 set_disk_ro(nbd->disk, false); 1049 if (config->flags & NBD_FLAG_SEND_TRIM) 1050 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1051 if (config->flags & NBD_FLAG_SEND_FLUSH) { 1052 if (config->flags & NBD_FLAG_SEND_FUA) 1053 blk_queue_write_cache(nbd->disk->queue, true, true); 1054 else 1055 blk_queue_write_cache(nbd->disk->queue, true, false); 1056 } 1057 else 1058 blk_queue_write_cache(nbd->disk->queue, false, false); 1059 } 1060 1061 static void send_disconnects(struct nbd_device *nbd) 1062 { 1063 struct nbd_config *config = nbd->config; 1064 struct nbd_request request = { 1065 .magic = htonl(NBD_REQUEST_MAGIC), 1066 .type = htonl(NBD_CMD_DISC), 1067 }; 1068 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 1069 struct iov_iter from; 1070 int i, ret; 1071 1072 for (i = 0; i < config->num_connections; i++) { 1073 struct nbd_sock *nsock = config->socks[i]; 1074 1075 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); 1076 mutex_lock(&nsock->tx_lock); 1077 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1078 if (ret <= 0) 1079 dev_err(disk_to_dev(nbd->disk), 1080 "Send disconnect failed %d\n", ret); 1081 mutex_unlock(&nsock->tx_lock); 1082 } 1083 } 1084 1085 static int nbd_disconnect(struct nbd_device *nbd) 1086 { 1087 struct nbd_config *config = nbd->config; 1088 1089 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1090 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1091 send_disconnects(nbd); 1092 return 0; 1093 } 1094 1095 static void nbd_clear_sock(struct nbd_device *nbd) 1096 { 1097 sock_shutdown(nbd); 1098 nbd_clear_que(nbd); 1099 nbd->task_setup = NULL; 1100 } 1101 1102 static void nbd_config_put(struct nbd_device *nbd) 1103 { 1104 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1105 &nbd->config_lock)) { 1106 struct nbd_config *config = nbd->config; 1107 nbd_dev_dbg_close(nbd); 1108 nbd_size_clear(nbd); 1109 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1110 &config->runtime_flags)) 1111 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1112 nbd->task_recv = NULL; 1113 nbd_clear_sock(nbd); 1114 if (config->num_connections) { 1115 int i; 1116 for (i = 0; i < config->num_connections; i++) { 1117 sockfd_put(config->socks[i]->sock); 1118 kfree(config->socks[i]); 1119 } 1120 kfree(config->socks); 1121 } 1122 kfree(nbd->config); 1123 nbd->config = NULL; 1124 1125 nbd->tag_set.timeout = 0; 1126 nbd->disk->queue->limits.discard_granularity = 0; 1127 nbd->disk->queue->limits.discard_alignment = 0; 1128 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 1129 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1130 1131 mutex_unlock(&nbd->config_lock); 1132 nbd_put(nbd); 1133 module_put(THIS_MODULE); 1134 } 1135 } 1136 1137 static int nbd_start_device(struct nbd_device *nbd) 1138 { 1139 struct nbd_config *config = nbd->config; 1140 int num_connections = config->num_connections; 1141 int error = 0, i; 1142 1143 if (nbd->task_recv) 1144 return -EBUSY; 1145 if (!config->socks) 1146 return -EINVAL; 1147 if (num_connections > 1 && 1148 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1149 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1150 return -EINVAL; 1151 } 1152 1153 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1154 nbd->task_recv = current; 1155 1156 nbd_parse_flags(nbd); 1157 1158 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1159 if (error) { 1160 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1161 return error; 1162 } 1163 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1164 1165 nbd_dev_dbg_init(nbd); 1166 for (i = 0; i < num_connections; i++) { 1167 struct recv_thread_args *args; 1168 1169 args = kzalloc(sizeof(*args), GFP_KERNEL); 1170 if (!args) { 1171 sock_shutdown(nbd); 1172 return -ENOMEM; 1173 } 1174 sk_set_memalloc(config->socks[i]->sock->sk); 1175 if (nbd->tag_set.timeout) 1176 config->socks[i]->sock->sk->sk_sndtimeo = 1177 nbd->tag_set.timeout; 1178 atomic_inc(&config->recv_threads); 1179 refcount_inc(&nbd->config_refs); 1180 INIT_WORK(&args->work, recv_work); 1181 args->nbd = nbd; 1182 args->index = i; 1183 queue_work(recv_workqueue, &args->work); 1184 } 1185 nbd_size_update(nbd); 1186 return error; 1187 } 1188 1189 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1190 { 1191 struct nbd_config *config = nbd->config; 1192 int ret; 1193 1194 ret = nbd_start_device(nbd); 1195 if (ret) 1196 return ret; 1197 1198 if (max_part) 1199 bdev->bd_invalidated = 1; 1200 mutex_unlock(&nbd->config_lock); 1201 ret = wait_event_interruptible(config->recv_wq, 1202 atomic_read(&config->recv_threads) == 0); 1203 if (ret) 1204 sock_shutdown(nbd); 1205 mutex_lock(&nbd->config_lock); 1206 nbd_bdev_reset(bdev); 1207 /* user requested, ignore socket errors */ 1208 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1209 ret = 0; 1210 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1211 ret = -ETIMEDOUT; 1212 return ret; 1213 } 1214 1215 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1216 struct block_device *bdev) 1217 { 1218 sock_shutdown(nbd); 1219 kill_bdev(bdev); 1220 nbd_bdev_reset(bdev); 1221 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1222 &nbd->config->runtime_flags)) 1223 nbd_config_put(nbd); 1224 } 1225 1226 /* Must be called with config_lock held */ 1227 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1228 unsigned int cmd, unsigned long arg) 1229 { 1230 struct nbd_config *config = nbd->config; 1231 1232 switch (cmd) { 1233 case NBD_DISCONNECT: 1234 return nbd_disconnect(nbd); 1235 case NBD_CLEAR_SOCK: 1236 nbd_clear_sock_ioctl(nbd, bdev); 1237 return 0; 1238 case NBD_SET_SOCK: 1239 return nbd_add_socket(nbd, arg, false); 1240 case NBD_SET_BLKSIZE: 1241 if (!arg || !is_power_of_2(arg) || arg < 512 || 1242 arg > PAGE_SIZE) 1243 return -EINVAL; 1244 nbd_size_set(nbd, arg, 1245 div_s64(config->bytesize, arg)); 1246 return 0; 1247 case NBD_SET_SIZE: 1248 nbd_size_set(nbd, config->blksize, 1249 div_s64(arg, config->blksize)); 1250 return 0; 1251 case NBD_SET_SIZE_BLOCKS: 1252 nbd_size_set(nbd, config->blksize, arg); 1253 return 0; 1254 case NBD_SET_TIMEOUT: 1255 if (arg) { 1256 nbd->tag_set.timeout = arg * HZ; 1257 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1258 } 1259 return 0; 1260 1261 case NBD_SET_FLAGS: 1262 config->flags = arg; 1263 return 0; 1264 case NBD_DO_IT: 1265 return nbd_start_device_ioctl(nbd, bdev); 1266 case NBD_CLEAR_QUE: 1267 /* 1268 * This is for compatibility only. The queue is always cleared 1269 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1270 */ 1271 return 0; 1272 case NBD_PRINT_DEBUG: 1273 /* 1274 * For compatibility only, we no longer keep a list of 1275 * outstanding requests. 1276 */ 1277 return 0; 1278 } 1279 return -ENOTTY; 1280 } 1281 1282 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1283 unsigned int cmd, unsigned long arg) 1284 { 1285 struct nbd_device *nbd = bdev->bd_disk->private_data; 1286 struct nbd_config *config = nbd->config; 1287 int error = -EINVAL; 1288 1289 if (!capable(CAP_SYS_ADMIN)) 1290 return -EPERM; 1291 1292 /* The block layer will pass back some non-nbd ioctls in case we have 1293 * special handling for them, but we don't so just return an error. 1294 */ 1295 if (_IOC_TYPE(cmd) != 0xab) 1296 return -EINVAL; 1297 1298 mutex_lock(&nbd->config_lock); 1299 1300 /* Don't allow ioctl operations on a nbd device that was created with 1301 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1302 */ 1303 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1304 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1305 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1306 else 1307 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1308 mutex_unlock(&nbd->config_lock); 1309 return error; 1310 } 1311 1312 static struct nbd_config *nbd_alloc_config(void) 1313 { 1314 struct nbd_config *config; 1315 1316 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1317 if (!config) 1318 return NULL; 1319 atomic_set(&config->recv_threads, 0); 1320 init_waitqueue_head(&config->recv_wq); 1321 init_waitqueue_head(&config->conn_wait); 1322 config->blksize = 1024; 1323 atomic_set(&config->live_connections, 0); 1324 try_module_get(THIS_MODULE); 1325 return config; 1326 } 1327 1328 static int nbd_open(struct block_device *bdev, fmode_t mode) 1329 { 1330 struct nbd_device *nbd; 1331 int ret = 0; 1332 1333 mutex_lock(&nbd_index_mutex); 1334 nbd = bdev->bd_disk->private_data; 1335 if (!nbd) { 1336 ret = -ENXIO; 1337 goto out; 1338 } 1339 if (!refcount_inc_not_zero(&nbd->refs)) { 1340 ret = -ENXIO; 1341 goto out; 1342 } 1343 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1344 struct nbd_config *config; 1345 1346 mutex_lock(&nbd->config_lock); 1347 if (refcount_inc_not_zero(&nbd->config_refs)) { 1348 mutex_unlock(&nbd->config_lock); 1349 goto out; 1350 } 1351 config = nbd->config = nbd_alloc_config(); 1352 if (!config) { 1353 ret = -ENOMEM; 1354 mutex_unlock(&nbd->config_lock); 1355 goto out; 1356 } 1357 refcount_set(&nbd->config_refs, 1); 1358 refcount_inc(&nbd->refs); 1359 mutex_unlock(&nbd->config_lock); 1360 bdev->bd_invalidated = 1; 1361 } else if (nbd_disconnected(nbd->config)) { 1362 bdev->bd_invalidated = 1; 1363 } 1364 out: 1365 mutex_unlock(&nbd_index_mutex); 1366 return ret; 1367 } 1368 1369 static void nbd_release(struct gendisk *disk, fmode_t mode) 1370 { 1371 struct nbd_device *nbd = disk->private_data; 1372 struct block_device *bdev = bdget_disk(disk, 0); 1373 1374 if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1375 bdev->bd_openers == 0) 1376 nbd_disconnect_and_put(nbd); 1377 1378 nbd_config_put(nbd); 1379 nbd_put(nbd); 1380 } 1381 1382 static const struct block_device_operations nbd_fops = 1383 { 1384 .owner = THIS_MODULE, 1385 .open = nbd_open, 1386 .release = nbd_release, 1387 .ioctl = nbd_ioctl, 1388 .compat_ioctl = nbd_ioctl, 1389 }; 1390 1391 #if IS_ENABLED(CONFIG_DEBUG_FS) 1392 1393 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1394 { 1395 struct nbd_device *nbd = s->private; 1396 1397 if (nbd->task_recv) 1398 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1399 1400 return 0; 1401 } 1402 1403 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1404 { 1405 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1406 } 1407 1408 static const struct file_operations nbd_dbg_tasks_ops = { 1409 .open = nbd_dbg_tasks_open, 1410 .read = seq_read, 1411 .llseek = seq_lseek, 1412 .release = single_release, 1413 }; 1414 1415 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1416 { 1417 struct nbd_device *nbd = s->private; 1418 u32 flags = nbd->config->flags; 1419 1420 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1421 1422 seq_puts(s, "Known flags:\n"); 1423 1424 if (flags & NBD_FLAG_HAS_FLAGS) 1425 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1426 if (flags & NBD_FLAG_READ_ONLY) 1427 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1428 if (flags & NBD_FLAG_SEND_FLUSH) 1429 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1430 if (flags & NBD_FLAG_SEND_FUA) 1431 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1432 if (flags & NBD_FLAG_SEND_TRIM) 1433 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1434 1435 return 0; 1436 } 1437 1438 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1439 { 1440 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1441 } 1442 1443 static const struct file_operations nbd_dbg_flags_ops = { 1444 .open = nbd_dbg_flags_open, 1445 .read = seq_read, 1446 .llseek = seq_lseek, 1447 .release = single_release, 1448 }; 1449 1450 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1451 { 1452 struct dentry *dir; 1453 struct nbd_config *config = nbd->config; 1454 1455 if (!nbd_dbg_dir) 1456 return -EIO; 1457 1458 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1459 if (!dir) { 1460 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1461 nbd_name(nbd)); 1462 return -EIO; 1463 } 1464 config->dbg_dir = dir; 1465 1466 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1467 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1468 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1469 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1470 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1471 1472 return 0; 1473 } 1474 1475 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1476 { 1477 debugfs_remove_recursive(nbd->config->dbg_dir); 1478 } 1479 1480 static int nbd_dbg_init(void) 1481 { 1482 struct dentry *dbg_dir; 1483 1484 dbg_dir = debugfs_create_dir("nbd", NULL); 1485 if (!dbg_dir) 1486 return -EIO; 1487 1488 nbd_dbg_dir = dbg_dir; 1489 1490 return 0; 1491 } 1492 1493 static void nbd_dbg_close(void) 1494 { 1495 debugfs_remove_recursive(nbd_dbg_dir); 1496 } 1497 1498 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1499 1500 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1501 { 1502 return 0; 1503 } 1504 1505 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1506 { 1507 } 1508 1509 static int nbd_dbg_init(void) 1510 { 1511 return 0; 1512 } 1513 1514 static void nbd_dbg_close(void) 1515 { 1516 } 1517 1518 #endif 1519 1520 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1521 unsigned int hctx_idx, unsigned int numa_node) 1522 { 1523 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1524 cmd->nbd = set->driver_data; 1525 cmd->flags = 0; 1526 mutex_init(&cmd->lock); 1527 return 0; 1528 } 1529 1530 static const struct blk_mq_ops nbd_mq_ops = { 1531 .queue_rq = nbd_queue_rq, 1532 .complete = nbd_complete_rq, 1533 .init_request = nbd_init_request, 1534 .timeout = nbd_xmit_timeout, 1535 }; 1536 1537 static int nbd_dev_add(int index) 1538 { 1539 struct nbd_device *nbd; 1540 struct gendisk *disk; 1541 struct request_queue *q; 1542 int err = -ENOMEM; 1543 1544 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1545 if (!nbd) 1546 goto out; 1547 1548 disk = alloc_disk(1 << part_shift); 1549 if (!disk) 1550 goto out_free_nbd; 1551 1552 if (index >= 0) { 1553 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1554 GFP_KERNEL); 1555 if (err == -ENOSPC) 1556 err = -EEXIST; 1557 } else { 1558 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1559 if (err >= 0) 1560 index = err; 1561 } 1562 if (err < 0) 1563 goto out_free_disk; 1564 1565 nbd->index = index; 1566 nbd->disk = disk; 1567 nbd->tag_set.ops = &nbd_mq_ops; 1568 nbd->tag_set.nr_hw_queues = 1; 1569 nbd->tag_set.queue_depth = 128; 1570 nbd->tag_set.numa_node = NUMA_NO_NODE; 1571 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1572 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1573 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; 1574 nbd->tag_set.driver_data = nbd; 1575 1576 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1577 if (err) 1578 goto out_free_idr; 1579 1580 q = blk_mq_init_queue(&nbd->tag_set); 1581 if (IS_ERR(q)) { 1582 err = PTR_ERR(q); 1583 goto out_free_tags; 1584 } 1585 disk->queue = q; 1586 1587 /* 1588 * Tell the block layer that we are not a rotational device 1589 */ 1590 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 1591 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1592 disk->queue->limits.discard_granularity = 0; 1593 disk->queue->limits.discard_alignment = 0; 1594 blk_queue_max_discard_sectors(disk->queue, 0); 1595 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1596 blk_queue_max_segments(disk->queue, USHRT_MAX); 1597 blk_queue_max_hw_sectors(disk->queue, 65536); 1598 disk->queue->limits.max_sectors = 256; 1599 1600 mutex_init(&nbd->config_lock); 1601 refcount_set(&nbd->config_refs, 0); 1602 refcount_set(&nbd->refs, 1); 1603 INIT_LIST_HEAD(&nbd->list); 1604 disk->major = NBD_MAJOR; 1605 disk->first_minor = index << part_shift; 1606 disk->fops = &nbd_fops; 1607 disk->private_data = nbd; 1608 sprintf(disk->disk_name, "nbd%d", index); 1609 add_disk(disk); 1610 nbd_total_devices++; 1611 return index; 1612 1613 out_free_tags: 1614 blk_mq_free_tag_set(&nbd->tag_set); 1615 out_free_idr: 1616 idr_remove(&nbd_index_idr, index); 1617 out_free_disk: 1618 put_disk(disk); 1619 out_free_nbd: 1620 kfree(nbd); 1621 out: 1622 return err; 1623 } 1624 1625 static int find_free_cb(int id, void *ptr, void *data) 1626 { 1627 struct nbd_device *nbd = ptr; 1628 struct nbd_device **found = data; 1629 1630 if (!refcount_read(&nbd->config_refs)) { 1631 *found = nbd; 1632 return 1; 1633 } 1634 return 0; 1635 } 1636 1637 /* Netlink interface. */ 1638 static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1639 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1640 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1641 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1642 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1643 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1644 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1645 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1646 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1647 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1648 }; 1649 1650 static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1651 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1652 }; 1653 1654 /* We don't use this right now since we don't parse the incoming list, but we 1655 * still want it here so userspace knows what to expect. 1656 */ 1657 static const struct nla_policy __attribute__((unused)) 1658 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1659 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1660 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1661 }; 1662 1663 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1664 { 1665 struct nbd_device *nbd = NULL; 1666 struct nbd_config *config; 1667 int index = -1; 1668 int ret; 1669 bool put_dev = false; 1670 1671 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1672 return -EPERM; 1673 1674 if (info->attrs[NBD_ATTR_INDEX]) 1675 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1676 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1677 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1678 return -EINVAL; 1679 } 1680 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1681 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1682 return -EINVAL; 1683 } 1684 again: 1685 mutex_lock(&nbd_index_mutex); 1686 if (index == -1) { 1687 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1688 if (ret == 0) { 1689 int new_index; 1690 new_index = nbd_dev_add(-1); 1691 if (new_index < 0) { 1692 mutex_unlock(&nbd_index_mutex); 1693 printk(KERN_ERR "nbd: failed to add new device\n"); 1694 return new_index; 1695 } 1696 nbd = idr_find(&nbd_index_idr, new_index); 1697 } 1698 } else { 1699 nbd = idr_find(&nbd_index_idr, index); 1700 if (!nbd) { 1701 ret = nbd_dev_add(index); 1702 if (ret < 0) { 1703 mutex_unlock(&nbd_index_mutex); 1704 printk(KERN_ERR "nbd: failed to add new device\n"); 1705 return ret; 1706 } 1707 nbd = idr_find(&nbd_index_idr, index); 1708 } 1709 } 1710 if (!nbd) { 1711 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1712 index); 1713 mutex_unlock(&nbd_index_mutex); 1714 return -EINVAL; 1715 } 1716 if (!refcount_inc_not_zero(&nbd->refs)) { 1717 mutex_unlock(&nbd_index_mutex); 1718 if (index == -1) 1719 goto again; 1720 printk(KERN_ERR "nbd: device at index %d is going down\n", 1721 index); 1722 return -EINVAL; 1723 } 1724 mutex_unlock(&nbd_index_mutex); 1725 1726 mutex_lock(&nbd->config_lock); 1727 if (refcount_read(&nbd->config_refs)) { 1728 mutex_unlock(&nbd->config_lock); 1729 nbd_put(nbd); 1730 if (index == -1) 1731 goto again; 1732 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1733 return -EBUSY; 1734 } 1735 if (WARN_ON(nbd->config)) { 1736 mutex_unlock(&nbd->config_lock); 1737 nbd_put(nbd); 1738 return -EINVAL; 1739 } 1740 config = nbd->config = nbd_alloc_config(); 1741 if (!nbd->config) { 1742 mutex_unlock(&nbd->config_lock); 1743 nbd_put(nbd); 1744 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1745 return -ENOMEM; 1746 } 1747 refcount_set(&nbd->config_refs, 1); 1748 set_bit(NBD_BOUND, &config->runtime_flags); 1749 1750 if (info->attrs[NBD_ATTR_SIZE_BYTES]) { 1751 u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1752 nbd_size_set(nbd, config->blksize, 1753 div64_u64(bytes, config->blksize)); 1754 } 1755 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1756 u64 bsize = 1757 nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1758 nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); 1759 } 1760 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1761 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1762 nbd->tag_set.timeout = timeout * HZ; 1763 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1764 } 1765 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1766 config->dead_conn_timeout = 1767 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1768 config->dead_conn_timeout *= HZ; 1769 } 1770 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1771 config->flags = 1772 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1773 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1774 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1775 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1776 set_bit(NBD_DESTROY_ON_DISCONNECT, 1777 &config->runtime_flags); 1778 put_dev = true; 1779 } 1780 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1781 set_bit(NBD_DISCONNECT_ON_CLOSE, 1782 &config->runtime_flags); 1783 } 1784 } 1785 1786 if (info->attrs[NBD_ATTR_SOCKETS]) { 1787 struct nlattr *attr; 1788 int rem, fd; 1789 1790 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1791 rem) { 1792 struct nlattr *socks[NBD_SOCK_MAX+1]; 1793 1794 if (nla_type(attr) != NBD_SOCK_ITEM) { 1795 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1796 ret = -EINVAL; 1797 goto out; 1798 } 1799 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1800 nbd_sock_policy, info->extack); 1801 if (ret != 0) { 1802 printk(KERN_ERR "nbd: error processing sock list\n"); 1803 ret = -EINVAL; 1804 goto out; 1805 } 1806 if (!socks[NBD_SOCK_FD]) 1807 continue; 1808 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1809 ret = nbd_add_socket(nbd, fd, true); 1810 if (ret) 1811 goto out; 1812 } 1813 } 1814 ret = nbd_start_device(nbd); 1815 out: 1816 mutex_unlock(&nbd->config_lock); 1817 if (!ret) { 1818 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1819 refcount_inc(&nbd->config_refs); 1820 nbd_connect_reply(info, nbd->index); 1821 } 1822 nbd_config_put(nbd); 1823 if (put_dev) 1824 nbd_put(nbd); 1825 return ret; 1826 } 1827 1828 static void nbd_disconnect_and_put(struct nbd_device *nbd) 1829 { 1830 mutex_lock(&nbd->config_lock); 1831 nbd_disconnect(nbd); 1832 nbd_clear_sock(nbd); 1833 mutex_unlock(&nbd->config_lock); 1834 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1835 &nbd->config->runtime_flags)) 1836 nbd_config_put(nbd); 1837 } 1838 1839 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1840 { 1841 struct nbd_device *nbd; 1842 int index; 1843 1844 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1845 return -EPERM; 1846 1847 if (!info->attrs[NBD_ATTR_INDEX]) { 1848 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1849 return -EINVAL; 1850 } 1851 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1852 mutex_lock(&nbd_index_mutex); 1853 nbd = idr_find(&nbd_index_idr, index); 1854 if (!nbd) { 1855 mutex_unlock(&nbd_index_mutex); 1856 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1857 index); 1858 return -EINVAL; 1859 } 1860 if (!refcount_inc_not_zero(&nbd->refs)) { 1861 mutex_unlock(&nbd_index_mutex); 1862 printk(KERN_ERR "nbd: device at index %d is going down\n", 1863 index); 1864 return -EINVAL; 1865 } 1866 mutex_unlock(&nbd_index_mutex); 1867 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1868 nbd_put(nbd); 1869 return 0; 1870 } 1871 nbd_disconnect_and_put(nbd); 1872 nbd_config_put(nbd); 1873 nbd_put(nbd); 1874 return 0; 1875 } 1876 1877 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1878 { 1879 struct nbd_device *nbd = NULL; 1880 struct nbd_config *config; 1881 int index; 1882 int ret = 0; 1883 bool put_dev = false; 1884 1885 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1886 return -EPERM; 1887 1888 if (!info->attrs[NBD_ATTR_INDEX]) { 1889 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1890 return -EINVAL; 1891 } 1892 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1893 mutex_lock(&nbd_index_mutex); 1894 nbd = idr_find(&nbd_index_idr, index); 1895 if (!nbd) { 1896 mutex_unlock(&nbd_index_mutex); 1897 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1898 index); 1899 return -EINVAL; 1900 } 1901 if (!refcount_inc_not_zero(&nbd->refs)) { 1902 mutex_unlock(&nbd_index_mutex); 1903 printk(KERN_ERR "nbd: device at index %d is going down\n", 1904 index); 1905 return -EINVAL; 1906 } 1907 mutex_unlock(&nbd_index_mutex); 1908 1909 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1910 dev_err(nbd_to_dev(nbd), 1911 "not configured, cannot reconfigure\n"); 1912 nbd_put(nbd); 1913 return -EINVAL; 1914 } 1915 1916 mutex_lock(&nbd->config_lock); 1917 config = nbd->config; 1918 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1919 !nbd->task_recv) { 1920 dev_err(nbd_to_dev(nbd), 1921 "not configured, cannot reconfigure\n"); 1922 ret = -EINVAL; 1923 goto out; 1924 } 1925 1926 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1927 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1928 nbd->tag_set.timeout = timeout * HZ; 1929 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1930 } 1931 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1932 config->dead_conn_timeout = 1933 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1934 config->dead_conn_timeout *= HZ; 1935 } 1936 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1937 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1938 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1939 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1940 &config->runtime_flags)) 1941 put_dev = true; 1942 } else { 1943 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1944 &config->runtime_flags)) 1945 refcount_inc(&nbd->refs); 1946 } 1947 1948 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1949 set_bit(NBD_DISCONNECT_ON_CLOSE, 1950 &config->runtime_flags); 1951 } else { 1952 clear_bit(NBD_DISCONNECT_ON_CLOSE, 1953 &config->runtime_flags); 1954 } 1955 } 1956 1957 if (info->attrs[NBD_ATTR_SOCKETS]) { 1958 struct nlattr *attr; 1959 int rem, fd; 1960 1961 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1962 rem) { 1963 struct nlattr *socks[NBD_SOCK_MAX+1]; 1964 1965 if (nla_type(attr) != NBD_SOCK_ITEM) { 1966 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1967 ret = -EINVAL; 1968 goto out; 1969 } 1970 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1971 nbd_sock_policy, info->extack); 1972 if (ret != 0) { 1973 printk(KERN_ERR "nbd: error processing sock list\n"); 1974 ret = -EINVAL; 1975 goto out; 1976 } 1977 if (!socks[NBD_SOCK_FD]) 1978 continue; 1979 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1980 ret = nbd_reconnect_socket(nbd, fd); 1981 if (ret) { 1982 if (ret == -ENOSPC) 1983 ret = 0; 1984 goto out; 1985 } 1986 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 1987 } 1988 } 1989 out: 1990 mutex_unlock(&nbd->config_lock); 1991 nbd_config_put(nbd); 1992 nbd_put(nbd); 1993 if (put_dev) 1994 nbd_put(nbd); 1995 return ret; 1996 } 1997 1998 static const struct genl_ops nbd_connect_genl_ops[] = { 1999 { 2000 .cmd = NBD_CMD_CONNECT, 2001 .policy = nbd_attr_policy, 2002 .doit = nbd_genl_connect, 2003 }, 2004 { 2005 .cmd = NBD_CMD_DISCONNECT, 2006 .policy = nbd_attr_policy, 2007 .doit = nbd_genl_disconnect, 2008 }, 2009 { 2010 .cmd = NBD_CMD_RECONFIGURE, 2011 .policy = nbd_attr_policy, 2012 .doit = nbd_genl_reconfigure, 2013 }, 2014 { 2015 .cmd = NBD_CMD_STATUS, 2016 .policy = nbd_attr_policy, 2017 .doit = nbd_genl_status, 2018 }, 2019 }; 2020 2021 static const struct genl_multicast_group nbd_mcast_grps[] = { 2022 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 2023 }; 2024 2025 static struct genl_family nbd_genl_family __ro_after_init = { 2026 .hdrsize = 0, 2027 .name = NBD_GENL_FAMILY_NAME, 2028 .version = NBD_GENL_VERSION, 2029 .module = THIS_MODULE, 2030 .ops = nbd_connect_genl_ops, 2031 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 2032 .maxattr = NBD_ATTR_MAX, 2033 .mcgrps = nbd_mcast_grps, 2034 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 2035 }; 2036 2037 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 2038 { 2039 struct nlattr *dev_opt; 2040 u8 connected = 0; 2041 int ret; 2042 2043 /* This is a little racey, but for status it's ok. The 2044 * reason we don't take a ref here is because we can't 2045 * take a ref in the index == -1 case as we would need 2046 * to put under the nbd_index_mutex, which could 2047 * deadlock if we are configured to remove ourselves 2048 * once we're disconnected. 2049 */ 2050 if (refcount_read(&nbd->config_refs)) 2051 connected = 1; 2052 dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); 2053 if (!dev_opt) 2054 return -EMSGSIZE; 2055 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 2056 if (ret) 2057 return -EMSGSIZE; 2058 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 2059 connected); 2060 if (ret) 2061 return -EMSGSIZE; 2062 nla_nest_end(reply, dev_opt); 2063 return 0; 2064 } 2065 2066 static int status_cb(int id, void *ptr, void *data) 2067 { 2068 struct nbd_device *nbd = ptr; 2069 return populate_nbd_status(nbd, (struct sk_buff *)data); 2070 } 2071 2072 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 2073 { 2074 struct nlattr *dev_list; 2075 struct sk_buff *reply; 2076 void *reply_head; 2077 size_t msg_size; 2078 int index = -1; 2079 int ret = -ENOMEM; 2080 2081 if (info->attrs[NBD_ATTR_INDEX]) 2082 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 2083 2084 mutex_lock(&nbd_index_mutex); 2085 2086 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 2087 nla_attr_size(sizeof(u8))); 2088 msg_size *= (index == -1) ? nbd_total_devices : 1; 2089 2090 reply = genlmsg_new(msg_size, GFP_KERNEL); 2091 if (!reply) 2092 goto out; 2093 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 2094 NBD_CMD_STATUS); 2095 if (!reply_head) { 2096 nlmsg_free(reply); 2097 goto out; 2098 } 2099 2100 dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); 2101 if (index == -1) { 2102 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 2103 if (ret) { 2104 nlmsg_free(reply); 2105 goto out; 2106 } 2107 } else { 2108 struct nbd_device *nbd; 2109 nbd = idr_find(&nbd_index_idr, index); 2110 if (nbd) { 2111 ret = populate_nbd_status(nbd, reply); 2112 if (ret) { 2113 nlmsg_free(reply); 2114 goto out; 2115 } 2116 } 2117 } 2118 nla_nest_end(reply, dev_list); 2119 genlmsg_end(reply, reply_head); 2120 genlmsg_reply(reply, info); 2121 ret = 0; 2122 out: 2123 mutex_unlock(&nbd_index_mutex); 2124 return ret; 2125 } 2126 2127 static void nbd_connect_reply(struct genl_info *info, int index) 2128 { 2129 struct sk_buff *skb; 2130 void *msg_head; 2131 int ret; 2132 2133 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2134 if (!skb) 2135 return; 2136 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2137 NBD_CMD_CONNECT); 2138 if (!msg_head) { 2139 nlmsg_free(skb); 2140 return; 2141 } 2142 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2143 if (ret) { 2144 nlmsg_free(skb); 2145 return; 2146 } 2147 genlmsg_end(skb, msg_head); 2148 genlmsg_reply(skb, info); 2149 } 2150 2151 static void nbd_mcast_index(int index) 2152 { 2153 struct sk_buff *skb; 2154 void *msg_head; 2155 int ret; 2156 2157 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2158 if (!skb) 2159 return; 2160 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2161 NBD_CMD_LINK_DEAD); 2162 if (!msg_head) { 2163 nlmsg_free(skb); 2164 return; 2165 } 2166 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2167 if (ret) { 2168 nlmsg_free(skb); 2169 return; 2170 } 2171 genlmsg_end(skb, msg_head); 2172 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2173 } 2174 2175 static void nbd_dead_link_work(struct work_struct *work) 2176 { 2177 struct link_dead_args *args = container_of(work, struct link_dead_args, 2178 work); 2179 nbd_mcast_index(args->index); 2180 kfree(args); 2181 } 2182 2183 static int __init nbd_init(void) 2184 { 2185 int i; 2186 2187 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2188 2189 if (max_part < 0) { 2190 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2191 return -EINVAL; 2192 } 2193 2194 part_shift = 0; 2195 if (max_part > 0) { 2196 part_shift = fls(max_part); 2197 2198 /* 2199 * Adjust max_part according to part_shift as it is exported 2200 * to user space so that user can know the max number of 2201 * partition kernel should be able to manage. 2202 * 2203 * Note that -1 is required because partition 0 is reserved 2204 * for the whole disk. 2205 */ 2206 max_part = (1UL << part_shift) - 1; 2207 } 2208 2209 if ((1UL << part_shift) > DISK_MAX_PARTS) 2210 return -EINVAL; 2211 2212 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2213 return -EINVAL; 2214 recv_workqueue = alloc_workqueue("knbd-recv", 2215 WQ_MEM_RECLAIM | WQ_HIGHPRI | 2216 WQ_UNBOUND, 0); 2217 if (!recv_workqueue) 2218 return -ENOMEM; 2219 2220 if (register_blkdev(NBD_MAJOR, "nbd")) { 2221 destroy_workqueue(recv_workqueue); 2222 return -EIO; 2223 } 2224 2225 if (genl_register_family(&nbd_genl_family)) { 2226 unregister_blkdev(NBD_MAJOR, "nbd"); 2227 destroy_workqueue(recv_workqueue); 2228 return -EINVAL; 2229 } 2230 nbd_dbg_init(); 2231 2232 mutex_lock(&nbd_index_mutex); 2233 for (i = 0; i < nbds_max; i++) 2234 nbd_dev_add(i); 2235 mutex_unlock(&nbd_index_mutex); 2236 return 0; 2237 } 2238 2239 static int nbd_exit_cb(int id, void *ptr, void *data) 2240 { 2241 struct list_head *list = (struct list_head *)data; 2242 struct nbd_device *nbd = ptr; 2243 2244 list_add_tail(&nbd->list, list); 2245 return 0; 2246 } 2247 2248 static void __exit nbd_cleanup(void) 2249 { 2250 struct nbd_device *nbd; 2251 LIST_HEAD(del_list); 2252 2253 nbd_dbg_close(); 2254 2255 mutex_lock(&nbd_index_mutex); 2256 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2257 mutex_unlock(&nbd_index_mutex); 2258 2259 while (!list_empty(&del_list)) { 2260 nbd = list_first_entry(&del_list, struct nbd_device, list); 2261 list_del_init(&nbd->list); 2262 if (refcount_read(&nbd->refs) != 1) 2263 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2264 nbd_put(nbd); 2265 } 2266 2267 idr_destroy(&nbd_index_idr); 2268 genl_unregister_family(&nbd_genl_family); 2269 destroy_workqueue(recv_workqueue); 2270 unregister_blkdev(NBD_MAJOR, "nbd"); 2271 } 2272 2273 module_init(nbd_init); 2274 module_exit(nbd_cleanup); 2275 2276 MODULE_DESCRIPTION("Network Block Device"); 2277 MODULE_LICENSE("GPL"); 2278 2279 module_param(nbds_max, int, 0444); 2280 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2281 module_param(max_part, int, 0444); 2282 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2283