1 /* 2 * Copyright (C) 2013 Shaohua Li <shli@kernel.org> 3 * Copyright (C) 2014 Red Hat, Inc. 4 * Copyright (C) 2015 Arrikto, Inc. 5 * Copyright (C) 2017 Chinamobile, Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 #include <linux/spinlock.h> 22 #include <linux/module.h> 23 #include <linux/idr.h> 24 #include <linux/kernel.h> 25 #include <linux/timer.h> 26 #include <linux/parser.h> 27 #include <linux/vmalloc.h> 28 #include <linux/uio_driver.h> 29 #include <linux/radix-tree.h> 30 #include <linux/stringify.h> 31 #include <linux/bitops.h> 32 #include <linux/highmem.h> 33 #include <linux/configfs.h> 34 #include <linux/mutex.h> 35 #include <linux/workqueue.h> 36 #include <net/genetlink.h> 37 #include <scsi/scsi_common.h> 38 #include <scsi/scsi_proto.h> 39 #include <target/target_core_base.h> 40 #include <target/target_core_fabric.h> 41 #include <target/target_core_backend.h> 42 43 #include <linux/target_core_user.h> 44 45 /* 46 * Define a shared-memory interface for LIO to pass SCSI commands and 47 * data to userspace for processing. This is to allow backends that 48 * are too complex for in-kernel support to be possible. 49 * 50 * It uses the UIO framework to do a lot of the device-creation and 51 * introspection work for us. 52 * 53 * See the .h file for how the ring is laid out. Note that while the 54 * command ring is defined, the particulars of the data area are 55 * not. Offset values in the command entry point to other locations 56 * internal to the mmap()ed area. There is separate space outside the 57 * command ring for data buffers. This leaves maximum flexibility for 58 * moving buffer allocations, or even page flipping or other 59 * allocation techniques, without altering the command ring layout. 60 * 61 * SECURITY: 62 * The user process must be assumed to be malicious. There's no way to 63 * prevent it breaking the command ring protocol if it wants, but in 64 * order to prevent other issues we must only ever read *data* from 65 * the shared memory area, not offsets or sizes. This applies to 66 * command ring entries as well as the mailbox. Extra code needed for 67 * this may have a 'UAM' comment. 68 */ 69 70 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC) 71 72 /* For cmd area, the size is fixed 8MB */ 73 #define CMDR_SIZE (8 * 1024 * 1024) 74 75 /* 76 * For data area, the block size is PAGE_SIZE and 77 * the total size is 256K * PAGE_SIZE. 78 */ 79 #define DATA_BLOCK_SIZE PAGE_SIZE 80 #define DATA_BLOCK_SHIFT PAGE_SHIFT 81 #define DATA_BLOCK_BITS_DEF (256 * 1024) 82 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE) 83 84 #define TCMU_MBS_TO_BLOCKS(_mbs) (_mbs << (20 - DATA_BLOCK_SHIFT)) 85 #define TCMU_BLOCKS_TO_MBS(_blocks) (_blocks >> (20 - DATA_BLOCK_SHIFT)) 86 87 /* The total size of the ring is 8M + 256K * PAGE_SIZE */ 88 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) 89 90 /* 91 * Default number of global data blocks(512K * PAGE_SIZE) 92 * when the unmap thread will be started. 93 */ 94 #define TCMU_GLOBAL_MAX_BLOCKS_DEF (512 * 1024) 95 96 static u8 tcmu_kern_cmd_reply_supported; 97 98 static struct device *tcmu_root_device; 99 100 struct tcmu_hba { 101 u32 host_id; 102 }; 103 104 #define TCMU_CONFIG_LEN 256 105 106 struct tcmu_nl_cmd { 107 /* wake up thread waiting for reply */ 108 struct completion complete; 109 int cmd; 110 int status; 111 }; 112 113 struct tcmu_dev { 114 struct list_head node; 115 struct kref kref; 116 117 struct se_device se_dev; 118 119 char *name; 120 struct se_hba *hba; 121 122 #define TCMU_DEV_BIT_OPEN 0 123 #define TCMU_DEV_BIT_BROKEN 1 124 #define TCMU_DEV_BIT_BLOCKED 2 125 unsigned long flags; 126 127 struct uio_info uio_info; 128 129 struct inode *inode; 130 131 struct tcmu_mailbox *mb_addr; 132 size_t dev_size; 133 u32 cmdr_size; 134 u32 cmdr_last_cleaned; 135 /* Offset of data area from start of mb */ 136 /* Must add data_off and mb_addr to get the address */ 137 size_t data_off; 138 size_t data_size; 139 uint32_t max_blocks; 140 size_t ring_size; 141 142 struct mutex cmdr_lock; 143 struct list_head cmdr_queue; 144 145 uint32_t dbi_max; 146 uint32_t dbi_thresh; 147 unsigned long *data_bitmap; 148 struct radix_tree_root data_blocks; 149 150 struct idr commands; 151 152 struct timer_list cmd_timer; 153 unsigned int cmd_time_out; 154 155 struct timer_list qfull_timer; 156 int qfull_time_out; 157 158 struct list_head timedout_entry; 159 160 spinlock_t nl_cmd_lock; 161 struct tcmu_nl_cmd curr_nl_cmd; 162 /* wake up threads waiting on curr_nl_cmd */ 163 wait_queue_head_t nl_cmd_wq; 164 165 char dev_config[TCMU_CONFIG_LEN]; 166 167 int nl_reply_supported; 168 }; 169 170 #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev) 171 172 #define CMDR_OFF sizeof(struct tcmu_mailbox) 173 174 struct tcmu_cmd { 175 struct se_cmd *se_cmd; 176 struct tcmu_dev *tcmu_dev; 177 struct list_head cmdr_queue_entry; 178 179 uint16_t cmd_id; 180 181 /* Can't use se_cmd when cleaning up expired cmds, because if 182 cmd has been completed then accessing se_cmd is off limits */ 183 uint32_t dbi_cnt; 184 uint32_t dbi_cur; 185 uint32_t *dbi; 186 187 unsigned long deadline; 188 189 #define TCMU_CMD_BIT_EXPIRED 0 190 unsigned long flags; 191 }; 192 /* 193 * To avoid dead lock the mutex lock order should always be: 194 * 195 * mutex_lock(&root_udev_mutex); 196 * ... 197 * mutex_lock(&tcmu_dev->cmdr_lock); 198 * mutex_unlock(&tcmu_dev->cmdr_lock); 199 * ... 200 * mutex_unlock(&root_udev_mutex); 201 */ 202 static DEFINE_MUTEX(root_udev_mutex); 203 static LIST_HEAD(root_udev); 204 205 static DEFINE_SPINLOCK(timed_out_udevs_lock); 206 static LIST_HEAD(timed_out_udevs); 207 208 static struct kmem_cache *tcmu_cmd_cache; 209 210 static atomic_t global_db_count = ATOMIC_INIT(0); 211 static struct delayed_work tcmu_unmap_work; 212 static int tcmu_global_max_blocks = TCMU_GLOBAL_MAX_BLOCKS_DEF; 213 214 static int tcmu_set_global_max_data_area(const char *str, 215 const struct kernel_param *kp) 216 { 217 int ret, max_area_mb; 218 219 ret = kstrtoint(str, 10, &max_area_mb); 220 if (ret) 221 return -EINVAL; 222 223 if (max_area_mb <= 0) { 224 pr_err("global_max_data_area must be larger than 0.\n"); 225 return -EINVAL; 226 } 227 228 tcmu_global_max_blocks = TCMU_MBS_TO_BLOCKS(max_area_mb); 229 if (atomic_read(&global_db_count) > tcmu_global_max_blocks) 230 schedule_delayed_work(&tcmu_unmap_work, 0); 231 else 232 cancel_delayed_work_sync(&tcmu_unmap_work); 233 234 return 0; 235 } 236 237 static int tcmu_get_global_max_data_area(char *buffer, 238 const struct kernel_param *kp) 239 { 240 return sprintf(buffer, "%d", TCMU_BLOCKS_TO_MBS(tcmu_global_max_blocks)); 241 } 242 243 static const struct kernel_param_ops tcmu_global_max_data_area_op = { 244 .set = tcmu_set_global_max_data_area, 245 .get = tcmu_get_global_max_data_area, 246 }; 247 248 module_param_cb(global_max_data_area_mb, &tcmu_global_max_data_area_op, NULL, 249 S_IWUSR | S_IRUGO); 250 MODULE_PARM_DESC(global_max_data_area_mb, 251 "Max MBs allowed to be allocated to all the tcmu device's " 252 "data areas."); 253 254 /* multicast group */ 255 enum tcmu_multicast_groups { 256 TCMU_MCGRP_CONFIG, 257 }; 258 259 static const struct genl_multicast_group tcmu_mcgrps[] = { 260 [TCMU_MCGRP_CONFIG] = { .name = "config", }, 261 }; 262 263 static struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX+1] = { 264 [TCMU_ATTR_DEVICE] = { .type = NLA_STRING }, 265 [TCMU_ATTR_MINOR] = { .type = NLA_U32 }, 266 [TCMU_ATTR_CMD_STATUS] = { .type = NLA_S32 }, 267 [TCMU_ATTR_DEVICE_ID] = { .type = NLA_U32 }, 268 [TCMU_ATTR_SUPP_KERN_CMD_REPLY] = { .type = NLA_U8 }, 269 }; 270 271 static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd) 272 { 273 struct se_device *dev; 274 struct tcmu_dev *udev; 275 struct tcmu_nl_cmd *nl_cmd; 276 int dev_id, rc, ret = 0; 277 bool is_removed = (completed_cmd == TCMU_CMD_REMOVED_DEVICE); 278 279 if (!info->attrs[TCMU_ATTR_CMD_STATUS] || 280 !info->attrs[TCMU_ATTR_DEVICE_ID]) { 281 printk(KERN_ERR "TCMU_ATTR_CMD_STATUS or TCMU_ATTR_DEVICE_ID not set, doing nothing\n"); 282 return -EINVAL; 283 } 284 285 dev_id = nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]); 286 rc = nla_get_s32(info->attrs[TCMU_ATTR_CMD_STATUS]); 287 288 dev = target_find_device(dev_id, !is_removed); 289 if (!dev) { 290 printk(KERN_ERR "tcmu nl cmd %u/%u completion could not find device with dev id %u.\n", 291 completed_cmd, rc, dev_id); 292 return -ENODEV; 293 } 294 udev = TCMU_DEV(dev); 295 296 spin_lock(&udev->nl_cmd_lock); 297 nl_cmd = &udev->curr_nl_cmd; 298 299 pr_debug("genl cmd done got id %d curr %d done %d rc %d\n", dev_id, 300 nl_cmd->cmd, completed_cmd, rc); 301 302 if (nl_cmd->cmd != completed_cmd) { 303 printk(KERN_ERR "Mismatched commands (Expecting reply for %d. Current %d).\n", 304 completed_cmd, nl_cmd->cmd); 305 ret = -EINVAL; 306 } else { 307 nl_cmd->status = rc; 308 } 309 310 spin_unlock(&udev->nl_cmd_lock); 311 if (!is_removed) 312 target_undepend_item(&dev->dev_group.cg_item); 313 if (!ret) 314 complete(&nl_cmd->complete); 315 return ret; 316 } 317 318 static int tcmu_genl_rm_dev_done(struct sk_buff *skb, struct genl_info *info) 319 { 320 return tcmu_genl_cmd_done(info, TCMU_CMD_REMOVED_DEVICE); 321 } 322 323 static int tcmu_genl_add_dev_done(struct sk_buff *skb, struct genl_info *info) 324 { 325 return tcmu_genl_cmd_done(info, TCMU_CMD_ADDED_DEVICE); 326 } 327 328 static int tcmu_genl_reconfig_dev_done(struct sk_buff *skb, 329 struct genl_info *info) 330 { 331 return tcmu_genl_cmd_done(info, TCMU_CMD_RECONFIG_DEVICE); 332 } 333 334 static int tcmu_genl_set_features(struct sk_buff *skb, struct genl_info *info) 335 { 336 if (info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]) { 337 tcmu_kern_cmd_reply_supported = 338 nla_get_u8(info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]); 339 printk(KERN_INFO "tcmu daemon: command reply support %u.\n", 340 tcmu_kern_cmd_reply_supported); 341 } 342 343 return 0; 344 } 345 346 static const struct genl_ops tcmu_genl_ops[] = { 347 { 348 .cmd = TCMU_CMD_SET_FEATURES, 349 .flags = GENL_ADMIN_PERM, 350 .policy = tcmu_attr_policy, 351 .doit = tcmu_genl_set_features, 352 }, 353 { 354 .cmd = TCMU_CMD_ADDED_DEVICE_DONE, 355 .flags = GENL_ADMIN_PERM, 356 .policy = tcmu_attr_policy, 357 .doit = tcmu_genl_add_dev_done, 358 }, 359 { 360 .cmd = TCMU_CMD_REMOVED_DEVICE_DONE, 361 .flags = GENL_ADMIN_PERM, 362 .policy = tcmu_attr_policy, 363 .doit = tcmu_genl_rm_dev_done, 364 }, 365 { 366 .cmd = TCMU_CMD_RECONFIG_DEVICE_DONE, 367 .flags = GENL_ADMIN_PERM, 368 .policy = tcmu_attr_policy, 369 .doit = tcmu_genl_reconfig_dev_done, 370 }, 371 }; 372 373 /* Our generic netlink family */ 374 static struct genl_family tcmu_genl_family __ro_after_init = { 375 .module = THIS_MODULE, 376 .hdrsize = 0, 377 .name = "TCM-USER", 378 .version = 2, 379 .maxattr = TCMU_ATTR_MAX, 380 .mcgrps = tcmu_mcgrps, 381 .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps), 382 .netnsok = true, 383 .ops = tcmu_genl_ops, 384 .n_ops = ARRAY_SIZE(tcmu_genl_ops), 385 }; 386 387 #define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index)) 388 #define tcmu_cmd_reset_dbi_cur(cmd) tcmu_cmd_set_dbi_cur(cmd, 0) 389 #define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index)) 390 #define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++]) 391 392 static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd, uint32_t len) 393 { 394 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; 395 uint32_t i; 396 397 for (i = 0; i < len; i++) 398 clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap); 399 } 400 401 static inline bool tcmu_get_empty_block(struct tcmu_dev *udev, 402 struct tcmu_cmd *tcmu_cmd) 403 { 404 struct page *page; 405 int ret, dbi; 406 407 dbi = find_first_zero_bit(udev->data_bitmap, udev->dbi_thresh); 408 if (dbi == udev->dbi_thresh) 409 return false; 410 411 page = radix_tree_lookup(&udev->data_blocks, dbi); 412 if (!page) { 413 if (atomic_add_return(1, &global_db_count) > 414 tcmu_global_max_blocks) 415 schedule_delayed_work(&tcmu_unmap_work, 0); 416 417 /* try to get new page from the mm */ 418 page = alloc_page(GFP_KERNEL); 419 if (!page) 420 goto err_alloc; 421 422 ret = radix_tree_insert(&udev->data_blocks, dbi, page); 423 if (ret) 424 goto err_insert; 425 } 426 427 if (dbi > udev->dbi_max) 428 udev->dbi_max = dbi; 429 430 set_bit(dbi, udev->data_bitmap); 431 tcmu_cmd_set_dbi(tcmu_cmd, dbi); 432 433 return true; 434 err_insert: 435 __free_page(page); 436 err_alloc: 437 atomic_dec(&global_db_count); 438 return false; 439 } 440 441 static bool tcmu_get_empty_blocks(struct tcmu_dev *udev, 442 struct tcmu_cmd *tcmu_cmd) 443 { 444 int i; 445 446 for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) { 447 if (!tcmu_get_empty_block(udev, tcmu_cmd)) 448 return false; 449 } 450 return true; 451 } 452 453 static inline struct page * 454 tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi) 455 { 456 return radix_tree_lookup(&udev->data_blocks, dbi); 457 } 458 459 static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd) 460 { 461 kfree(tcmu_cmd->dbi); 462 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 463 } 464 465 static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) 466 { 467 struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 468 size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); 469 470 if (se_cmd->se_cmd_flags & SCF_BIDI) { 471 BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); 472 data_length += round_up(se_cmd->t_bidi_data_sg->length, 473 DATA_BLOCK_SIZE); 474 } 475 476 return data_length; 477 } 478 479 static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) 480 { 481 size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); 482 483 return data_length / DATA_BLOCK_SIZE; 484 } 485 486 static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) 487 { 488 struct se_device *se_dev = se_cmd->se_dev; 489 struct tcmu_dev *udev = TCMU_DEV(se_dev); 490 struct tcmu_cmd *tcmu_cmd; 491 492 tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_KERNEL); 493 if (!tcmu_cmd) 494 return NULL; 495 496 INIT_LIST_HEAD(&tcmu_cmd->cmdr_queue_entry); 497 tcmu_cmd->se_cmd = se_cmd; 498 tcmu_cmd->tcmu_dev = udev; 499 500 tcmu_cmd_reset_dbi_cur(tcmu_cmd); 501 tcmu_cmd->dbi_cnt = tcmu_cmd_get_block_cnt(tcmu_cmd); 502 tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t), 503 GFP_KERNEL); 504 if (!tcmu_cmd->dbi) { 505 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 506 return NULL; 507 } 508 509 return tcmu_cmd; 510 } 511 512 static inline void tcmu_flush_dcache_range(void *vaddr, size_t size) 513 { 514 unsigned long offset = offset_in_page(vaddr); 515 void *start = vaddr - offset; 516 517 size = round_up(size+offset, PAGE_SIZE); 518 519 while (size) { 520 flush_dcache_page(virt_to_page(start)); 521 start += PAGE_SIZE; 522 size -= PAGE_SIZE; 523 } 524 } 525 526 /* 527 * Some ring helper functions. We don't assume size is a power of 2 so 528 * we can't use circ_buf.h. 529 */ 530 static inline size_t spc_used(size_t head, size_t tail, size_t size) 531 { 532 int diff = head - tail; 533 534 if (diff >= 0) 535 return diff; 536 else 537 return size + diff; 538 } 539 540 static inline size_t spc_free(size_t head, size_t tail, size_t size) 541 { 542 /* Keep 1 byte unused or we can't tell full from empty */ 543 return (size - spc_used(head, tail, size) - 1); 544 } 545 546 static inline size_t head_to_end(size_t head, size_t size) 547 { 548 return size - head; 549 } 550 551 static inline void new_iov(struct iovec **iov, int *iov_cnt) 552 { 553 struct iovec *iovec; 554 555 if (*iov_cnt != 0) 556 (*iov)++; 557 (*iov_cnt)++; 558 559 iovec = *iov; 560 memset(iovec, 0, sizeof(struct iovec)); 561 } 562 563 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) 564 565 /* offset is relative to mb_addr */ 566 static inline size_t get_block_offset_user(struct tcmu_dev *dev, 567 int dbi, int remaining) 568 { 569 return dev->data_off + dbi * DATA_BLOCK_SIZE + 570 DATA_BLOCK_SIZE - remaining; 571 } 572 573 static inline size_t iov_tail(struct iovec *iov) 574 { 575 return (size_t)iov->iov_base + iov->iov_len; 576 } 577 578 static void scatter_data_area(struct tcmu_dev *udev, 579 struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg, 580 unsigned int data_nents, struct iovec **iov, 581 int *iov_cnt, bool copy_data) 582 { 583 int i, dbi; 584 int block_remaining = 0; 585 void *from, *to = NULL; 586 size_t copy_bytes, to_offset, offset; 587 struct scatterlist *sg; 588 struct page *page; 589 590 for_each_sg(data_sg, sg, data_nents, i) { 591 int sg_remaining = sg->length; 592 from = kmap_atomic(sg_page(sg)) + sg->offset; 593 while (sg_remaining > 0) { 594 if (block_remaining == 0) { 595 if (to) 596 kunmap_atomic(to); 597 598 block_remaining = DATA_BLOCK_SIZE; 599 dbi = tcmu_cmd_get_dbi(tcmu_cmd); 600 page = tcmu_get_block_page(udev, dbi); 601 to = kmap_atomic(page); 602 } 603 604 /* 605 * Covert to virtual offset of the ring data area. 606 */ 607 to_offset = get_block_offset_user(udev, dbi, 608 block_remaining); 609 610 /* 611 * The following code will gather and map the blocks 612 * to the same iovec when the blocks are all next to 613 * each other. 614 */ 615 copy_bytes = min_t(size_t, sg_remaining, 616 block_remaining); 617 if (*iov_cnt != 0 && 618 to_offset == iov_tail(*iov)) { 619 /* 620 * Will append to the current iovec, because 621 * the current block page is next to the 622 * previous one. 623 */ 624 (*iov)->iov_len += copy_bytes; 625 } else { 626 /* 627 * Will allocate a new iovec because we are 628 * first time here or the current block page 629 * is not next to the previous one. 630 */ 631 new_iov(iov, iov_cnt); 632 (*iov)->iov_base = (void __user *)to_offset; 633 (*iov)->iov_len = copy_bytes; 634 } 635 636 if (copy_data) { 637 offset = DATA_BLOCK_SIZE - block_remaining; 638 memcpy(to + offset, 639 from + sg->length - sg_remaining, 640 copy_bytes); 641 tcmu_flush_dcache_range(to, copy_bytes); 642 } 643 644 sg_remaining -= copy_bytes; 645 block_remaining -= copy_bytes; 646 } 647 kunmap_atomic(from - sg->offset); 648 } 649 650 if (to) 651 kunmap_atomic(to); 652 } 653 654 static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, 655 bool bidi) 656 { 657 struct se_cmd *se_cmd = cmd->se_cmd; 658 int i, dbi; 659 int block_remaining = 0; 660 void *from = NULL, *to; 661 size_t copy_bytes, offset; 662 struct scatterlist *sg, *data_sg; 663 struct page *page; 664 unsigned int data_nents; 665 uint32_t count = 0; 666 667 if (!bidi) { 668 data_sg = se_cmd->t_data_sg; 669 data_nents = se_cmd->t_data_nents; 670 } else { 671 672 /* 673 * For bidi case, the first count blocks are for Data-Out 674 * buffer blocks, and before gathering the Data-In buffer 675 * the Data-Out buffer blocks should be discarded. 676 */ 677 count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE); 678 679 data_sg = se_cmd->t_bidi_data_sg; 680 data_nents = se_cmd->t_bidi_data_nents; 681 } 682 683 tcmu_cmd_set_dbi_cur(cmd, count); 684 685 for_each_sg(data_sg, sg, data_nents, i) { 686 int sg_remaining = sg->length; 687 to = kmap_atomic(sg_page(sg)) + sg->offset; 688 while (sg_remaining > 0) { 689 if (block_remaining == 0) { 690 if (from) 691 kunmap_atomic(from); 692 693 block_remaining = DATA_BLOCK_SIZE; 694 dbi = tcmu_cmd_get_dbi(cmd); 695 page = tcmu_get_block_page(udev, dbi); 696 from = kmap_atomic(page); 697 } 698 copy_bytes = min_t(size_t, sg_remaining, 699 block_remaining); 700 offset = DATA_BLOCK_SIZE - block_remaining; 701 tcmu_flush_dcache_range(from, copy_bytes); 702 memcpy(to + sg->length - sg_remaining, from + offset, 703 copy_bytes); 704 705 sg_remaining -= copy_bytes; 706 block_remaining -= copy_bytes; 707 } 708 kunmap_atomic(to - sg->offset); 709 } 710 if (from) 711 kunmap_atomic(from); 712 } 713 714 static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh) 715 { 716 return thresh - bitmap_weight(bitmap, thresh); 717 } 718 719 /* 720 * We can't queue a command until we have space available on the cmd ring *and* 721 * space available on the data area. 722 * 723 * Called with ring lock held. 724 */ 725 static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, 726 size_t cmd_size, size_t data_needed) 727 { 728 struct tcmu_mailbox *mb = udev->mb_addr; 729 uint32_t blocks_needed = (data_needed + DATA_BLOCK_SIZE - 1) 730 / DATA_BLOCK_SIZE; 731 size_t space, cmd_needed; 732 u32 cmd_head; 733 734 tcmu_flush_dcache_range(mb, sizeof(*mb)); 735 736 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 737 738 /* 739 * If cmd end-of-ring space is too small then we need space for a NOP plus 740 * original cmd - cmds are internally contiguous. 741 */ 742 if (head_to_end(cmd_head, udev->cmdr_size) >= cmd_size) 743 cmd_needed = cmd_size; 744 else 745 cmd_needed = cmd_size + head_to_end(cmd_head, udev->cmdr_size); 746 747 space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size); 748 if (space < cmd_needed) { 749 pr_debug("no cmd space: %u %u %u\n", cmd_head, 750 udev->cmdr_last_cleaned, udev->cmdr_size); 751 return false; 752 } 753 754 /* try to check and get the data blocks as needed */ 755 space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh); 756 if ((space * DATA_BLOCK_SIZE) < data_needed) { 757 unsigned long blocks_left = 758 (udev->max_blocks - udev->dbi_thresh) + space; 759 760 if (blocks_left < blocks_needed) { 761 pr_debug("no data space: only %lu available, but ask for %zu\n", 762 blocks_left * DATA_BLOCK_SIZE, 763 data_needed); 764 return false; 765 } 766 767 udev->dbi_thresh += blocks_needed; 768 if (udev->dbi_thresh > udev->max_blocks) 769 udev->dbi_thresh = udev->max_blocks; 770 } 771 772 return tcmu_get_empty_blocks(udev, cmd); 773 } 774 775 static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt) 776 { 777 return max(offsetof(struct tcmu_cmd_entry, req.iov[iov_cnt]), 778 sizeof(struct tcmu_cmd_entry)); 779 } 780 781 static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd, 782 size_t base_command_size) 783 { 784 struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 785 size_t command_size; 786 787 command_size = base_command_size + 788 round_up(scsi_command_size(se_cmd->t_task_cdb), 789 TCMU_OP_ALIGN_SIZE); 790 791 WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1)); 792 793 return command_size; 794 } 795 796 static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd, unsigned int tmo, 797 struct timer_list *timer) 798 { 799 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; 800 int cmd_id; 801 802 if (tcmu_cmd->cmd_id) 803 goto setup_timer; 804 805 cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 1, USHRT_MAX, GFP_NOWAIT); 806 if (cmd_id < 0) { 807 pr_err("tcmu: Could not allocate cmd id.\n"); 808 return cmd_id; 809 } 810 tcmu_cmd->cmd_id = cmd_id; 811 812 pr_debug("allocated cmd %u for dev %s tmo %lu\n", tcmu_cmd->cmd_id, 813 udev->name, tmo / MSEC_PER_SEC); 814 815 setup_timer: 816 if (!tmo) 817 return 0; 818 819 tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo)); 820 mod_timer(timer, tcmu_cmd->deadline); 821 return 0; 822 } 823 824 static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd) 825 { 826 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; 827 unsigned int tmo; 828 int ret; 829 830 /* 831 * For backwards compat if qfull_time_out is not set use 832 * cmd_time_out and if that's not set use the default time out. 833 */ 834 if (!udev->qfull_time_out) 835 return -ETIMEDOUT; 836 else if (udev->qfull_time_out > 0) 837 tmo = udev->qfull_time_out; 838 else if (udev->cmd_time_out) 839 tmo = udev->cmd_time_out; 840 else 841 tmo = TCMU_TIME_OUT; 842 843 ret = tcmu_setup_cmd_timer(tcmu_cmd, tmo, &udev->qfull_timer); 844 if (ret) 845 return ret; 846 847 list_add_tail(&tcmu_cmd->cmdr_queue_entry, &udev->cmdr_queue); 848 pr_debug("adding cmd %u on dev %s to ring space wait queue\n", 849 tcmu_cmd->cmd_id, udev->name); 850 return 0; 851 } 852 853 /** 854 * queue_cmd_ring - queue cmd to ring or internally 855 * @tcmu_cmd: cmd to queue 856 * @scsi_err: TCM error code if failure (-1) returned. 857 * 858 * Returns: 859 * -1 we cannot queue internally or to the ring. 860 * 0 success 861 * 1 internally queued to wait for ring memory to free. 862 */ 863 static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) 864 { 865 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; 866 struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 867 size_t base_command_size, command_size; 868 struct tcmu_mailbox *mb; 869 struct tcmu_cmd_entry *entry; 870 struct iovec *iov; 871 int iov_cnt, ret; 872 uint32_t cmd_head; 873 uint64_t cdb_off; 874 bool copy_to_data_area; 875 size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); 876 877 *scsi_err = TCM_NO_SENSE; 878 879 if (test_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) { 880 *scsi_err = TCM_LUN_BUSY; 881 return -1; 882 } 883 884 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { 885 *scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 886 return -1; 887 } 888 889 /* 890 * Must be a certain minimum size for response sense info, but 891 * also may be larger if the iov array is large. 892 * 893 * We prepare as many iovs as possbile for potential uses here, 894 * because it's expensive to tell how many regions are freed in 895 * the bitmap & global data pool, as the size calculated here 896 * will only be used to do the checks. 897 * 898 * The size will be recalculated later as actually needed to save 899 * cmd area memories. 900 */ 901 base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt); 902 command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); 903 904 if (!list_empty(&udev->cmdr_queue)) 905 goto queue; 906 907 mb = udev->mb_addr; 908 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 909 if ((command_size > (udev->cmdr_size / 2)) || 910 data_length > udev->data_size) { 911 pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " 912 "cmd ring/data area\n", command_size, data_length, 913 udev->cmdr_size, udev->data_size); 914 *scsi_err = TCM_INVALID_CDB_FIELD; 915 return -1; 916 } 917 918 if (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) { 919 /* 920 * Don't leave commands partially setup because the unmap 921 * thread might need the blocks to make forward progress. 922 */ 923 tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur); 924 tcmu_cmd_reset_dbi_cur(tcmu_cmd); 925 goto queue; 926 } 927 928 /* Insert a PAD if end-of-ring space is too small */ 929 if (head_to_end(cmd_head, udev->cmdr_size) < command_size) { 930 size_t pad_size = head_to_end(cmd_head, udev->cmdr_size); 931 932 entry = (void *) mb + CMDR_OFF + cmd_head; 933 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD); 934 tcmu_hdr_set_len(&entry->hdr.len_op, pad_size); 935 entry->hdr.cmd_id = 0; /* not used for PAD */ 936 entry->hdr.kflags = 0; 937 entry->hdr.uflags = 0; 938 tcmu_flush_dcache_range(entry, sizeof(*entry)); 939 940 UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size); 941 tcmu_flush_dcache_range(mb, sizeof(*mb)); 942 943 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 944 WARN_ON(cmd_head != 0); 945 } 946 947 entry = (void *) mb + CMDR_OFF + cmd_head; 948 memset(entry, 0, command_size); 949 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD); 950 951 /* Handle allocating space from the data area */ 952 tcmu_cmd_reset_dbi_cur(tcmu_cmd); 953 iov = &entry->req.iov[0]; 954 iov_cnt = 0; 955 copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE 956 || se_cmd->se_cmd_flags & SCF_BIDI); 957 scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg, 958 se_cmd->t_data_nents, &iov, &iov_cnt, 959 copy_to_data_area); 960 entry->req.iov_cnt = iov_cnt; 961 962 /* Handle BIDI commands */ 963 iov_cnt = 0; 964 if (se_cmd->se_cmd_flags & SCF_BIDI) { 965 iov++; 966 scatter_data_area(udev, tcmu_cmd, se_cmd->t_bidi_data_sg, 967 se_cmd->t_bidi_data_nents, &iov, &iov_cnt, 968 false); 969 } 970 entry->req.iov_bidi_cnt = iov_cnt; 971 972 ret = tcmu_setup_cmd_timer(tcmu_cmd, udev->cmd_time_out, 973 &udev->cmd_timer); 974 if (ret) { 975 tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); 976 mutex_unlock(&udev->cmdr_lock); 977 978 *scsi_err = TCM_OUT_OF_RESOURCES; 979 return -1; 980 } 981 entry->hdr.cmd_id = tcmu_cmd->cmd_id; 982 983 /* 984 * Recalaulate the command's base size and size according 985 * to the actual needs 986 */ 987 base_command_size = tcmu_cmd_get_base_cmd_size(entry->req.iov_cnt + 988 entry->req.iov_bidi_cnt); 989 command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); 990 991 tcmu_hdr_set_len(&entry->hdr.len_op, command_size); 992 993 /* All offsets relative to mb_addr, not start of entry! */ 994 cdb_off = CMDR_OFF + cmd_head + base_command_size; 995 memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb)); 996 entry->req.cdb_off = cdb_off; 997 tcmu_flush_dcache_range(entry, sizeof(*entry)); 998 999 UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); 1000 tcmu_flush_dcache_range(mb, sizeof(*mb)); 1001 1002 /* TODO: only if FLUSH and FUA? */ 1003 uio_event_notify(&udev->uio_info); 1004 1005 return 0; 1006 1007 queue: 1008 if (add_to_cmdr_queue(tcmu_cmd)) { 1009 *scsi_err = TCM_OUT_OF_RESOURCES; 1010 return -1; 1011 } 1012 1013 return 1; 1014 } 1015 1016 static sense_reason_t 1017 tcmu_queue_cmd(struct se_cmd *se_cmd) 1018 { 1019 struct se_device *se_dev = se_cmd->se_dev; 1020 struct tcmu_dev *udev = TCMU_DEV(se_dev); 1021 struct tcmu_cmd *tcmu_cmd; 1022 sense_reason_t scsi_ret; 1023 int ret; 1024 1025 tcmu_cmd = tcmu_alloc_cmd(se_cmd); 1026 if (!tcmu_cmd) 1027 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1028 1029 mutex_lock(&udev->cmdr_lock); 1030 ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); 1031 mutex_unlock(&udev->cmdr_lock); 1032 if (ret < 0) 1033 tcmu_free_cmd(tcmu_cmd); 1034 return scsi_ret; 1035 } 1036 1037 static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry) 1038 { 1039 struct se_cmd *se_cmd = cmd->se_cmd; 1040 struct tcmu_dev *udev = cmd->tcmu_dev; 1041 1042 /* 1043 * cmd has been completed already from timeout, just reclaim 1044 * data area space and free cmd 1045 */ 1046 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) 1047 goto out; 1048 1049 tcmu_cmd_reset_dbi_cur(cmd); 1050 1051 if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) { 1052 pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", 1053 cmd->se_cmd); 1054 entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION; 1055 } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { 1056 transport_copy_sense_to_cmd(se_cmd, entry->rsp.sense_buffer); 1057 } else if (se_cmd->se_cmd_flags & SCF_BIDI) { 1058 /* Get Data-In buffer before clean up */ 1059 gather_data_area(udev, cmd, true); 1060 } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { 1061 gather_data_area(udev, cmd, false); 1062 } else if (se_cmd->data_direction == DMA_TO_DEVICE) { 1063 /* TODO: */ 1064 } else if (se_cmd->data_direction != DMA_NONE) { 1065 pr_warn("TCMU: data direction was %d!\n", 1066 se_cmd->data_direction); 1067 } 1068 1069 target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); 1070 1071 out: 1072 cmd->se_cmd = NULL; 1073 tcmu_cmd_free_data(cmd, cmd->dbi_cnt); 1074 tcmu_free_cmd(cmd); 1075 } 1076 1077 static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) 1078 { 1079 struct tcmu_mailbox *mb; 1080 int handled = 0; 1081 1082 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { 1083 pr_err("ring broken, not handling completions\n"); 1084 return 0; 1085 } 1086 1087 mb = udev->mb_addr; 1088 tcmu_flush_dcache_range(mb, sizeof(*mb)); 1089 1090 while (udev->cmdr_last_cleaned != READ_ONCE(mb->cmd_tail)) { 1091 1092 struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned; 1093 struct tcmu_cmd *cmd; 1094 1095 tcmu_flush_dcache_range(entry, sizeof(*entry)); 1096 1097 if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD) { 1098 UPDATE_HEAD(udev->cmdr_last_cleaned, 1099 tcmu_hdr_get_len(entry->hdr.len_op), 1100 udev->cmdr_size); 1101 continue; 1102 } 1103 WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD); 1104 1105 cmd = idr_remove(&udev->commands, entry->hdr.cmd_id); 1106 if (!cmd) { 1107 pr_err("cmd_id %u not found, ring is broken\n", 1108 entry->hdr.cmd_id); 1109 set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); 1110 break; 1111 } 1112 1113 tcmu_handle_completion(cmd, entry); 1114 1115 UPDATE_HEAD(udev->cmdr_last_cleaned, 1116 tcmu_hdr_get_len(entry->hdr.len_op), 1117 udev->cmdr_size); 1118 1119 handled++; 1120 } 1121 1122 if (mb->cmd_tail == mb->cmd_head) { 1123 /* no more pending commands */ 1124 del_timer(&udev->cmd_timer); 1125 1126 if (list_empty(&udev->cmdr_queue)) { 1127 /* 1128 * no more pending or waiting commands so try to 1129 * reclaim blocks if needed. 1130 */ 1131 if (atomic_read(&global_db_count) > 1132 tcmu_global_max_blocks) 1133 schedule_delayed_work(&tcmu_unmap_work, 0); 1134 } 1135 } 1136 1137 return handled; 1138 } 1139 1140 static int tcmu_check_expired_cmd(int id, void *p, void *data) 1141 { 1142 struct tcmu_cmd *cmd = p; 1143 struct tcmu_dev *udev = cmd->tcmu_dev; 1144 u8 scsi_status; 1145 struct se_cmd *se_cmd; 1146 bool is_running; 1147 1148 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) 1149 return 0; 1150 1151 if (!time_after(jiffies, cmd->deadline)) 1152 return 0; 1153 1154 is_running = list_empty(&cmd->cmdr_queue_entry); 1155 se_cmd = cmd->se_cmd; 1156 1157 if (is_running) { 1158 /* 1159 * If cmd_time_out is disabled but qfull is set deadline 1160 * will only reflect the qfull timeout. Ignore it. 1161 */ 1162 if (!udev->cmd_time_out) 1163 return 0; 1164 1165 set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); 1166 /* 1167 * target_complete_cmd will translate this to LUN COMM FAILURE 1168 */ 1169 scsi_status = SAM_STAT_CHECK_CONDITION; 1170 } else { 1171 list_del_init(&cmd->cmdr_queue_entry); 1172 1173 idr_remove(&udev->commands, id); 1174 tcmu_free_cmd(cmd); 1175 scsi_status = SAM_STAT_TASK_SET_FULL; 1176 } 1177 1178 pr_debug("Timing out cmd %u on dev %s that is %s.\n", 1179 id, udev->name, is_running ? "inflight" : "queued"); 1180 1181 target_complete_cmd(se_cmd, scsi_status); 1182 return 0; 1183 } 1184 1185 static void tcmu_device_timedout(struct tcmu_dev *udev) 1186 { 1187 spin_lock(&timed_out_udevs_lock); 1188 if (list_empty(&udev->timedout_entry)) 1189 list_add_tail(&udev->timedout_entry, &timed_out_udevs); 1190 spin_unlock(&timed_out_udevs_lock); 1191 1192 schedule_delayed_work(&tcmu_unmap_work, 0); 1193 } 1194 1195 static void tcmu_cmd_timedout(struct timer_list *t) 1196 { 1197 struct tcmu_dev *udev = from_timer(udev, t, cmd_timer); 1198 1199 pr_debug("%s cmd timeout has expired\n", udev->name); 1200 tcmu_device_timedout(udev); 1201 } 1202 1203 static void tcmu_qfull_timedout(struct timer_list *t) 1204 { 1205 struct tcmu_dev *udev = from_timer(udev, t, qfull_timer); 1206 1207 pr_debug("%s qfull timeout has expired\n", udev->name); 1208 tcmu_device_timedout(udev); 1209 } 1210 1211 static int tcmu_attach_hba(struct se_hba *hba, u32 host_id) 1212 { 1213 struct tcmu_hba *tcmu_hba; 1214 1215 tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL); 1216 if (!tcmu_hba) 1217 return -ENOMEM; 1218 1219 tcmu_hba->host_id = host_id; 1220 hba->hba_ptr = tcmu_hba; 1221 1222 return 0; 1223 } 1224 1225 static void tcmu_detach_hba(struct se_hba *hba) 1226 { 1227 kfree(hba->hba_ptr); 1228 hba->hba_ptr = NULL; 1229 } 1230 1231 static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) 1232 { 1233 struct tcmu_dev *udev; 1234 1235 udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL); 1236 if (!udev) 1237 return NULL; 1238 kref_init(&udev->kref); 1239 1240 udev->name = kstrdup(name, GFP_KERNEL); 1241 if (!udev->name) { 1242 kfree(udev); 1243 return NULL; 1244 } 1245 1246 udev->hba = hba; 1247 udev->cmd_time_out = TCMU_TIME_OUT; 1248 udev->qfull_time_out = -1; 1249 1250 udev->max_blocks = DATA_BLOCK_BITS_DEF; 1251 mutex_init(&udev->cmdr_lock); 1252 1253 INIT_LIST_HEAD(&udev->timedout_entry); 1254 INIT_LIST_HEAD(&udev->cmdr_queue); 1255 idr_init(&udev->commands); 1256 1257 timer_setup(&udev->qfull_timer, tcmu_qfull_timedout, 0); 1258 timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0); 1259 1260 init_waitqueue_head(&udev->nl_cmd_wq); 1261 spin_lock_init(&udev->nl_cmd_lock); 1262 1263 INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL); 1264 1265 return &udev->se_dev; 1266 } 1267 1268 static bool run_cmdr_queue(struct tcmu_dev *udev, bool fail) 1269 { 1270 struct tcmu_cmd *tcmu_cmd, *tmp_cmd; 1271 LIST_HEAD(cmds); 1272 bool drained = true; 1273 sense_reason_t scsi_ret; 1274 int ret; 1275 1276 if (list_empty(&udev->cmdr_queue)) 1277 return true; 1278 1279 pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail); 1280 1281 list_splice_init(&udev->cmdr_queue, &cmds); 1282 1283 list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, cmdr_queue_entry) { 1284 list_del_init(&tcmu_cmd->cmdr_queue_entry); 1285 1286 pr_debug("removing cmd %u on dev %s from queue\n", 1287 tcmu_cmd->cmd_id, udev->name); 1288 1289 if (fail) { 1290 idr_remove(&udev->commands, tcmu_cmd->cmd_id); 1291 /* 1292 * We were not able to even start the command, so 1293 * fail with busy to allow a retry in case runner 1294 * was only temporarily down. If the device is being 1295 * removed then LIO core will do the right thing and 1296 * fail the retry. 1297 */ 1298 target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY); 1299 tcmu_free_cmd(tcmu_cmd); 1300 continue; 1301 } 1302 1303 ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); 1304 if (ret < 0) { 1305 pr_debug("cmd %u on dev %s failed with %u\n", 1306 tcmu_cmd->cmd_id, udev->name, scsi_ret); 1307 1308 idr_remove(&udev->commands, tcmu_cmd->cmd_id); 1309 /* 1310 * Ignore scsi_ret for now. target_complete_cmd 1311 * drops it. 1312 */ 1313 target_complete_cmd(tcmu_cmd->se_cmd, 1314 SAM_STAT_CHECK_CONDITION); 1315 tcmu_free_cmd(tcmu_cmd); 1316 } else if (ret > 0) { 1317 pr_debug("ran out of space during cmdr queue run\n"); 1318 /* 1319 * cmd was requeued, so just put all cmds back in 1320 * the queue 1321 */ 1322 list_splice_tail(&cmds, &udev->cmdr_queue); 1323 drained = false; 1324 goto done; 1325 } 1326 } 1327 if (list_empty(&udev->cmdr_queue)) 1328 del_timer(&udev->qfull_timer); 1329 done: 1330 return drained; 1331 } 1332 1333 static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) 1334 { 1335 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 1336 1337 mutex_lock(&udev->cmdr_lock); 1338 tcmu_handle_completions(udev); 1339 run_cmdr_queue(udev, false); 1340 mutex_unlock(&udev->cmdr_lock); 1341 1342 return 0; 1343 } 1344 1345 /* 1346 * mmap code from uio.c. Copied here because we want to hook mmap() 1347 * and this stuff must come along. 1348 */ 1349 static int tcmu_find_mem_index(struct vm_area_struct *vma) 1350 { 1351 struct tcmu_dev *udev = vma->vm_private_data; 1352 struct uio_info *info = &udev->uio_info; 1353 1354 if (vma->vm_pgoff < MAX_UIO_MAPS) { 1355 if (info->mem[vma->vm_pgoff].size == 0) 1356 return -1; 1357 return (int)vma->vm_pgoff; 1358 } 1359 return -1; 1360 } 1361 1362 static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) 1363 { 1364 struct page *page; 1365 1366 mutex_lock(&udev->cmdr_lock); 1367 page = tcmu_get_block_page(udev, dbi); 1368 if (likely(page)) { 1369 mutex_unlock(&udev->cmdr_lock); 1370 return page; 1371 } 1372 1373 /* 1374 * Userspace messed up and passed in a address not in the 1375 * data iov passed to it. 1376 */ 1377 pr_err("Invalid addr to data block mapping (dbi %u) on device %s\n", 1378 dbi, udev->name); 1379 page = NULL; 1380 mutex_unlock(&udev->cmdr_lock); 1381 1382 return page; 1383 } 1384 1385 static int tcmu_vma_fault(struct vm_fault *vmf) 1386 { 1387 struct tcmu_dev *udev = vmf->vma->vm_private_data; 1388 struct uio_info *info = &udev->uio_info; 1389 struct page *page; 1390 unsigned long offset; 1391 void *addr; 1392 1393 int mi = tcmu_find_mem_index(vmf->vma); 1394 if (mi < 0) 1395 return VM_FAULT_SIGBUS; 1396 1397 /* 1398 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE 1399 * to use mem[N]. 1400 */ 1401 offset = (vmf->pgoff - mi) << PAGE_SHIFT; 1402 1403 if (offset < udev->data_off) { 1404 /* For the vmalloc()ed cmd area pages */ 1405 addr = (void *)(unsigned long)info->mem[mi].addr + offset; 1406 page = vmalloc_to_page(addr); 1407 } else { 1408 uint32_t dbi; 1409 1410 /* For the dynamically growing data area pages */ 1411 dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE; 1412 page = tcmu_try_get_block_page(udev, dbi); 1413 if (!page) 1414 return VM_FAULT_SIGBUS; 1415 } 1416 1417 get_page(page); 1418 vmf->page = page; 1419 return 0; 1420 } 1421 1422 static const struct vm_operations_struct tcmu_vm_ops = { 1423 .fault = tcmu_vma_fault, 1424 }; 1425 1426 static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma) 1427 { 1428 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 1429 1430 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 1431 vma->vm_ops = &tcmu_vm_ops; 1432 1433 vma->vm_private_data = udev; 1434 1435 /* Ensure the mmap is exactly the right size */ 1436 if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT)) 1437 return -EINVAL; 1438 1439 return 0; 1440 } 1441 1442 static int tcmu_open(struct uio_info *info, struct inode *inode) 1443 { 1444 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 1445 1446 /* O_EXCL not supported for char devs, so fake it? */ 1447 if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags)) 1448 return -EBUSY; 1449 1450 udev->inode = inode; 1451 kref_get(&udev->kref); 1452 1453 pr_debug("open\n"); 1454 1455 return 0; 1456 } 1457 1458 static void tcmu_dev_call_rcu(struct rcu_head *p) 1459 { 1460 struct se_device *dev = container_of(p, struct se_device, rcu_head); 1461 struct tcmu_dev *udev = TCMU_DEV(dev); 1462 1463 kfree(udev->uio_info.name); 1464 kfree(udev->name); 1465 kfree(udev); 1466 } 1467 1468 static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) 1469 { 1470 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { 1471 kmem_cache_free(tcmu_cmd_cache, cmd); 1472 return 0; 1473 } 1474 return -EINVAL; 1475 } 1476 1477 static void tcmu_blocks_release(struct radix_tree_root *blocks, 1478 int start, int end) 1479 { 1480 int i; 1481 struct page *page; 1482 1483 for (i = start; i < end; i++) { 1484 page = radix_tree_delete(blocks, i); 1485 if (page) { 1486 __free_page(page); 1487 atomic_dec(&global_db_count); 1488 } 1489 } 1490 } 1491 1492 static void tcmu_dev_kref_release(struct kref *kref) 1493 { 1494 struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref); 1495 struct se_device *dev = &udev->se_dev; 1496 struct tcmu_cmd *cmd; 1497 bool all_expired = true; 1498 int i; 1499 1500 vfree(udev->mb_addr); 1501 udev->mb_addr = NULL; 1502 1503 spin_lock_bh(&timed_out_udevs_lock); 1504 if (!list_empty(&udev->timedout_entry)) 1505 list_del(&udev->timedout_entry); 1506 spin_unlock_bh(&timed_out_udevs_lock); 1507 1508 /* Upper layer should drain all requests before calling this */ 1509 mutex_lock(&udev->cmdr_lock); 1510 idr_for_each_entry(&udev->commands, cmd, i) { 1511 if (tcmu_check_and_free_pending_cmd(cmd) != 0) 1512 all_expired = false; 1513 } 1514 idr_destroy(&udev->commands); 1515 WARN_ON(!all_expired); 1516 1517 tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1); 1518 kfree(udev->data_bitmap); 1519 mutex_unlock(&udev->cmdr_lock); 1520 1521 call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); 1522 } 1523 1524 static int tcmu_release(struct uio_info *info, struct inode *inode) 1525 { 1526 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 1527 1528 clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags); 1529 1530 pr_debug("close\n"); 1531 /* release ref from open */ 1532 kref_put(&udev->kref, tcmu_dev_kref_release); 1533 return 0; 1534 } 1535 1536 static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) 1537 { 1538 struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd; 1539 1540 if (!tcmu_kern_cmd_reply_supported) 1541 return; 1542 1543 if (udev->nl_reply_supported <= 0) 1544 return; 1545 1546 relock: 1547 spin_lock(&udev->nl_cmd_lock); 1548 1549 if (nl_cmd->cmd != TCMU_CMD_UNSPEC) { 1550 spin_unlock(&udev->nl_cmd_lock); 1551 pr_debug("sleeping for open nl cmd\n"); 1552 wait_event(udev->nl_cmd_wq, (nl_cmd->cmd == TCMU_CMD_UNSPEC)); 1553 goto relock; 1554 } 1555 1556 memset(nl_cmd, 0, sizeof(*nl_cmd)); 1557 nl_cmd->cmd = cmd; 1558 init_completion(&nl_cmd->complete); 1559 1560 spin_unlock(&udev->nl_cmd_lock); 1561 } 1562 1563 static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) 1564 { 1565 struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd; 1566 int ret; 1567 DEFINE_WAIT(__wait); 1568 1569 if (!tcmu_kern_cmd_reply_supported) 1570 return 0; 1571 1572 if (udev->nl_reply_supported <= 0) 1573 return 0; 1574 1575 pr_debug("sleeping for nl reply\n"); 1576 wait_for_completion(&nl_cmd->complete); 1577 1578 spin_lock(&udev->nl_cmd_lock); 1579 nl_cmd->cmd = TCMU_CMD_UNSPEC; 1580 ret = nl_cmd->status; 1581 nl_cmd->status = 0; 1582 spin_unlock(&udev->nl_cmd_lock); 1583 1584 wake_up_all(&udev->nl_cmd_wq); 1585 1586 return ret; 1587 } 1588 1589 static int tcmu_netlink_event(struct tcmu_dev *udev, enum tcmu_genl_cmd cmd, 1590 int reconfig_attr, const void *reconfig_data) 1591 { 1592 struct sk_buff *skb; 1593 void *msg_header; 1594 int ret = -ENOMEM; 1595 1596 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 1597 if (!skb) 1598 return ret; 1599 1600 msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd); 1601 if (!msg_header) 1602 goto free_skb; 1603 1604 ret = nla_put_string(skb, TCMU_ATTR_DEVICE, udev->uio_info.name); 1605 if (ret < 0) 1606 goto free_skb; 1607 1608 ret = nla_put_u32(skb, TCMU_ATTR_MINOR, udev->uio_info.uio_dev->minor); 1609 if (ret < 0) 1610 goto free_skb; 1611 1612 ret = nla_put_u32(skb, TCMU_ATTR_DEVICE_ID, udev->se_dev.dev_index); 1613 if (ret < 0) 1614 goto free_skb; 1615 1616 if (cmd == TCMU_CMD_RECONFIG_DEVICE) { 1617 switch (reconfig_attr) { 1618 case TCMU_ATTR_DEV_CFG: 1619 ret = nla_put_string(skb, reconfig_attr, reconfig_data); 1620 break; 1621 case TCMU_ATTR_DEV_SIZE: 1622 ret = nla_put_u64_64bit(skb, reconfig_attr, 1623 *((u64 *)reconfig_data), 1624 TCMU_ATTR_PAD); 1625 break; 1626 case TCMU_ATTR_WRITECACHE: 1627 ret = nla_put_u8(skb, reconfig_attr, 1628 *((u8 *)reconfig_data)); 1629 break; 1630 default: 1631 BUG(); 1632 } 1633 1634 if (ret < 0) 1635 goto free_skb; 1636 } 1637 1638 genlmsg_end(skb, msg_header); 1639 1640 tcmu_init_genl_cmd_reply(udev, cmd); 1641 1642 ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0, 1643 TCMU_MCGRP_CONFIG, GFP_KERNEL); 1644 /* We don't care if no one is listening */ 1645 if (ret == -ESRCH) 1646 ret = 0; 1647 if (!ret) 1648 ret = tcmu_wait_genl_cmd_reply(udev); 1649 1650 return ret; 1651 free_skb: 1652 nlmsg_free(skb); 1653 return ret; 1654 } 1655 1656 static int tcmu_update_uio_info(struct tcmu_dev *udev) 1657 { 1658 struct tcmu_hba *hba = udev->hba->hba_ptr; 1659 struct uio_info *info; 1660 size_t size, used; 1661 char *str; 1662 1663 info = &udev->uio_info; 1664 size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name, 1665 udev->dev_config); 1666 size += 1; /* for \0 */ 1667 str = kmalloc(size, GFP_KERNEL); 1668 if (!str) 1669 return -ENOMEM; 1670 1671 used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name); 1672 if (udev->dev_config[0]) 1673 snprintf(str + used, size - used, "/%s", udev->dev_config); 1674 1675 /* If the old string exists, free it */ 1676 kfree(info->name); 1677 info->name = str; 1678 1679 return 0; 1680 } 1681 1682 static int tcmu_configure_device(struct se_device *dev) 1683 { 1684 struct tcmu_dev *udev = TCMU_DEV(dev); 1685 struct uio_info *info; 1686 struct tcmu_mailbox *mb; 1687 int ret = 0; 1688 1689 ret = tcmu_update_uio_info(udev); 1690 if (ret) 1691 return ret; 1692 1693 info = &udev->uio_info; 1694 1695 udev->data_bitmap = kzalloc(BITS_TO_LONGS(udev->max_blocks) * 1696 sizeof(unsigned long), GFP_KERNEL); 1697 if (!udev->data_bitmap) { 1698 ret = -ENOMEM; 1699 goto err_bitmap_alloc; 1700 } 1701 1702 udev->mb_addr = vzalloc(CMDR_SIZE); 1703 if (!udev->mb_addr) { 1704 ret = -ENOMEM; 1705 goto err_vzalloc; 1706 } 1707 1708 /* mailbox fits in first part of CMDR space */ 1709 udev->cmdr_size = CMDR_SIZE - CMDR_OFF; 1710 udev->data_off = CMDR_SIZE; 1711 udev->data_size = udev->max_blocks * DATA_BLOCK_SIZE; 1712 udev->dbi_thresh = 0; /* Default in Idle state */ 1713 1714 /* Initialise the mailbox of the ring buffer */ 1715 mb = udev->mb_addr; 1716 mb->version = TCMU_MAILBOX_VERSION; 1717 mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC; 1718 mb->cmdr_off = CMDR_OFF; 1719 mb->cmdr_size = udev->cmdr_size; 1720 1721 WARN_ON(!PAGE_ALIGNED(udev->data_off)); 1722 WARN_ON(udev->data_size % PAGE_SIZE); 1723 WARN_ON(udev->data_size % DATA_BLOCK_SIZE); 1724 1725 info->version = __stringify(TCMU_MAILBOX_VERSION); 1726 1727 info->mem[0].name = "tcm-user command & data buffer"; 1728 info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr; 1729 info->mem[0].size = udev->ring_size = udev->data_size + CMDR_SIZE; 1730 info->mem[0].memtype = UIO_MEM_NONE; 1731 1732 info->irqcontrol = tcmu_irqcontrol; 1733 info->irq = UIO_IRQ_CUSTOM; 1734 1735 info->mmap = tcmu_mmap; 1736 info->open = tcmu_open; 1737 info->release = tcmu_release; 1738 1739 ret = uio_register_device(tcmu_root_device, info); 1740 if (ret) 1741 goto err_register; 1742 1743 /* User can set hw_block_size before enable the device */ 1744 if (dev->dev_attrib.hw_block_size == 0) 1745 dev->dev_attrib.hw_block_size = 512; 1746 /* Other attributes can be configured in userspace */ 1747 if (!dev->dev_attrib.hw_max_sectors) 1748 dev->dev_attrib.hw_max_sectors = 128; 1749 if (!dev->dev_attrib.emulate_write_cache) 1750 dev->dev_attrib.emulate_write_cache = 0; 1751 dev->dev_attrib.hw_queue_depth = 128; 1752 1753 /* If user didn't explicitly disable netlink reply support, use 1754 * module scope setting. 1755 */ 1756 if (udev->nl_reply_supported >= 0) 1757 udev->nl_reply_supported = tcmu_kern_cmd_reply_supported; 1758 1759 /* 1760 * Get a ref incase userspace does a close on the uio device before 1761 * LIO has initiated tcmu_free_device. 1762 */ 1763 kref_get(&udev->kref); 1764 1765 ret = tcmu_netlink_event(udev, TCMU_CMD_ADDED_DEVICE, 0, NULL); 1766 if (ret) 1767 goto err_netlink; 1768 1769 mutex_lock(&root_udev_mutex); 1770 list_add(&udev->node, &root_udev); 1771 mutex_unlock(&root_udev_mutex); 1772 1773 return 0; 1774 1775 err_netlink: 1776 kref_put(&udev->kref, tcmu_dev_kref_release); 1777 uio_unregister_device(&udev->uio_info); 1778 err_register: 1779 vfree(udev->mb_addr); 1780 udev->mb_addr = NULL; 1781 err_vzalloc: 1782 kfree(udev->data_bitmap); 1783 udev->data_bitmap = NULL; 1784 err_bitmap_alloc: 1785 kfree(info->name); 1786 info->name = NULL; 1787 1788 return ret; 1789 } 1790 1791 static bool tcmu_dev_configured(struct tcmu_dev *udev) 1792 { 1793 return udev->uio_info.uio_dev ? true : false; 1794 } 1795 1796 static void tcmu_free_device(struct se_device *dev) 1797 { 1798 struct tcmu_dev *udev = TCMU_DEV(dev); 1799 1800 /* release ref from init */ 1801 kref_put(&udev->kref, tcmu_dev_kref_release); 1802 } 1803 1804 static void tcmu_destroy_device(struct se_device *dev) 1805 { 1806 struct tcmu_dev *udev = TCMU_DEV(dev); 1807 1808 del_timer_sync(&udev->cmd_timer); 1809 del_timer_sync(&udev->qfull_timer); 1810 1811 mutex_lock(&root_udev_mutex); 1812 list_del(&udev->node); 1813 mutex_unlock(&root_udev_mutex); 1814 1815 tcmu_netlink_event(udev, TCMU_CMD_REMOVED_DEVICE, 0, NULL); 1816 1817 uio_unregister_device(&udev->uio_info); 1818 1819 /* release ref from configure */ 1820 kref_put(&udev->kref, tcmu_dev_kref_release); 1821 } 1822 1823 static void tcmu_unblock_dev(struct tcmu_dev *udev) 1824 { 1825 mutex_lock(&udev->cmdr_lock); 1826 clear_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags); 1827 mutex_unlock(&udev->cmdr_lock); 1828 } 1829 1830 static void tcmu_block_dev(struct tcmu_dev *udev) 1831 { 1832 mutex_lock(&udev->cmdr_lock); 1833 1834 if (test_and_set_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) 1835 goto unlock; 1836 1837 /* complete IO that has executed successfully */ 1838 tcmu_handle_completions(udev); 1839 /* fail IO waiting to be queued */ 1840 run_cmdr_queue(udev, true); 1841 1842 unlock: 1843 mutex_unlock(&udev->cmdr_lock); 1844 } 1845 1846 static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) 1847 { 1848 struct tcmu_mailbox *mb; 1849 struct tcmu_cmd *cmd; 1850 int i; 1851 1852 mutex_lock(&udev->cmdr_lock); 1853 1854 idr_for_each_entry(&udev->commands, cmd, i) { 1855 if (!list_empty(&cmd->cmdr_queue_entry)) 1856 continue; 1857 1858 pr_debug("removing cmd %u on dev %s from ring (is expired %d)\n", 1859 cmd->cmd_id, udev->name, 1860 test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)); 1861 1862 idr_remove(&udev->commands, i); 1863 if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { 1864 if (err_level == 1) { 1865 /* 1866 * Userspace was not able to start the 1867 * command or it is retryable. 1868 */ 1869 target_complete_cmd(cmd->se_cmd, SAM_STAT_BUSY); 1870 } else { 1871 /* hard failure */ 1872 target_complete_cmd(cmd->se_cmd, 1873 SAM_STAT_CHECK_CONDITION); 1874 } 1875 } 1876 tcmu_cmd_free_data(cmd, cmd->dbi_cnt); 1877 tcmu_free_cmd(cmd); 1878 } 1879 1880 mb = udev->mb_addr; 1881 tcmu_flush_dcache_range(mb, sizeof(*mb)); 1882 pr_debug("mb last %u head %u tail %u\n", udev->cmdr_last_cleaned, 1883 mb->cmd_tail, mb->cmd_head); 1884 1885 udev->cmdr_last_cleaned = 0; 1886 mb->cmd_tail = 0; 1887 mb->cmd_head = 0; 1888 tcmu_flush_dcache_range(mb, sizeof(*mb)); 1889 1890 del_timer(&udev->cmd_timer); 1891 1892 mutex_unlock(&udev->cmdr_lock); 1893 } 1894 1895 enum { 1896 Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, 1897 Opt_nl_reply_supported, Opt_max_data_area_mb, Opt_err, 1898 }; 1899 1900 static match_table_t tokens = { 1901 {Opt_dev_config, "dev_config=%s"}, 1902 {Opt_dev_size, "dev_size=%u"}, 1903 {Opt_hw_block_size, "hw_block_size=%u"}, 1904 {Opt_hw_max_sectors, "hw_max_sectors=%u"}, 1905 {Opt_nl_reply_supported, "nl_reply_supported=%d"}, 1906 {Opt_max_data_area_mb, "max_data_area_mb=%u"}, 1907 {Opt_err, NULL} 1908 }; 1909 1910 static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib) 1911 { 1912 unsigned long tmp_ul; 1913 char *arg_p; 1914 int ret; 1915 1916 arg_p = match_strdup(arg); 1917 if (!arg_p) 1918 return -ENOMEM; 1919 1920 ret = kstrtoul(arg_p, 0, &tmp_ul); 1921 kfree(arg_p); 1922 if (ret < 0) { 1923 pr_err("kstrtoul() failed for dev attrib\n"); 1924 return ret; 1925 } 1926 if (!tmp_ul) { 1927 pr_err("dev attrib must be nonzero\n"); 1928 return -EINVAL; 1929 } 1930 *dev_attrib = tmp_ul; 1931 return 0; 1932 } 1933 1934 static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, 1935 const char *page, ssize_t count) 1936 { 1937 struct tcmu_dev *udev = TCMU_DEV(dev); 1938 char *orig, *ptr, *opts, *arg_p; 1939 substring_t args[MAX_OPT_ARGS]; 1940 int ret = 0, token, tmpval; 1941 1942 opts = kstrdup(page, GFP_KERNEL); 1943 if (!opts) 1944 return -ENOMEM; 1945 1946 orig = opts; 1947 1948 while ((ptr = strsep(&opts, ",\n")) != NULL) { 1949 if (!*ptr) 1950 continue; 1951 1952 token = match_token(ptr, tokens, args); 1953 switch (token) { 1954 case Opt_dev_config: 1955 if (match_strlcpy(udev->dev_config, &args[0], 1956 TCMU_CONFIG_LEN) == 0) { 1957 ret = -EINVAL; 1958 break; 1959 } 1960 pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config); 1961 break; 1962 case Opt_dev_size: 1963 arg_p = match_strdup(&args[0]); 1964 if (!arg_p) { 1965 ret = -ENOMEM; 1966 break; 1967 } 1968 ret = kstrtoul(arg_p, 0, (unsigned long *) &udev->dev_size); 1969 kfree(arg_p); 1970 if (ret < 0) 1971 pr_err("kstrtoul() failed for dev_size=\n"); 1972 break; 1973 case Opt_hw_block_size: 1974 ret = tcmu_set_dev_attrib(&args[0], 1975 &(dev->dev_attrib.hw_block_size)); 1976 break; 1977 case Opt_hw_max_sectors: 1978 ret = tcmu_set_dev_attrib(&args[0], 1979 &(dev->dev_attrib.hw_max_sectors)); 1980 break; 1981 case Opt_nl_reply_supported: 1982 arg_p = match_strdup(&args[0]); 1983 if (!arg_p) { 1984 ret = -ENOMEM; 1985 break; 1986 } 1987 ret = kstrtoint(arg_p, 0, &udev->nl_reply_supported); 1988 kfree(arg_p); 1989 if (ret < 0) 1990 pr_err("kstrtoint() failed for nl_reply_supported=\n"); 1991 break; 1992 case Opt_max_data_area_mb: 1993 if (dev->export_count) { 1994 pr_err("Unable to set max_data_area_mb while exports exist\n"); 1995 ret = -EINVAL; 1996 break; 1997 } 1998 1999 arg_p = match_strdup(&args[0]); 2000 if (!arg_p) { 2001 ret = -ENOMEM; 2002 break; 2003 } 2004 ret = kstrtoint(arg_p, 0, &tmpval); 2005 kfree(arg_p); 2006 if (ret < 0) { 2007 pr_err("kstrtoint() failed for max_data_area_mb=\n"); 2008 break; 2009 } 2010 2011 if (tmpval <= 0) { 2012 pr_err("Invalid max_data_area %d\n", tmpval); 2013 ret = -EINVAL; 2014 break; 2015 } 2016 2017 udev->max_blocks = TCMU_MBS_TO_BLOCKS(tmpval); 2018 if (udev->max_blocks > tcmu_global_max_blocks) { 2019 pr_err("%d is too large. Adjusting max_data_area_mb to global limit of %u\n", 2020 tmpval, 2021 TCMU_BLOCKS_TO_MBS(tcmu_global_max_blocks)); 2022 udev->max_blocks = tcmu_global_max_blocks; 2023 } 2024 break; 2025 default: 2026 break; 2027 } 2028 2029 if (ret) 2030 break; 2031 } 2032 2033 kfree(orig); 2034 return (!ret) ? count : ret; 2035 } 2036 2037 static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) 2038 { 2039 struct tcmu_dev *udev = TCMU_DEV(dev); 2040 ssize_t bl = 0; 2041 2042 bl = sprintf(b + bl, "Config: %s ", 2043 udev->dev_config[0] ? udev->dev_config : "NULL"); 2044 bl += sprintf(b + bl, "Size: %zu ", udev->dev_size); 2045 bl += sprintf(b + bl, "MaxDataAreaMB: %u\n", 2046 TCMU_BLOCKS_TO_MBS(udev->max_blocks)); 2047 2048 return bl; 2049 } 2050 2051 static sector_t tcmu_get_blocks(struct se_device *dev) 2052 { 2053 struct tcmu_dev *udev = TCMU_DEV(dev); 2054 2055 return div_u64(udev->dev_size - dev->dev_attrib.block_size, 2056 dev->dev_attrib.block_size); 2057 } 2058 2059 static sense_reason_t 2060 tcmu_parse_cdb(struct se_cmd *cmd) 2061 { 2062 return passthrough_parse_cdb(cmd, tcmu_queue_cmd); 2063 } 2064 2065 static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page) 2066 { 2067 struct se_dev_attrib *da = container_of(to_config_group(item), 2068 struct se_dev_attrib, da_group); 2069 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2070 2071 return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC); 2072 } 2073 2074 static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page, 2075 size_t count) 2076 { 2077 struct se_dev_attrib *da = container_of(to_config_group(item), 2078 struct se_dev_attrib, da_group); 2079 struct tcmu_dev *udev = container_of(da->da_dev, 2080 struct tcmu_dev, se_dev); 2081 u32 val; 2082 int ret; 2083 2084 if (da->da_dev->export_count) { 2085 pr_err("Unable to set tcmu cmd_time_out while exports exist\n"); 2086 return -EINVAL; 2087 } 2088 2089 ret = kstrtou32(page, 0, &val); 2090 if (ret < 0) 2091 return ret; 2092 2093 udev->cmd_time_out = val * MSEC_PER_SEC; 2094 return count; 2095 } 2096 CONFIGFS_ATTR(tcmu_, cmd_time_out); 2097 2098 static ssize_t tcmu_qfull_time_out_show(struct config_item *item, char *page) 2099 { 2100 struct se_dev_attrib *da = container_of(to_config_group(item), 2101 struct se_dev_attrib, da_group); 2102 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2103 2104 return snprintf(page, PAGE_SIZE, "%ld\n", udev->qfull_time_out <= 0 ? 2105 udev->qfull_time_out : 2106 udev->qfull_time_out / MSEC_PER_SEC); 2107 } 2108 2109 static ssize_t tcmu_qfull_time_out_store(struct config_item *item, 2110 const char *page, size_t count) 2111 { 2112 struct se_dev_attrib *da = container_of(to_config_group(item), 2113 struct se_dev_attrib, da_group); 2114 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2115 s32 val; 2116 int ret; 2117 2118 ret = kstrtos32(page, 0, &val); 2119 if (ret < 0) 2120 return ret; 2121 2122 if (val >= 0) { 2123 udev->qfull_time_out = val * MSEC_PER_SEC; 2124 } else { 2125 printk(KERN_ERR "Invalid qfull timeout value %d\n", val); 2126 return -EINVAL; 2127 } 2128 return count; 2129 } 2130 CONFIGFS_ATTR(tcmu_, qfull_time_out); 2131 2132 static ssize_t tcmu_max_data_area_mb_show(struct config_item *item, char *page) 2133 { 2134 struct se_dev_attrib *da = container_of(to_config_group(item), 2135 struct se_dev_attrib, da_group); 2136 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2137 2138 return snprintf(page, PAGE_SIZE, "%u\n", 2139 TCMU_BLOCKS_TO_MBS(udev->max_blocks)); 2140 } 2141 CONFIGFS_ATTR_RO(tcmu_, max_data_area_mb); 2142 2143 static ssize_t tcmu_dev_config_show(struct config_item *item, char *page) 2144 { 2145 struct se_dev_attrib *da = container_of(to_config_group(item), 2146 struct se_dev_attrib, da_group); 2147 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2148 2149 return snprintf(page, PAGE_SIZE, "%s\n", udev->dev_config); 2150 } 2151 2152 static ssize_t tcmu_dev_config_store(struct config_item *item, const char *page, 2153 size_t count) 2154 { 2155 struct se_dev_attrib *da = container_of(to_config_group(item), 2156 struct se_dev_attrib, da_group); 2157 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2158 int ret, len; 2159 2160 len = strlen(page); 2161 if (!len || len > TCMU_CONFIG_LEN - 1) 2162 return -EINVAL; 2163 2164 /* Check if device has been configured before */ 2165 if (tcmu_dev_configured(udev)) { 2166 ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE, 2167 TCMU_ATTR_DEV_CFG, page); 2168 if (ret) { 2169 pr_err("Unable to reconfigure device\n"); 2170 return ret; 2171 } 2172 strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN); 2173 2174 ret = tcmu_update_uio_info(udev); 2175 if (ret) 2176 return ret; 2177 return count; 2178 } 2179 strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN); 2180 2181 return count; 2182 } 2183 CONFIGFS_ATTR(tcmu_, dev_config); 2184 2185 static ssize_t tcmu_dev_size_show(struct config_item *item, char *page) 2186 { 2187 struct se_dev_attrib *da = container_of(to_config_group(item), 2188 struct se_dev_attrib, da_group); 2189 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2190 2191 return snprintf(page, PAGE_SIZE, "%zu\n", udev->dev_size); 2192 } 2193 2194 static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page, 2195 size_t count) 2196 { 2197 struct se_dev_attrib *da = container_of(to_config_group(item), 2198 struct se_dev_attrib, da_group); 2199 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2200 u64 val; 2201 int ret; 2202 2203 ret = kstrtou64(page, 0, &val); 2204 if (ret < 0) 2205 return ret; 2206 2207 /* Check if device has been configured before */ 2208 if (tcmu_dev_configured(udev)) { 2209 ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE, 2210 TCMU_ATTR_DEV_SIZE, &val); 2211 if (ret) { 2212 pr_err("Unable to reconfigure device\n"); 2213 return ret; 2214 } 2215 } 2216 udev->dev_size = val; 2217 return count; 2218 } 2219 CONFIGFS_ATTR(tcmu_, dev_size); 2220 2221 static ssize_t tcmu_nl_reply_supported_show(struct config_item *item, 2222 char *page) 2223 { 2224 struct se_dev_attrib *da = container_of(to_config_group(item), 2225 struct se_dev_attrib, da_group); 2226 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2227 2228 return snprintf(page, PAGE_SIZE, "%d\n", udev->nl_reply_supported); 2229 } 2230 2231 static ssize_t tcmu_nl_reply_supported_store(struct config_item *item, 2232 const char *page, size_t count) 2233 { 2234 struct se_dev_attrib *da = container_of(to_config_group(item), 2235 struct se_dev_attrib, da_group); 2236 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2237 s8 val; 2238 int ret; 2239 2240 ret = kstrtos8(page, 0, &val); 2241 if (ret < 0) 2242 return ret; 2243 2244 udev->nl_reply_supported = val; 2245 return count; 2246 } 2247 CONFIGFS_ATTR(tcmu_, nl_reply_supported); 2248 2249 static ssize_t tcmu_emulate_write_cache_show(struct config_item *item, 2250 char *page) 2251 { 2252 struct se_dev_attrib *da = container_of(to_config_group(item), 2253 struct se_dev_attrib, da_group); 2254 2255 return snprintf(page, PAGE_SIZE, "%i\n", da->emulate_write_cache); 2256 } 2257 2258 static ssize_t tcmu_emulate_write_cache_store(struct config_item *item, 2259 const char *page, size_t count) 2260 { 2261 struct se_dev_attrib *da = container_of(to_config_group(item), 2262 struct se_dev_attrib, da_group); 2263 struct tcmu_dev *udev = TCMU_DEV(da->da_dev); 2264 u8 val; 2265 int ret; 2266 2267 ret = kstrtou8(page, 0, &val); 2268 if (ret < 0) 2269 return ret; 2270 2271 /* Check if device has been configured before */ 2272 if (tcmu_dev_configured(udev)) { 2273 ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE, 2274 TCMU_ATTR_WRITECACHE, &val); 2275 if (ret) { 2276 pr_err("Unable to reconfigure device\n"); 2277 return ret; 2278 } 2279 } 2280 2281 da->emulate_write_cache = val; 2282 return count; 2283 } 2284 CONFIGFS_ATTR(tcmu_, emulate_write_cache); 2285 2286 static ssize_t tcmu_block_dev_show(struct config_item *item, char *page) 2287 { 2288 struct se_device *se_dev = container_of(to_config_group(item), 2289 struct se_device, 2290 dev_action_group); 2291 struct tcmu_dev *udev = TCMU_DEV(se_dev); 2292 2293 if (test_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) 2294 return snprintf(page, PAGE_SIZE, "%s\n", "blocked"); 2295 else 2296 return snprintf(page, PAGE_SIZE, "%s\n", "unblocked"); 2297 } 2298 2299 static ssize_t tcmu_block_dev_store(struct config_item *item, const char *page, 2300 size_t count) 2301 { 2302 struct se_device *se_dev = container_of(to_config_group(item), 2303 struct se_device, 2304 dev_action_group); 2305 struct tcmu_dev *udev = TCMU_DEV(se_dev); 2306 u8 val; 2307 int ret; 2308 2309 ret = kstrtou8(page, 0, &val); 2310 if (ret < 0) 2311 return ret; 2312 2313 if (val > 1) { 2314 pr_err("Invalid block value %d\n", val); 2315 return -EINVAL; 2316 } 2317 2318 if (!val) 2319 tcmu_unblock_dev(udev); 2320 else 2321 tcmu_block_dev(udev); 2322 return count; 2323 } 2324 CONFIGFS_ATTR(tcmu_, block_dev); 2325 2326 static ssize_t tcmu_reset_ring_store(struct config_item *item, const char *page, 2327 size_t count) 2328 { 2329 struct se_device *se_dev = container_of(to_config_group(item), 2330 struct se_device, 2331 dev_action_group); 2332 struct tcmu_dev *udev = TCMU_DEV(se_dev); 2333 u8 val; 2334 int ret; 2335 2336 ret = kstrtou8(page, 0, &val); 2337 if (ret < 0) 2338 return ret; 2339 2340 if (val != 1 && val != 2) { 2341 pr_err("Invalid reset ring value %d\n", val); 2342 return -EINVAL; 2343 } 2344 2345 tcmu_reset_ring(udev, val); 2346 return count; 2347 } 2348 CONFIGFS_ATTR_WO(tcmu_, reset_ring); 2349 2350 static struct configfs_attribute *tcmu_attrib_attrs[] = { 2351 &tcmu_attr_cmd_time_out, 2352 &tcmu_attr_qfull_time_out, 2353 &tcmu_attr_max_data_area_mb, 2354 &tcmu_attr_dev_config, 2355 &tcmu_attr_dev_size, 2356 &tcmu_attr_emulate_write_cache, 2357 &tcmu_attr_nl_reply_supported, 2358 NULL, 2359 }; 2360 2361 static struct configfs_attribute **tcmu_attrs; 2362 2363 static struct configfs_attribute *tcmu_action_attrs[] = { 2364 &tcmu_attr_block_dev, 2365 &tcmu_attr_reset_ring, 2366 NULL, 2367 }; 2368 2369 static struct target_backend_ops tcmu_ops = { 2370 .name = "user", 2371 .owner = THIS_MODULE, 2372 .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, 2373 .attach_hba = tcmu_attach_hba, 2374 .detach_hba = tcmu_detach_hba, 2375 .alloc_device = tcmu_alloc_device, 2376 .configure_device = tcmu_configure_device, 2377 .destroy_device = tcmu_destroy_device, 2378 .free_device = tcmu_free_device, 2379 .parse_cdb = tcmu_parse_cdb, 2380 .set_configfs_dev_params = tcmu_set_configfs_dev_params, 2381 .show_configfs_dev_params = tcmu_show_configfs_dev_params, 2382 .get_device_type = sbc_get_device_type, 2383 .get_blocks = tcmu_get_blocks, 2384 .tb_dev_action_attrs = tcmu_action_attrs, 2385 }; 2386 2387 static void find_free_blocks(void) 2388 { 2389 struct tcmu_dev *udev; 2390 loff_t off; 2391 u32 start, end, block, total_freed = 0; 2392 2393 if (atomic_read(&global_db_count) <= tcmu_global_max_blocks) 2394 return; 2395 2396 mutex_lock(&root_udev_mutex); 2397 list_for_each_entry(udev, &root_udev, node) { 2398 mutex_lock(&udev->cmdr_lock); 2399 2400 /* Try to complete the finished commands first */ 2401 tcmu_handle_completions(udev); 2402 2403 /* Skip the udevs in idle */ 2404 if (!udev->dbi_thresh) { 2405 mutex_unlock(&udev->cmdr_lock); 2406 continue; 2407 } 2408 2409 end = udev->dbi_max + 1; 2410 block = find_last_bit(udev->data_bitmap, end); 2411 if (block == udev->dbi_max) { 2412 /* 2413 * The last bit is dbi_max, so it is not possible 2414 * reclaim any blocks. 2415 */ 2416 mutex_unlock(&udev->cmdr_lock); 2417 continue; 2418 } else if (block == end) { 2419 /* The current udev will goto idle state */ 2420 udev->dbi_thresh = start = 0; 2421 udev->dbi_max = 0; 2422 } else { 2423 udev->dbi_thresh = start = block + 1; 2424 udev->dbi_max = block; 2425 } 2426 2427 /* Here will truncate the data area from off */ 2428 off = udev->data_off + start * DATA_BLOCK_SIZE; 2429 unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); 2430 2431 /* Release the block pages */ 2432 tcmu_blocks_release(&udev->data_blocks, start, end); 2433 mutex_unlock(&udev->cmdr_lock); 2434 2435 total_freed += end - start; 2436 pr_debug("Freed %u blocks (total %u) from %s.\n", end - start, 2437 total_freed, udev->name); 2438 } 2439 mutex_unlock(&root_udev_mutex); 2440 2441 if (atomic_read(&global_db_count) > tcmu_global_max_blocks) 2442 schedule_delayed_work(&tcmu_unmap_work, msecs_to_jiffies(5000)); 2443 } 2444 2445 static void check_timedout_devices(void) 2446 { 2447 struct tcmu_dev *udev, *tmp_dev; 2448 LIST_HEAD(devs); 2449 2450 spin_lock_bh(&timed_out_udevs_lock); 2451 list_splice_init(&timed_out_udevs, &devs); 2452 2453 list_for_each_entry_safe(udev, tmp_dev, &devs, timedout_entry) { 2454 list_del_init(&udev->timedout_entry); 2455 spin_unlock_bh(&timed_out_udevs_lock); 2456 2457 mutex_lock(&udev->cmdr_lock); 2458 idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); 2459 mutex_unlock(&udev->cmdr_lock); 2460 2461 spin_lock_bh(&timed_out_udevs_lock); 2462 } 2463 2464 spin_unlock_bh(&timed_out_udevs_lock); 2465 } 2466 2467 static void tcmu_unmap_work_fn(struct work_struct *work) 2468 { 2469 check_timedout_devices(); 2470 find_free_blocks(); 2471 } 2472 2473 static int __init tcmu_module_init(void) 2474 { 2475 int ret, i, k, len = 0; 2476 2477 BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); 2478 2479 INIT_DELAYED_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn); 2480 2481 tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache", 2482 sizeof(struct tcmu_cmd), 2483 __alignof__(struct tcmu_cmd), 2484 0, NULL); 2485 if (!tcmu_cmd_cache) 2486 return -ENOMEM; 2487 2488 tcmu_root_device = root_device_register("tcm_user"); 2489 if (IS_ERR(tcmu_root_device)) { 2490 ret = PTR_ERR(tcmu_root_device); 2491 goto out_free_cache; 2492 } 2493 2494 ret = genl_register_family(&tcmu_genl_family); 2495 if (ret < 0) { 2496 goto out_unreg_device; 2497 } 2498 2499 for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { 2500 len += sizeof(struct configfs_attribute *); 2501 } 2502 for (i = 0; tcmu_attrib_attrs[i] != NULL; i++) { 2503 len += sizeof(struct configfs_attribute *); 2504 } 2505 len += sizeof(struct configfs_attribute *); 2506 2507 tcmu_attrs = kzalloc(len, GFP_KERNEL); 2508 if (!tcmu_attrs) { 2509 ret = -ENOMEM; 2510 goto out_unreg_genl; 2511 } 2512 2513 for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { 2514 tcmu_attrs[i] = passthrough_attrib_attrs[i]; 2515 } 2516 for (k = 0; tcmu_attrib_attrs[k] != NULL; k++) { 2517 tcmu_attrs[i] = tcmu_attrib_attrs[k]; 2518 i++; 2519 } 2520 tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs; 2521 2522 ret = transport_backend_register(&tcmu_ops); 2523 if (ret) 2524 goto out_attrs; 2525 2526 return 0; 2527 2528 out_attrs: 2529 kfree(tcmu_attrs); 2530 out_unreg_genl: 2531 genl_unregister_family(&tcmu_genl_family); 2532 out_unreg_device: 2533 root_device_unregister(tcmu_root_device); 2534 out_free_cache: 2535 kmem_cache_destroy(tcmu_cmd_cache); 2536 2537 return ret; 2538 } 2539 2540 static void __exit tcmu_module_exit(void) 2541 { 2542 cancel_delayed_work_sync(&tcmu_unmap_work); 2543 target_backend_unregister(&tcmu_ops); 2544 kfree(tcmu_attrs); 2545 genl_unregister_family(&tcmu_genl_family); 2546 root_device_unregister(tcmu_root_device); 2547 kmem_cache_destroy(tcmu_cmd_cache); 2548 } 2549 2550 MODULE_DESCRIPTION("TCM USER subsystem plugin"); 2551 MODULE_AUTHOR("Shaohua Li <shli@kernel.org>"); 2552 MODULE_AUTHOR("Andy Grover <agrover@redhat.com>"); 2553 MODULE_LICENSE("GPL"); 2554 2555 module_init(tcmu_module_init); 2556 module_exit(tcmu_module_exit); 2557