1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Authors: 18 * Haiyang Zhang <haiyangz@microsoft.com> 19 * Hank Janssen <hjanssen@microsoft.com> 20 * K. Y. Srinivasan <kys@microsoft.com> 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/wait.h> 25 #include <linux/sched.h> 26 #include <linux/completion.h> 27 #include <linux/string.h> 28 #include <linux/mm.h> 29 #include <linux/delay.h> 30 #include <linux/init.h> 31 #include <linux/slab.h> 32 #include <linux/module.h> 33 #include <linux/device.h> 34 #include <linux/hyperv.h> 35 #include <linux/blkdev.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_cmnd.h> 38 #include <scsi/scsi_host.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_tcq.h> 41 #include <scsi/scsi_eh.h> 42 #include <scsi/scsi_devinfo.h> 43 #include <scsi/scsi_dbg.h> 44 45 /* 46 * All wire protocol details (storage protocol between the guest and the host) 47 * are consolidated here. 48 * 49 * Begin protocol definitions. 50 */ 51 52 /* 53 * Version history: 54 * V1 Beta: 0.1 55 * V1 RC < 2008/1/31: 1.0 56 * V1 RC > 2008/1/31: 2.0 57 * Win7: 4.2 58 * Win8: 5.1 59 */ 60 61 62 #define VMSTOR_WIN7_MAJOR 4 63 #define VMSTOR_WIN7_MINOR 2 64 65 #define VMSTOR_WIN8_MAJOR 5 66 #define VMSTOR_WIN8_MINOR 1 67 68 69 /* Packet structure describing virtual storage requests. */ 70 enum vstor_packet_operation { 71 VSTOR_OPERATION_COMPLETE_IO = 1, 72 VSTOR_OPERATION_REMOVE_DEVICE = 2, 73 VSTOR_OPERATION_EXECUTE_SRB = 3, 74 VSTOR_OPERATION_RESET_LUN = 4, 75 VSTOR_OPERATION_RESET_ADAPTER = 5, 76 VSTOR_OPERATION_RESET_BUS = 6, 77 VSTOR_OPERATION_BEGIN_INITIALIZATION = 7, 78 VSTOR_OPERATION_END_INITIALIZATION = 8, 79 VSTOR_OPERATION_QUERY_PROTOCOL_VERSION = 9, 80 VSTOR_OPERATION_QUERY_PROPERTIES = 10, 81 VSTOR_OPERATION_ENUMERATE_BUS = 11, 82 VSTOR_OPERATION_FCHBA_DATA = 12, 83 VSTOR_OPERATION_CREATE_SUB_CHANNELS = 13, 84 VSTOR_OPERATION_MAXIMUM = 13 85 }; 86 87 /* 88 * WWN packet for Fibre Channel HBA 89 */ 90 91 struct hv_fc_wwn_packet { 92 bool primary_active; 93 u8 reserved1; 94 u8 reserved2; 95 u8 primary_port_wwn[8]; 96 u8 primary_node_wwn[8]; 97 u8 secondary_port_wwn[8]; 98 u8 secondary_node_wwn[8]; 99 }; 100 101 102 103 /* 104 * SRB Flag Bits 105 */ 106 107 #define SRB_FLAGS_QUEUE_ACTION_ENABLE 0x00000002 108 #define SRB_FLAGS_DISABLE_DISCONNECT 0x00000004 109 #define SRB_FLAGS_DISABLE_SYNCH_TRANSFER 0x00000008 110 #define SRB_FLAGS_BYPASS_FROZEN_QUEUE 0x00000010 111 #define SRB_FLAGS_DISABLE_AUTOSENSE 0x00000020 112 #define SRB_FLAGS_DATA_IN 0x00000040 113 #define SRB_FLAGS_DATA_OUT 0x00000080 114 #define SRB_FLAGS_NO_DATA_TRANSFER 0x00000000 115 #define SRB_FLAGS_UNSPECIFIED_DIRECTION (SRB_FLAGS_DATA_IN | SRB_FLAGS_DATA_OUT) 116 #define SRB_FLAGS_NO_QUEUE_FREEZE 0x00000100 117 #define SRB_FLAGS_ADAPTER_CACHE_ENABLE 0x00000200 118 #define SRB_FLAGS_FREE_SENSE_BUFFER 0x00000400 119 120 /* 121 * This flag indicates the request is part of the workflow for processing a D3. 122 */ 123 #define SRB_FLAGS_D3_PROCESSING 0x00000800 124 #define SRB_FLAGS_IS_ACTIVE 0x00010000 125 #define SRB_FLAGS_ALLOCATED_FROM_ZONE 0x00020000 126 #define SRB_FLAGS_SGLIST_FROM_POOL 0x00040000 127 #define SRB_FLAGS_BYPASS_LOCKED_QUEUE 0x00080000 128 #define SRB_FLAGS_NO_KEEP_AWAKE 0x00100000 129 #define SRB_FLAGS_PORT_DRIVER_ALLOCSENSE 0x00200000 130 #define SRB_FLAGS_PORT_DRIVER_SENSEHASPORT 0x00400000 131 #define SRB_FLAGS_DONT_START_NEXT_PACKET 0x00800000 132 #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F000000 133 #define SRB_FLAGS_CLASS_DRIVER_RESERVED 0xF0000000 134 135 136 /* 137 * Platform neutral description of a scsi request - 138 * this remains the same across the write regardless of 32/64 bit 139 * note: it's patterned off the SCSI_PASS_THROUGH structure 140 */ 141 #define STORVSC_MAX_CMD_LEN 0x10 142 143 #define POST_WIN7_STORVSC_SENSE_BUFFER_SIZE 0x14 144 #define PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE 0x12 145 146 #define STORVSC_SENSE_BUFFER_SIZE 0x14 147 #define STORVSC_MAX_BUF_LEN_WITH_PADDING 0x14 148 149 /* 150 * Sense buffer size changed in win8; have a run-time 151 * variable to track the size we should use. 152 */ 153 static int sense_buffer_size; 154 155 /* 156 * The size of the vmscsi_request has changed in win8. The 157 * additional size is because of new elements added to the 158 * structure. These elements are valid only when we are talking 159 * to a win8 host. 160 * Track the correction to size we need to apply. 161 */ 162 163 static int vmscsi_size_delta; 164 static int vmstor_current_major; 165 static int vmstor_current_minor; 166 167 struct vmscsi_win8_extension { 168 /* 169 * The following were added in Windows 8 170 */ 171 u16 reserve; 172 u8 queue_tag; 173 u8 queue_action; 174 u32 srb_flags; 175 u32 time_out_value; 176 u32 queue_sort_ey; 177 } __packed; 178 179 struct vmscsi_request { 180 u16 length; 181 u8 srb_status; 182 u8 scsi_status; 183 184 u8 port_number; 185 u8 path_id; 186 u8 target_id; 187 u8 lun; 188 189 u8 cdb_length; 190 u8 sense_info_length; 191 u8 data_in; 192 u8 reserved; 193 194 u32 data_transfer_length; 195 196 union { 197 u8 cdb[STORVSC_MAX_CMD_LEN]; 198 u8 sense_data[STORVSC_SENSE_BUFFER_SIZE]; 199 u8 reserved_array[STORVSC_MAX_BUF_LEN_WITH_PADDING]; 200 }; 201 /* 202 * The following was added in win8. 203 */ 204 struct vmscsi_win8_extension win8_extension; 205 206 } __attribute((packed)); 207 208 209 /* 210 * This structure is sent during the intialization phase to get the different 211 * properties of the channel. 212 */ 213 214 #define STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL 0x1 215 216 struct vmstorage_channel_properties { 217 u32 reserved; 218 u16 max_channel_cnt; 219 u16 reserved1; 220 221 u32 flags; 222 u32 max_transfer_bytes; 223 224 u64 reserved2; 225 } __packed; 226 227 /* This structure is sent during the storage protocol negotiations. */ 228 struct vmstorage_protocol_version { 229 /* Major (MSW) and minor (LSW) version numbers. */ 230 u16 major_minor; 231 232 /* 233 * Revision number is auto-incremented whenever this file is changed 234 * (See FILL_VMSTOR_REVISION macro above). Mismatch does not 235 * definitely indicate incompatibility--but it does indicate mismatched 236 * builds. 237 * This is only used on the windows side. Just set it to 0. 238 */ 239 u16 revision; 240 } __packed; 241 242 /* Channel Property Flags */ 243 #define STORAGE_CHANNEL_REMOVABLE_FLAG 0x1 244 #define STORAGE_CHANNEL_EMULATED_IDE_FLAG 0x2 245 246 struct vstor_packet { 247 /* Requested operation type */ 248 enum vstor_packet_operation operation; 249 250 /* Flags - see below for values */ 251 u32 flags; 252 253 /* Status of the request returned from the server side. */ 254 u32 status; 255 256 /* Data payload area */ 257 union { 258 /* 259 * Structure used to forward SCSI commands from the 260 * client to the server. 261 */ 262 struct vmscsi_request vm_srb; 263 264 /* Structure used to query channel properties. */ 265 struct vmstorage_channel_properties storage_channel_properties; 266 267 /* Used during version negotiations. */ 268 struct vmstorage_protocol_version version; 269 270 /* Fibre channel address packet */ 271 struct hv_fc_wwn_packet wwn_packet; 272 273 /* Number of sub-channels to create */ 274 u16 sub_channel_count; 275 276 /* This will be the maximum of the union members */ 277 u8 buffer[0x34]; 278 }; 279 } __packed; 280 281 /* 282 * Packet Flags: 283 * 284 * This flag indicates that the server should send back a completion for this 285 * packet. 286 */ 287 288 #define REQUEST_COMPLETION_FLAG 0x1 289 290 /* Matches Windows-end */ 291 enum storvsc_request_type { 292 WRITE_TYPE = 0, 293 READ_TYPE, 294 UNKNOWN_TYPE, 295 }; 296 297 /* 298 * SRB status codes and masks; a subset of the codes used here. 299 */ 300 301 #define SRB_STATUS_AUTOSENSE_VALID 0x80 302 #define SRB_STATUS_INVALID_LUN 0x20 303 #define SRB_STATUS_SUCCESS 0x01 304 #define SRB_STATUS_ABORTED 0x02 305 #define SRB_STATUS_ERROR 0x04 306 307 /* 308 * This is the end of Protocol specific defines. 309 */ 310 311 static int storvsc_ringbuffer_size = (256 * PAGE_SIZE); 312 static u32 max_outstanding_req_per_channel; 313 314 static int storvsc_vcpus_per_sub_channel = 4; 315 316 module_param(storvsc_ringbuffer_size, int, S_IRUGO); 317 MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)"); 318 319 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO); 320 MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels"); 321 /* 322 * Timeout in seconds for all devices managed by this driver. 323 */ 324 static int storvsc_timeout = 180; 325 326 static int msft_blist_flags = BLIST_TRY_VPD_PAGES; 327 328 329 static void storvsc_on_channel_callback(void *context); 330 331 #define STORVSC_MAX_LUNS_PER_TARGET 255 332 #define STORVSC_MAX_TARGETS 2 333 #define STORVSC_MAX_CHANNELS 8 334 335 #define STORVSC_FC_MAX_LUNS_PER_TARGET 255 336 #define STORVSC_FC_MAX_TARGETS 128 337 #define STORVSC_FC_MAX_CHANNELS 8 338 339 #define STORVSC_IDE_MAX_LUNS_PER_TARGET 64 340 #define STORVSC_IDE_MAX_TARGETS 1 341 #define STORVSC_IDE_MAX_CHANNELS 1 342 343 struct storvsc_cmd_request { 344 struct scsi_cmnd *cmd; 345 346 unsigned int bounce_sgl_count; 347 struct scatterlist *bounce_sgl; 348 349 struct hv_device *device; 350 351 /* Synchronize the request/response if needed */ 352 struct completion wait_event; 353 354 struct vmbus_channel_packet_multipage_buffer mpb; 355 struct vmbus_packet_mpb_array *payload; 356 u32 payload_sz; 357 358 struct vstor_packet vstor_packet; 359 }; 360 361 362 /* A storvsc device is a device object that contains a vmbus channel */ 363 struct storvsc_device { 364 struct hv_device *device; 365 366 bool destroy; 367 bool drain_notify; 368 bool open_sub_channel; 369 atomic_t num_outstanding_req; 370 struct Scsi_Host *host; 371 372 wait_queue_head_t waiting_to_drain; 373 374 /* 375 * Each unique Port/Path/Target represents 1 channel ie scsi 376 * controller. In reality, the pathid, targetid is always 0 377 * and the port is set by us 378 */ 379 unsigned int port_number; 380 unsigned char path_id; 381 unsigned char target_id; 382 383 /* 384 * Max I/O, the device can support. 385 */ 386 u32 max_transfer_bytes; 387 /* Used for vsc/vsp channel reset process */ 388 struct storvsc_cmd_request init_request; 389 struct storvsc_cmd_request reset_request; 390 }; 391 392 struct hv_host_device { 393 struct hv_device *dev; 394 unsigned int port; 395 unsigned char path; 396 unsigned char target; 397 }; 398 399 struct storvsc_scan_work { 400 struct work_struct work; 401 struct Scsi_Host *host; 402 uint lun; 403 }; 404 405 static void storvsc_device_scan(struct work_struct *work) 406 { 407 struct storvsc_scan_work *wrk; 408 uint lun; 409 struct scsi_device *sdev; 410 411 wrk = container_of(work, struct storvsc_scan_work, work); 412 lun = wrk->lun; 413 414 sdev = scsi_device_lookup(wrk->host, 0, 0, lun); 415 if (!sdev) 416 goto done; 417 scsi_rescan_device(&sdev->sdev_gendev); 418 scsi_device_put(sdev); 419 420 done: 421 kfree(wrk); 422 } 423 424 static void storvsc_host_scan(struct work_struct *work) 425 { 426 struct storvsc_scan_work *wrk; 427 struct Scsi_Host *host; 428 struct scsi_device *sdev; 429 unsigned long flags; 430 431 wrk = container_of(work, struct storvsc_scan_work, work); 432 host = wrk->host; 433 434 /* 435 * Before scanning the host, first check to see if any of the 436 * currrently known devices have been hot removed. We issue a 437 * "unit ready" command against all currently known devices. 438 * This I/O will result in an error for devices that have been 439 * removed. As part of handling the I/O error, we remove the device. 440 * 441 * When a LUN is added or removed, the host sends us a signal to 442 * scan the host. Thus we are forced to discover the LUNs that 443 * may have been removed this way. 444 */ 445 mutex_lock(&host->scan_mutex); 446 spin_lock_irqsave(host->host_lock, flags); 447 list_for_each_entry(sdev, &host->__devices, siblings) { 448 spin_unlock_irqrestore(host->host_lock, flags); 449 scsi_test_unit_ready(sdev, 1, 1, NULL); 450 spin_lock_irqsave(host->host_lock, flags); 451 continue; 452 } 453 spin_unlock_irqrestore(host->host_lock, flags); 454 mutex_unlock(&host->scan_mutex); 455 /* 456 * Now scan the host to discover LUNs that may have been added. 457 */ 458 scsi_scan_host(host); 459 460 kfree(wrk); 461 } 462 463 static void storvsc_remove_lun(struct work_struct *work) 464 { 465 struct storvsc_scan_work *wrk; 466 struct scsi_device *sdev; 467 468 wrk = container_of(work, struct storvsc_scan_work, work); 469 if (!scsi_host_get(wrk->host)) 470 goto done; 471 472 sdev = scsi_device_lookup(wrk->host, 0, 0, wrk->lun); 473 474 if (sdev) { 475 scsi_remove_device(sdev); 476 scsi_device_put(sdev); 477 } 478 scsi_host_put(wrk->host); 479 480 done: 481 kfree(wrk); 482 } 483 484 /* 485 * Major/minor macros. Minor version is in LSB, meaning that earlier flat 486 * version numbers will be interpreted as "0.x" (i.e., 1 becomes 0.1). 487 */ 488 489 static inline u16 storvsc_get_version(u8 major, u8 minor) 490 { 491 u16 version; 492 493 version = ((major << 8) | minor); 494 return version; 495 } 496 497 /* 498 * We can get incoming messages from the host that are not in response to 499 * messages that we have sent out. An example of this would be messages 500 * received by the guest to notify dynamic addition/removal of LUNs. To 501 * deal with potential race conditions where the driver may be in the 502 * midst of being unloaded when we might receive an unsolicited message 503 * from the host, we have implemented a mechanism to gurantee sequential 504 * consistency: 505 * 506 * 1) Once the device is marked as being destroyed, we will fail all 507 * outgoing messages. 508 * 2) We permit incoming messages when the device is being destroyed, 509 * only to properly account for messages already sent out. 510 */ 511 512 static inline struct storvsc_device *get_out_stor_device( 513 struct hv_device *device) 514 { 515 struct storvsc_device *stor_device; 516 517 stor_device = hv_get_drvdata(device); 518 519 if (stor_device && stor_device->destroy) 520 stor_device = NULL; 521 522 return stor_device; 523 } 524 525 526 static inline void storvsc_wait_to_drain(struct storvsc_device *dev) 527 { 528 dev->drain_notify = true; 529 wait_event(dev->waiting_to_drain, 530 atomic_read(&dev->num_outstanding_req) == 0); 531 dev->drain_notify = false; 532 } 533 534 static inline struct storvsc_device *get_in_stor_device( 535 struct hv_device *device) 536 { 537 struct storvsc_device *stor_device; 538 539 stor_device = hv_get_drvdata(device); 540 541 if (!stor_device) 542 goto get_in_err; 543 544 /* 545 * If the device is being destroyed; allow incoming 546 * traffic only to cleanup outstanding requests. 547 */ 548 549 if (stor_device->destroy && 550 (atomic_read(&stor_device->num_outstanding_req) == 0)) 551 stor_device = NULL; 552 553 get_in_err: 554 return stor_device; 555 556 } 557 558 static void destroy_bounce_buffer(struct scatterlist *sgl, 559 unsigned int sg_count) 560 { 561 int i; 562 struct page *page_buf; 563 564 for (i = 0; i < sg_count; i++) { 565 page_buf = sg_page((&sgl[i])); 566 if (page_buf != NULL) 567 __free_page(page_buf); 568 } 569 570 kfree(sgl); 571 } 572 573 static int do_bounce_buffer(struct scatterlist *sgl, unsigned int sg_count) 574 { 575 int i; 576 577 /* No need to check */ 578 if (sg_count < 2) 579 return -1; 580 581 /* We have at least 2 sg entries */ 582 for (i = 0; i < sg_count; i++) { 583 if (i == 0) { 584 /* make sure 1st one does not have hole */ 585 if (sgl[i].offset + sgl[i].length != PAGE_SIZE) 586 return i; 587 } else if (i == sg_count - 1) { 588 /* make sure last one does not have hole */ 589 if (sgl[i].offset != 0) 590 return i; 591 } else { 592 /* make sure no hole in the middle */ 593 if (sgl[i].length != PAGE_SIZE || sgl[i].offset != 0) 594 return i; 595 } 596 } 597 return -1; 598 } 599 600 static struct scatterlist *create_bounce_buffer(struct scatterlist *sgl, 601 unsigned int sg_count, 602 unsigned int len, 603 int write) 604 { 605 int i; 606 int num_pages; 607 struct scatterlist *bounce_sgl; 608 struct page *page_buf; 609 unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); 610 611 num_pages = ALIGN(len, PAGE_SIZE) >> PAGE_SHIFT; 612 613 bounce_sgl = kcalloc(num_pages, sizeof(struct scatterlist), GFP_ATOMIC); 614 if (!bounce_sgl) 615 return NULL; 616 617 sg_init_table(bounce_sgl, num_pages); 618 for (i = 0; i < num_pages; i++) { 619 page_buf = alloc_page(GFP_ATOMIC); 620 if (!page_buf) 621 goto cleanup; 622 sg_set_page(&bounce_sgl[i], page_buf, buf_len, 0); 623 } 624 625 return bounce_sgl; 626 627 cleanup: 628 destroy_bounce_buffer(bounce_sgl, num_pages); 629 return NULL; 630 } 631 632 /* Assume the original sgl has enough room */ 633 static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl, 634 struct scatterlist *bounce_sgl, 635 unsigned int orig_sgl_count, 636 unsigned int bounce_sgl_count) 637 { 638 int i; 639 int j = 0; 640 unsigned long src, dest; 641 unsigned int srclen, destlen, copylen; 642 unsigned int total_copied = 0; 643 unsigned long bounce_addr = 0; 644 unsigned long dest_addr = 0; 645 unsigned long flags; 646 struct scatterlist *cur_dest_sgl; 647 struct scatterlist *cur_src_sgl; 648 649 local_irq_save(flags); 650 cur_dest_sgl = orig_sgl; 651 cur_src_sgl = bounce_sgl; 652 for (i = 0; i < orig_sgl_count; i++) { 653 dest_addr = (unsigned long) 654 kmap_atomic(sg_page(cur_dest_sgl)) + 655 cur_dest_sgl->offset; 656 dest = dest_addr; 657 destlen = cur_dest_sgl->length; 658 659 if (bounce_addr == 0) 660 bounce_addr = (unsigned long)kmap_atomic( 661 sg_page(cur_src_sgl)); 662 663 while (destlen) { 664 src = bounce_addr + cur_src_sgl->offset; 665 srclen = cur_src_sgl->length - cur_src_sgl->offset; 666 667 copylen = min(srclen, destlen); 668 memcpy((void *)dest, (void *)src, copylen); 669 670 total_copied += copylen; 671 cur_src_sgl->offset += copylen; 672 destlen -= copylen; 673 dest += copylen; 674 675 if (cur_src_sgl->offset == cur_src_sgl->length) { 676 /* full */ 677 kunmap_atomic((void *)bounce_addr); 678 j++; 679 680 /* 681 * It is possible that the number of elements 682 * in the bounce buffer may not be equal to 683 * the number of elements in the original 684 * scatter list. Handle this correctly. 685 */ 686 687 if (j == bounce_sgl_count) { 688 /* 689 * We are done; cleanup and return. 690 */ 691 kunmap_atomic((void *)(dest_addr - 692 cur_dest_sgl->offset)); 693 local_irq_restore(flags); 694 return total_copied; 695 } 696 697 /* if we need to use another bounce buffer */ 698 if (destlen || i != orig_sgl_count - 1) { 699 cur_src_sgl = sg_next(cur_src_sgl); 700 bounce_addr = (unsigned long) 701 kmap_atomic( 702 sg_page(cur_src_sgl)); 703 } 704 } else if (destlen == 0 && i == orig_sgl_count - 1) { 705 /* unmap the last bounce that is < PAGE_SIZE */ 706 kunmap_atomic((void *)bounce_addr); 707 } 708 } 709 710 kunmap_atomic((void *)(dest_addr - cur_dest_sgl->offset)); 711 cur_dest_sgl = sg_next(cur_dest_sgl); 712 } 713 714 local_irq_restore(flags); 715 716 return total_copied; 717 } 718 719 /* Assume the bounce_sgl has enough room ie using the create_bounce_buffer() */ 720 static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl, 721 struct scatterlist *bounce_sgl, 722 unsigned int orig_sgl_count) 723 { 724 int i; 725 int j = 0; 726 unsigned long src, dest; 727 unsigned int srclen, destlen, copylen; 728 unsigned int total_copied = 0; 729 unsigned long bounce_addr = 0; 730 unsigned long src_addr = 0; 731 unsigned long flags; 732 struct scatterlist *cur_src_sgl; 733 struct scatterlist *cur_dest_sgl; 734 735 local_irq_save(flags); 736 737 cur_src_sgl = orig_sgl; 738 cur_dest_sgl = bounce_sgl; 739 740 for (i = 0; i < orig_sgl_count; i++) { 741 src_addr = (unsigned long) 742 kmap_atomic(sg_page(cur_src_sgl)) + 743 cur_src_sgl->offset; 744 src = src_addr; 745 srclen = cur_src_sgl->length; 746 747 if (bounce_addr == 0) 748 bounce_addr = (unsigned long) 749 kmap_atomic(sg_page(cur_dest_sgl)); 750 751 while (srclen) { 752 /* assume bounce offset always == 0 */ 753 dest = bounce_addr + cur_dest_sgl->length; 754 destlen = PAGE_SIZE - cur_dest_sgl->length; 755 756 copylen = min(srclen, destlen); 757 memcpy((void *)dest, (void *)src, copylen); 758 759 total_copied += copylen; 760 cur_dest_sgl->length += copylen; 761 srclen -= copylen; 762 src += copylen; 763 764 if (cur_dest_sgl->length == PAGE_SIZE) { 765 /* full..move to next entry */ 766 kunmap_atomic((void *)bounce_addr); 767 bounce_addr = 0; 768 j++; 769 } 770 771 /* if we need to use another bounce buffer */ 772 if (srclen && bounce_addr == 0) { 773 cur_dest_sgl = sg_next(cur_dest_sgl); 774 bounce_addr = (unsigned long) 775 kmap_atomic( 776 sg_page(cur_dest_sgl)); 777 } 778 779 } 780 781 kunmap_atomic((void *)(src_addr - cur_src_sgl->offset)); 782 cur_src_sgl = sg_next(cur_src_sgl); 783 } 784 785 if (bounce_addr) 786 kunmap_atomic((void *)bounce_addr); 787 788 local_irq_restore(flags); 789 790 return total_copied; 791 } 792 793 static void handle_sc_creation(struct vmbus_channel *new_sc) 794 { 795 struct hv_device *device = new_sc->primary_channel->device_obj; 796 struct storvsc_device *stor_device; 797 struct vmstorage_channel_properties props; 798 799 stor_device = get_out_stor_device(device); 800 if (!stor_device) 801 return; 802 803 if (stor_device->open_sub_channel == false) 804 return; 805 806 memset(&props, 0, sizeof(struct vmstorage_channel_properties)); 807 808 vmbus_open(new_sc, 809 storvsc_ringbuffer_size, 810 storvsc_ringbuffer_size, 811 (void *)&props, 812 sizeof(struct vmstorage_channel_properties), 813 storvsc_on_channel_callback, new_sc); 814 } 815 816 static void handle_multichannel_storage(struct hv_device *device, int max_chns) 817 { 818 struct storvsc_device *stor_device; 819 int num_cpus = num_online_cpus(); 820 int num_sc; 821 struct storvsc_cmd_request *request; 822 struct vstor_packet *vstor_packet; 823 int ret, t; 824 825 num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns); 826 stor_device = get_out_stor_device(device); 827 if (!stor_device) 828 return; 829 830 request = &stor_device->init_request; 831 vstor_packet = &request->vstor_packet; 832 833 stor_device->open_sub_channel = true; 834 /* 835 * Establish a handler for dealing with subchannels. 836 */ 837 vmbus_set_sc_create_callback(device->channel, handle_sc_creation); 838 839 /* 840 * Check to see if sub-channels have already been created. This 841 * can happen when this driver is re-loaded after unloading. 842 */ 843 844 if (vmbus_are_subchannels_present(device->channel)) 845 return; 846 847 stor_device->open_sub_channel = false; 848 /* 849 * Request the host to create sub-channels. 850 */ 851 memset(request, 0, sizeof(struct storvsc_cmd_request)); 852 init_completion(&request->wait_event); 853 vstor_packet->operation = VSTOR_OPERATION_CREATE_SUB_CHANNELS; 854 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 855 vstor_packet->sub_channel_count = num_sc; 856 857 ret = vmbus_sendpacket(device->channel, vstor_packet, 858 (sizeof(struct vstor_packet) - 859 vmscsi_size_delta), 860 (unsigned long)request, 861 VM_PKT_DATA_INBAND, 862 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 863 864 if (ret != 0) 865 return; 866 867 t = wait_for_completion_timeout(&request->wait_event, 10*HZ); 868 if (t == 0) 869 return; 870 871 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 872 vstor_packet->status != 0) 873 return; 874 875 /* 876 * Now that we created the sub-channels, invoke the check; this 877 * may trigger the callback. 878 */ 879 stor_device->open_sub_channel = true; 880 vmbus_are_subchannels_present(device->channel); 881 } 882 883 static int storvsc_channel_init(struct hv_device *device) 884 { 885 struct storvsc_device *stor_device; 886 struct storvsc_cmd_request *request; 887 struct vstor_packet *vstor_packet; 888 int ret, t; 889 int max_chns; 890 bool process_sub_channels = false; 891 892 stor_device = get_out_stor_device(device); 893 if (!stor_device) 894 return -ENODEV; 895 896 request = &stor_device->init_request; 897 vstor_packet = &request->vstor_packet; 898 899 /* 900 * Now, initiate the vsc/vsp initialization protocol on the open 901 * channel 902 */ 903 memset(request, 0, sizeof(struct storvsc_cmd_request)); 904 init_completion(&request->wait_event); 905 vstor_packet->operation = VSTOR_OPERATION_BEGIN_INITIALIZATION; 906 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 907 908 ret = vmbus_sendpacket(device->channel, vstor_packet, 909 (sizeof(struct vstor_packet) - 910 vmscsi_size_delta), 911 (unsigned long)request, 912 VM_PKT_DATA_INBAND, 913 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 914 if (ret != 0) 915 goto cleanup; 916 917 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 918 if (t == 0) { 919 ret = -ETIMEDOUT; 920 goto cleanup; 921 } 922 923 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 924 vstor_packet->status != 0) 925 goto cleanup; 926 927 928 /* reuse the packet for version range supported */ 929 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 930 vstor_packet->operation = VSTOR_OPERATION_QUERY_PROTOCOL_VERSION; 931 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 932 933 vstor_packet->version.major_minor = 934 storvsc_get_version(vmstor_current_major, vmstor_current_minor); 935 936 /* 937 * The revision number is only used in Windows; set it to 0. 938 */ 939 vstor_packet->version.revision = 0; 940 941 ret = vmbus_sendpacket(device->channel, vstor_packet, 942 (sizeof(struct vstor_packet) - 943 vmscsi_size_delta), 944 (unsigned long)request, 945 VM_PKT_DATA_INBAND, 946 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 947 if (ret != 0) 948 goto cleanup; 949 950 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 951 if (t == 0) { 952 ret = -ETIMEDOUT; 953 goto cleanup; 954 } 955 956 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 957 vstor_packet->status != 0) 958 goto cleanup; 959 960 961 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 962 vstor_packet->operation = VSTOR_OPERATION_QUERY_PROPERTIES; 963 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 964 965 ret = vmbus_sendpacket(device->channel, vstor_packet, 966 (sizeof(struct vstor_packet) - 967 vmscsi_size_delta), 968 (unsigned long)request, 969 VM_PKT_DATA_INBAND, 970 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 971 972 if (ret != 0) 973 goto cleanup; 974 975 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 976 if (t == 0) { 977 ret = -ETIMEDOUT; 978 goto cleanup; 979 } 980 981 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 982 vstor_packet->status != 0) 983 goto cleanup; 984 985 /* 986 * Check to see if multi-channel support is there. 987 * Hosts that implement protocol version of 5.1 and above 988 * support multi-channel. 989 */ 990 max_chns = vstor_packet->storage_channel_properties.max_channel_cnt; 991 if ((vmbus_proto_version != VERSION_WIN7) && 992 (vmbus_proto_version != VERSION_WS2008)) { 993 if (vstor_packet->storage_channel_properties.flags & 994 STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL) 995 process_sub_channels = true; 996 } 997 stor_device->max_transfer_bytes = 998 vstor_packet->storage_channel_properties.max_transfer_bytes; 999 1000 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 1001 vstor_packet->operation = VSTOR_OPERATION_END_INITIALIZATION; 1002 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 1003 1004 ret = vmbus_sendpacket(device->channel, vstor_packet, 1005 (sizeof(struct vstor_packet) - 1006 vmscsi_size_delta), 1007 (unsigned long)request, 1008 VM_PKT_DATA_INBAND, 1009 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1010 1011 if (ret != 0) 1012 goto cleanup; 1013 1014 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 1015 if (t == 0) { 1016 ret = -ETIMEDOUT; 1017 goto cleanup; 1018 } 1019 1020 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 1021 vstor_packet->status != 0) 1022 goto cleanup; 1023 1024 if (process_sub_channels) 1025 handle_multichannel_storage(device, max_chns); 1026 1027 1028 cleanup: 1029 return ret; 1030 } 1031 1032 static void storvsc_handle_error(struct vmscsi_request *vm_srb, 1033 struct scsi_cmnd *scmnd, 1034 struct Scsi_Host *host, 1035 u8 asc, u8 ascq) 1036 { 1037 struct storvsc_scan_work *wrk; 1038 void (*process_err_fn)(struct work_struct *work); 1039 bool do_work = false; 1040 1041 switch (vm_srb->srb_status) { 1042 case SRB_STATUS_ERROR: 1043 /* 1044 * If there is an error; offline the device since all 1045 * error recovery strategies would have already been 1046 * deployed on the host side. However, if the command 1047 * were a pass-through command deal with it appropriately. 1048 */ 1049 switch (scmnd->cmnd[0]) { 1050 case ATA_16: 1051 case ATA_12: 1052 set_host_byte(scmnd, DID_PASSTHROUGH); 1053 break; 1054 /* 1055 * On Some Windows hosts TEST_UNIT_READY command can return 1056 * SRB_STATUS_ERROR, let the upper level code deal with it 1057 * based on the sense information. 1058 */ 1059 case TEST_UNIT_READY: 1060 break; 1061 default: 1062 set_host_byte(scmnd, DID_TARGET_FAILURE); 1063 } 1064 break; 1065 case SRB_STATUS_INVALID_LUN: 1066 do_work = true; 1067 process_err_fn = storvsc_remove_lun; 1068 break; 1069 case (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID): 1070 if ((asc == 0x2a) && (ascq == 0x9)) { 1071 do_work = true; 1072 process_err_fn = storvsc_device_scan; 1073 /* 1074 * Retry the I/O that trigerred this. 1075 */ 1076 set_host_byte(scmnd, DID_REQUEUE); 1077 } 1078 break; 1079 } 1080 1081 if (!do_work) 1082 return; 1083 1084 /* 1085 * We need to schedule work to process this error; schedule it. 1086 */ 1087 wrk = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC); 1088 if (!wrk) { 1089 set_host_byte(scmnd, DID_TARGET_FAILURE); 1090 return; 1091 } 1092 1093 wrk->host = host; 1094 wrk->lun = vm_srb->lun; 1095 INIT_WORK(&wrk->work, process_err_fn); 1096 schedule_work(&wrk->work); 1097 } 1098 1099 1100 static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) 1101 { 1102 struct scsi_cmnd *scmnd = cmd_request->cmd; 1103 struct hv_host_device *host_dev = shost_priv(scmnd->device->host); 1104 struct scsi_sense_hdr sense_hdr; 1105 struct vmscsi_request *vm_srb; 1106 struct Scsi_Host *host; 1107 struct storvsc_device *stor_dev; 1108 struct hv_device *dev = host_dev->dev; 1109 u32 payload_sz = cmd_request->payload_sz; 1110 void *payload = cmd_request->payload; 1111 1112 stor_dev = get_in_stor_device(dev); 1113 host = stor_dev->host; 1114 1115 vm_srb = &cmd_request->vstor_packet.vm_srb; 1116 if (cmd_request->bounce_sgl_count) { 1117 if (vm_srb->data_in == READ_TYPE) 1118 copy_from_bounce_buffer(scsi_sglist(scmnd), 1119 cmd_request->bounce_sgl, 1120 scsi_sg_count(scmnd), 1121 cmd_request->bounce_sgl_count); 1122 destroy_bounce_buffer(cmd_request->bounce_sgl, 1123 cmd_request->bounce_sgl_count); 1124 } 1125 1126 scmnd->result = vm_srb->scsi_status; 1127 1128 if (scmnd->result) { 1129 if (scsi_normalize_sense(scmnd->sense_buffer, 1130 SCSI_SENSE_BUFFERSIZE, &sense_hdr)) 1131 scsi_print_sense_hdr(scmnd->device, "storvsc", 1132 &sense_hdr); 1133 } 1134 1135 if (vm_srb->srb_status != SRB_STATUS_SUCCESS) 1136 storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, 1137 sense_hdr.ascq); 1138 1139 scsi_set_resid(scmnd, 1140 cmd_request->payload->range.len - 1141 vm_srb->data_transfer_length); 1142 1143 scmnd->scsi_done(scmnd); 1144 1145 if (payload_sz > 1146 sizeof(struct vmbus_channel_packet_multipage_buffer)) 1147 kfree(payload); 1148 } 1149 1150 static void storvsc_on_io_completion(struct hv_device *device, 1151 struct vstor_packet *vstor_packet, 1152 struct storvsc_cmd_request *request) 1153 { 1154 struct storvsc_device *stor_device; 1155 struct vstor_packet *stor_pkt; 1156 1157 stor_device = hv_get_drvdata(device); 1158 stor_pkt = &request->vstor_packet; 1159 1160 /* 1161 * The current SCSI handling on the host side does 1162 * not correctly handle: 1163 * INQUIRY command with page code parameter set to 0x80 1164 * MODE_SENSE command with cmd[2] == 0x1c 1165 * 1166 * Setup srb and scsi status so this won't be fatal. 1167 * We do this so we can distinguish truly fatal failues 1168 * (srb status == 0x4) and off-line the device in that case. 1169 */ 1170 1171 if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) || 1172 (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) { 1173 vstor_packet->vm_srb.scsi_status = 0; 1174 vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS; 1175 } 1176 1177 1178 /* Copy over the status...etc */ 1179 stor_pkt->vm_srb.scsi_status = vstor_packet->vm_srb.scsi_status; 1180 stor_pkt->vm_srb.srb_status = vstor_packet->vm_srb.srb_status; 1181 stor_pkt->vm_srb.sense_info_length = 1182 vstor_packet->vm_srb.sense_info_length; 1183 1184 1185 if ((vstor_packet->vm_srb.scsi_status & 0xFF) == 0x02) { 1186 /* CHECK_CONDITION */ 1187 if (vstor_packet->vm_srb.srb_status & 1188 SRB_STATUS_AUTOSENSE_VALID) { 1189 /* autosense data available */ 1190 1191 memcpy(request->cmd->sense_buffer, 1192 vstor_packet->vm_srb.sense_data, 1193 vstor_packet->vm_srb.sense_info_length); 1194 1195 } 1196 } 1197 1198 stor_pkt->vm_srb.data_transfer_length = 1199 vstor_packet->vm_srb.data_transfer_length; 1200 1201 storvsc_command_completion(request); 1202 1203 if (atomic_dec_and_test(&stor_device->num_outstanding_req) && 1204 stor_device->drain_notify) 1205 wake_up(&stor_device->waiting_to_drain); 1206 1207 1208 } 1209 1210 static void storvsc_on_receive(struct hv_device *device, 1211 struct vstor_packet *vstor_packet, 1212 struct storvsc_cmd_request *request) 1213 { 1214 struct storvsc_scan_work *work; 1215 struct storvsc_device *stor_device; 1216 1217 switch (vstor_packet->operation) { 1218 case VSTOR_OPERATION_COMPLETE_IO: 1219 storvsc_on_io_completion(device, vstor_packet, request); 1220 break; 1221 1222 case VSTOR_OPERATION_REMOVE_DEVICE: 1223 case VSTOR_OPERATION_ENUMERATE_BUS: 1224 stor_device = get_in_stor_device(device); 1225 work = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC); 1226 if (!work) 1227 return; 1228 1229 INIT_WORK(&work->work, storvsc_host_scan); 1230 work->host = stor_device->host; 1231 schedule_work(&work->work); 1232 break; 1233 1234 default: 1235 break; 1236 } 1237 } 1238 1239 static void storvsc_on_channel_callback(void *context) 1240 { 1241 struct vmbus_channel *channel = (struct vmbus_channel *)context; 1242 struct hv_device *device; 1243 struct storvsc_device *stor_device; 1244 u32 bytes_recvd; 1245 u64 request_id; 1246 unsigned char packet[ALIGN(sizeof(struct vstor_packet), 8)]; 1247 struct storvsc_cmd_request *request; 1248 int ret; 1249 1250 if (channel->primary_channel != NULL) 1251 device = channel->primary_channel->device_obj; 1252 else 1253 device = channel->device_obj; 1254 1255 stor_device = get_in_stor_device(device); 1256 if (!stor_device) 1257 return; 1258 1259 do { 1260 ret = vmbus_recvpacket(channel, packet, 1261 ALIGN((sizeof(struct vstor_packet) - 1262 vmscsi_size_delta), 8), 1263 &bytes_recvd, &request_id); 1264 if (ret == 0 && bytes_recvd > 0) { 1265 1266 request = (struct storvsc_cmd_request *) 1267 (unsigned long)request_id; 1268 1269 if ((request == &stor_device->init_request) || 1270 (request == &stor_device->reset_request)) { 1271 1272 memcpy(&request->vstor_packet, packet, 1273 (sizeof(struct vstor_packet) - 1274 vmscsi_size_delta)); 1275 complete(&request->wait_event); 1276 } else { 1277 storvsc_on_receive(device, 1278 (struct vstor_packet *)packet, 1279 request); 1280 } 1281 } else { 1282 break; 1283 } 1284 } while (1); 1285 1286 return; 1287 } 1288 1289 static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size) 1290 { 1291 struct vmstorage_channel_properties props; 1292 int ret; 1293 1294 memset(&props, 0, sizeof(struct vmstorage_channel_properties)); 1295 1296 ret = vmbus_open(device->channel, 1297 ring_size, 1298 ring_size, 1299 (void *)&props, 1300 sizeof(struct vmstorage_channel_properties), 1301 storvsc_on_channel_callback, device->channel); 1302 1303 if (ret != 0) 1304 return ret; 1305 1306 ret = storvsc_channel_init(device); 1307 1308 return ret; 1309 } 1310 1311 static int storvsc_dev_remove(struct hv_device *device) 1312 { 1313 struct storvsc_device *stor_device; 1314 unsigned long flags; 1315 1316 stor_device = hv_get_drvdata(device); 1317 1318 spin_lock_irqsave(&device->channel->inbound_lock, flags); 1319 stor_device->destroy = true; 1320 spin_unlock_irqrestore(&device->channel->inbound_lock, flags); 1321 1322 /* 1323 * At this point, all outbound traffic should be disable. We 1324 * only allow inbound traffic (responses) to proceed so that 1325 * outstanding requests can be completed. 1326 */ 1327 1328 storvsc_wait_to_drain(stor_device); 1329 1330 /* 1331 * Since we have already drained, we don't need to busy wait 1332 * as was done in final_release_stor_device() 1333 * Note that we cannot set the ext pointer to NULL until 1334 * we have drained - to drain the outgoing packets, we need to 1335 * allow incoming packets. 1336 */ 1337 spin_lock_irqsave(&device->channel->inbound_lock, flags); 1338 hv_set_drvdata(device, NULL); 1339 spin_unlock_irqrestore(&device->channel->inbound_lock, flags); 1340 1341 /* Close the channel */ 1342 vmbus_close(device->channel); 1343 1344 kfree(stor_device); 1345 return 0; 1346 } 1347 1348 static int storvsc_do_io(struct hv_device *device, 1349 struct storvsc_cmd_request *request) 1350 { 1351 struct storvsc_device *stor_device; 1352 struct vstor_packet *vstor_packet; 1353 struct vmbus_channel *outgoing_channel; 1354 int ret = 0; 1355 1356 vstor_packet = &request->vstor_packet; 1357 stor_device = get_out_stor_device(device); 1358 1359 if (!stor_device) 1360 return -ENODEV; 1361 1362 1363 request->device = device; 1364 /* 1365 * Select an an appropriate channel to send the request out. 1366 */ 1367 1368 outgoing_channel = vmbus_get_outgoing_channel(device->channel); 1369 1370 1371 vstor_packet->flags |= REQUEST_COMPLETION_FLAG; 1372 1373 vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) - 1374 vmscsi_size_delta); 1375 1376 1377 vstor_packet->vm_srb.sense_info_length = sense_buffer_size; 1378 1379 1380 vstor_packet->vm_srb.data_transfer_length = 1381 request->payload->range.len; 1382 1383 vstor_packet->operation = VSTOR_OPERATION_EXECUTE_SRB; 1384 1385 if (request->payload->range.len) { 1386 1387 ret = vmbus_sendpacket_mpb_desc(outgoing_channel, 1388 request->payload, request->payload_sz, 1389 vstor_packet, 1390 (sizeof(struct vstor_packet) - 1391 vmscsi_size_delta), 1392 (unsigned long)request); 1393 } else { 1394 ret = vmbus_sendpacket(outgoing_channel, vstor_packet, 1395 (sizeof(struct vstor_packet) - 1396 vmscsi_size_delta), 1397 (unsigned long)request, 1398 VM_PKT_DATA_INBAND, 1399 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1400 } 1401 1402 if (ret != 0) 1403 return ret; 1404 1405 atomic_inc(&stor_device->num_outstanding_req); 1406 1407 return ret; 1408 } 1409 1410 static int storvsc_device_configure(struct scsi_device *sdevice) 1411 { 1412 1413 blk_queue_max_segment_size(sdevice->request_queue, PAGE_SIZE); 1414 1415 blk_queue_bounce_limit(sdevice->request_queue, BLK_BOUNCE_ANY); 1416 1417 blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); 1418 1419 sdevice->no_write_same = 1; 1420 1421 /* 1422 * Add blist flags to permit the reading of the VPD pages even when 1423 * the target may claim SPC-2 compliance. MSFT targets currently 1424 * claim SPC-2 compliance while they implement post SPC-2 features. 1425 * With this patch we can correctly handle WRITE_SAME_16 issues. 1426 */ 1427 sdevice->sdev_bflags |= msft_blist_flags; 1428 1429 /* 1430 * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3 1431 * if the device is a MSFT virtual device. 1432 */ 1433 if (!strncmp(sdevice->vendor, "Msft", 4)) { 1434 switch (vmbus_proto_version) { 1435 case VERSION_WIN8: 1436 case VERSION_WIN8_1: 1437 sdevice->scsi_level = SCSI_SPC_3; 1438 break; 1439 } 1440 } 1441 1442 return 0; 1443 } 1444 1445 static int storvsc_get_chs(struct scsi_device *sdev, struct block_device * bdev, 1446 sector_t capacity, int *info) 1447 { 1448 sector_t nsect = capacity; 1449 sector_t cylinders = nsect; 1450 int heads, sectors_pt; 1451 1452 /* 1453 * We are making up these values; let us keep it simple. 1454 */ 1455 heads = 0xff; 1456 sectors_pt = 0x3f; /* Sectors per track */ 1457 sector_div(cylinders, heads * sectors_pt); 1458 if ((sector_t)(cylinders + 1) * heads * sectors_pt < nsect) 1459 cylinders = 0xffff; 1460 1461 info[0] = heads; 1462 info[1] = sectors_pt; 1463 info[2] = (int)cylinders; 1464 1465 return 0; 1466 } 1467 1468 static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) 1469 { 1470 struct hv_host_device *host_dev = shost_priv(scmnd->device->host); 1471 struct hv_device *device = host_dev->dev; 1472 1473 struct storvsc_device *stor_device; 1474 struct storvsc_cmd_request *request; 1475 struct vstor_packet *vstor_packet; 1476 int ret, t; 1477 1478 1479 stor_device = get_out_stor_device(device); 1480 if (!stor_device) 1481 return FAILED; 1482 1483 request = &stor_device->reset_request; 1484 vstor_packet = &request->vstor_packet; 1485 1486 init_completion(&request->wait_event); 1487 1488 vstor_packet->operation = VSTOR_OPERATION_RESET_BUS; 1489 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 1490 vstor_packet->vm_srb.path_id = stor_device->path_id; 1491 1492 ret = vmbus_sendpacket(device->channel, vstor_packet, 1493 (sizeof(struct vstor_packet) - 1494 vmscsi_size_delta), 1495 (unsigned long)&stor_device->reset_request, 1496 VM_PKT_DATA_INBAND, 1497 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1498 if (ret != 0) 1499 return FAILED; 1500 1501 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 1502 if (t == 0) 1503 return TIMEOUT_ERROR; 1504 1505 1506 /* 1507 * At this point, all outstanding requests in the adapter 1508 * should have been flushed out and return to us 1509 * There is a potential race here where the host may be in 1510 * the process of responding when we return from here. 1511 * Just wait for all in-transit packets to be accounted for 1512 * before we return from here. 1513 */ 1514 storvsc_wait_to_drain(stor_device); 1515 1516 return SUCCESS; 1517 } 1518 1519 /* 1520 * The host guarantees to respond to each command, although I/O latencies might 1521 * be unbounded on Azure. Reset the timer unconditionally to give the host a 1522 * chance to perform EH. 1523 */ 1524 static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd) 1525 { 1526 return BLK_EH_RESET_TIMER; 1527 } 1528 1529 static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) 1530 { 1531 bool allowed = true; 1532 u8 scsi_op = scmnd->cmnd[0]; 1533 1534 switch (scsi_op) { 1535 /* the host does not handle WRITE_SAME, log accident usage */ 1536 case WRITE_SAME: 1537 /* 1538 * smartd sends this command and the host does not handle 1539 * this. So, don't send it. 1540 */ 1541 case SET_WINDOW: 1542 scmnd->result = ILLEGAL_REQUEST << 16; 1543 allowed = false; 1544 break; 1545 default: 1546 break; 1547 } 1548 return allowed; 1549 } 1550 1551 static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) 1552 { 1553 int ret; 1554 struct hv_host_device *host_dev = shost_priv(host); 1555 struct hv_device *dev = host_dev->dev; 1556 struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd); 1557 int i; 1558 struct scatterlist *sgl; 1559 unsigned int sg_count = 0; 1560 struct vmscsi_request *vm_srb; 1561 struct scatterlist *cur_sgl; 1562 struct vmbus_packet_mpb_array *payload; 1563 u32 payload_sz; 1564 u32 length; 1565 1566 if (vmstor_current_major <= VMSTOR_WIN8_MAJOR) { 1567 /* 1568 * On legacy hosts filter unimplemented commands. 1569 * Future hosts are expected to correctly handle 1570 * unsupported commands. Furthermore, it is 1571 * possible that some of the currently 1572 * unsupported commands maybe supported in 1573 * future versions of the host. 1574 */ 1575 if (!storvsc_scsi_cmd_ok(scmnd)) { 1576 scmnd->scsi_done(scmnd); 1577 return 0; 1578 } 1579 } 1580 1581 /* Setup the cmd request */ 1582 cmd_request->cmd = scmnd; 1583 1584 vm_srb = &cmd_request->vstor_packet.vm_srb; 1585 vm_srb->win8_extension.time_out_value = 60; 1586 1587 vm_srb->win8_extension.srb_flags |= 1588 (SRB_FLAGS_QUEUE_ACTION_ENABLE | 1589 SRB_FLAGS_DISABLE_SYNCH_TRANSFER); 1590 1591 /* Build the SRB */ 1592 switch (scmnd->sc_data_direction) { 1593 case DMA_TO_DEVICE: 1594 vm_srb->data_in = WRITE_TYPE; 1595 vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_OUT; 1596 break; 1597 case DMA_FROM_DEVICE: 1598 vm_srb->data_in = READ_TYPE; 1599 vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_IN; 1600 break; 1601 default: 1602 vm_srb->data_in = UNKNOWN_TYPE; 1603 vm_srb->win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; 1604 break; 1605 } 1606 1607 1608 vm_srb->port_number = host_dev->port; 1609 vm_srb->path_id = scmnd->device->channel; 1610 vm_srb->target_id = scmnd->device->id; 1611 vm_srb->lun = scmnd->device->lun; 1612 1613 vm_srb->cdb_length = scmnd->cmd_len; 1614 1615 memcpy(vm_srb->cdb, scmnd->cmnd, vm_srb->cdb_length); 1616 1617 sgl = (struct scatterlist *)scsi_sglist(scmnd); 1618 sg_count = scsi_sg_count(scmnd); 1619 1620 length = scsi_bufflen(scmnd); 1621 payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb; 1622 payload_sz = sizeof(cmd_request->mpb); 1623 1624 if (sg_count) { 1625 /* check if we need to bounce the sgl */ 1626 if (do_bounce_buffer(sgl, scsi_sg_count(scmnd)) != -1) { 1627 cmd_request->bounce_sgl = 1628 create_bounce_buffer(sgl, sg_count, 1629 length, 1630 vm_srb->data_in); 1631 if (!cmd_request->bounce_sgl) 1632 return SCSI_MLQUEUE_HOST_BUSY; 1633 1634 cmd_request->bounce_sgl_count = 1635 ALIGN(length, PAGE_SIZE) >> PAGE_SHIFT; 1636 1637 if (vm_srb->data_in == WRITE_TYPE) 1638 copy_to_bounce_buffer(sgl, 1639 cmd_request->bounce_sgl, sg_count); 1640 1641 sgl = cmd_request->bounce_sgl; 1642 sg_count = cmd_request->bounce_sgl_count; 1643 } 1644 1645 1646 if (sg_count > MAX_PAGE_BUFFER_COUNT) { 1647 1648 payload_sz = (sg_count * sizeof(void *) + 1649 sizeof(struct vmbus_packet_mpb_array)); 1650 payload = kmalloc(payload_sz, GFP_ATOMIC); 1651 if (!payload) { 1652 if (cmd_request->bounce_sgl_count) 1653 destroy_bounce_buffer( 1654 cmd_request->bounce_sgl, 1655 cmd_request->bounce_sgl_count); 1656 1657 return SCSI_MLQUEUE_DEVICE_BUSY; 1658 } 1659 } 1660 1661 payload->range.len = length; 1662 payload->range.offset = sgl[0].offset; 1663 1664 cur_sgl = sgl; 1665 for (i = 0; i < sg_count; i++) { 1666 payload->range.pfn_array[i] = 1667 page_to_pfn(sg_page((cur_sgl))); 1668 cur_sgl = sg_next(cur_sgl); 1669 } 1670 1671 } else if (scsi_sglist(scmnd)) { 1672 payload->range.len = length; 1673 payload->range.offset = 1674 virt_to_phys(scsi_sglist(scmnd)) & (PAGE_SIZE-1); 1675 payload->range.pfn_array[0] = 1676 virt_to_phys(scsi_sglist(scmnd)) >> PAGE_SHIFT; 1677 } 1678 1679 cmd_request->payload = payload; 1680 cmd_request->payload_sz = payload_sz; 1681 1682 /* Invokes the vsc to start an IO */ 1683 ret = storvsc_do_io(dev, cmd_request); 1684 1685 if (ret == -EAGAIN) { 1686 /* no more space */ 1687 1688 if (cmd_request->bounce_sgl_count) 1689 destroy_bounce_buffer(cmd_request->bounce_sgl, 1690 cmd_request->bounce_sgl_count); 1691 1692 return SCSI_MLQUEUE_DEVICE_BUSY; 1693 } 1694 1695 return 0; 1696 } 1697 1698 static struct scsi_host_template scsi_driver = { 1699 .module = THIS_MODULE, 1700 .name = "storvsc_host_t", 1701 .cmd_size = sizeof(struct storvsc_cmd_request), 1702 .bios_param = storvsc_get_chs, 1703 .queuecommand = storvsc_queuecommand, 1704 .eh_host_reset_handler = storvsc_host_reset_handler, 1705 .proc_name = "storvsc_host", 1706 .eh_timed_out = storvsc_eh_timed_out, 1707 .slave_configure = storvsc_device_configure, 1708 .cmd_per_lun = 255, 1709 .this_id = -1, 1710 .use_clustering = ENABLE_CLUSTERING, 1711 /* Make sure we dont get a sg segment crosses a page boundary */ 1712 .dma_boundary = PAGE_SIZE-1, 1713 .no_write_same = 1, 1714 }; 1715 1716 enum { 1717 SCSI_GUID, 1718 IDE_GUID, 1719 SFC_GUID, 1720 }; 1721 1722 static const struct hv_vmbus_device_id id_table[] = { 1723 /* SCSI guid */ 1724 { HV_SCSI_GUID, 1725 .driver_data = SCSI_GUID 1726 }, 1727 /* IDE guid */ 1728 { HV_IDE_GUID, 1729 .driver_data = IDE_GUID 1730 }, 1731 /* Fibre Channel GUID */ 1732 { 1733 HV_SYNTHFC_GUID, 1734 .driver_data = SFC_GUID 1735 }, 1736 { }, 1737 }; 1738 1739 MODULE_DEVICE_TABLE(vmbus, id_table); 1740 1741 static int storvsc_probe(struct hv_device *device, 1742 const struct hv_vmbus_device_id *dev_id) 1743 { 1744 int ret; 1745 int num_cpus = num_online_cpus(); 1746 struct Scsi_Host *host; 1747 struct hv_host_device *host_dev; 1748 bool dev_is_ide = ((dev_id->driver_data == IDE_GUID) ? true : false); 1749 int target = 0; 1750 struct storvsc_device *stor_device; 1751 int max_luns_per_target; 1752 int max_targets; 1753 int max_channels; 1754 int max_sub_channels = 0; 1755 1756 /* 1757 * Based on the windows host we are running on, 1758 * set state to properly communicate with the host. 1759 */ 1760 1761 switch (vmbus_proto_version) { 1762 case VERSION_WS2008: 1763 case VERSION_WIN7: 1764 sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; 1765 vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); 1766 vmstor_current_major = VMSTOR_WIN7_MAJOR; 1767 vmstor_current_minor = VMSTOR_WIN7_MINOR; 1768 max_luns_per_target = STORVSC_IDE_MAX_LUNS_PER_TARGET; 1769 max_targets = STORVSC_IDE_MAX_TARGETS; 1770 max_channels = STORVSC_IDE_MAX_CHANNELS; 1771 break; 1772 default: 1773 sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE; 1774 vmscsi_size_delta = 0; 1775 vmstor_current_major = VMSTOR_WIN8_MAJOR; 1776 vmstor_current_minor = VMSTOR_WIN8_MINOR; 1777 max_luns_per_target = STORVSC_MAX_LUNS_PER_TARGET; 1778 max_targets = STORVSC_MAX_TARGETS; 1779 max_channels = STORVSC_MAX_CHANNELS; 1780 /* 1781 * On Windows8 and above, we support sub-channels for storage. 1782 * The number of sub-channels offerred is based on the number of 1783 * VCPUs in the guest. 1784 */ 1785 max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel); 1786 break; 1787 } 1788 1789 scsi_driver.can_queue = (max_outstanding_req_per_channel * 1790 (max_sub_channels + 1)); 1791 1792 host = scsi_host_alloc(&scsi_driver, 1793 sizeof(struct hv_host_device)); 1794 if (!host) 1795 return -ENOMEM; 1796 1797 host_dev = shost_priv(host); 1798 memset(host_dev, 0, sizeof(struct hv_host_device)); 1799 1800 host_dev->port = host->host_no; 1801 host_dev->dev = device; 1802 1803 1804 stor_device = kzalloc(sizeof(struct storvsc_device), GFP_KERNEL); 1805 if (!stor_device) { 1806 ret = -ENOMEM; 1807 goto err_out0; 1808 } 1809 1810 stor_device->destroy = false; 1811 stor_device->open_sub_channel = false; 1812 init_waitqueue_head(&stor_device->waiting_to_drain); 1813 stor_device->device = device; 1814 stor_device->host = host; 1815 hv_set_drvdata(device, stor_device); 1816 1817 stor_device->port_number = host->host_no; 1818 ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size); 1819 if (ret) 1820 goto err_out1; 1821 1822 host_dev->path = stor_device->path_id; 1823 host_dev->target = stor_device->target_id; 1824 1825 switch (dev_id->driver_data) { 1826 case SFC_GUID: 1827 host->max_lun = STORVSC_FC_MAX_LUNS_PER_TARGET; 1828 host->max_id = STORVSC_FC_MAX_TARGETS; 1829 host->max_channel = STORVSC_FC_MAX_CHANNELS - 1; 1830 break; 1831 1832 case SCSI_GUID: 1833 host->max_lun = max_luns_per_target; 1834 host->max_id = max_targets; 1835 host->max_channel = max_channels - 1; 1836 break; 1837 1838 default: 1839 host->max_lun = STORVSC_IDE_MAX_LUNS_PER_TARGET; 1840 host->max_id = STORVSC_IDE_MAX_TARGETS; 1841 host->max_channel = STORVSC_IDE_MAX_CHANNELS - 1; 1842 break; 1843 } 1844 /* max cmd length */ 1845 host->max_cmd_len = STORVSC_MAX_CMD_LEN; 1846 1847 /* 1848 * set the table size based on the info we got 1849 * from the host. 1850 */ 1851 host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); 1852 1853 /* Register the HBA and start the scsi bus scan */ 1854 ret = scsi_add_host(host, &device->device); 1855 if (ret != 0) 1856 goto err_out2; 1857 1858 if (!dev_is_ide) { 1859 scsi_scan_host(host); 1860 } else { 1861 target = (device->dev_instance.b[5] << 8 | 1862 device->dev_instance.b[4]); 1863 ret = scsi_add_device(host, 0, target, 0); 1864 if (ret) { 1865 scsi_remove_host(host); 1866 goto err_out2; 1867 } 1868 } 1869 return 0; 1870 1871 err_out2: 1872 /* 1873 * Once we have connected with the host, we would need to 1874 * to invoke storvsc_dev_remove() to rollback this state and 1875 * this call also frees up the stor_device; hence the jump around 1876 * err_out1 label. 1877 */ 1878 storvsc_dev_remove(device); 1879 goto err_out0; 1880 1881 err_out1: 1882 kfree(stor_device); 1883 1884 err_out0: 1885 scsi_host_put(host); 1886 return ret; 1887 } 1888 1889 static int storvsc_remove(struct hv_device *dev) 1890 { 1891 struct storvsc_device *stor_device = hv_get_drvdata(dev); 1892 struct Scsi_Host *host = stor_device->host; 1893 1894 scsi_remove_host(host); 1895 storvsc_dev_remove(dev); 1896 scsi_host_put(host); 1897 1898 return 0; 1899 } 1900 1901 static struct hv_driver storvsc_drv = { 1902 .name = KBUILD_MODNAME, 1903 .id_table = id_table, 1904 .probe = storvsc_probe, 1905 .remove = storvsc_remove, 1906 }; 1907 1908 static int __init storvsc_drv_init(void) 1909 { 1910 1911 /* 1912 * Divide the ring buffer data size (which is 1 page less 1913 * than the ring buffer size since that page is reserved for 1914 * the ring buffer indices) by the max request size (which is 1915 * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) 1916 */ 1917 max_outstanding_req_per_channel = 1918 ((storvsc_ringbuffer_size - PAGE_SIZE) / 1919 ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + 1920 sizeof(struct vstor_packet) + sizeof(u64) - 1921 vmscsi_size_delta, 1922 sizeof(u64))); 1923 1924 return vmbus_driver_register(&storvsc_drv); 1925 } 1926 1927 static void __exit storvsc_drv_exit(void) 1928 { 1929 vmbus_driver_unregister(&storvsc_drv); 1930 } 1931 1932 MODULE_LICENSE("GPL"); 1933 MODULE_DESCRIPTION("Microsoft Hyper-V virtual storage driver"); 1934 module_init(storvsc_drv_init); 1935 module_exit(storvsc_drv_exit); 1936