1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 * K. Y. Srinivasan <kys@microsoft.com> 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/wait.h> 13 #include <linux/sched.h> 14 #include <linux/completion.h> 15 #include <linux/string.h> 16 #include <linux/mm.h> 17 #include <linux/delay.h> 18 #include <linux/init.h> 19 #include <linux/slab.h> 20 #include <linux/module.h> 21 #include <linux/device.h> 22 #include <linux/hyperv.h> 23 #include <linux/blkdev.h> 24 #include <scsi/scsi.h> 25 #include <scsi/scsi_cmnd.h> 26 #include <scsi/scsi_host.h> 27 #include <scsi/scsi_device.h> 28 #include <scsi/scsi_tcq.h> 29 #include <scsi/scsi_eh.h> 30 #include <scsi/scsi_devinfo.h> 31 #include <scsi/scsi_dbg.h> 32 #include <scsi/scsi_transport_fc.h> 33 #include <scsi/scsi_transport.h> 34 35 /* 36 * All wire protocol details (storage protocol between the guest and the host) 37 * are consolidated here. 38 * 39 * Begin protocol definitions. 40 */ 41 42 /* 43 * Version history: 44 * V1 Beta: 0.1 45 * V1 RC < 2008/1/31: 1.0 46 * V1 RC > 2008/1/31: 2.0 47 * Win7: 4.2 48 * Win8: 5.1 49 * Win8.1: 6.0 50 * Win10: 6.2 51 */ 52 53 #define VMSTOR_PROTO_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \ 54 (((MINOR_) & 0xff))) 55 56 #define VMSTOR_PROTO_VERSION_WIN6 VMSTOR_PROTO_VERSION(2, 0) 57 #define VMSTOR_PROTO_VERSION_WIN7 VMSTOR_PROTO_VERSION(4, 2) 58 #define VMSTOR_PROTO_VERSION_WIN8 VMSTOR_PROTO_VERSION(5, 1) 59 #define VMSTOR_PROTO_VERSION_WIN8_1 VMSTOR_PROTO_VERSION(6, 0) 60 #define VMSTOR_PROTO_VERSION_WIN10 VMSTOR_PROTO_VERSION(6, 2) 61 62 /* Packet structure describing virtual storage requests. */ 63 enum vstor_packet_operation { 64 VSTOR_OPERATION_COMPLETE_IO = 1, 65 VSTOR_OPERATION_REMOVE_DEVICE = 2, 66 VSTOR_OPERATION_EXECUTE_SRB = 3, 67 VSTOR_OPERATION_RESET_LUN = 4, 68 VSTOR_OPERATION_RESET_ADAPTER = 5, 69 VSTOR_OPERATION_RESET_BUS = 6, 70 VSTOR_OPERATION_BEGIN_INITIALIZATION = 7, 71 VSTOR_OPERATION_END_INITIALIZATION = 8, 72 VSTOR_OPERATION_QUERY_PROTOCOL_VERSION = 9, 73 VSTOR_OPERATION_QUERY_PROPERTIES = 10, 74 VSTOR_OPERATION_ENUMERATE_BUS = 11, 75 VSTOR_OPERATION_FCHBA_DATA = 12, 76 VSTOR_OPERATION_CREATE_SUB_CHANNELS = 13, 77 VSTOR_OPERATION_MAXIMUM = 13 78 }; 79 80 /* 81 * WWN packet for Fibre Channel HBA 82 */ 83 84 struct hv_fc_wwn_packet { 85 u8 primary_active; 86 u8 reserved1[3]; 87 u8 primary_port_wwn[8]; 88 u8 primary_node_wwn[8]; 89 u8 secondary_port_wwn[8]; 90 u8 secondary_node_wwn[8]; 91 }; 92 93 94 95 /* 96 * SRB Flag Bits 97 */ 98 99 #define SRB_FLAGS_QUEUE_ACTION_ENABLE 0x00000002 100 #define SRB_FLAGS_DISABLE_DISCONNECT 0x00000004 101 #define SRB_FLAGS_DISABLE_SYNCH_TRANSFER 0x00000008 102 #define SRB_FLAGS_BYPASS_FROZEN_QUEUE 0x00000010 103 #define SRB_FLAGS_DISABLE_AUTOSENSE 0x00000020 104 #define SRB_FLAGS_DATA_IN 0x00000040 105 #define SRB_FLAGS_DATA_OUT 0x00000080 106 #define SRB_FLAGS_NO_DATA_TRANSFER 0x00000000 107 #define SRB_FLAGS_UNSPECIFIED_DIRECTION (SRB_FLAGS_DATA_IN | SRB_FLAGS_DATA_OUT) 108 #define SRB_FLAGS_NO_QUEUE_FREEZE 0x00000100 109 #define SRB_FLAGS_ADAPTER_CACHE_ENABLE 0x00000200 110 #define SRB_FLAGS_FREE_SENSE_BUFFER 0x00000400 111 112 /* 113 * This flag indicates the request is part of the workflow for processing a D3. 114 */ 115 #define SRB_FLAGS_D3_PROCESSING 0x00000800 116 #define SRB_FLAGS_IS_ACTIVE 0x00010000 117 #define SRB_FLAGS_ALLOCATED_FROM_ZONE 0x00020000 118 #define SRB_FLAGS_SGLIST_FROM_POOL 0x00040000 119 #define SRB_FLAGS_BYPASS_LOCKED_QUEUE 0x00080000 120 #define SRB_FLAGS_NO_KEEP_AWAKE 0x00100000 121 #define SRB_FLAGS_PORT_DRIVER_ALLOCSENSE 0x00200000 122 #define SRB_FLAGS_PORT_DRIVER_SENSEHASPORT 0x00400000 123 #define SRB_FLAGS_DONT_START_NEXT_PACKET 0x00800000 124 #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F000000 125 #define SRB_FLAGS_CLASS_DRIVER_RESERVED 0xF0000000 126 127 #define SP_UNTAGGED ((unsigned char) ~0) 128 #define SRB_SIMPLE_TAG_REQUEST 0x20 129 130 /* 131 * Platform neutral description of a scsi request - 132 * this remains the same across the write regardless of 32/64 bit 133 * note: it's patterned off the SCSI_PASS_THROUGH structure 134 */ 135 #define STORVSC_MAX_CMD_LEN 0x10 136 137 #define POST_WIN7_STORVSC_SENSE_BUFFER_SIZE 0x14 138 #define PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE 0x12 139 140 #define STORVSC_SENSE_BUFFER_SIZE 0x14 141 #define STORVSC_MAX_BUF_LEN_WITH_PADDING 0x14 142 143 /* 144 * Sense buffer size changed in win8; have a run-time 145 * variable to track the size we should use. This value will 146 * likely change during protocol negotiation but it is valid 147 * to start by assuming pre-Win8. 148 */ 149 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; 150 151 /* 152 * The storage protocol version is determined during the 153 * initial exchange with the host. It will indicate which 154 * storage functionality is available in the host. 155 */ 156 static int vmstor_proto_version; 157 158 #define STORVSC_LOGGING_NONE 0 159 #define STORVSC_LOGGING_ERROR 1 160 #define STORVSC_LOGGING_WARN 2 161 162 static int logging_level = STORVSC_LOGGING_ERROR; 163 module_param(logging_level, int, S_IRUGO|S_IWUSR); 164 MODULE_PARM_DESC(logging_level, 165 "Logging level, 0 - None, 1 - Error (default), 2 - Warning."); 166 167 static inline bool do_logging(int level) 168 { 169 return logging_level >= level; 170 } 171 172 #define storvsc_log(dev, level, fmt, ...) \ 173 do { \ 174 if (do_logging(level)) \ 175 dev_warn(&(dev)->device, fmt, ##__VA_ARGS__); \ 176 } while (0) 177 178 struct vmscsi_win8_extension { 179 /* 180 * The following were added in Windows 8 181 */ 182 u16 reserve; 183 u8 queue_tag; 184 u8 queue_action; 185 u32 srb_flags; 186 u32 time_out_value; 187 u32 queue_sort_ey; 188 } __packed; 189 190 struct vmscsi_request { 191 u16 length; 192 u8 srb_status; 193 u8 scsi_status; 194 195 u8 port_number; 196 u8 path_id; 197 u8 target_id; 198 u8 lun; 199 200 u8 cdb_length; 201 u8 sense_info_length; 202 u8 data_in; 203 u8 reserved; 204 205 u32 data_transfer_length; 206 207 union { 208 u8 cdb[STORVSC_MAX_CMD_LEN]; 209 u8 sense_data[STORVSC_SENSE_BUFFER_SIZE]; 210 u8 reserved_array[STORVSC_MAX_BUF_LEN_WITH_PADDING]; 211 }; 212 /* 213 * The following was added in win8. 214 */ 215 struct vmscsi_win8_extension win8_extension; 216 217 } __attribute((packed)); 218 219 /* 220 * The list of storage protocols in order of preference. 221 */ 222 struct vmstor_protocol { 223 int protocol_version; 224 int sense_buffer_size; 225 int vmscsi_size_delta; 226 }; 227 228 229 static const struct vmstor_protocol vmstor_protocols[] = { 230 { 231 VMSTOR_PROTO_VERSION_WIN10, 232 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 233 0 234 }, 235 { 236 VMSTOR_PROTO_VERSION_WIN8_1, 237 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 238 0 239 }, 240 { 241 VMSTOR_PROTO_VERSION_WIN8, 242 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 243 0 244 }, 245 { 246 VMSTOR_PROTO_VERSION_WIN7, 247 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, 248 sizeof(struct vmscsi_win8_extension), 249 }, 250 { 251 VMSTOR_PROTO_VERSION_WIN6, 252 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, 253 sizeof(struct vmscsi_win8_extension), 254 } 255 }; 256 257 258 /* 259 * This structure is sent during the initialization phase to get the different 260 * properties of the channel. 261 */ 262 263 #define STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL 0x1 264 265 struct vmstorage_channel_properties { 266 u32 reserved; 267 u16 max_channel_cnt; 268 u16 reserved1; 269 270 u32 flags; 271 u32 max_transfer_bytes; 272 273 u64 reserved2; 274 } __packed; 275 276 /* This structure is sent during the storage protocol negotiations. */ 277 struct vmstorage_protocol_version { 278 /* Major (MSW) and minor (LSW) version numbers. */ 279 u16 major_minor; 280 281 /* 282 * Revision number is auto-incremented whenever this file is changed 283 * (See FILL_VMSTOR_REVISION macro above). Mismatch does not 284 * definitely indicate incompatibility--but it does indicate mismatched 285 * builds. 286 * This is only used on the windows side. Just set it to 0. 287 */ 288 u16 revision; 289 } __packed; 290 291 /* Channel Property Flags */ 292 #define STORAGE_CHANNEL_REMOVABLE_FLAG 0x1 293 #define STORAGE_CHANNEL_EMULATED_IDE_FLAG 0x2 294 295 struct vstor_packet { 296 /* Requested operation type */ 297 enum vstor_packet_operation operation; 298 299 /* Flags - see below for values */ 300 u32 flags; 301 302 /* Status of the request returned from the server side. */ 303 u32 status; 304 305 /* Data payload area */ 306 union { 307 /* 308 * Structure used to forward SCSI commands from the 309 * client to the server. 310 */ 311 struct vmscsi_request vm_srb; 312 313 /* Structure used to query channel properties. */ 314 struct vmstorage_channel_properties storage_channel_properties; 315 316 /* Used during version negotiations. */ 317 struct vmstorage_protocol_version version; 318 319 /* Fibre channel address packet */ 320 struct hv_fc_wwn_packet wwn_packet; 321 322 /* Number of sub-channels to create */ 323 u16 sub_channel_count; 324 325 /* This will be the maximum of the union members */ 326 u8 buffer[0x34]; 327 }; 328 } __packed; 329 330 /* 331 * Packet Flags: 332 * 333 * This flag indicates that the server should send back a completion for this 334 * packet. 335 */ 336 337 #define REQUEST_COMPLETION_FLAG 0x1 338 339 /* Matches Windows-end */ 340 enum storvsc_request_type { 341 WRITE_TYPE = 0, 342 READ_TYPE, 343 UNKNOWN_TYPE, 344 }; 345 346 /* 347 * SRB status codes and masks; a subset of the codes used here. 348 */ 349 350 #define SRB_STATUS_AUTOSENSE_VALID 0x80 351 #define SRB_STATUS_QUEUE_FROZEN 0x40 352 #define SRB_STATUS_INVALID_LUN 0x20 353 #define SRB_STATUS_SUCCESS 0x01 354 #define SRB_STATUS_ABORTED 0x02 355 #define SRB_STATUS_ERROR 0x04 356 #define SRB_STATUS_DATA_OVERRUN 0x12 357 358 #define SRB_STATUS(status) \ 359 (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) 360 /* 361 * This is the end of Protocol specific defines. 362 */ 363 364 static int storvsc_ringbuffer_size = (128 * 1024); 365 static u32 max_outstanding_req_per_channel; 366 static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); 367 368 static int storvsc_vcpus_per_sub_channel = 4; 369 static unsigned int storvsc_max_hw_queues; 370 371 module_param(storvsc_ringbuffer_size, int, S_IRUGO); 372 MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)"); 373 374 module_param(storvsc_max_hw_queues, uint, 0644); 375 MODULE_PARM_DESC(storvsc_max_hw_queues, "Maximum number of hardware queues"); 376 377 module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO); 378 MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels"); 379 380 static int ring_avail_percent_lowater = 10; 381 module_param(ring_avail_percent_lowater, int, S_IRUGO); 382 MODULE_PARM_DESC(ring_avail_percent_lowater, 383 "Select a channel if available ring size > this in percent"); 384 385 /* 386 * Timeout in seconds for all devices managed by this driver. 387 */ 388 static int storvsc_timeout = 180; 389 390 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 391 static struct scsi_transport_template *fc_transport_template; 392 #endif 393 394 static struct scsi_host_template scsi_driver; 395 static void storvsc_on_channel_callback(void *context); 396 397 #define STORVSC_MAX_LUNS_PER_TARGET 255 398 #define STORVSC_MAX_TARGETS 2 399 #define STORVSC_MAX_CHANNELS 8 400 401 #define STORVSC_FC_MAX_LUNS_PER_TARGET 255 402 #define STORVSC_FC_MAX_TARGETS 128 403 #define STORVSC_FC_MAX_CHANNELS 8 404 405 #define STORVSC_IDE_MAX_LUNS_PER_TARGET 64 406 #define STORVSC_IDE_MAX_TARGETS 1 407 #define STORVSC_IDE_MAX_CHANNELS 1 408 409 struct storvsc_cmd_request { 410 struct scsi_cmnd *cmd; 411 412 struct hv_device *device; 413 414 /* Synchronize the request/response if needed */ 415 struct completion wait_event; 416 417 struct vmbus_channel_packet_multipage_buffer mpb; 418 struct vmbus_packet_mpb_array *payload; 419 u32 payload_sz; 420 421 struct vstor_packet vstor_packet; 422 }; 423 424 425 /* A storvsc device is a device object that contains a vmbus channel */ 426 struct storvsc_device { 427 struct hv_device *device; 428 429 bool destroy; 430 bool drain_notify; 431 atomic_t num_outstanding_req; 432 struct Scsi_Host *host; 433 434 wait_queue_head_t waiting_to_drain; 435 436 /* 437 * Each unique Port/Path/Target represents 1 channel ie scsi 438 * controller. In reality, the pathid, targetid is always 0 439 * and the port is set by us 440 */ 441 unsigned int port_number; 442 unsigned char path_id; 443 unsigned char target_id; 444 445 /* 446 * The size of the vmscsi_request has changed in win8. The 447 * additional size is because of new elements added to the 448 * structure. These elements are valid only when we are talking 449 * to a win8 host. 450 * Track the correction to size we need to apply. This value 451 * will likely change during protocol negotiation but it is 452 * valid to start by assuming pre-Win8. 453 */ 454 int vmscsi_size_delta; 455 456 /* 457 * Max I/O, the device can support. 458 */ 459 u32 max_transfer_bytes; 460 /* 461 * Number of sub-channels we will open. 462 */ 463 u16 num_sc; 464 struct vmbus_channel **stor_chns; 465 /* 466 * Mask of CPUs bound to subchannels. 467 */ 468 struct cpumask alloced_cpus; 469 /* 470 * Serializes modifications of stor_chns[] from storvsc_do_io() 471 * and storvsc_change_target_cpu(). 472 */ 473 spinlock_t lock; 474 /* Used for vsc/vsp channel reset process */ 475 struct storvsc_cmd_request init_request; 476 struct storvsc_cmd_request reset_request; 477 /* 478 * Currently active port and node names for FC devices. 479 */ 480 u64 node_name; 481 u64 port_name; 482 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 483 struct fc_rport *rport; 484 #endif 485 }; 486 487 struct hv_host_device { 488 struct hv_device *dev; 489 unsigned int port; 490 unsigned char path; 491 unsigned char target; 492 struct workqueue_struct *handle_error_wq; 493 struct work_struct host_scan_work; 494 struct Scsi_Host *host; 495 }; 496 497 struct storvsc_scan_work { 498 struct work_struct work; 499 struct Scsi_Host *host; 500 u8 lun; 501 u8 tgt_id; 502 }; 503 504 static void storvsc_device_scan(struct work_struct *work) 505 { 506 struct storvsc_scan_work *wrk; 507 struct scsi_device *sdev; 508 509 wrk = container_of(work, struct storvsc_scan_work, work); 510 511 sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); 512 if (!sdev) 513 goto done; 514 scsi_rescan_device(&sdev->sdev_gendev); 515 scsi_device_put(sdev); 516 517 done: 518 kfree(wrk); 519 } 520 521 static void storvsc_host_scan(struct work_struct *work) 522 { 523 struct Scsi_Host *host; 524 struct scsi_device *sdev; 525 struct hv_host_device *host_device = 526 container_of(work, struct hv_host_device, host_scan_work); 527 528 host = host_device->host; 529 /* 530 * Before scanning the host, first check to see if any of the 531 * currrently known devices have been hot removed. We issue a 532 * "unit ready" command against all currently known devices. 533 * This I/O will result in an error for devices that have been 534 * removed. As part of handling the I/O error, we remove the device. 535 * 536 * When a LUN is added or removed, the host sends us a signal to 537 * scan the host. Thus we are forced to discover the LUNs that 538 * may have been removed this way. 539 */ 540 mutex_lock(&host->scan_mutex); 541 shost_for_each_device(sdev, host) 542 scsi_test_unit_ready(sdev, 1, 1, NULL); 543 mutex_unlock(&host->scan_mutex); 544 /* 545 * Now scan the host to discover LUNs that may have been added. 546 */ 547 scsi_scan_host(host); 548 } 549 550 static void storvsc_remove_lun(struct work_struct *work) 551 { 552 struct storvsc_scan_work *wrk; 553 struct scsi_device *sdev; 554 555 wrk = container_of(work, struct storvsc_scan_work, work); 556 if (!scsi_host_get(wrk->host)) 557 goto done; 558 559 sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); 560 561 if (sdev) { 562 scsi_remove_device(sdev); 563 scsi_device_put(sdev); 564 } 565 scsi_host_put(wrk->host); 566 567 done: 568 kfree(wrk); 569 } 570 571 572 /* 573 * We can get incoming messages from the host that are not in response to 574 * messages that we have sent out. An example of this would be messages 575 * received by the guest to notify dynamic addition/removal of LUNs. To 576 * deal with potential race conditions where the driver may be in the 577 * midst of being unloaded when we might receive an unsolicited message 578 * from the host, we have implemented a mechanism to gurantee sequential 579 * consistency: 580 * 581 * 1) Once the device is marked as being destroyed, we will fail all 582 * outgoing messages. 583 * 2) We permit incoming messages when the device is being destroyed, 584 * only to properly account for messages already sent out. 585 */ 586 587 static inline struct storvsc_device *get_out_stor_device( 588 struct hv_device *device) 589 { 590 struct storvsc_device *stor_device; 591 592 stor_device = hv_get_drvdata(device); 593 594 if (stor_device && stor_device->destroy) 595 stor_device = NULL; 596 597 return stor_device; 598 } 599 600 601 static inline void storvsc_wait_to_drain(struct storvsc_device *dev) 602 { 603 dev->drain_notify = true; 604 wait_event(dev->waiting_to_drain, 605 atomic_read(&dev->num_outstanding_req) == 0); 606 dev->drain_notify = false; 607 } 608 609 static inline struct storvsc_device *get_in_stor_device( 610 struct hv_device *device) 611 { 612 struct storvsc_device *stor_device; 613 614 stor_device = hv_get_drvdata(device); 615 616 if (!stor_device) 617 goto get_in_err; 618 619 /* 620 * If the device is being destroyed; allow incoming 621 * traffic only to cleanup outstanding requests. 622 */ 623 624 if (stor_device->destroy && 625 (atomic_read(&stor_device->num_outstanding_req) == 0)) 626 stor_device = NULL; 627 628 get_in_err: 629 return stor_device; 630 631 } 632 633 static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old, 634 u32 new) 635 { 636 struct storvsc_device *stor_device; 637 struct vmbus_channel *cur_chn; 638 bool old_is_alloced = false; 639 struct hv_device *device; 640 unsigned long flags; 641 int cpu; 642 643 device = channel->primary_channel ? 644 channel->primary_channel->device_obj 645 : channel->device_obj; 646 stor_device = get_out_stor_device(device); 647 if (!stor_device) 648 return; 649 650 /* See storvsc_do_io() -> get_og_chn(). */ 651 spin_lock_irqsave(&stor_device->lock, flags); 652 653 /* 654 * Determines if the storvsc device has other channels assigned to 655 * the "old" CPU to update the alloced_cpus mask and the stor_chns 656 * array. 657 */ 658 if (device->channel != channel && device->channel->target_cpu == old) { 659 cur_chn = device->channel; 660 old_is_alloced = true; 661 goto old_is_alloced; 662 } 663 list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) { 664 if (cur_chn == channel) 665 continue; 666 if (cur_chn->target_cpu == old) { 667 old_is_alloced = true; 668 goto old_is_alloced; 669 } 670 } 671 672 old_is_alloced: 673 if (old_is_alloced) 674 WRITE_ONCE(stor_device->stor_chns[old], cur_chn); 675 else 676 cpumask_clear_cpu(old, &stor_device->alloced_cpus); 677 678 /* "Flush" the stor_chns array. */ 679 for_each_possible_cpu(cpu) { 680 if (stor_device->stor_chns[cpu] && !cpumask_test_cpu( 681 cpu, &stor_device->alloced_cpus)) 682 WRITE_ONCE(stor_device->stor_chns[cpu], NULL); 683 } 684 685 WRITE_ONCE(stor_device->stor_chns[new], channel); 686 cpumask_set_cpu(new, &stor_device->alloced_cpus); 687 688 spin_unlock_irqrestore(&stor_device->lock, flags); 689 } 690 691 static void handle_sc_creation(struct vmbus_channel *new_sc) 692 { 693 struct hv_device *device = new_sc->primary_channel->device_obj; 694 struct device *dev = &device->device; 695 struct storvsc_device *stor_device; 696 struct vmstorage_channel_properties props; 697 int ret; 698 699 stor_device = get_out_stor_device(device); 700 if (!stor_device) 701 return; 702 703 memset(&props, 0, sizeof(struct vmstorage_channel_properties)); 704 705 /* 706 * The size of vmbus_requestor is an upper bound on the number of requests 707 * that can be in-progress at any one time across all channels. 708 */ 709 new_sc->rqstor_size = scsi_driver.can_queue; 710 711 ret = vmbus_open(new_sc, 712 storvsc_ringbuffer_size, 713 storvsc_ringbuffer_size, 714 (void *)&props, 715 sizeof(struct vmstorage_channel_properties), 716 storvsc_on_channel_callback, new_sc); 717 718 /* In case vmbus_open() fails, we don't use the sub-channel. */ 719 if (ret != 0) { 720 dev_err(dev, "Failed to open sub-channel: err=%d\n", ret); 721 return; 722 } 723 724 new_sc->change_target_cpu_callback = storvsc_change_target_cpu; 725 726 /* Add the sub-channel to the array of available channels. */ 727 stor_device->stor_chns[new_sc->target_cpu] = new_sc; 728 cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus); 729 } 730 731 static void handle_multichannel_storage(struct hv_device *device, int max_chns) 732 { 733 struct device *dev = &device->device; 734 struct storvsc_device *stor_device; 735 int num_sc; 736 struct storvsc_cmd_request *request; 737 struct vstor_packet *vstor_packet; 738 int ret, t; 739 740 /* 741 * If the number of CPUs is artificially restricted, such as 742 * with maxcpus=1 on the kernel boot line, Hyper-V could offer 743 * sub-channels >= the number of CPUs. These sub-channels 744 * should not be created. The primary channel is already created 745 * and assigned to one CPU, so check against # CPUs - 1. 746 */ 747 num_sc = min((int)(num_online_cpus() - 1), max_chns); 748 if (!num_sc) 749 return; 750 751 stor_device = get_out_stor_device(device); 752 if (!stor_device) 753 return; 754 755 stor_device->num_sc = num_sc; 756 request = &stor_device->init_request; 757 vstor_packet = &request->vstor_packet; 758 759 /* 760 * Establish a handler for dealing with subchannels. 761 */ 762 vmbus_set_sc_create_callback(device->channel, handle_sc_creation); 763 764 /* 765 * Request the host to create sub-channels. 766 */ 767 memset(request, 0, sizeof(struct storvsc_cmd_request)); 768 init_completion(&request->wait_event); 769 vstor_packet->operation = VSTOR_OPERATION_CREATE_SUB_CHANNELS; 770 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 771 vstor_packet->sub_channel_count = num_sc; 772 773 ret = vmbus_sendpacket(device->channel, vstor_packet, 774 (sizeof(struct vstor_packet) - 775 stor_device->vmscsi_size_delta), 776 (unsigned long)request, 777 VM_PKT_DATA_INBAND, 778 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 779 780 if (ret != 0) { 781 dev_err(dev, "Failed to create sub-channel: err=%d\n", ret); 782 return; 783 } 784 785 t = wait_for_completion_timeout(&request->wait_event, 10*HZ); 786 if (t == 0) { 787 dev_err(dev, "Failed to create sub-channel: timed out\n"); 788 return; 789 } 790 791 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 792 vstor_packet->status != 0) { 793 dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n", 794 vstor_packet->operation, vstor_packet->status); 795 return; 796 } 797 798 /* 799 * We need to do nothing here, because vmbus_process_offer() 800 * invokes channel->sc_creation_callback, which will open and use 801 * the sub-channel(s). 802 */ 803 } 804 805 static void cache_wwn(struct storvsc_device *stor_device, 806 struct vstor_packet *vstor_packet) 807 { 808 /* 809 * Cache the currently active port and node ww names. 810 */ 811 if (vstor_packet->wwn_packet.primary_active) { 812 stor_device->node_name = 813 wwn_to_u64(vstor_packet->wwn_packet.primary_node_wwn); 814 stor_device->port_name = 815 wwn_to_u64(vstor_packet->wwn_packet.primary_port_wwn); 816 } else { 817 stor_device->node_name = 818 wwn_to_u64(vstor_packet->wwn_packet.secondary_node_wwn); 819 stor_device->port_name = 820 wwn_to_u64(vstor_packet->wwn_packet.secondary_port_wwn); 821 } 822 } 823 824 825 static int storvsc_execute_vstor_op(struct hv_device *device, 826 struct storvsc_cmd_request *request, 827 bool status_check) 828 { 829 struct storvsc_device *stor_device; 830 struct vstor_packet *vstor_packet; 831 int ret, t; 832 833 stor_device = get_out_stor_device(device); 834 if (!stor_device) 835 return -ENODEV; 836 837 vstor_packet = &request->vstor_packet; 838 839 init_completion(&request->wait_event); 840 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 841 842 ret = vmbus_sendpacket(device->channel, vstor_packet, 843 (sizeof(struct vstor_packet) - 844 stor_device->vmscsi_size_delta), 845 (unsigned long)request, 846 VM_PKT_DATA_INBAND, 847 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 848 if (ret != 0) 849 return ret; 850 851 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 852 if (t == 0) 853 return -ETIMEDOUT; 854 855 if (!status_check) 856 return ret; 857 858 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO || 859 vstor_packet->status != 0) 860 return -EINVAL; 861 862 return ret; 863 } 864 865 static int storvsc_channel_init(struct hv_device *device, bool is_fc) 866 { 867 struct storvsc_device *stor_device; 868 struct storvsc_cmd_request *request; 869 struct vstor_packet *vstor_packet; 870 int ret, i; 871 int max_chns; 872 bool process_sub_channels = false; 873 874 stor_device = get_out_stor_device(device); 875 if (!stor_device) 876 return -ENODEV; 877 878 request = &stor_device->init_request; 879 vstor_packet = &request->vstor_packet; 880 881 /* 882 * Now, initiate the vsc/vsp initialization protocol on the open 883 * channel 884 */ 885 memset(request, 0, sizeof(struct storvsc_cmd_request)); 886 vstor_packet->operation = VSTOR_OPERATION_BEGIN_INITIALIZATION; 887 ret = storvsc_execute_vstor_op(device, request, true); 888 if (ret) 889 return ret; 890 /* 891 * Query host supported protocol version. 892 */ 893 894 for (i = 0; i < ARRAY_SIZE(vmstor_protocols); i++) { 895 /* reuse the packet for version range supported */ 896 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 897 vstor_packet->operation = 898 VSTOR_OPERATION_QUERY_PROTOCOL_VERSION; 899 900 vstor_packet->version.major_minor = 901 vmstor_protocols[i].protocol_version; 902 903 /* 904 * The revision number is only used in Windows; set it to 0. 905 */ 906 vstor_packet->version.revision = 0; 907 ret = storvsc_execute_vstor_op(device, request, false); 908 if (ret != 0) 909 return ret; 910 911 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO) 912 return -EINVAL; 913 914 if (vstor_packet->status == 0) { 915 vmstor_proto_version = 916 vmstor_protocols[i].protocol_version; 917 918 sense_buffer_size = 919 vmstor_protocols[i].sense_buffer_size; 920 921 stor_device->vmscsi_size_delta = 922 vmstor_protocols[i].vmscsi_size_delta; 923 924 break; 925 } 926 } 927 928 if (vstor_packet->status != 0) 929 return -EINVAL; 930 931 932 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 933 vstor_packet->operation = VSTOR_OPERATION_QUERY_PROPERTIES; 934 ret = storvsc_execute_vstor_op(device, request, true); 935 if (ret != 0) 936 return ret; 937 938 /* 939 * Check to see if multi-channel support is there. 940 * Hosts that implement protocol version of 5.1 and above 941 * support multi-channel. 942 */ 943 max_chns = vstor_packet->storage_channel_properties.max_channel_cnt; 944 945 /* 946 * Allocate state to manage the sub-channels. 947 * We allocate an array based on the numbers of possible CPUs 948 * (Hyper-V does not support cpu online/offline). 949 * This Array will be sparseley populated with unique 950 * channels - primary + sub-channels. 951 * We will however populate all the slots to evenly distribute 952 * the load. 953 */ 954 stor_device->stor_chns = kcalloc(num_possible_cpus(), sizeof(void *), 955 GFP_KERNEL); 956 if (stor_device->stor_chns == NULL) 957 return -ENOMEM; 958 959 device->channel->change_target_cpu_callback = storvsc_change_target_cpu; 960 961 stor_device->stor_chns[device->channel->target_cpu] = device->channel; 962 cpumask_set_cpu(device->channel->target_cpu, 963 &stor_device->alloced_cpus); 964 965 if (vmstor_proto_version >= VMSTOR_PROTO_VERSION_WIN8) { 966 if (vstor_packet->storage_channel_properties.flags & 967 STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL) 968 process_sub_channels = true; 969 } 970 stor_device->max_transfer_bytes = 971 vstor_packet->storage_channel_properties.max_transfer_bytes; 972 973 if (!is_fc) 974 goto done; 975 976 /* 977 * For FC devices retrieve FC HBA data. 978 */ 979 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 980 vstor_packet->operation = VSTOR_OPERATION_FCHBA_DATA; 981 ret = storvsc_execute_vstor_op(device, request, true); 982 if (ret != 0) 983 return ret; 984 985 /* 986 * Cache the currently active port and node ww names. 987 */ 988 cache_wwn(stor_device, vstor_packet); 989 990 done: 991 992 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 993 vstor_packet->operation = VSTOR_OPERATION_END_INITIALIZATION; 994 ret = storvsc_execute_vstor_op(device, request, true); 995 if (ret != 0) 996 return ret; 997 998 if (process_sub_channels) 999 handle_multichannel_storage(device, max_chns); 1000 1001 return ret; 1002 } 1003 1004 static void storvsc_handle_error(struct vmscsi_request *vm_srb, 1005 struct scsi_cmnd *scmnd, 1006 struct Scsi_Host *host, 1007 u8 asc, u8 ascq) 1008 { 1009 struct storvsc_scan_work *wrk; 1010 void (*process_err_fn)(struct work_struct *work); 1011 struct hv_host_device *host_dev = shost_priv(host); 1012 bool do_work = false; 1013 1014 switch (SRB_STATUS(vm_srb->srb_status)) { 1015 case SRB_STATUS_ERROR: 1016 /* 1017 * Let upper layer deal with error when 1018 * sense message is present. 1019 */ 1020 1021 if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) 1022 break; 1023 /* 1024 * If there is an error; offline the device since all 1025 * error recovery strategies would have already been 1026 * deployed on the host side. However, if the command 1027 * were a pass-through command deal with it appropriately. 1028 */ 1029 switch (scmnd->cmnd[0]) { 1030 case ATA_16: 1031 case ATA_12: 1032 set_host_byte(scmnd, DID_PASSTHROUGH); 1033 break; 1034 /* 1035 * On Some Windows hosts TEST_UNIT_READY command can return 1036 * SRB_STATUS_ERROR, let the upper level code deal with it 1037 * based on the sense information. 1038 */ 1039 case TEST_UNIT_READY: 1040 break; 1041 default: 1042 set_host_byte(scmnd, DID_ERROR); 1043 } 1044 break; 1045 case SRB_STATUS_INVALID_LUN: 1046 set_host_byte(scmnd, DID_NO_CONNECT); 1047 do_work = true; 1048 process_err_fn = storvsc_remove_lun; 1049 break; 1050 case SRB_STATUS_ABORTED: 1051 if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && 1052 (asc == 0x2a) && (ascq == 0x9)) { 1053 do_work = true; 1054 process_err_fn = storvsc_device_scan; 1055 /* 1056 * Retry the I/O that triggered this. 1057 */ 1058 set_host_byte(scmnd, DID_REQUEUE); 1059 } 1060 break; 1061 } 1062 1063 if (!do_work) 1064 return; 1065 1066 /* 1067 * We need to schedule work to process this error; schedule it. 1068 */ 1069 wrk = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC); 1070 if (!wrk) { 1071 set_host_byte(scmnd, DID_TARGET_FAILURE); 1072 return; 1073 } 1074 1075 wrk->host = host; 1076 wrk->lun = vm_srb->lun; 1077 wrk->tgt_id = vm_srb->target_id; 1078 INIT_WORK(&wrk->work, process_err_fn); 1079 queue_work(host_dev->handle_error_wq, &wrk->work); 1080 } 1081 1082 1083 static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, 1084 struct storvsc_device *stor_dev) 1085 { 1086 struct scsi_cmnd *scmnd = cmd_request->cmd; 1087 struct scsi_sense_hdr sense_hdr; 1088 struct vmscsi_request *vm_srb; 1089 u32 data_transfer_length; 1090 struct Scsi_Host *host; 1091 u32 payload_sz = cmd_request->payload_sz; 1092 void *payload = cmd_request->payload; 1093 1094 host = stor_dev->host; 1095 1096 vm_srb = &cmd_request->vstor_packet.vm_srb; 1097 data_transfer_length = vm_srb->data_transfer_length; 1098 1099 scmnd->result = vm_srb->scsi_status; 1100 1101 if (scmnd->result) { 1102 if (scsi_normalize_sense(scmnd->sense_buffer, 1103 SCSI_SENSE_BUFFERSIZE, &sense_hdr) && 1104 !(sense_hdr.sense_key == NOT_READY && 1105 sense_hdr.asc == 0x03A) && 1106 do_logging(STORVSC_LOGGING_ERROR)) 1107 scsi_print_sense_hdr(scmnd->device, "storvsc", 1108 &sense_hdr); 1109 } 1110 1111 if (vm_srb->srb_status != SRB_STATUS_SUCCESS) { 1112 storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, 1113 sense_hdr.ascq); 1114 /* 1115 * The Windows driver set data_transfer_length on 1116 * SRB_STATUS_DATA_OVERRUN. On other errors, this value 1117 * is untouched. In these cases we set it to 0. 1118 */ 1119 if (vm_srb->srb_status != SRB_STATUS_DATA_OVERRUN) 1120 data_transfer_length = 0; 1121 } 1122 1123 /* Validate data_transfer_length (from Hyper-V) */ 1124 if (data_transfer_length > cmd_request->payload->range.len) 1125 data_transfer_length = cmd_request->payload->range.len; 1126 1127 scsi_set_resid(scmnd, 1128 cmd_request->payload->range.len - data_transfer_length); 1129 1130 scmnd->scsi_done(scmnd); 1131 1132 if (payload_sz > 1133 sizeof(struct vmbus_channel_packet_multipage_buffer)) 1134 kfree(payload); 1135 } 1136 1137 static void storvsc_on_io_completion(struct storvsc_device *stor_device, 1138 struct vstor_packet *vstor_packet, 1139 struct storvsc_cmd_request *request) 1140 { 1141 struct vstor_packet *stor_pkt; 1142 struct hv_device *device = stor_device->device; 1143 1144 stor_pkt = &request->vstor_packet; 1145 1146 /* 1147 * The current SCSI handling on the host side does 1148 * not correctly handle: 1149 * INQUIRY command with page code parameter set to 0x80 1150 * MODE_SENSE command with cmd[2] == 0x1c 1151 * 1152 * Setup srb and scsi status so this won't be fatal. 1153 * We do this so we can distinguish truly fatal failues 1154 * (srb status == 0x4) and off-line the device in that case. 1155 */ 1156 1157 if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) || 1158 (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) { 1159 vstor_packet->vm_srb.scsi_status = 0; 1160 vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS; 1161 } 1162 1163 1164 /* Copy over the status...etc */ 1165 stor_pkt->vm_srb.scsi_status = vstor_packet->vm_srb.scsi_status; 1166 stor_pkt->vm_srb.srb_status = vstor_packet->vm_srb.srb_status; 1167 1168 /* Validate sense_info_length (from Hyper-V) */ 1169 if (vstor_packet->vm_srb.sense_info_length > sense_buffer_size) 1170 vstor_packet->vm_srb.sense_info_length = sense_buffer_size; 1171 1172 stor_pkt->vm_srb.sense_info_length = 1173 vstor_packet->vm_srb.sense_info_length; 1174 1175 if (vstor_packet->vm_srb.scsi_status != 0 || 1176 vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) 1177 storvsc_log(device, STORVSC_LOGGING_WARN, 1178 "cmd 0x%x scsi status 0x%x srb status 0x%x\n", 1179 stor_pkt->vm_srb.cdb[0], 1180 vstor_packet->vm_srb.scsi_status, 1181 vstor_packet->vm_srb.srb_status); 1182 1183 if ((vstor_packet->vm_srb.scsi_status & 0xFF) == 0x02) { 1184 /* CHECK_CONDITION */ 1185 if (vstor_packet->vm_srb.srb_status & 1186 SRB_STATUS_AUTOSENSE_VALID) { 1187 /* autosense data available */ 1188 1189 storvsc_log(device, STORVSC_LOGGING_WARN, 1190 "stor pkt %p autosense data valid - len %d\n", 1191 request, vstor_packet->vm_srb.sense_info_length); 1192 1193 memcpy(request->cmd->sense_buffer, 1194 vstor_packet->vm_srb.sense_data, 1195 vstor_packet->vm_srb.sense_info_length); 1196 1197 } 1198 } 1199 1200 stor_pkt->vm_srb.data_transfer_length = 1201 vstor_packet->vm_srb.data_transfer_length; 1202 1203 storvsc_command_completion(request, stor_device); 1204 1205 if (atomic_dec_and_test(&stor_device->num_outstanding_req) && 1206 stor_device->drain_notify) 1207 wake_up(&stor_device->waiting_to_drain); 1208 1209 1210 } 1211 1212 static void storvsc_on_receive(struct storvsc_device *stor_device, 1213 struct vstor_packet *vstor_packet, 1214 struct storvsc_cmd_request *request) 1215 { 1216 struct hv_host_device *host_dev; 1217 switch (vstor_packet->operation) { 1218 case VSTOR_OPERATION_COMPLETE_IO: 1219 storvsc_on_io_completion(stor_device, vstor_packet, request); 1220 break; 1221 1222 case VSTOR_OPERATION_REMOVE_DEVICE: 1223 case VSTOR_OPERATION_ENUMERATE_BUS: 1224 host_dev = shost_priv(stor_device->host); 1225 queue_work( 1226 host_dev->handle_error_wq, &host_dev->host_scan_work); 1227 break; 1228 1229 case VSTOR_OPERATION_FCHBA_DATA: 1230 cache_wwn(stor_device, vstor_packet); 1231 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 1232 fc_host_node_name(stor_device->host) = stor_device->node_name; 1233 fc_host_port_name(stor_device->host) = stor_device->port_name; 1234 #endif 1235 break; 1236 default: 1237 break; 1238 } 1239 } 1240 1241 static void storvsc_on_channel_callback(void *context) 1242 { 1243 struct vmbus_channel *channel = (struct vmbus_channel *)context; 1244 const struct vmpacket_descriptor *desc; 1245 struct hv_device *device; 1246 struct storvsc_device *stor_device; 1247 1248 if (channel->primary_channel != NULL) 1249 device = channel->primary_channel->device_obj; 1250 else 1251 device = channel->device_obj; 1252 1253 stor_device = get_in_stor_device(device); 1254 if (!stor_device) 1255 return; 1256 1257 foreach_vmbus_pkt(desc, channel) { 1258 void *packet = hv_pkt_data(desc); 1259 struct storvsc_cmd_request *request; 1260 u64 cmd_rqst; 1261 1262 cmd_rqst = vmbus_request_addr(&channel->requestor, 1263 desc->trans_id); 1264 if (cmd_rqst == VMBUS_RQST_ERROR) { 1265 dev_err(&device->device, 1266 "Incorrect transaction id\n"); 1267 continue; 1268 } 1269 1270 request = (struct storvsc_cmd_request *)(unsigned long)cmd_rqst; 1271 1272 if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) - 1273 stor_device->vmscsi_size_delta) { 1274 dev_err(&device->device, "Invalid packet len\n"); 1275 continue; 1276 } 1277 1278 if (request == &stor_device->init_request || 1279 request == &stor_device->reset_request) { 1280 memcpy(&request->vstor_packet, packet, 1281 (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta)); 1282 complete(&request->wait_event); 1283 } else { 1284 storvsc_on_receive(stor_device, packet, request); 1285 } 1286 } 1287 } 1288 1289 static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, 1290 bool is_fc) 1291 { 1292 struct vmstorage_channel_properties props; 1293 int ret; 1294 1295 memset(&props, 0, sizeof(struct vmstorage_channel_properties)); 1296 1297 /* 1298 * The size of vmbus_requestor is an upper bound on the number of requests 1299 * that can be in-progress at any one time across all channels. 1300 */ 1301 device->channel->rqstor_size = scsi_driver.can_queue; 1302 1303 ret = vmbus_open(device->channel, 1304 ring_size, 1305 ring_size, 1306 (void *)&props, 1307 sizeof(struct vmstorage_channel_properties), 1308 storvsc_on_channel_callback, device->channel); 1309 1310 if (ret != 0) 1311 return ret; 1312 1313 ret = storvsc_channel_init(device, is_fc); 1314 1315 return ret; 1316 } 1317 1318 static int storvsc_dev_remove(struct hv_device *device) 1319 { 1320 struct storvsc_device *stor_device; 1321 1322 stor_device = hv_get_drvdata(device); 1323 1324 stor_device->destroy = true; 1325 1326 /* Make sure flag is set before waiting */ 1327 wmb(); 1328 1329 /* 1330 * At this point, all outbound traffic should be disable. We 1331 * only allow inbound traffic (responses) to proceed so that 1332 * outstanding requests can be completed. 1333 */ 1334 1335 storvsc_wait_to_drain(stor_device); 1336 1337 /* 1338 * Since we have already drained, we don't need to busy wait 1339 * as was done in final_release_stor_device() 1340 * Note that we cannot set the ext pointer to NULL until 1341 * we have drained - to drain the outgoing packets, we need to 1342 * allow incoming packets. 1343 */ 1344 hv_set_drvdata(device, NULL); 1345 1346 /* Close the channel */ 1347 vmbus_close(device->channel); 1348 1349 kfree(stor_device->stor_chns); 1350 kfree(stor_device); 1351 return 0; 1352 } 1353 1354 static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device, 1355 u16 q_num) 1356 { 1357 u16 slot = 0; 1358 u16 hash_qnum; 1359 const struct cpumask *node_mask; 1360 int num_channels, tgt_cpu; 1361 1362 if (stor_device->num_sc == 0) { 1363 stor_device->stor_chns[q_num] = stor_device->device->channel; 1364 return stor_device->device->channel; 1365 } 1366 1367 /* 1368 * Our channel array is sparsley populated and we 1369 * initiated I/O on a processor/hw-q that does not 1370 * currently have a designated channel. Fix this. 1371 * The strategy is simple: 1372 * I. Ensure NUMA locality 1373 * II. Distribute evenly (best effort) 1374 */ 1375 1376 node_mask = cpumask_of_node(cpu_to_node(q_num)); 1377 1378 num_channels = 0; 1379 for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { 1380 if (cpumask_test_cpu(tgt_cpu, node_mask)) 1381 num_channels++; 1382 } 1383 if (num_channels == 0) { 1384 stor_device->stor_chns[q_num] = stor_device->device->channel; 1385 return stor_device->device->channel; 1386 } 1387 1388 hash_qnum = q_num; 1389 while (hash_qnum >= num_channels) 1390 hash_qnum -= num_channels; 1391 1392 for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { 1393 if (!cpumask_test_cpu(tgt_cpu, node_mask)) 1394 continue; 1395 if (slot == hash_qnum) 1396 break; 1397 slot++; 1398 } 1399 1400 stor_device->stor_chns[q_num] = stor_device->stor_chns[tgt_cpu]; 1401 1402 return stor_device->stor_chns[q_num]; 1403 } 1404 1405 1406 static int storvsc_do_io(struct hv_device *device, 1407 struct storvsc_cmd_request *request, u16 q_num) 1408 { 1409 struct storvsc_device *stor_device; 1410 struct vstor_packet *vstor_packet; 1411 struct vmbus_channel *outgoing_channel, *channel; 1412 unsigned long flags; 1413 int ret = 0; 1414 const struct cpumask *node_mask; 1415 int tgt_cpu; 1416 1417 vstor_packet = &request->vstor_packet; 1418 stor_device = get_out_stor_device(device); 1419 1420 if (!stor_device) 1421 return -ENODEV; 1422 1423 1424 request->device = device; 1425 /* 1426 * Select an appropriate channel to send the request out. 1427 */ 1428 /* See storvsc_change_target_cpu(). */ 1429 outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]); 1430 if (outgoing_channel != NULL) { 1431 if (outgoing_channel->target_cpu == q_num) { 1432 /* 1433 * Ideally, we want to pick a different channel if 1434 * available on the same NUMA node. 1435 */ 1436 node_mask = cpumask_of_node(cpu_to_node(q_num)); 1437 for_each_cpu_wrap(tgt_cpu, 1438 &stor_device->alloced_cpus, q_num + 1) { 1439 if (!cpumask_test_cpu(tgt_cpu, node_mask)) 1440 continue; 1441 if (tgt_cpu == q_num) 1442 continue; 1443 channel = READ_ONCE( 1444 stor_device->stor_chns[tgt_cpu]); 1445 if (channel == NULL) 1446 continue; 1447 if (hv_get_avail_to_write_percent( 1448 &channel->outbound) 1449 > ring_avail_percent_lowater) { 1450 outgoing_channel = channel; 1451 goto found_channel; 1452 } 1453 } 1454 1455 /* 1456 * All the other channels on the same NUMA node are 1457 * busy. Try to use the channel on the current CPU 1458 */ 1459 if (hv_get_avail_to_write_percent( 1460 &outgoing_channel->outbound) 1461 > ring_avail_percent_lowater) 1462 goto found_channel; 1463 1464 /* 1465 * If we reach here, all the channels on the current 1466 * NUMA node are busy. Try to find a channel in 1467 * other NUMA nodes 1468 */ 1469 for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { 1470 if (cpumask_test_cpu(tgt_cpu, node_mask)) 1471 continue; 1472 channel = READ_ONCE( 1473 stor_device->stor_chns[tgt_cpu]); 1474 if (channel == NULL) 1475 continue; 1476 if (hv_get_avail_to_write_percent( 1477 &channel->outbound) 1478 > ring_avail_percent_lowater) { 1479 outgoing_channel = channel; 1480 goto found_channel; 1481 } 1482 } 1483 } 1484 } else { 1485 spin_lock_irqsave(&stor_device->lock, flags); 1486 outgoing_channel = stor_device->stor_chns[q_num]; 1487 if (outgoing_channel != NULL) { 1488 spin_unlock_irqrestore(&stor_device->lock, flags); 1489 goto found_channel; 1490 } 1491 outgoing_channel = get_og_chn(stor_device, q_num); 1492 spin_unlock_irqrestore(&stor_device->lock, flags); 1493 } 1494 1495 found_channel: 1496 vstor_packet->flags |= REQUEST_COMPLETION_FLAG; 1497 1498 vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) - 1499 stor_device->vmscsi_size_delta); 1500 1501 1502 vstor_packet->vm_srb.sense_info_length = sense_buffer_size; 1503 1504 1505 vstor_packet->vm_srb.data_transfer_length = 1506 request->payload->range.len; 1507 1508 vstor_packet->operation = VSTOR_OPERATION_EXECUTE_SRB; 1509 1510 if (request->payload->range.len) { 1511 1512 ret = vmbus_sendpacket_mpb_desc(outgoing_channel, 1513 request->payload, request->payload_sz, 1514 vstor_packet, 1515 (sizeof(struct vstor_packet) - 1516 stor_device->vmscsi_size_delta), 1517 (unsigned long)request); 1518 } else { 1519 ret = vmbus_sendpacket(outgoing_channel, vstor_packet, 1520 (sizeof(struct vstor_packet) - 1521 stor_device->vmscsi_size_delta), 1522 (unsigned long)request, 1523 VM_PKT_DATA_INBAND, 1524 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1525 } 1526 1527 if (ret != 0) 1528 return ret; 1529 1530 atomic_inc(&stor_device->num_outstanding_req); 1531 1532 return ret; 1533 } 1534 1535 static int storvsc_device_alloc(struct scsi_device *sdevice) 1536 { 1537 /* 1538 * Set blist flag to permit the reading of the VPD pages even when 1539 * the target may claim SPC-2 compliance. MSFT targets currently 1540 * claim SPC-2 compliance while they implement post SPC-2 features. 1541 * With this flag we can correctly handle WRITE_SAME_16 issues. 1542 * 1543 * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but 1544 * still supports REPORT LUN. 1545 */ 1546 sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES; 1547 1548 return 0; 1549 } 1550 1551 static int storvsc_device_configure(struct scsi_device *sdevice) 1552 { 1553 blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); 1554 1555 sdevice->no_write_same = 1; 1556 1557 /* 1558 * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3 1559 * if the device is a MSFT virtual device. If the host is 1560 * WIN10 or newer, allow write_same. 1561 */ 1562 if (!strncmp(sdevice->vendor, "Msft", 4)) { 1563 switch (vmstor_proto_version) { 1564 case VMSTOR_PROTO_VERSION_WIN8: 1565 case VMSTOR_PROTO_VERSION_WIN8_1: 1566 sdevice->scsi_level = SCSI_SPC_3; 1567 break; 1568 } 1569 1570 if (vmstor_proto_version >= VMSTOR_PROTO_VERSION_WIN10) 1571 sdevice->no_write_same = 0; 1572 } 1573 1574 return 0; 1575 } 1576 1577 static int storvsc_get_chs(struct scsi_device *sdev, struct block_device * bdev, 1578 sector_t capacity, int *info) 1579 { 1580 sector_t nsect = capacity; 1581 sector_t cylinders = nsect; 1582 int heads, sectors_pt; 1583 1584 /* 1585 * We are making up these values; let us keep it simple. 1586 */ 1587 heads = 0xff; 1588 sectors_pt = 0x3f; /* Sectors per track */ 1589 sector_div(cylinders, heads * sectors_pt); 1590 if ((sector_t)(cylinders + 1) * heads * sectors_pt < nsect) 1591 cylinders = 0xffff; 1592 1593 info[0] = heads; 1594 info[1] = sectors_pt; 1595 info[2] = (int)cylinders; 1596 1597 return 0; 1598 } 1599 1600 static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) 1601 { 1602 struct hv_host_device *host_dev = shost_priv(scmnd->device->host); 1603 struct hv_device *device = host_dev->dev; 1604 1605 struct storvsc_device *stor_device; 1606 struct storvsc_cmd_request *request; 1607 struct vstor_packet *vstor_packet; 1608 int ret, t; 1609 1610 stor_device = get_out_stor_device(device); 1611 if (!stor_device) 1612 return FAILED; 1613 1614 request = &stor_device->reset_request; 1615 vstor_packet = &request->vstor_packet; 1616 memset(vstor_packet, 0, sizeof(struct vstor_packet)); 1617 1618 init_completion(&request->wait_event); 1619 1620 vstor_packet->operation = VSTOR_OPERATION_RESET_BUS; 1621 vstor_packet->flags = REQUEST_COMPLETION_FLAG; 1622 vstor_packet->vm_srb.path_id = stor_device->path_id; 1623 1624 ret = vmbus_sendpacket(device->channel, vstor_packet, 1625 (sizeof(struct vstor_packet) - 1626 stor_device->vmscsi_size_delta), 1627 (unsigned long)&stor_device->reset_request, 1628 VM_PKT_DATA_INBAND, 1629 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 1630 if (ret != 0) 1631 return FAILED; 1632 1633 t = wait_for_completion_timeout(&request->wait_event, 5*HZ); 1634 if (t == 0) 1635 return TIMEOUT_ERROR; 1636 1637 1638 /* 1639 * At this point, all outstanding requests in the adapter 1640 * should have been flushed out and return to us 1641 * There is a potential race here where the host may be in 1642 * the process of responding when we return from here. 1643 * Just wait for all in-transit packets to be accounted for 1644 * before we return from here. 1645 */ 1646 storvsc_wait_to_drain(stor_device); 1647 1648 return SUCCESS; 1649 } 1650 1651 /* 1652 * The host guarantees to respond to each command, although I/O latencies might 1653 * be unbounded on Azure. Reset the timer unconditionally to give the host a 1654 * chance to perform EH. 1655 */ 1656 static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd) 1657 { 1658 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 1659 if (scmnd->device->host->transportt == fc_transport_template) 1660 return fc_eh_timed_out(scmnd); 1661 #endif 1662 return BLK_EH_RESET_TIMER; 1663 } 1664 1665 static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) 1666 { 1667 bool allowed = true; 1668 u8 scsi_op = scmnd->cmnd[0]; 1669 1670 switch (scsi_op) { 1671 /* the host does not handle WRITE_SAME, log accident usage */ 1672 case WRITE_SAME: 1673 /* 1674 * smartd sends this command and the host does not handle 1675 * this. So, don't send it. 1676 */ 1677 case SET_WINDOW: 1678 scmnd->result = DID_ERROR << 16; 1679 allowed = false; 1680 break; 1681 default: 1682 break; 1683 } 1684 return allowed; 1685 } 1686 1687 static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) 1688 { 1689 int ret; 1690 struct hv_host_device *host_dev = shost_priv(host); 1691 struct hv_device *dev = host_dev->dev; 1692 struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd); 1693 int i; 1694 struct scatterlist *sgl; 1695 unsigned int sg_count; 1696 struct vmscsi_request *vm_srb; 1697 struct vmbus_packet_mpb_array *payload; 1698 u32 payload_sz; 1699 u32 length; 1700 1701 if (vmstor_proto_version <= VMSTOR_PROTO_VERSION_WIN8) { 1702 /* 1703 * On legacy hosts filter unimplemented commands. 1704 * Future hosts are expected to correctly handle 1705 * unsupported commands. Furthermore, it is 1706 * possible that some of the currently 1707 * unsupported commands maybe supported in 1708 * future versions of the host. 1709 */ 1710 if (!storvsc_scsi_cmd_ok(scmnd)) { 1711 scmnd->scsi_done(scmnd); 1712 return 0; 1713 } 1714 } 1715 1716 /* Setup the cmd request */ 1717 cmd_request->cmd = scmnd; 1718 1719 memset(&cmd_request->vstor_packet, 0, sizeof(struct vstor_packet)); 1720 vm_srb = &cmd_request->vstor_packet.vm_srb; 1721 vm_srb->win8_extension.time_out_value = 60; 1722 1723 vm_srb->win8_extension.srb_flags |= 1724 SRB_FLAGS_DISABLE_SYNCH_TRANSFER; 1725 1726 if (scmnd->device->tagged_supported) { 1727 vm_srb->win8_extension.srb_flags |= 1728 (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE); 1729 vm_srb->win8_extension.queue_tag = SP_UNTAGGED; 1730 vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST; 1731 } 1732 1733 /* Build the SRB */ 1734 switch (scmnd->sc_data_direction) { 1735 case DMA_TO_DEVICE: 1736 vm_srb->data_in = WRITE_TYPE; 1737 vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_OUT; 1738 break; 1739 case DMA_FROM_DEVICE: 1740 vm_srb->data_in = READ_TYPE; 1741 vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_IN; 1742 break; 1743 case DMA_NONE: 1744 vm_srb->data_in = UNKNOWN_TYPE; 1745 vm_srb->win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; 1746 break; 1747 default: 1748 /* 1749 * This is DMA_BIDIRECTIONAL or something else we are never 1750 * supposed to see here. 1751 */ 1752 WARN(1, "Unexpected data direction: %d\n", 1753 scmnd->sc_data_direction); 1754 return -EINVAL; 1755 } 1756 1757 1758 vm_srb->port_number = host_dev->port; 1759 vm_srb->path_id = scmnd->device->channel; 1760 vm_srb->target_id = scmnd->device->id; 1761 vm_srb->lun = scmnd->device->lun; 1762 1763 vm_srb->cdb_length = scmnd->cmd_len; 1764 1765 memcpy(vm_srb->cdb, scmnd->cmnd, vm_srb->cdb_length); 1766 1767 sgl = (struct scatterlist *)scsi_sglist(scmnd); 1768 sg_count = scsi_sg_count(scmnd); 1769 1770 length = scsi_bufflen(scmnd); 1771 payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb; 1772 payload_sz = sizeof(cmd_request->mpb); 1773 1774 if (sg_count) { 1775 unsigned int hvpgoff, hvpfns_to_add; 1776 unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset); 1777 unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length); 1778 u64 hvpfn; 1779 1780 if (hvpg_count > MAX_PAGE_BUFFER_COUNT) { 1781 1782 payload_sz = (hvpg_count * sizeof(u64) + 1783 sizeof(struct vmbus_packet_mpb_array)); 1784 payload = kzalloc(payload_sz, GFP_ATOMIC); 1785 if (!payload) 1786 return SCSI_MLQUEUE_DEVICE_BUSY; 1787 } 1788 1789 payload->range.len = length; 1790 payload->range.offset = offset_in_hvpg; 1791 1792 1793 for (i = 0; sgl != NULL; sgl = sg_next(sgl)) { 1794 /* 1795 * Init values for the current sgl entry. hvpgoff 1796 * and hvpfns_to_add are in units of Hyper-V size 1797 * pages. Handling the PAGE_SIZE != HV_HYP_PAGE_SIZE 1798 * case also handles values of sgl->offset that are 1799 * larger than PAGE_SIZE. Such offsets are handled 1800 * even on other than the first sgl entry, provided 1801 * they are a multiple of PAGE_SIZE. 1802 */ 1803 hvpgoff = HVPFN_DOWN(sgl->offset); 1804 hvpfn = page_to_hvpfn(sg_page(sgl)) + hvpgoff; 1805 hvpfns_to_add = HVPFN_UP(sgl->offset + sgl->length) - 1806 hvpgoff; 1807 1808 /* 1809 * Fill the next portion of the PFN array with 1810 * sequential Hyper-V PFNs for the continguous physical 1811 * memory described by the sgl entry. The end of the 1812 * last sgl should be reached at the same time that 1813 * the PFN array is filled. 1814 */ 1815 while (hvpfns_to_add--) 1816 payload->range.pfn_array[i++] = hvpfn++; 1817 } 1818 } 1819 1820 cmd_request->payload = payload; 1821 cmd_request->payload_sz = payload_sz; 1822 1823 /* Invokes the vsc to start an IO */ 1824 ret = storvsc_do_io(dev, cmd_request, get_cpu()); 1825 put_cpu(); 1826 1827 if (ret == -EAGAIN) { 1828 if (payload_sz > sizeof(cmd_request->mpb)) 1829 kfree(payload); 1830 /* no more space */ 1831 return SCSI_MLQUEUE_DEVICE_BUSY; 1832 } 1833 1834 return 0; 1835 } 1836 1837 static struct scsi_host_template scsi_driver = { 1838 .module = THIS_MODULE, 1839 .name = "storvsc_host_t", 1840 .cmd_size = sizeof(struct storvsc_cmd_request), 1841 .bios_param = storvsc_get_chs, 1842 .queuecommand = storvsc_queuecommand, 1843 .eh_host_reset_handler = storvsc_host_reset_handler, 1844 .proc_name = "storvsc_host", 1845 .eh_timed_out = storvsc_eh_timed_out, 1846 .slave_alloc = storvsc_device_alloc, 1847 .slave_configure = storvsc_device_configure, 1848 .cmd_per_lun = 2048, 1849 .this_id = -1, 1850 /* Ensure there are no gaps in presented sgls */ 1851 .virt_boundary_mask = PAGE_SIZE-1, 1852 .no_write_same = 1, 1853 .track_queue_depth = 1, 1854 .change_queue_depth = storvsc_change_queue_depth, 1855 }; 1856 1857 enum { 1858 SCSI_GUID, 1859 IDE_GUID, 1860 SFC_GUID, 1861 }; 1862 1863 static const struct hv_vmbus_device_id id_table[] = { 1864 /* SCSI guid */ 1865 { HV_SCSI_GUID, 1866 .driver_data = SCSI_GUID 1867 }, 1868 /* IDE guid */ 1869 { HV_IDE_GUID, 1870 .driver_data = IDE_GUID 1871 }, 1872 /* Fibre Channel GUID */ 1873 { 1874 HV_SYNTHFC_GUID, 1875 .driver_data = SFC_GUID 1876 }, 1877 { }, 1878 }; 1879 1880 MODULE_DEVICE_TABLE(vmbus, id_table); 1881 1882 static const struct { guid_t guid; } fc_guid = { HV_SYNTHFC_GUID }; 1883 1884 static bool hv_dev_is_fc(struct hv_device *hv_dev) 1885 { 1886 return guid_equal(&fc_guid.guid, &hv_dev->dev_type); 1887 } 1888 1889 static int storvsc_probe(struct hv_device *device, 1890 const struct hv_vmbus_device_id *dev_id) 1891 { 1892 int ret; 1893 int num_cpus = num_online_cpus(); 1894 int num_present_cpus = num_present_cpus(); 1895 struct Scsi_Host *host; 1896 struct hv_host_device *host_dev; 1897 bool dev_is_ide = ((dev_id->driver_data == IDE_GUID) ? true : false); 1898 bool is_fc = ((dev_id->driver_data == SFC_GUID) ? true : false); 1899 int target = 0; 1900 struct storvsc_device *stor_device; 1901 int max_luns_per_target; 1902 int max_targets; 1903 int max_channels; 1904 int max_sub_channels = 0; 1905 1906 /* 1907 * Based on the windows host we are running on, 1908 * set state to properly communicate with the host. 1909 */ 1910 1911 if (vmbus_proto_version < VERSION_WIN8) { 1912 max_luns_per_target = STORVSC_IDE_MAX_LUNS_PER_TARGET; 1913 max_targets = STORVSC_IDE_MAX_TARGETS; 1914 max_channels = STORVSC_IDE_MAX_CHANNELS; 1915 } else { 1916 max_luns_per_target = STORVSC_MAX_LUNS_PER_TARGET; 1917 max_targets = STORVSC_MAX_TARGETS; 1918 max_channels = STORVSC_MAX_CHANNELS; 1919 /* 1920 * On Windows8 and above, we support sub-channels for storage 1921 * on SCSI and FC controllers. 1922 * The number of sub-channels offerred is based on the number of 1923 * VCPUs in the guest. 1924 */ 1925 if (!dev_is_ide) 1926 max_sub_channels = 1927 (num_cpus - 1) / storvsc_vcpus_per_sub_channel; 1928 } 1929 1930 scsi_driver.can_queue = max_outstanding_req_per_channel * 1931 (max_sub_channels + 1) * 1932 (100 - ring_avail_percent_lowater) / 100; 1933 1934 host = scsi_host_alloc(&scsi_driver, 1935 sizeof(struct hv_host_device)); 1936 if (!host) 1937 return -ENOMEM; 1938 1939 host_dev = shost_priv(host); 1940 memset(host_dev, 0, sizeof(struct hv_host_device)); 1941 1942 host_dev->port = host->host_no; 1943 host_dev->dev = device; 1944 host_dev->host = host; 1945 1946 1947 stor_device = kzalloc(sizeof(struct storvsc_device), GFP_KERNEL); 1948 if (!stor_device) { 1949 ret = -ENOMEM; 1950 goto err_out0; 1951 } 1952 1953 stor_device->destroy = false; 1954 init_waitqueue_head(&stor_device->waiting_to_drain); 1955 stor_device->device = device; 1956 stor_device->host = host; 1957 stor_device->vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); 1958 spin_lock_init(&stor_device->lock); 1959 hv_set_drvdata(device, stor_device); 1960 1961 stor_device->port_number = host->host_no; 1962 ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); 1963 if (ret) 1964 goto err_out1; 1965 1966 host_dev->path = stor_device->path_id; 1967 host_dev->target = stor_device->target_id; 1968 1969 switch (dev_id->driver_data) { 1970 case SFC_GUID: 1971 host->max_lun = STORVSC_FC_MAX_LUNS_PER_TARGET; 1972 host->max_id = STORVSC_FC_MAX_TARGETS; 1973 host->max_channel = STORVSC_FC_MAX_CHANNELS - 1; 1974 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 1975 host->transportt = fc_transport_template; 1976 #endif 1977 break; 1978 1979 case SCSI_GUID: 1980 host->max_lun = max_luns_per_target; 1981 host->max_id = max_targets; 1982 host->max_channel = max_channels - 1; 1983 break; 1984 1985 default: 1986 host->max_lun = STORVSC_IDE_MAX_LUNS_PER_TARGET; 1987 host->max_id = STORVSC_IDE_MAX_TARGETS; 1988 host->max_channel = STORVSC_IDE_MAX_CHANNELS - 1; 1989 break; 1990 } 1991 /* max cmd length */ 1992 host->max_cmd_len = STORVSC_MAX_CMD_LEN; 1993 1994 /* 1995 * set the table size based on the info we got 1996 * from the host. 1997 */ 1998 host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); 1999 /* 2000 * For non-IDE disks, the host supports multiple channels. 2001 * Set the number of HW queues we are supporting. 2002 */ 2003 if (!dev_is_ide) { 2004 if (storvsc_max_hw_queues > num_present_cpus) { 2005 storvsc_max_hw_queues = 0; 2006 storvsc_log(device, STORVSC_LOGGING_WARN, 2007 "Resetting invalid storvsc_max_hw_queues value to default.\n"); 2008 } 2009 if (storvsc_max_hw_queues) 2010 host->nr_hw_queues = storvsc_max_hw_queues; 2011 else 2012 host->nr_hw_queues = num_present_cpus; 2013 } 2014 2015 /* 2016 * Set the error handler work queue. 2017 */ 2018 host_dev->handle_error_wq = 2019 alloc_ordered_workqueue("storvsc_error_wq_%d", 2020 WQ_MEM_RECLAIM, 2021 host->host_no); 2022 if (!host_dev->handle_error_wq) { 2023 ret = -ENOMEM; 2024 goto err_out2; 2025 } 2026 INIT_WORK(&host_dev->host_scan_work, storvsc_host_scan); 2027 /* Register the HBA and start the scsi bus scan */ 2028 ret = scsi_add_host(host, &device->device); 2029 if (ret != 0) 2030 goto err_out3; 2031 2032 if (!dev_is_ide) { 2033 scsi_scan_host(host); 2034 } else { 2035 target = (device->dev_instance.b[5] << 8 | 2036 device->dev_instance.b[4]); 2037 ret = scsi_add_device(host, 0, target, 0); 2038 if (ret) 2039 goto err_out4; 2040 } 2041 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 2042 if (host->transportt == fc_transport_template) { 2043 struct fc_rport_identifiers ids = { 2044 .roles = FC_PORT_ROLE_FCP_DUMMY_INITIATOR, 2045 }; 2046 2047 fc_host_node_name(host) = stor_device->node_name; 2048 fc_host_port_name(host) = stor_device->port_name; 2049 stor_device->rport = fc_remote_port_add(host, 0, &ids); 2050 if (!stor_device->rport) { 2051 ret = -ENOMEM; 2052 goto err_out4; 2053 } 2054 } 2055 #endif 2056 return 0; 2057 2058 err_out4: 2059 scsi_remove_host(host); 2060 2061 err_out3: 2062 destroy_workqueue(host_dev->handle_error_wq); 2063 2064 err_out2: 2065 /* 2066 * Once we have connected with the host, we would need to 2067 * to invoke storvsc_dev_remove() to rollback this state and 2068 * this call also frees up the stor_device; hence the jump around 2069 * err_out1 label. 2070 */ 2071 storvsc_dev_remove(device); 2072 goto err_out0; 2073 2074 err_out1: 2075 kfree(stor_device->stor_chns); 2076 kfree(stor_device); 2077 2078 err_out0: 2079 scsi_host_put(host); 2080 return ret; 2081 } 2082 2083 /* Change a scsi target's queue depth */ 2084 static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth) 2085 { 2086 if (queue_depth > scsi_driver.can_queue) 2087 queue_depth = scsi_driver.can_queue; 2088 2089 return scsi_change_queue_depth(sdev, queue_depth); 2090 } 2091 2092 static int storvsc_remove(struct hv_device *dev) 2093 { 2094 struct storvsc_device *stor_device = hv_get_drvdata(dev); 2095 struct Scsi_Host *host = stor_device->host; 2096 struct hv_host_device *host_dev = shost_priv(host); 2097 2098 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 2099 if (host->transportt == fc_transport_template) { 2100 fc_remote_port_delete(stor_device->rport); 2101 fc_remove_host(host); 2102 } 2103 #endif 2104 destroy_workqueue(host_dev->handle_error_wq); 2105 scsi_remove_host(host); 2106 storvsc_dev_remove(dev); 2107 scsi_host_put(host); 2108 2109 return 0; 2110 } 2111 2112 static int storvsc_suspend(struct hv_device *hv_dev) 2113 { 2114 struct storvsc_device *stor_device = hv_get_drvdata(hv_dev); 2115 struct Scsi_Host *host = stor_device->host; 2116 struct hv_host_device *host_dev = shost_priv(host); 2117 2118 storvsc_wait_to_drain(stor_device); 2119 2120 drain_workqueue(host_dev->handle_error_wq); 2121 2122 vmbus_close(hv_dev->channel); 2123 2124 kfree(stor_device->stor_chns); 2125 stor_device->stor_chns = NULL; 2126 2127 cpumask_clear(&stor_device->alloced_cpus); 2128 2129 return 0; 2130 } 2131 2132 static int storvsc_resume(struct hv_device *hv_dev) 2133 { 2134 int ret; 2135 2136 ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, 2137 hv_dev_is_fc(hv_dev)); 2138 return ret; 2139 } 2140 2141 static struct hv_driver storvsc_drv = { 2142 .name = KBUILD_MODNAME, 2143 .id_table = id_table, 2144 .probe = storvsc_probe, 2145 .remove = storvsc_remove, 2146 .suspend = storvsc_suspend, 2147 .resume = storvsc_resume, 2148 .driver = { 2149 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 2150 }, 2151 }; 2152 2153 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 2154 static struct fc_function_template fc_transport_functions = { 2155 .show_host_node_name = 1, 2156 .show_host_port_name = 1, 2157 }; 2158 #endif 2159 2160 static int __init storvsc_drv_init(void) 2161 { 2162 int ret; 2163 2164 /* 2165 * Divide the ring buffer data size (which is 1 page less 2166 * than the ring buffer size since that page is reserved for 2167 * the ring buffer indices) by the max request size (which is 2168 * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) 2169 * 2170 * The computation underestimates max_outstanding_req_per_channel 2171 * for Win7 and older hosts because it does not take into account 2172 * the vmscsi_size_delta correction to the max request size. 2173 */ 2174 max_outstanding_req_per_channel = 2175 ((storvsc_ringbuffer_size - PAGE_SIZE) / 2176 ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + 2177 sizeof(struct vstor_packet) + sizeof(u64), 2178 sizeof(u64))); 2179 2180 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 2181 fc_transport_template = fc_attach_transport(&fc_transport_functions); 2182 if (!fc_transport_template) 2183 return -ENODEV; 2184 #endif 2185 2186 ret = vmbus_driver_register(&storvsc_drv); 2187 2188 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 2189 if (ret) 2190 fc_release_transport(fc_transport_template); 2191 #endif 2192 2193 return ret; 2194 } 2195 2196 static void __exit storvsc_drv_exit(void) 2197 { 2198 vmbus_driver_unregister(&storvsc_drv); 2199 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) 2200 fc_release_transport(fc_transport_template); 2201 #endif 2202 } 2203 2204 MODULE_LICENSE("GPL"); 2205 MODULE_DESCRIPTION("Microsoft Hyper-V virtual storage driver"); 2206 module_init(storvsc_drv_init); 2207 module_exit(storvsc_drv_exit); 2208