138c8a9a5SSteve French // SPDX-License-Identifier: GPL-2.0-or-later 238c8a9a5SSteve French /* 338c8a9a5SSteve French * Copyright (C) 2017, Microsoft Corporation. 438c8a9a5SSteve French * 538c8a9a5SSteve French * Author(s): Long Li <longli@microsoft.com> 638c8a9a5SSteve French */ 738c8a9a5SSteve French #include <linux/module.h> 838c8a9a5SSteve French #include <linux/highmem.h> 938c8a9a5SSteve French #include "smbdirect.h" 1038c8a9a5SSteve French #include "cifs_debug.h" 1138c8a9a5SSteve French #include "cifsproto.h" 1238c8a9a5SSteve French #include "smb2proto.h" 1338c8a9a5SSteve French 1438c8a9a5SSteve French static struct smbd_response *get_empty_queue_buffer( 1538c8a9a5SSteve French struct smbd_connection *info); 1638c8a9a5SSteve French static struct smbd_response *get_receive_buffer( 1738c8a9a5SSteve French struct smbd_connection *info); 1838c8a9a5SSteve French static void put_receive_buffer( 1938c8a9a5SSteve French struct smbd_connection *info, 2038c8a9a5SSteve French struct smbd_response *response); 2138c8a9a5SSteve French static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); 2238c8a9a5SSteve French static void destroy_receive_buffers(struct smbd_connection *info); 2338c8a9a5SSteve French 2438c8a9a5SSteve French static void put_empty_packet( 2538c8a9a5SSteve French struct smbd_connection *info, struct smbd_response *response); 2638c8a9a5SSteve French static void enqueue_reassembly( 2738c8a9a5SSteve French struct smbd_connection *info, 2838c8a9a5SSteve French struct smbd_response *response, int data_length); 2938c8a9a5SSteve French static struct smbd_response *_get_first_reassembly( 3038c8a9a5SSteve French struct smbd_connection *info); 3138c8a9a5SSteve French 3238c8a9a5SSteve French static int smbd_post_recv( 3338c8a9a5SSteve French struct smbd_connection *info, 3438c8a9a5SSteve French struct smbd_response *response); 3538c8a9a5SSteve French 3638c8a9a5SSteve French static int smbd_post_send_empty(struct smbd_connection *info); 3738c8a9a5SSteve French 3838c8a9a5SSteve French static void destroy_mr_list(struct smbd_connection *info); 3938c8a9a5SSteve French static int allocate_mr_list(struct smbd_connection *info); 4038c8a9a5SSteve French 4138c8a9a5SSteve French struct smb_extract_to_rdma { 4238c8a9a5SSteve French struct ib_sge *sge; 4338c8a9a5SSteve French unsigned int nr_sge; 4438c8a9a5SSteve French unsigned int max_sge; 4538c8a9a5SSteve French struct ib_device *device; 4638c8a9a5SSteve French u32 local_dma_lkey; 4738c8a9a5SSteve French enum dma_data_direction direction; 4838c8a9a5SSteve French }; 4938c8a9a5SSteve French static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 5038c8a9a5SSteve French struct smb_extract_to_rdma *rdma); 5138c8a9a5SSteve French 5238c8a9a5SSteve French /* SMBD version number */ 5338c8a9a5SSteve French #define SMBD_V1 0x0100 5438c8a9a5SSteve French 5538c8a9a5SSteve French /* Port numbers for SMBD transport */ 5638c8a9a5SSteve French #define SMB_PORT 445 5738c8a9a5SSteve French #define SMBD_PORT 5445 5838c8a9a5SSteve French 5938c8a9a5SSteve French /* Address lookup and resolve timeout in ms */ 6038c8a9a5SSteve French #define RDMA_RESOLVE_TIMEOUT 5000 6138c8a9a5SSteve French 6238c8a9a5SSteve French /* SMBD negotiation timeout in seconds */ 6338c8a9a5SSteve French #define SMBD_NEGOTIATE_TIMEOUT 120 6438c8a9a5SSteve French 6538c8a9a5SSteve French /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ 6638c8a9a5SSteve French #define SMBD_MIN_RECEIVE_SIZE 128 6738c8a9a5SSteve French #define SMBD_MIN_FRAGMENTED_SIZE 131072 6838c8a9a5SSteve French 6938c8a9a5SSteve French /* 7038c8a9a5SSteve French * Default maximum number of RDMA read/write outstanding on this connection 7138c8a9a5SSteve French * This value is possibly decreased during QP creation on hardware limit 7238c8a9a5SSteve French */ 7338c8a9a5SSteve French #define SMBD_CM_RESPONDER_RESOURCES 32 7438c8a9a5SSteve French 7538c8a9a5SSteve French /* Maximum number of retries on data transfer operations */ 7638c8a9a5SSteve French #define SMBD_CM_RETRY 6 7738c8a9a5SSteve French /* No need to retry on Receiver Not Ready since SMBD manages credits */ 7838c8a9a5SSteve French #define SMBD_CM_RNR_RETRY 0 7938c8a9a5SSteve French 8038c8a9a5SSteve French /* 8138c8a9a5SSteve French * User configurable initial values per SMBD transport connection 8238c8a9a5SSteve French * as defined in [MS-SMBD] 3.1.1.1 8338c8a9a5SSteve French * Those may change after a SMBD negotiation 8438c8a9a5SSteve French */ 8538c8a9a5SSteve French /* The local peer's maximum number of credits to grant to the peer */ 8638c8a9a5SSteve French int smbd_receive_credit_max = 255; 8738c8a9a5SSteve French 8838c8a9a5SSteve French /* The remote peer's credit request of local peer */ 8938c8a9a5SSteve French int smbd_send_credit_target = 255; 9038c8a9a5SSteve French 9138c8a9a5SSteve French /* The maximum single message size can be sent to remote peer */ 9238c8a9a5SSteve French int smbd_max_send_size = 1364; 9338c8a9a5SSteve French 9438c8a9a5SSteve French /* The maximum fragmented upper-layer payload receive size supported */ 9538c8a9a5SSteve French int smbd_max_fragmented_recv_size = 1024 * 1024; 9638c8a9a5SSteve French 9738c8a9a5SSteve French /* The maximum single-message size which can be received */ 9838c8a9a5SSteve French int smbd_max_receive_size = 1364; 9938c8a9a5SSteve French 10038c8a9a5SSteve French /* The timeout to initiate send of a keepalive message on idle */ 10138c8a9a5SSteve French int smbd_keep_alive_interval = 120; 10238c8a9a5SSteve French 10338c8a9a5SSteve French /* 10438c8a9a5SSteve French * User configurable initial values for RDMA transport 10538c8a9a5SSteve French * The actual values used may be lower and are limited to hardware capabilities 10638c8a9a5SSteve French */ 10738c8a9a5SSteve French /* Default maximum number of pages in a single RDMA write/read */ 10838c8a9a5SSteve French int smbd_max_frmr_depth = 2048; 10938c8a9a5SSteve French 11038c8a9a5SSteve French /* If payload is less than this byte, use RDMA send/recv not read/write */ 11138c8a9a5SSteve French int rdma_readwrite_threshold = 4096; 11238c8a9a5SSteve French 11338c8a9a5SSteve French /* Transport logging functions 11438c8a9a5SSteve French * Logging are defined as classes. They can be OR'ed to define the actual 11538c8a9a5SSteve French * logging level via module parameter smbd_logging_class 11638c8a9a5SSteve French * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and 11738c8a9a5SSteve French * log_rdma_event() 11838c8a9a5SSteve French */ 11938c8a9a5SSteve French #define LOG_OUTGOING 0x1 12038c8a9a5SSteve French #define LOG_INCOMING 0x2 12138c8a9a5SSteve French #define LOG_READ 0x4 12238c8a9a5SSteve French #define LOG_WRITE 0x8 12338c8a9a5SSteve French #define LOG_RDMA_SEND 0x10 12438c8a9a5SSteve French #define LOG_RDMA_RECV 0x20 12538c8a9a5SSteve French #define LOG_KEEP_ALIVE 0x40 12638c8a9a5SSteve French #define LOG_RDMA_EVENT 0x80 12738c8a9a5SSteve French #define LOG_RDMA_MR 0x100 12838c8a9a5SSteve French static unsigned int smbd_logging_class; 12938c8a9a5SSteve French module_param(smbd_logging_class, uint, 0644); 13038c8a9a5SSteve French MODULE_PARM_DESC(smbd_logging_class, 13138c8a9a5SSteve French "Logging class for SMBD transport 0x0 to 0x100"); 13238c8a9a5SSteve French 13338c8a9a5SSteve French #define ERR 0x0 13438c8a9a5SSteve French #define INFO 0x1 13538c8a9a5SSteve French static unsigned int smbd_logging_level = ERR; 13638c8a9a5SSteve French module_param(smbd_logging_level, uint, 0644); 13738c8a9a5SSteve French MODULE_PARM_DESC(smbd_logging_level, 13838c8a9a5SSteve French "Logging level for SMBD transport, 0 (default): error, 1: info"); 13938c8a9a5SSteve French 14038c8a9a5SSteve French #define log_rdma(level, class, fmt, args...) \ 14138c8a9a5SSteve French do { \ 14238c8a9a5SSteve French if (level <= smbd_logging_level || class & smbd_logging_class) \ 14338c8a9a5SSteve French cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\ 14438c8a9a5SSteve French } while (0) 14538c8a9a5SSteve French 14638c8a9a5SSteve French #define log_outgoing(level, fmt, args...) \ 14738c8a9a5SSteve French log_rdma(level, LOG_OUTGOING, fmt, ##args) 14838c8a9a5SSteve French #define log_incoming(level, fmt, args...) \ 14938c8a9a5SSteve French log_rdma(level, LOG_INCOMING, fmt, ##args) 15038c8a9a5SSteve French #define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args) 15138c8a9a5SSteve French #define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args) 15238c8a9a5SSteve French #define log_rdma_send(level, fmt, args...) \ 15338c8a9a5SSteve French log_rdma(level, LOG_RDMA_SEND, fmt, ##args) 15438c8a9a5SSteve French #define log_rdma_recv(level, fmt, args...) \ 15538c8a9a5SSteve French log_rdma(level, LOG_RDMA_RECV, fmt, ##args) 15638c8a9a5SSteve French #define log_keep_alive(level, fmt, args...) \ 15738c8a9a5SSteve French log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args) 15838c8a9a5SSteve French #define log_rdma_event(level, fmt, args...) \ 15938c8a9a5SSteve French log_rdma(level, LOG_RDMA_EVENT, fmt, ##args) 16038c8a9a5SSteve French #define log_rdma_mr(level, fmt, args...) \ 16138c8a9a5SSteve French log_rdma(level, LOG_RDMA_MR, fmt, ##args) 16238c8a9a5SSteve French 16338c8a9a5SSteve French static void smbd_disconnect_rdma_work(struct work_struct *work) 16438c8a9a5SSteve French { 16538c8a9a5SSteve French struct smbd_connection *info = 16638c8a9a5SSteve French container_of(work, struct smbd_connection, disconnect_work); 16738c8a9a5SSteve French 16838c8a9a5SSteve French if (info->transport_status == SMBD_CONNECTED) { 16938c8a9a5SSteve French info->transport_status = SMBD_DISCONNECTING; 17038c8a9a5SSteve French rdma_disconnect(info->id); 17138c8a9a5SSteve French } 17238c8a9a5SSteve French } 17338c8a9a5SSteve French 17438c8a9a5SSteve French static void smbd_disconnect_rdma_connection(struct smbd_connection *info) 17538c8a9a5SSteve French { 17638c8a9a5SSteve French queue_work(info->workqueue, &info->disconnect_work); 17738c8a9a5SSteve French } 17838c8a9a5SSteve French 17938c8a9a5SSteve French /* Upcall from RDMA CM */ 18038c8a9a5SSteve French static int smbd_conn_upcall( 18138c8a9a5SSteve French struct rdma_cm_id *id, struct rdma_cm_event *event) 18238c8a9a5SSteve French { 18338c8a9a5SSteve French struct smbd_connection *info = id->context; 18438c8a9a5SSteve French 18538c8a9a5SSteve French log_rdma_event(INFO, "event=%d status=%d\n", 18638c8a9a5SSteve French event->event, event->status); 18738c8a9a5SSteve French 18838c8a9a5SSteve French switch (event->event) { 18938c8a9a5SSteve French case RDMA_CM_EVENT_ADDR_RESOLVED: 19038c8a9a5SSteve French case RDMA_CM_EVENT_ROUTE_RESOLVED: 19138c8a9a5SSteve French info->ri_rc = 0; 19238c8a9a5SSteve French complete(&info->ri_done); 19338c8a9a5SSteve French break; 19438c8a9a5SSteve French 19538c8a9a5SSteve French case RDMA_CM_EVENT_ADDR_ERROR: 19638c8a9a5SSteve French info->ri_rc = -EHOSTUNREACH; 19738c8a9a5SSteve French complete(&info->ri_done); 19838c8a9a5SSteve French break; 19938c8a9a5SSteve French 20038c8a9a5SSteve French case RDMA_CM_EVENT_ROUTE_ERROR: 20138c8a9a5SSteve French info->ri_rc = -ENETUNREACH; 20238c8a9a5SSteve French complete(&info->ri_done); 20338c8a9a5SSteve French break; 20438c8a9a5SSteve French 20538c8a9a5SSteve French case RDMA_CM_EVENT_ESTABLISHED: 20638c8a9a5SSteve French log_rdma_event(INFO, "connected event=%d\n", event->event); 20738c8a9a5SSteve French info->transport_status = SMBD_CONNECTED; 20838c8a9a5SSteve French wake_up_interruptible(&info->conn_wait); 20938c8a9a5SSteve French break; 21038c8a9a5SSteve French 21138c8a9a5SSteve French case RDMA_CM_EVENT_CONNECT_ERROR: 21238c8a9a5SSteve French case RDMA_CM_EVENT_UNREACHABLE: 21338c8a9a5SSteve French case RDMA_CM_EVENT_REJECTED: 21438c8a9a5SSteve French log_rdma_event(INFO, "connecting failed event=%d\n", event->event); 21538c8a9a5SSteve French info->transport_status = SMBD_DISCONNECTED; 21638c8a9a5SSteve French wake_up_interruptible(&info->conn_wait); 21738c8a9a5SSteve French break; 21838c8a9a5SSteve French 21938c8a9a5SSteve French case RDMA_CM_EVENT_DEVICE_REMOVAL: 22038c8a9a5SSteve French case RDMA_CM_EVENT_DISCONNECTED: 22138c8a9a5SSteve French /* This happenes when we fail the negotiation */ 22238c8a9a5SSteve French if (info->transport_status == SMBD_NEGOTIATE_FAILED) { 22338c8a9a5SSteve French info->transport_status = SMBD_DISCONNECTED; 22438c8a9a5SSteve French wake_up(&info->conn_wait); 22538c8a9a5SSteve French break; 22638c8a9a5SSteve French } 22738c8a9a5SSteve French 22838c8a9a5SSteve French info->transport_status = SMBD_DISCONNECTED; 22938c8a9a5SSteve French wake_up_interruptible(&info->disconn_wait); 23038c8a9a5SSteve French wake_up_interruptible(&info->wait_reassembly_queue); 23138c8a9a5SSteve French wake_up_interruptible_all(&info->wait_send_queue); 23238c8a9a5SSteve French break; 23338c8a9a5SSteve French 23438c8a9a5SSteve French default: 23538c8a9a5SSteve French break; 23638c8a9a5SSteve French } 23738c8a9a5SSteve French 23838c8a9a5SSteve French return 0; 23938c8a9a5SSteve French } 24038c8a9a5SSteve French 24138c8a9a5SSteve French /* Upcall from RDMA QP */ 24238c8a9a5SSteve French static void 24338c8a9a5SSteve French smbd_qp_async_error_upcall(struct ib_event *event, void *context) 24438c8a9a5SSteve French { 24538c8a9a5SSteve French struct smbd_connection *info = context; 24638c8a9a5SSteve French 24738c8a9a5SSteve French log_rdma_event(ERR, "%s on device %s info %p\n", 24838c8a9a5SSteve French ib_event_msg(event->event), event->device->name, info); 24938c8a9a5SSteve French 25038c8a9a5SSteve French switch (event->event) { 25138c8a9a5SSteve French case IB_EVENT_CQ_ERR: 25238c8a9a5SSteve French case IB_EVENT_QP_FATAL: 25338c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 25438c8a9a5SSteve French break; 25538c8a9a5SSteve French 25638c8a9a5SSteve French default: 25738c8a9a5SSteve French break; 25838c8a9a5SSteve French } 25938c8a9a5SSteve French } 26038c8a9a5SSteve French 26138c8a9a5SSteve French static inline void *smbd_request_payload(struct smbd_request *request) 26238c8a9a5SSteve French { 26338c8a9a5SSteve French return (void *)request->packet; 26438c8a9a5SSteve French } 26538c8a9a5SSteve French 26638c8a9a5SSteve French static inline void *smbd_response_payload(struct smbd_response *response) 26738c8a9a5SSteve French { 26838c8a9a5SSteve French return (void *)response->packet; 26938c8a9a5SSteve French } 27038c8a9a5SSteve French 27138c8a9a5SSteve French /* Called when a RDMA send is done */ 27238c8a9a5SSteve French static void send_done(struct ib_cq *cq, struct ib_wc *wc) 27338c8a9a5SSteve French { 27438c8a9a5SSteve French int i; 27538c8a9a5SSteve French struct smbd_request *request = 27638c8a9a5SSteve French container_of(wc->wr_cqe, struct smbd_request, cqe); 27738c8a9a5SSteve French 27838c8a9a5SSteve French log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", 27938c8a9a5SSteve French request, wc->status); 28038c8a9a5SSteve French 28138c8a9a5SSteve French if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 28238c8a9a5SSteve French log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", 28338c8a9a5SSteve French wc->status, wc->opcode); 28438c8a9a5SSteve French smbd_disconnect_rdma_connection(request->info); 28538c8a9a5SSteve French } 28638c8a9a5SSteve French 28738c8a9a5SSteve French for (i = 0; i < request->num_sge; i++) 28838c8a9a5SSteve French ib_dma_unmap_single(request->info->id->device, 28938c8a9a5SSteve French request->sge[i].addr, 29038c8a9a5SSteve French request->sge[i].length, 29138c8a9a5SSteve French DMA_TO_DEVICE); 29238c8a9a5SSteve French 29338c8a9a5SSteve French if (atomic_dec_and_test(&request->info->send_pending)) 29438c8a9a5SSteve French wake_up(&request->info->wait_send_pending); 29538c8a9a5SSteve French 29638c8a9a5SSteve French wake_up(&request->info->wait_post_send); 29738c8a9a5SSteve French 29838c8a9a5SSteve French mempool_free(request, request->info->request_mempool); 29938c8a9a5SSteve French } 30038c8a9a5SSteve French 30138c8a9a5SSteve French static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) 30238c8a9a5SSteve French { 30338c8a9a5SSteve French log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", 30438c8a9a5SSteve French resp->min_version, resp->max_version, 30538c8a9a5SSteve French resp->negotiated_version, resp->credits_requested, 30638c8a9a5SSteve French resp->credits_granted, resp->status, 30738c8a9a5SSteve French resp->max_readwrite_size, resp->preferred_send_size, 30838c8a9a5SSteve French resp->max_receive_size, resp->max_fragmented_size); 30938c8a9a5SSteve French } 31038c8a9a5SSteve French 31138c8a9a5SSteve French /* 31238c8a9a5SSteve French * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 31338c8a9a5SSteve French * response, packet_length: the negotiation response message 31438c8a9a5SSteve French * return value: true if negotiation is a success, false if failed 31538c8a9a5SSteve French */ 31638c8a9a5SSteve French static bool process_negotiation_response( 31738c8a9a5SSteve French struct smbd_response *response, int packet_length) 31838c8a9a5SSteve French { 31938c8a9a5SSteve French struct smbd_connection *info = response->info; 32038c8a9a5SSteve French struct smbd_negotiate_resp *packet = smbd_response_payload(response); 32138c8a9a5SSteve French 32238c8a9a5SSteve French if (packet_length < sizeof(struct smbd_negotiate_resp)) { 32338c8a9a5SSteve French log_rdma_event(ERR, 32438c8a9a5SSteve French "error: packet_length=%d\n", packet_length); 32538c8a9a5SSteve French return false; 32638c8a9a5SSteve French } 32738c8a9a5SSteve French 32838c8a9a5SSteve French if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) { 32938c8a9a5SSteve French log_rdma_event(ERR, "error: negotiated_version=%x\n", 33038c8a9a5SSteve French le16_to_cpu(packet->negotiated_version)); 33138c8a9a5SSteve French return false; 33238c8a9a5SSteve French } 33338c8a9a5SSteve French info->protocol = le16_to_cpu(packet->negotiated_version); 33438c8a9a5SSteve French 33538c8a9a5SSteve French if (packet->credits_requested == 0) { 33638c8a9a5SSteve French log_rdma_event(ERR, "error: credits_requested==0\n"); 33738c8a9a5SSteve French return false; 33838c8a9a5SSteve French } 33938c8a9a5SSteve French info->receive_credit_target = le16_to_cpu(packet->credits_requested); 34038c8a9a5SSteve French 34138c8a9a5SSteve French if (packet->credits_granted == 0) { 34238c8a9a5SSteve French log_rdma_event(ERR, "error: credits_granted==0\n"); 34338c8a9a5SSteve French return false; 34438c8a9a5SSteve French } 34538c8a9a5SSteve French atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted)); 34638c8a9a5SSteve French 34738c8a9a5SSteve French atomic_set(&info->receive_credits, 0); 34838c8a9a5SSteve French 34938c8a9a5SSteve French if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) { 35038c8a9a5SSteve French log_rdma_event(ERR, "error: preferred_send_size=%d\n", 35138c8a9a5SSteve French le32_to_cpu(packet->preferred_send_size)); 35238c8a9a5SSteve French return false; 35338c8a9a5SSteve French } 35438c8a9a5SSteve French info->max_receive_size = le32_to_cpu(packet->preferred_send_size); 35538c8a9a5SSteve French 35638c8a9a5SSteve French if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { 35738c8a9a5SSteve French log_rdma_event(ERR, "error: max_receive_size=%d\n", 35838c8a9a5SSteve French le32_to_cpu(packet->max_receive_size)); 35938c8a9a5SSteve French return false; 36038c8a9a5SSteve French } 36138c8a9a5SSteve French info->max_send_size = min_t(int, info->max_send_size, 36238c8a9a5SSteve French le32_to_cpu(packet->max_receive_size)); 36338c8a9a5SSteve French 36438c8a9a5SSteve French if (le32_to_cpu(packet->max_fragmented_size) < 36538c8a9a5SSteve French SMBD_MIN_FRAGMENTED_SIZE) { 36638c8a9a5SSteve French log_rdma_event(ERR, "error: max_fragmented_size=%d\n", 36738c8a9a5SSteve French le32_to_cpu(packet->max_fragmented_size)); 36838c8a9a5SSteve French return false; 36938c8a9a5SSteve French } 37038c8a9a5SSteve French info->max_fragmented_send_size = 37138c8a9a5SSteve French le32_to_cpu(packet->max_fragmented_size); 37238c8a9a5SSteve French info->rdma_readwrite_threshold = 37338c8a9a5SSteve French rdma_readwrite_threshold > info->max_fragmented_send_size ? 37438c8a9a5SSteve French info->max_fragmented_send_size : 37538c8a9a5SSteve French rdma_readwrite_threshold; 37638c8a9a5SSteve French 37738c8a9a5SSteve French 37838c8a9a5SSteve French info->max_readwrite_size = min_t(u32, 37938c8a9a5SSteve French le32_to_cpu(packet->max_readwrite_size), 38038c8a9a5SSteve French info->max_frmr_depth * PAGE_SIZE); 38138c8a9a5SSteve French info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE; 38238c8a9a5SSteve French 38338c8a9a5SSteve French return true; 38438c8a9a5SSteve French } 38538c8a9a5SSteve French 38638c8a9a5SSteve French static void smbd_post_send_credits(struct work_struct *work) 38738c8a9a5SSteve French { 38838c8a9a5SSteve French int ret = 0; 38938c8a9a5SSteve French int use_receive_queue = 1; 39038c8a9a5SSteve French int rc; 39138c8a9a5SSteve French struct smbd_response *response; 39238c8a9a5SSteve French struct smbd_connection *info = 39338c8a9a5SSteve French container_of(work, struct smbd_connection, 39438c8a9a5SSteve French post_send_credits_work); 39538c8a9a5SSteve French 39638c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) { 39738c8a9a5SSteve French wake_up(&info->wait_receive_queues); 39838c8a9a5SSteve French return; 39938c8a9a5SSteve French } 40038c8a9a5SSteve French 40138c8a9a5SSteve French if (info->receive_credit_target > 40238c8a9a5SSteve French atomic_read(&info->receive_credits)) { 40338c8a9a5SSteve French while (true) { 40438c8a9a5SSteve French if (use_receive_queue) 40538c8a9a5SSteve French response = get_receive_buffer(info); 40638c8a9a5SSteve French else 40738c8a9a5SSteve French response = get_empty_queue_buffer(info); 40838c8a9a5SSteve French if (!response) { 40938c8a9a5SSteve French /* now switch to emtpy packet queue */ 41038c8a9a5SSteve French if (use_receive_queue) { 41138c8a9a5SSteve French use_receive_queue = 0; 41238c8a9a5SSteve French continue; 41338c8a9a5SSteve French } else 41438c8a9a5SSteve French break; 41538c8a9a5SSteve French } 41638c8a9a5SSteve French 41738c8a9a5SSteve French response->type = SMBD_TRANSFER_DATA; 41838c8a9a5SSteve French response->first_segment = false; 41938c8a9a5SSteve French rc = smbd_post_recv(info, response); 42038c8a9a5SSteve French if (rc) { 42138c8a9a5SSteve French log_rdma_recv(ERR, 42238c8a9a5SSteve French "post_recv failed rc=%d\n", rc); 42338c8a9a5SSteve French put_receive_buffer(info, response); 42438c8a9a5SSteve French break; 42538c8a9a5SSteve French } 42638c8a9a5SSteve French 42738c8a9a5SSteve French ret++; 42838c8a9a5SSteve French } 42938c8a9a5SSteve French } 43038c8a9a5SSteve French 43138c8a9a5SSteve French spin_lock(&info->lock_new_credits_offered); 43238c8a9a5SSteve French info->new_credits_offered += ret; 43338c8a9a5SSteve French spin_unlock(&info->lock_new_credits_offered); 43438c8a9a5SSteve French 43538c8a9a5SSteve French /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ 43638c8a9a5SSteve French info->send_immediate = true; 43738c8a9a5SSteve French if (atomic_read(&info->receive_credits) < 43838c8a9a5SSteve French info->receive_credit_target - 1) { 43938c8a9a5SSteve French if (info->keep_alive_requested == KEEP_ALIVE_PENDING || 44038c8a9a5SSteve French info->send_immediate) { 44138c8a9a5SSteve French log_keep_alive(INFO, "send an empty message\n"); 44238c8a9a5SSteve French smbd_post_send_empty(info); 44338c8a9a5SSteve French } 44438c8a9a5SSteve French } 44538c8a9a5SSteve French } 44638c8a9a5SSteve French 44738c8a9a5SSteve French /* Called from softirq, when recv is done */ 44838c8a9a5SSteve French static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 44938c8a9a5SSteve French { 45038c8a9a5SSteve French struct smbd_data_transfer *data_transfer; 45138c8a9a5SSteve French struct smbd_response *response = 45238c8a9a5SSteve French container_of(wc->wr_cqe, struct smbd_response, cqe); 45338c8a9a5SSteve French struct smbd_connection *info = response->info; 45438c8a9a5SSteve French int data_length = 0; 45538c8a9a5SSteve French 45638c8a9a5SSteve French log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", 45738c8a9a5SSteve French response, response->type, wc->status, wc->opcode, 45838c8a9a5SSteve French wc->byte_len, wc->pkey_index); 45938c8a9a5SSteve French 46038c8a9a5SSteve French if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 46138c8a9a5SSteve French log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", 46238c8a9a5SSteve French wc->status, wc->opcode); 46338c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 46438c8a9a5SSteve French goto error; 46538c8a9a5SSteve French } 46638c8a9a5SSteve French 46738c8a9a5SSteve French ib_dma_sync_single_for_cpu( 46838c8a9a5SSteve French wc->qp->device, 46938c8a9a5SSteve French response->sge.addr, 47038c8a9a5SSteve French response->sge.length, 47138c8a9a5SSteve French DMA_FROM_DEVICE); 47238c8a9a5SSteve French 47338c8a9a5SSteve French switch (response->type) { 47438c8a9a5SSteve French /* SMBD negotiation response */ 47538c8a9a5SSteve French case SMBD_NEGOTIATE_RESP: 47638c8a9a5SSteve French dump_smbd_negotiate_resp(smbd_response_payload(response)); 47738c8a9a5SSteve French info->full_packet_received = true; 47838c8a9a5SSteve French info->negotiate_done = 47938c8a9a5SSteve French process_negotiation_response(response, wc->byte_len); 48038c8a9a5SSteve French complete(&info->negotiate_completion); 48138c8a9a5SSteve French break; 48238c8a9a5SSteve French 48338c8a9a5SSteve French /* SMBD data transfer packet */ 48438c8a9a5SSteve French case SMBD_TRANSFER_DATA: 48538c8a9a5SSteve French data_transfer = smbd_response_payload(response); 48638c8a9a5SSteve French data_length = le32_to_cpu(data_transfer->data_length); 48738c8a9a5SSteve French 48838c8a9a5SSteve French /* 48938c8a9a5SSteve French * If this is a packet with data playload place the data in 49038c8a9a5SSteve French * reassembly queue and wake up the reading thread 49138c8a9a5SSteve French */ 49238c8a9a5SSteve French if (data_length) { 49338c8a9a5SSteve French if (info->full_packet_received) 49438c8a9a5SSteve French response->first_segment = true; 49538c8a9a5SSteve French 49638c8a9a5SSteve French if (le32_to_cpu(data_transfer->remaining_data_length)) 49738c8a9a5SSteve French info->full_packet_received = false; 49838c8a9a5SSteve French else 49938c8a9a5SSteve French info->full_packet_received = true; 50038c8a9a5SSteve French 50138c8a9a5SSteve French enqueue_reassembly( 50238c8a9a5SSteve French info, 50338c8a9a5SSteve French response, 50438c8a9a5SSteve French data_length); 50538c8a9a5SSteve French } else 50638c8a9a5SSteve French put_empty_packet(info, response); 50738c8a9a5SSteve French 50838c8a9a5SSteve French if (data_length) 50938c8a9a5SSteve French wake_up_interruptible(&info->wait_reassembly_queue); 51038c8a9a5SSteve French 51138c8a9a5SSteve French atomic_dec(&info->receive_credits); 51238c8a9a5SSteve French info->receive_credit_target = 51338c8a9a5SSteve French le16_to_cpu(data_transfer->credits_requested); 51438c8a9a5SSteve French if (le16_to_cpu(data_transfer->credits_granted)) { 51538c8a9a5SSteve French atomic_add(le16_to_cpu(data_transfer->credits_granted), 51638c8a9a5SSteve French &info->send_credits); 51738c8a9a5SSteve French /* 51838c8a9a5SSteve French * We have new send credits granted from remote peer 51938c8a9a5SSteve French * If any sender is waiting for credits, unblock it 52038c8a9a5SSteve French */ 52138c8a9a5SSteve French wake_up_interruptible(&info->wait_send_queue); 52238c8a9a5SSteve French } 52338c8a9a5SSteve French 52438c8a9a5SSteve French log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", 52538c8a9a5SSteve French le16_to_cpu(data_transfer->flags), 52638c8a9a5SSteve French le32_to_cpu(data_transfer->data_offset), 52738c8a9a5SSteve French le32_to_cpu(data_transfer->data_length), 52838c8a9a5SSteve French le32_to_cpu(data_transfer->remaining_data_length)); 52938c8a9a5SSteve French 53038c8a9a5SSteve French /* Send a KEEP_ALIVE response right away if requested */ 53138c8a9a5SSteve French info->keep_alive_requested = KEEP_ALIVE_NONE; 53238c8a9a5SSteve French if (le16_to_cpu(data_transfer->flags) & 53338c8a9a5SSteve French SMB_DIRECT_RESPONSE_REQUESTED) { 53438c8a9a5SSteve French info->keep_alive_requested = KEEP_ALIVE_PENDING; 53538c8a9a5SSteve French } 53638c8a9a5SSteve French 53738c8a9a5SSteve French return; 53838c8a9a5SSteve French 53938c8a9a5SSteve French default: 54038c8a9a5SSteve French log_rdma_recv(ERR, 54138c8a9a5SSteve French "unexpected response type=%d\n", response->type); 54238c8a9a5SSteve French } 54338c8a9a5SSteve French 54438c8a9a5SSteve French error: 54538c8a9a5SSteve French put_receive_buffer(info, response); 54638c8a9a5SSteve French } 54738c8a9a5SSteve French 54838c8a9a5SSteve French static struct rdma_cm_id *smbd_create_id( 54938c8a9a5SSteve French struct smbd_connection *info, 55038c8a9a5SSteve French struct sockaddr *dstaddr, int port) 55138c8a9a5SSteve French { 55238c8a9a5SSteve French struct rdma_cm_id *id; 55338c8a9a5SSteve French int rc; 55438c8a9a5SSteve French __be16 *sport; 55538c8a9a5SSteve French 55638c8a9a5SSteve French id = rdma_create_id(&init_net, smbd_conn_upcall, info, 55738c8a9a5SSteve French RDMA_PS_TCP, IB_QPT_RC); 55838c8a9a5SSteve French if (IS_ERR(id)) { 55938c8a9a5SSteve French rc = PTR_ERR(id); 56038c8a9a5SSteve French log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); 56138c8a9a5SSteve French return id; 56238c8a9a5SSteve French } 56338c8a9a5SSteve French 56438c8a9a5SSteve French if (dstaddr->sa_family == AF_INET6) 56538c8a9a5SSteve French sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; 56638c8a9a5SSteve French else 56738c8a9a5SSteve French sport = &((struct sockaddr_in *)dstaddr)->sin_port; 56838c8a9a5SSteve French 56938c8a9a5SSteve French *sport = htons(port); 57038c8a9a5SSteve French 57138c8a9a5SSteve French init_completion(&info->ri_done); 57238c8a9a5SSteve French info->ri_rc = -ETIMEDOUT; 57338c8a9a5SSteve French 57438c8a9a5SSteve French rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, 57538c8a9a5SSteve French RDMA_RESOLVE_TIMEOUT); 57638c8a9a5SSteve French if (rc) { 57738c8a9a5SSteve French log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); 57838c8a9a5SSteve French goto out; 57938c8a9a5SSteve French } 58038c8a9a5SSteve French rc = wait_for_completion_interruptible_timeout( 58138c8a9a5SSteve French &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 58238c8a9a5SSteve French /* e.g. if interrupted returns -ERESTARTSYS */ 58338c8a9a5SSteve French if (rc < 0) { 58438c8a9a5SSteve French log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 58538c8a9a5SSteve French goto out; 58638c8a9a5SSteve French } 58738c8a9a5SSteve French rc = info->ri_rc; 58838c8a9a5SSteve French if (rc) { 58938c8a9a5SSteve French log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 59038c8a9a5SSteve French goto out; 59138c8a9a5SSteve French } 59238c8a9a5SSteve French 59338c8a9a5SSteve French info->ri_rc = -ETIMEDOUT; 59438c8a9a5SSteve French rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 59538c8a9a5SSteve French if (rc) { 59638c8a9a5SSteve French log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); 59738c8a9a5SSteve French goto out; 59838c8a9a5SSteve French } 59938c8a9a5SSteve French rc = wait_for_completion_interruptible_timeout( 60038c8a9a5SSteve French &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); 60138c8a9a5SSteve French /* e.g. if interrupted returns -ERESTARTSYS */ 60238c8a9a5SSteve French if (rc < 0) { 60338c8a9a5SSteve French log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 60438c8a9a5SSteve French goto out; 60538c8a9a5SSteve French } 60638c8a9a5SSteve French rc = info->ri_rc; 60738c8a9a5SSteve French if (rc) { 60838c8a9a5SSteve French log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 60938c8a9a5SSteve French goto out; 61038c8a9a5SSteve French } 61138c8a9a5SSteve French 61238c8a9a5SSteve French return id; 61338c8a9a5SSteve French 61438c8a9a5SSteve French out: 61538c8a9a5SSteve French rdma_destroy_id(id); 61638c8a9a5SSteve French return ERR_PTR(rc); 61738c8a9a5SSteve French } 61838c8a9a5SSteve French 61938c8a9a5SSteve French /* 62038c8a9a5SSteve French * Test if FRWR (Fast Registration Work Requests) is supported on the device 62138c8a9a5SSteve French * This implementation requries FRWR on RDMA read/write 62238c8a9a5SSteve French * return value: true if it is supported 62338c8a9a5SSteve French */ 62438c8a9a5SSteve French static bool frwr_is_supported(struct ib_device_attr *attrs) 62538c8a9a5SSteve French { 62638c8a9a5SSteve French if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 62738c8a9a5SSteve French return false; 62838c8a9a5SSteve French if (attrs->max_fast_reg_page_list_len == 0) 62938c8a9a5SSteve French return false; 63038c8a9a5SSteve French return true; 63138c8a9a5SSteve French } 63238c8a9a5SSteve French 63338c8a9a5SSteve French static int smbd_ia_open( 63438c8a9a5SSteve French struct smbd_connection *info, 63538c8a9a5SSteve French struct sockaddr *dstaddr, int port) 63638c8a9a5SSteve French { 63738c8a9a5SSteve French int rc; 63838c8a9a5SSteve French 63938c8a9a5SSteve French info->id = smbd_create_id(info, dstaddr, port); 64038c8a9a5SSteve French if (IS_ERR(info->id)) { 64138c8a9a5SSteve French rc = PTR_ERR(info->id); 64238c8a9a5SSteve French goto out1; 64338c8a9a5SSteve French } 64438c8a9a5SSteve French 64538c8a9a5SSteve French if (!frwr_is_supported(&info->id->device->attrs)) { 64638c8a9a5SSteve French log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); 64738c8a9a5SSteve French log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 64838c8a9a5SSteve French info->id->device->attrs.device_cap_flags, 64938c8a9a5SSteve French info->id->device->attrs.max_fast_reg_page_list_len); 65038c8a9a5SSteve French rc = -EPROTONOSUPPORT; 65138c8a9a5SSteve French goto out2; 65238c8a9a5SSteve French } 65338c8a9a5SSteve French info->max_frmr_depth = min_t(int, 65438c8a9a5SSteve French smbd_max_frmr_depth, 65538c8a9a5SSteve French info->id->device->attrs.max_fast_reg_page_list_len); 65638c8a9a5SSteve French info->mr_type = IB_MR_TYPE_MEM_REG; 65738c8a9a5SSteve French if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 65838c8a9a5SSteve French info->mr_type = IB_MR_TYPE_SG_GAPS; 65938c8a9a5SSteve French 66038c8a9a5SSteve French info->pd = ib_alloc_pd(info->id->device, 0); 66138c8a9a5SSteve French if (IS_ERR(info->pd)) { 66238c8a9a5SSteve French rc = PTR_ERR(info->pd); 66338c8a9a5SSteve French log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); 66438c8a9a5SSteve French goto out2; 66538c8a9a5SSteve French } 66638c8a9a5SSteve French 66738c8a9a5SSteve French return 0; 66838c8a9a5SSteve French 66938c8a9a5SSteve French out2: 67038c8a9a5SSteve French rdma_destroy_id(info->id); 67138c8a9a5SSteve French info->id = NULL; 67238c8a9a5SSteve French 67338c8a9a5SSteve French out1: 67438c8a9a5SSteve French return rc; 67538c8a9a5SSteve French } 67638c8a9a5SSteve French 67738c8a9a5SSteve French /* 67838c8a9a5SSteve French * Send a negotiation request message to the peer 67938c8a9a5SSteve French * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 68038c8a9a5SSteve French * After negotiation, the transport is connected and ready for 68138c8a9a5SSteve French * carrying upper layer SMB payload 68238c8a9a5SSteve French */ 68338c8a9a5SSteve French static int smbd_post_send_negotiate_req(struct smbd_connection *info) 68438c8a9a5SSteve French { 68538c8a9a5SSteve French struct ib_send_wr send_wr; 68638c8a9a5SSteve French int rc = -ENOMEM; 68738c8a9a5SSteve French struct smbd_request *request; 68838c8a9a5SSteve French struct smbd_negotiate_req *packet; 68938c8a9a5SSteve French 69038c8a9a5SSteve French request = mempool_alloc(info->request_mempool, GFP_KERNEL); 69138c8a9a5SSteve French if (!request) 69238c8a9a5SSteve French return rc; 69338c8a9a5SSteve French 69438c8a9a5SSteve French request->info = info; 69538c8a9a5SSteve French 69638c8a9a5SSteve French packet = smbd_request_payload(request); 69738c8a9a5SSteve French packet->min_version = cpu_to_le16(SMBD_V1); 69838c8a9a5SSteve French packet->max_version = cpu_to_le16(SMBD_V1); 69938c8a9a5SSteve French packet->reserved = 0; 70038c8a9a5SSteve French packet->credits_requested = cpu_to_le16(info->send_credit_target); 70138c8a9a5SSteve French packet->preferred_send_size = cpu_to_le32(info->max_send_size); 70238c8a9a5SSteve French packet->max_receive_size = cpu_to_le32(info->max_receive_size); 70338c8a9a5SSteve French packet->max_fragmented_size = 70438c8a9a5SSteve French cpu_to_le32(info->max_fragmented_recv_size); 70538c8a9a5SSteve French 70638c8a9a5SSteve French request->num_sge = 1; 70738c8a9a5SSteve French request->sge[0].addr = ib_dma_map_single( 70838c8a9a5SSteve French info->id->device, (void *)packet, 70938c8a9a5SSteve French sizeof(*packet), DMA_TO_DEVICE); 71038c8a9a5SSteve French if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { 71138c8a9a5SSteve French rc = -EIO; 71238c8a9a5SSteve French goto dma_mapping_failed; 71338c8a9a5SSteve French } 71438c8a9a5SSteve French 71538c8a9a5SSteve French request->sge[0].length = sizeof(*packet); 71638c8a9a5SSteve French request->sge[0].lkey = info->pd->local_dma_lkey; 71738c8a9a5SSteve French 71838c8a9a5SSteve French ib_dma_sync_single_for_device( 71938c8a9a5SSteve French info->id->device, request->sge[0].addr, 72038c8a9a5SSteve French request->sge[0].length, DMA_TO_DEVICE); 72138c8a9a5SSteve French 72238c8a9a5SSteve French request->cqe.done = send_done; 72338c8a9a5SSteve French 72438c8a9a5SSteve French send_wr.next = NULL; 72538c8a9a5SSteve French send_wr.wr_cqe = &request->cqe; 72638c8a9a5SSteve French send_wr.sg_list = request->sge; 72738c8a9a5SSteve French send_wr.num_sge = request->num_sge; 72838c8a9a5SSteve French send_wr.opcode = IB_WR_SEND; 72938c8a9a5SSteve French send_wr.send_flags = IB_SEND_SIGNALED; 73038c8a9a5SSteve French 73138c8a9a5SSteve French log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n", 73238c8a9a5SSteve French request->sge[0].addr, 73338c8a9a5SSteve French request->sge[0].length, request->sge[0].lkey); 73438c8a9a5SSteve French 73538c8a9a5SSteve French atomic_inc(&info->send_pending); 73638c8a9a5SSteve French rc = ib_post_send(info->id->qp, &send_wr, NULL); 73738c8a9a5SSteve French if (!rc) 73838c8a9a5SSteve French return 0; 73938c8a9a5SSteve French 74038c8a9a5SSteve French /* if we reach here, post send failed */ 74138c8a9a5SSteve French log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 74238c8a9a5SSteve French atomic_dec(&info->send_pending); 74338c8a9a5SSteve French ib_dma_unmap_single(info->id->device, request->sge[0].addr, 74438c8a9a5SSteve French request->sge[0].length, DMA_TO_DEVICE); 74538c8a9a5SSteve French 74638c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 74738c8a9a5SSteve French 74838c8a9a5SSteve French dma_mapping_failed: 74938c8a9a5SSteve French mempool_free(request, info->request_mempool); 75038c8a9a5SSteve French return rc; 75138c8a9a5SSteve French } 75238c8a9a5SSteve French 75338c8a9a5SSteve French /* 75438c8a9a5SSteve French * Extend the credits to remote peer 75538c8a9a5SSteve French * This implements [MS-SMBD] 3.1.5.9 75638c8a9a5SSteve French * The idea is that we should extend credits to remote peer as quickly as 75738c8a9a5SSteve French * it's allowed, to maintain data flow. We allocate as much receive 75838c8a9a5SSteve French * buffer as possible, and extend the receive credits to remote peer 75938c8a9a5SSteve French * return value: the new credtis being granted. 76038c8a9a5SSteve French */ 76138c8a9a5SSteve French static int manage_credits_prior_sending(struct smbd_connection *info) 76238c8a9a5SSteve French { 76338c8a9a5SSteve French int new_credits; 76438c8a9a5SSteve French 76538c8a9a5SSteve French spin_lock(&info->lock_new_credits_offered); 76638c8a9a5SSteve French new_credits = info->new_credits_offered; 76738c8a9a5SSteve French info->new_credits_offered = 0; 76838c8a9a5SSteve French spin_unlock(&info->lock_new_credits_offered); 76938c8a9a5SSteve French 77038c8a9a5SSteve French return new_credits; 77138c8a9a5SSteve French } 77238c8a9a5SSteve French 77338c8a9a5SSteve French /* 77438c8a9a5SSteve French * Check if we need to send a KEEP_ALIVE message 77538c8a9a5SSteve French * The idle connection timer triggers a KEEP_ALIVE message when expires 77638c8a9a5SSteve French * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send 77738c8a9a5SSteve French * back a response. 77838c8a9a5SSteve French * return value: 77938c8a9a5SSteve French * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set 78038c8a9a5SSteve French * 0: otherwise 78138c8a9a5SSteve French */ 78238c8a9a5SSteve French static int manage_keep_alive_before_sending(struct smbd_connection *info) 78338c8a9a5SSteve French { 78438c8a9a5SSteve French if (info->keep_alive_requested == KEEP_ALIVE_PENDING) { 78538c8a9a5SSteve French info->keep_alive_requested = KEEP_ALIVE_SENT; 78638c8a9a5SSteve French return 1; 78738c8a9a5SSteve French } 78838c8a9a5SSteve French return 0; 78938c8a9a5SSteve French } 79038c8a9a5SSteve French 79138c8a9a5SSteve French /* Post the send request */ 79238c8a9a5SSteve French static int smbd_post_send(struct smbd_connection *info, 79338c8a9a5SSteve French struct smbd_request *request) 79438c8a9a5SSteve French { 79538c8a9a5SSteve French struct ib_send_wr send_wr; 79638c8a9a5SSteve French int rc, i; 79738c8a9a5SSteve French 79838c8a9a5SSteve French for (i = 0; i < request->num_sge; i++) { 79938c8a9a5SSteve French log_rdma_send(INFO, 80038c8a9a5SSteve French "rdma_request sge[%d] addr=0x%llx length=%u\n", 80138c8a9a5SSteve French i, request->sge[i].addr, request->sge[i].length); 80238c8a9a5SSteve French ib_dma_sync_single_for_device( 80338c8a9a5SSteve French info->id->device, 80438c8a9a5SSteve French request->sge[i].addr, 80538c8a9a5SSteve French request->sge[i].length, 80638c8a9a5SSteve French DMA_TO_DEVICE); 80738c8a9a5SSteve French } 80838c8a9a5SSteve French 80938c8a9a5SSteve French request->cqe.done = send_done; 81038c8a9a5SSteve French 81138c8a9a5SSteve French send_wr.next = NULL; 81238c8a9a5SSteve French send_wr.wr_cqe = &request->cqe; 81338c8a9a5SSteve French send_wr.sg_list = request->sge; 81438c8a9a5SSteve French send_wr.num_sge = request->num_sge; 81538c8a9a5SSteve French send_wr.opcode = IB_WR_SEND; 81638c8a9a5SSteve French send_wr.send_flags = IB_SEND_SIGNALED; 81738c8a9a5SSteve French 81838c8a9a5SSteve French rc = ib_post_send(info->id->qp, &send_wr, NULL); 81938c8a9a5SSteve French if (rc) { 82038c8a9a5SSteve French log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 82138c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 82238c8a9a5SSteve French rc = -EAGAIN; 82338c8a9a5SSteve French } else 82438c8a9a5SSteve French /* Reset timer for idle connection after packet is sent */ 82538c8a9a5SSteve French mod_delayed_work(info->workqueue, &info->idle_timer_work, 82638c8a9a5SSteve French info->keep_alive_interval*HZ); 82738c8a9a5SSteve French 82838c8a9a5SSteve French return rc; 82938c8a9a5SSteve French } 83038c8a9a5SSteve French 83138c8a9a5SSteve French static int smbd_post_send_iter(struct smbd_connection *info, 83238c8a9a5SSteve French struct iov_iter *iter, 83338c8a9a5SSteve French int *_remaining_data_length) 83438c8a9a5SSteve French { 83538c8a9a5SSteve French int i, rc; 83638c8a9a5SSteve French int header_length; 83738c8a9a5SSteve French int data_length; 83838c8a9a5SSteve French struct smbd_request *request; 83938c8a9a5SSteve French struct smbd_data_transfer *packet; 84038c8a9a5SSteve French int new_credits = 0; 84138c8a9a5SSteve French 84238c8a9a5SSteve French wait_credit: 84338c8a9a5SSteve French /* Wait for send credits. A SMBD packet needs one credit */ 84438c8a9a5SSteve French rc = wait_event_interruptible(info->wait_send_queue, 84538c8a9a5SSteve French atomic_read(&info->send_credits) > 0 || 84638c8a9a5SSteve French info->transport_status != SMBD_CONNECTED); 84738c8a9a5SSteve French if (rc) 84838c8a9a5SSteve French goto err_wait_credit; 84938c8a9a5SSteve French 85038c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) { 85138c8a9a5SSteve French log_outgoing(ERR, "disconnected not sending on wait_credit\n"); 85238c8a9a5SSteve French rc = -EAGAIN; 85338c8a9a5SSteve French goto err_wait_credit; 85438c8a9a5SSteve French } 85538c8a9a5SSteve French if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { 85638c8a9a5SSteve French atomic_inc(&info->send_credits); 85738c8a9a5SSteve French goto wait_credit; 85838c8a9a5SSteve French } 85938c8a9a5SSteve French 86038c8a9a5SSteve French wait_send_queue: 86138c8a9a5SSteve French wait_event(info->wait_post_send, 86238c8a9a5SSteve French atomic_read(&info->send_pending) < info->send_credit_target || 86338c8a9a5SSteve French info->transport_status != SMBD_CONNECTED); 86438c8a9a5SSteve French 86538c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) { 86638c8a9a5SSteve French log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); 86738c8a9a5SSteve French rc = -EAGAIN; 86838c8a9a5SSteve French goto err_wait_send_queue; 86938c8a9a5SSteve French } 87038c8a9a5SSteve French 87138c8a9a5SSteve French if (unlikely(atomic_inc_return(&info->send_pending) > 87238c8a9a5SSteve French info->send_credit_target)) { 87338c8a9a5SSteve French atomic_dec(&info->send_pending); 87438c8a9a5SSteve French goto wait_send_queue; 87538c8a9a5SSteve French } 87638c8a9a5SSteve French 87738c8a9a5SSteve French request = mempool_alloc(info->request_mempool, GFP_KERNEL); 87838c8a9a5SSteve French if (!request) { 87938c8a9a5SSteve French rc = -ENOMEM; 88038c8a9a5SSteve French goto err_alloc; 88138c8a9a5SSteve French } 88238c8a9a5SSteve French 88338c8a9a5SSteve French request->info = info; 88438c8a9a5SSteve French memset(request->sge, 0, sizeof(request->sge)); 88538c8a9a5SSteve French 88638c8a9a5SSteve French /* Fill in the data payload to find out how much data we can add */ 88738c8a9a5SSteve French if (iter) { 88838c8a9a5SSteve French struct smb_extract_to_rdma extract = { 88938c8a9a5SSteve French .nr_sge = 1, 89038c8a9a5SSteve French .max_sge = SMBDIRECT_MAX_SEND_SGE, 89138c8a9a5SSteve French .sge = request->sge, 89238c8a9a5SSteve French .device = info->id->device, 89338c8a9a5SSteve French .local_dma_lkey = info->pd->local_dma_lkey, 89438c8a9a5SSteve French .direction = DMA_TO_DEVICE, 89538c8a9a5SSteve French }; 89638c8a9a5SSteve French 89738c8a9a5SSteve French rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, 89838c8a9a5SSteve French &extract); 89938c8a9a5SSteve French if (rc < 0) 90038c8a9a5SSteve French goto err_dma; 90138c8a9a5SSteve French data_length = rc; 90238c8a9a5SSteve French request->num_sge = extract.nr_sge; 90338c8a9a5SSteve French *_remaining_data_length -= data_length; 90438c8a9a5SSteve French } else { 90538c8a9a5SSteve French data_length = 0; 90638c8a9a5SSteve French request->num_sge = 1; 90738c8a9a5SSteve French } 90838c8a9a5SSteve French 90938c8a9a5SSteve French /* Fill in the packet header */ 91038c8a9a5SSteve French packet = smbd_request_payload(request); 91138c8a9a5SSteve French packet->credits_requested = cpu_to_le16(info->send_credit_target); 91238c8a9a5SSteve French 91338c8a9a5SSteve French new_credits = manage_credits_prior_sending(info); 91438c8a9a5SSteve French atomic_add(new_credits, &info->receive_credits); 91538c8a9a5SSteve French packet->credits_granted = cpu_to_le16(new_credits); 91638c8a9a5SSteve French 91738c8a9a5SSteve French info->send_immediate = false; 91838c8a9a5SSteve French 91938c8a9a5SSteve French packet->flags = 0; 92038c8a9a5SSteve French if (manage_keep_alive_before_sending(info)) 92138c8a9a5SSteve French packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); 92238c8a9a5SSteve French 92338c8a9a5SSteve French packet->reserved = 0; 92438c8a9a5SSteve French if (!data_length) 92538c8a9a5SSteve French packet->data_offset = 0; 92638c8a9a5SSteve French else 92738c8a9a5SSteve French packet->data_offset = cpu_to_le32(24); 92838c8a9a5SSteve French packet->data_length = cpu_to_le32(data_length); 92938c8a9a5SSteve French packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); 93038c8a9a5SSteve French packet->padding = 0; 93138c8a9a5SSteve French 93238c8a9a5SSteve French log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 93338c8a9a5SSteve French le16_to_cpu(packet->credits_requested), 93438c8a9a5SSteve French le16_to_cpu(packet->credits_granted), 93538c8a9a5SSteve French le32_to_cpu(packet->data_offset), 93638c8a9a5SSteve French le32_to_cpu(packet->data_length), 93738c8a9a5SSteve French le32_to_cpu(packet->remaining_data_length)); 93838c8a9a5SSteve French 93938c8a9a5SSteve French /* Map the packet to DMA */ 94038c8a9a5SSteve French header_length = sizeof(struct smbd_data_transfer); 94138c8a9a5SSteve French /* If this is a packet without payload, don't send padding */ 94238c8a9a5SSteve French if (!data_length) 94338c8a9a5SSteve French header_length = offsetof(struct smbd_data_transfer, padding); 94438c8a9a5SSteve French 94538c8a9a5SSteve French request->sge[0].addr = ib_dma_map_single(info->id->device, 94638c8a9a5SSteve French (void *)packet, 94738c8a9a5SSteve French header_length, 94838c8a9a5SSteve French DMA_TO_DEVICE); 94938c8a9a5SSteve French if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { 95038c8a9a5SSteve French rc = -EIO; 95138c8a9a5SSteve French request->sge[0].addr = 0; 95238c8a9a5SSteve French goto err_dma; 95338c8a9a5SSteve French } 95438c8a9a5SSteve French 95538c8a9a5SSteve French request->sge[0].length = header_length; 95638c8a9a5SSteve French request->sge[0].lkey = info->pd->local_dma_lkey; 95738c8a9a5SSteve French 95838c8a9a5SSteve French rc = smbd_post_send(info, request); 95938c8a9a5SSteve French if (!rc) 96038c8a9a5SSteve French return 0; 96138c8a9a5SSteve French 96238c8a9a5SSteve French err_dma: 96338c8a9a5SSteve French for (i = 0; i < request->num_sge; i++) 96438c8a9a5SSteve French if (request->sge[i].addr) 96538c8a9a5SSteve French ib_dma_unmap_single(info->id->device, 96638c8a9a5SSteve French request->sge[i].addr, 96738c8a9a5SSteve French request->sge[i].length, 96838c8a9a5SSteve French DMA_TO_DEVICE); 96938c8a9a5SSteve French mempool_free(request, info->request_mempool); 97038c8a9a5SSteve French 97138c8a9a5SSteve French /* roll back receive credits and credits to be offered */ 97238c8a9a5SSteve French spin_lock(&info->lock_new_credits_offered); 97338c8a9a5SSteve French info->new_credits_offered += new_credits; 97438c8a9a5SSteve French spin_unlock(&info->lock_new_credits_offered); 97538c8a9a5SSteve French atomic_sub(new_credits, &info->receive_credits); 97638c8a9a5SSteve French 97738c8a9a5SSteve French err_alloc: 97838c8a9a5SSteve French if (atomic_dec_and_test(&info->send_pending)) 97938c8a9a5SSteve French wake_up(&info->wait_send_pending); 98038c8a9a5SSteve French 98138c8a9a5SSteve French err_wait_send_queue: 98238c8a9a5SSteve French /* roll back send credits and pending */ 98338c8a9a5SSteve French atomic_inc(&info->send_credits); 98438c8a9a5SSteve French 98538c8a9a5SSteve French err_wait_credit: 98638c8a9a5SSteve French return rc; 98738c8a9a5SSteve French } 98838c8a9a5SSteve French 98938c8a9a5SSteve French /* 99038c8a9a5SSteve French * Send an empty message 99138c8a9a5SSteve French * Empty message is used to extend credits to peer to for keep live 99238c8a9a5SSteve French * while there is no upper layer payload to send at the time 99338c8a9a5SSteve French */ 99438c8a9a5SSteve French static int smbd_post_send_empty(struct smbd_connection *info) 99538c8a9a5SSteve French { 99638c8a9a5SSteve French int remaining_data_length = 0; 99738c8a9a5SSteve French 99838c8a9a5SSteve French info->count_send_empty++; 99938c8a9a5SSteve French return smbd_post_send_iter(info, NULL, &remaining_data_length); 100038c8a9a5SSteve French } 100138c8a9a5SSteve French 100238c8a9a5SSteve French /* 100338c8a9a5SSteve French * Post a receive request to the transport 100438c8a9a5SSteve French * The remote peer can only send data when a receive request is posted 100538c8a9a5SSteve French * The interaction is controlled by send/receive credit system 100638c8a9a5SSteve French */ 100738c8a9a5SSteve French static int smbd_post_recv( 100838c8a9a5SSteve French struct smbd_connection *info, struct smbd_response *response) 100938c8a9a5SSteve French { 101038c8a9a5SSteve French struct ib_recv_wr recv_wr; 101138c8a9a5SSteve French int rc = -EIO; 101238c8a9a5SSteve French 101338c8a9a5SSteve French response->sge.addr = ib_dma_map_single( 101438c8a9a5SSteve French info->id->device, response->packet, 101538c8a9a5SSteve French info->max_receive_size, DMA_FROM_DEVICE); 101638c8a9a5SSteve French if (ib_dma_mapping_error(info->id->device, response->sge.addr)) 101738c8a9a5SSteve French return rc; 101838c8a9a5SSteve French 101938c8a9a5SSteve French response->sge.length = info->max_receive_size; 102038c8a9a5SSteve French response->sge.lkey = info->pd->local_dma_lkey; 102138c8a9a5SSteve French 102238c8a9a5SSteve French response->cqe.done = recv_done; 102338c8a9a5SSteve French 102438c8a9a5SSteve French recv_wr.wr_cqe = &response->cqe; 102538c8a9a5SSteve French recv_wr.next = NULL; 102638c8a9a5SSteve French recv_wr.sg_list = &response->sge; 102738c8a9a5SSteve French recv_wr.num_sge = 1; 102838c8a9a5SSteve French 102938c8a9a5SSteve French rc = ib_post_recv(info->id->qp, &recv_wr, NULL); 103038c8a9a5SSteve French if (rc) { 103138c8a9a5SSteve French ib_dma_unmap_single(info->id->device, response->sge.addr, 103238c8a9a5SSteve French response->sge.length, DMA_FROM_DEVICE); 103338c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 103438c8a9a5SSteve French log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); 103538c8a9a5SSteve French } 103638c8a9a5SSteve French 103738c8a9a5SSteve French return rc; 103838c8a9a5SSteve French } 103938c8a9a5SSteve French 104038c8a9a5SSteve French /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ 104138c8a9a5SSteve French static int smbd_negotiate(struct smbd_connection *info) 104238c8a9a5SSteve French { 104338c8a9a5SSteve French int rc; 104438c8a9a5SSteve French struct smbd_response *response = get_receive_buffer(info); 104538c8a9a5SSteve French 104638c8a9a5SSteve French response->type = SMBD_NEGOTIATE_RESP; 104738c8a9a5SSteve French rc = smbd_post_recv(info, response); 104838c8a9a5SSteve French log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", 104938c8a9a5SSteve French rc, response->sge.addr, 105038c8a9a5SSteve French response->sge.length, response->sge.lkey); 105138c8a9a5SSteve French if (rc) 105238c8a9a5SSteve French return rc; 105338c8a9a5SSteve French 105438c8a9a5SSteve French init_completion(&info->negotiate_completion); 105538c8a9a5SSteve French info->negotiate_done = false; 105638c8a9a5SSteve French rc = smbd_post_send_negotiate_req(info); 105738c8a9a5SSteve French if (rc) 105838c8a9a5SSteve French return rc; 105938c8a9a5SSteve French 106038c8a9a5SSteve French rc = wait_for_completion_interruptible_timeout( 106138c8a9a5SSteve French &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ); 106238c8a9a5SSteve French log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc); 106338c8a9a5SSteve French 106438c8a9a5SSteve French if (info->negotiate_done) 106538c8a9a5SSteve French return 0; 106638c8a9a5SSteve French 106738c8a9a5SSteve French if (rc == 0) 106838c8a9a5SSteve French rc = -ETIMEDOUT; 106938c8a9a5SSteve French else if (rc == -ERESTARTSYS) 107038c8a9a5SSteve French rc = -EINTR; 107138c8a9a5SSteve French else 107238c8a9a5SSteve French rc = -ENOTCONN; 107338c8a9a5SSteve French 107438c8a9a5SSteve French return rc; 107538c8a9a5SSteve French } 107638c8a9a5SSteve French 107738c8a9a5SSteve French static void put_empty_packet( 107838c8a9a5SSteve French struct smbd_connection *info, struct smbd_response *response) 107938c8a9a5SSteve French { 108038c8a9a5SSteve French spin_lock(&info->empty_packet_queue_lock); 108138c8a9a5SSteve French list_add_tail(&response->list, &info->empty_packet_queue); 108238c8a9a5SSteve French info->count_empty_packet_queue++; 108338c8a9a5SSteve French spin_unlock(&info->empty_packet_queue_lock); 108438c8a9a5SSteve French 108538c8a9a5SSteve French queue_work(info->workqueue, &info->post_send_credits_work); 108638c8a9a5SSteve French } 108738c8a9a5SSteve French 108838c8a9a5SSteve French /* 108938c8a9a5SSteve French * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 109038c8a9a5SSteve French * This is a queue for reassembling upper layer payload and present to upper 109138c8a9a5SSteve French * layer. All the inncoming payload go to the reassembly queue, regardless of 109238c8a9a5SSteve French * if reassembly is required. The uuper layer code reads from the queue for all 109338c8a9a5SSteve French * incoming payloads. 109438c8a9a5SSteve French * Put a received packet to the reassembly queue 109538c8a9a5SSteve French * response: the packet received 109638c8a9a5SSteve French * data_length: the size of payload in this packet 109738c8a9a5SSteve French */ 109838c8a9a5SSteve French static void enqueue_reassembly( 109938c8a9a5SSteve French struct smbd_connection *info, 110038c8a9a5SSteve French struct smbd_response *response, 110138c8a9a5SSteve French int data_length) 110238c8a9a5SSteve French { 110338c8a9a5SSteve French spin_lock(&info->reassembly_queue_lock); 110438c8a9a5SSteve French list_add_tail(&response->list, &info->reassembly_queue); 110538c8a9a5SSteve French info->reassembly_queue_length++; 110638c8a9a5SSteve French /* 110738c8a9a5SSteve French * Make sure reassembly_data_length is updated after list and 110838c8a9a5SSteve French * reassembly_queue_length are updated. On the dequeue side 110938c8a9a5SSteve French * reassembly_data_length is checked without a lock to determine 111038c8a9a5SSteve French * if reassembly_queue_length and list is up to date 111138c8a9a5SSteve French */ 111238c8a9a5SSteve French virt_wmb(); 111338c8a9a5SSteve French info->reassembly_data_length += data_length; 111438c8a9a5SSteve French spin_unlock(&info->reassembly_queue_lock); 111538c8a9a5SSteve French info->count_reassembly_queue++; 111638c8a9a5SSteve French info->count_enqueue_reassembly_queue++; 111738c8a9a5SSteve French } 111838c8a9a5SSteve French 111938c8a9a5SSteve French /* 112038c8a9a5SSteve French * Get the first entry at the front of reassembly queue 112138c8a9a5SSteve French * Caller is responsible for locking 112238c8a9a5SSteve French * return value: the first entry if any, NULL if queue is empty 112338c8a9a5SSteve French */ 112438c8a9a5SSteve French static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) 112538c8a9a5SSteve French { 112638c8a9a5SSteve French struct smbd_response *ret = NULL; 112738c8a9a5SSteve French 112838c8a9a5SSteve French if (!list_empty(&info->reassembly_queue)) { 112938c8a9a5SSteve French ret = list_first_entry( 113038c8a9a5SSteve French &info->reassembly_queue, 113138c8a9a5SSteve French struct smbd_response, list); 113238c8a9a5SSteve French } 113338c8a9a5SSteve French return ret; 113438c8a9a5SSteve French } 113538c8a9a5SSteve French 113638c8a9a5SSteve French static struct smbd_response *get_empty_queue_buffer( 113738c8a9a5SSteve French struct smbd_connection *info) 113838c8a9a5SSteve French { 113938c8a9a5SSteve French struct smbd_response *ret = NULL; 114038c8a9a5SSteve French unsigned long flags; 114138c8a9a5SSteve French 114238c8a9a5SSteve French spin_lock_irqsave(&info->empty_packet_queue_lock, flags); 114338c8a9a5SSteve French if (!list_empty(&info->empty_packet_queue)) { 114438c8a9a5SSteve French ret = list_first_entry( 114538c8a9a5SSteve French &info->empty_packet_queue, 114638c8a9a5SSteve French struct smbd_response, list); 114738c8a9a5SSteve French list_del(&ret->list); 114838c8a9a5SSteve French info->count_empty_packet_queue--; 114938c8a9a5SSteve French } 115038c8a9a5SSteve French spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags); 115138c8a9a5SSteve French 115238c8a9a5SSteve French return ret; 115338c8a9a5SSteve French } 115438c8a9a5SSteve French 115538c8a9a5SSteve French /* 115638c8a9a5SSteve French * Get a receive buffer 115738c8a9a5SSteve French * For each remote send, we need to post a receive. The receive buffers are 115838c8a9a5SSteve French * pre-allocated in advance. 115938c8a9a5SSteve French * return value: the receive buffer, NULL if none is available 116038c8a9a5SSteve French */ 116138c8a9a5SSteve French static struct smbd_response *get_receive_buffer(struct smbd_connection *info) 116238c8a9a5SSteve French { 116338c8a9a5SSteve French struct smbd_response *ret = NULL; 116438c8a9a5SSteve French unsigned long flags; 116538c8a9a5SSteve French 116638c8a9a5SSteve French spin_lock_irqsave(&info->receive_queue_lock, flags); 116738c8a9a5SSteve French if (!list_empty(&info->receive_queue)) { 116838c8a9a5SSteve French ret = list_first_entry( 116938c8a9a5SSteve French &info->receive_queue, 117038c8a9a5SSteve French struct smbd_response, list); 117138c8a9a5SSteve French list_del(&ret->list); 117238c8a9a5SSteve French info->count_receive_queue--; 117338c8a9a5SSteve French info->count_get_receive_buffer++; 117438c8a9a5SSteve French } 117538c8a9a5SSteve French spin_unlock_irqrestore(&info->receive_queue_lock, flags); 117638c8a9a5SSteve French 117738c8a9a5SSteve French return ret; 117838c8a9a5SSteve French } 117938c8a9a5SSteve French 118038c8a9a5SSteve French /* 118138c8a9a5SSteve French * Return a receive buffer 118238c8a9a5SSteve French * Upon returning of a receive buffer, we can post new receive and extend 118338c8a9a5SSteve French * more receive credits to remote peer. This is done immediately after a 118438c8a9a5SSteve French * receive buffer is returned. 118538c8a9a5SSteve French */ 118638c8a9a5SSteve French static void put_receive_buffer( 118738c8a9a5SSteve French struct smbd_connection *info, struct smbd_response *response) 118838c8a9a5SSteve French { 118938c8a9a5SSteve French unsigned long flags; 119038c8a9a5SSteve French 119138c8a9a5SSteve French ib_dma_unmap_single(info->id->device, response->sge.addr, 119238c8a9a5SSteve French response->sge.length, DMA_FROM_DEVICE); 119338c8a9a5SSteve French 119438c8a9a5SSteve French spin_lock_irqsave(&info->receive_queue_lock, flags); 119538c8a9a5SSteve French list_add_tail(&response->list, &info->receive_queue); 119638c8a9a5SSteve French info->count_receive_queue++; 119738c8a9a5SSteve French info->count_put_receive_buffer++; 119838c8a9a5SSteve French spin_unlock_irqrestore(&info->receive_queue_lock, flags); 119938c8a9a5SSteve French 120038c8a9a5SSteve French queue_work(info->workqueue, &info->post_send_credits_work); 120138c8a9a5SSteve French } 120238c8a9a5SSteve French 120338c8a9a5SSteve French /* Preallocate all receive buffer on transport establishment */ 120438c8a9a5SSteve French static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) 120538c8a9a5SSteve French { 120638c8a9a5SSteve French int i; 120738c8a9a5SSteve French struct smbd_response *response; 120838c8a9a5SSteve French 120938c8a9a5SSteve French INIT_LIST_HEAD(&info->reassembly_queue); 121038c8a9a5SSteve French spin_lock_init(&info->reassembly_queue_lock); 121138c8a9a5SSteve French info->reassembly_data_length = 0; 121238c8a9a5SSteve French info->reassembly_queue_length = 0; 121338c8a9a5SSteve French 121438c8a9a5SSteve French INIT_LIST_HEAD(&info->receive_queue); 121538c8a9a5SSteve French spin_lock_init(&info->receive_queue_lock); 121638c8a9a5SSteve French info->count_receive_queue = 0; 121738c8a9a5SSteve French 121838c8a9a5SSteve French INIT_LIST_HEAD(&info->empty_packet_queue); 121938c8a9a5SSteve French spin_lock_init(&info->empty_packet_queue_lock); 122038c8a9a5SSteve French info->count_empty_packet_queue = 0; 122138c8a9a5SSteve French 122238c8a9a5SSteve French init_waitqueue_head(&info->wait_receive_queues); 122338c8a9a5SSteve French 122438c8a9a5SSteve French for (i = 0; i < num_buf; i++) { 122538c8a9a5SSteve French response = mempool_alloc(info->response_mempool, GFP_KERNEL); 122638c8a9a5SSteve French if (!response) 122738c8a9a5SSteve French goto allocate_failed; 122838c8a9a5SSteve French 122938c8a9a5SSteve French response->info = info; 123038c8a9a5SSteve French list_add_tail(&response->list, &info->receive_queue); 123138c8a9a5SSteve French info->count_receive_queue++; 123238c8a9a5SSteve French } 123338c8a9a5SSteve French 123438c8a9a5SSteve French return 0; 123538c8a9a5SSteve French 123638c8a9a5SSteve French allocate_failed: 123738c8a9a5SSteve French while (!list_empty(&info->receive_queue)) { 123838c8a9a5SSteve French response = list_first_entry( 123938c8a9a5SSteve French &info->receive_queue, 124038c8a9a5SSteve French struct smbd_response, list); 124138c8a9a5SSteve French list_del(&response->list); 124238c8a9a5SSteve French info->count_receive_queue--; 124338c8a9a5SSteve French 124438c8a9a5SSteve French mempool_free(response, info->response_mempool); 124538c8a9a5SSteve French } 124638c8a9a5SSteve French return -ENOMEM; 124738c8a9a5SSteve French } 124838c8a9a5SSteve French 124938c8a9a5SSteve French static void destroy_receive_buffers(struct smbd_connection *info) 125038c8a9a5SSteve French { 125138c8a9a5SSteve French struct smbd_response *response; 125238c8a9a5SSteve French 125338c8a9a5SSteve French while ((response = get_receive_buffer(info))) 125438c8a9a5SSteve French mempool_free(response, info->response_mempool); 125538c8a9a5SSteve French 125638c8a9a5SSteve French while ((response = get_empty_queue_buffer(info))) 125738c8a9a5SSteve French mempool_free(response, info->response_mempool); 125838c8a9a5SSteve French } 125938c8a9a5SSteve French 126038c8a9a5SSteve French /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ 126138c8a9a5SSteve French static void idle_connection_timer(struct work_struct *work) 126238c8a9a5SSteve French { 126338c8a9a5SSteve French struct smbd_connection *info = container_of( 126438c8a9a5SSteve French work, struct smbd_connection, 126538c8a9a5SSteve French idle_timer_work.work); 126638c8a9a5SSteve French 126738c8a9a5SSteve French if (info->keep_alive_requested != KEEP_ALIVE_NONE) { 126838c8a9a5SSteve French log_keep_alive(ERR, 126938c8a9a5SSteve French "error status info->keep_alive_requested=%d\n", 127038c8a9a5SSteve French info->keep_alive_requested); 127138c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 127238c8a9a5SSteve French return; 127338c8a9a5SSteve French } 127438c8a9a5SSteve French 127538c8a9a5SSteve French log_keep_alive(INFO, "about to send an empty idle message\n"); 127638c8a9a5SSteve French smbd_post_send_empty(info); 127738c8a9a5SSteve French 127838c8a9a5SSteve French /* Setup the next idle timeout work */ 127938c8a9a5SSteve French queue_delayed_work(info->workqueue, &info->idle_timer_work, 128038c8a9a5SSteve French info->keep_alive_interval*HZ); 128138c8a9a5SSteve French } 128238c8a9a5SSteve French 128338c8a9a5SSteve French /* 128438c8a9a5SSteve French * Destroy the transport and related RDMA and memory resources 128538c8a9a5SSteve French * Need to go through all the pending counters and make sure on one is using 128638c8a9a5SSteve French * the transport while it is destroyed 128738c8a9a5SSteve French */ 128838c8a9a5SSteve French void smbd_destroy(struct TCP_Server_Info *server) 128938c8a9a5SSteve French { 129038c8a9a5SSteve French struct smbd_connection *info = server->smbd_conn; 129138c8a9a5SSteve French struct smbd_response *response; 129238c8a9a5SSteve French unsigned long flags; 129338c8a9a5SSteve French 129438c8a9a5SSteve French if (!info) { 129538c8a9a5SSteve French log_rdma_event(INFO, "rdma session already destroyed\n"); 129638c8a9a5SSteve French return; 129738c8a9a5SSteve French } 129838c8a9a5SSteve French 129938c8a9a5SSteve French log_rdma_event(INFO, "destroying rdma session\n"); 130038c8a9a5SSteve French if (info->transport_status != SMBD_DISCONNECTED) { 130138c8a9a5SSteve French rdma_disconnect(server->smbd_conn->id); 130238c8a9a5SSteve French log_rdma_event(INFO, "wait for transport being disconnected\n"); 130338c8a9a5SSteve French wait_event_interruptible( 130438c8a9a5SSteve French info->disconn_wait, 130538c8a9a5SSteve French info->transport_status == SMBD_DISCONNECTED); 130638c8a9a5SSteve French } 130738c8a9a5SSteve French 130838c8a9a5SSteve French log_rdma_event(INFO, "destroying qp\n"); 130938c8a9a5SSteve French ib_drain_qp(info->id->qp); 131038c8a9a5SSteve French rdma_destroy_qp(info->id); 131138c8a9a5SSteve French 131238c8a9a5SSteve French log_rdma_event(INFO, "cancelling idle timer\n"); 131338c8a9a5SSteve French cancel_delayed_work_sync(&info->idle_timer_work); 131438c8a9a5SSteve French 131538c8a9a5SSteve French log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); 131638c8a9a5SSteve French wait_event(info->wait_send_pending, 131738c8a9a5SSteve French atomic_read(&info->send_pending) == 0); 131838c8a9a5SSteve French 131938c8a9a5SSteve French /* It's not possible for upper layer to get to reassembly */ 132038c8a9a5SSteve French log_rdma_event(INFO, "drain the reassembly queue\n"); 132138c8a9a5SSteve French do { 132238c8a9a5SSteve French spin_lock_irqsave(&info->reassembly_queue_lock, flags); 132338c8a9a5SSteve French response = _get_first_reassembly(info); 132438c8a9a5SSteve French if (response) { 132538c8a9a5SSteve French list_del(&response->list); 132638c8a9a5SSteve French spin_unlock_irqrestore( 132738c8a9a5SSteve French &info->reassembly_queue_lock, flags); 132838c8a9a5SSteve French put_receive_buffer(info, response); 132938c8a9a5SSteve French } else 133038c8a9a5SSteve French spin_unlock_irqrestore( 133138c8a9a5SSteve French &info->reassembly_queue_lock, flags); 133238c8a9a5SSteve French } while (response); 133338c8a9a5SSteve French info->reassembly_data_length = 0; 133438c8a9a5SSteve French 133538c8a9a5SSteve French log_rdma_event(INFO, "free receive buffers\n"); 133638c8a9a5SSteve French wait_event(info->wait_receive_queues, 133738c8a9a5SSteve French info->count_receive_queue + info->count_empty_packet_queue 133838c8a9a5SSteve French == info->receive_credit_max); 133938c8a9a5SSteve French destroy_receive_buffers(info); 134038c8a9a5SSteve French 134138c8a9a5SSteve French /* 134238c8a9a5SSteve French * For performance reasons, memory registration and deregistration 134338c8a9a5SSteve French * are not locked by srv_mutex. It is possible some processes are 134438c8a9a5SSteve French * blocked on transport srv_mutex while holding memory registration. 134538c8a9a5SSteve French * Release the transport srv_mutex to allow them to hit the failure 134638c8a9a5SSteve French * path when sending data, and then release memory registartions. 134738c8a9a5SSteve French */ 134838c8a9a5SSteve French log_rdma_event(INFO, "freeing mr list\n"); 134938c8a9a5SSteve French wake_up_interruptible_all(&info->wait_mr); 135038c8a9a5SSteve French while (atomic_read(&info->mr_used_count)) { 135138c8a9a5SSteve French cifs_server_unlock(server); 135238c8a9a5SSteve French msleep(1000); 135338c8a9a5SSteve French cifs_server_lock(server); 135438c8a9a5SSteve French } 135538c8a9a5SSteve French destroy_mr_list(info); 135638c8a9a5SSteve French 135738c8a9a5SSteve French ib_free_cq(info->send_cq); 135838c8a9a5SSteve French ib_free_cq(info->recv_cq); 135938c8a9a5SSteve French ib_dealloc_pd(info->pd); 136038c8a9a5SSteve French rdma_destroy_id(info->id); 136138c8a9a5SSteve French 136238c8a9a5SSteve French /* free mempools */ 136338c8a9a5SSteve French mempool_destroy(info->request_mempool); 136438c8a9a5SSteve French kmem_cache_destroy(info->request_cache); 136538c8a9a5SSteve French 136638c8a9a5SSteve French mempool_destroy(info->response_mempool); 136738c8a9a5SSteve French kmem_cache_destroy(info->response_cache); 136838c8a9a5SSteve French 136938c8a9a5SSteve French info->transport_status = SMBD_DESTROYED; 137038c8a9a5SSteve French 137138c8a9a5SSteve French destroy_workqueue(info->workqueue); 137238c8a9a5SSteve French log_rdma_event(INFO, "rdma session destroyed\n"); 137338c8a9a5SSteve French kfree(info); 137438c8a9a5SSteve French server->smbd_conn = NULL; 137538c8a9a5SSteve French } 137638c8a9a5SSteve French 137738c8a9a5SSteve French /* 137838c8a9a5SSteve French * Reconnect this SMBD connection, called from upper layer 137938c8a9a5SSteve French * return value: 0 on success, or actual error code 138038c8a9a5SSteve French */ 138138c8a9a5SSteve French int smbd_reconnect(struct TCP_Server_Info *server) 138238c8a9a5SSteve French { 138338c8a9a5SSteve French log_rdma_event(INFO, "reconnecting rdma session\n"); 138438c8a9a5SSteve French 138538c8a9a5SSteve French if (!server->smbd_conn) { 138638c8a9a5SSteve French log_rdma_event(INFO, "rdma session already destroyed\n"); 138738c8a9a5SSteve French goto create_conn; 138838c8a9a5SSteve French } 138938c8a9a5SSteve French 139038c8a9a5SSteve French /* 139138c8a9a5SSteve French * This is possible if transport is disconnected and we haven't received 139238c8a9a5SSteve French * notification from RDMA, but upper layer has detected timeout 139338c8a9a5SSteve French */ 139438c8a9a5SSteve French if (server->smbd_conn->transport_status == SMBD_CONNECTED) { 139538c8a9a5SSteve French log_rdma_event(INFO, "disconnecting transport\n"); 139638c8a9a5SSteve French smbd_destroy(server); 139738c8a9a5SSteve French } 139838c8a9a5SSteve French 139938c8a9a5SSteve French create_conn: 140038c8a9a5SSteve French log_rdma_event(INFO, "creating rdma session\n"); 140138c8a9a5SSteve French server->smbd_conn = smbd_get_connection( 140238c8a9a5SSteve French server, (struct sockaddr *) &server->dstaddr); 140338c8a9a5SSteve French 1404*e3603ccfSSteve French if (server->smbd_conn) { 140538c8a9a5SSteve French cifs_dbg(VFS, "RDMA transport re-established\n"); 1406*e3603ccfSSteve French trace_smb3_smbd_connect_done(server->hostname, server->conn_id, &server->dstaddr); 1407*e3603ccfSSteve French return 0; 1408*e3603ccfSSteve French } 1409*e3603ccfSSteve French trace_smb3_smbd_connect_err(server->hostname, server->conn_id, &server->dstaddr); 1410*e3603ccfSSteve French return -ENOENT; 141138c8a9a5SSteve French } 141238c8a9a5SSteve French 141338c8a9a5SSteve French static void destroy_caches_and_workqueue(struct smbd_connection *info) 141438c8a9a5SSteve French { 141538c8a9a5SSteve French destroy_receive_buffers(info); 141638c8a9a5SSteve French destroy_workqueue(info->workqueue); 141738c8a9a5SSteve French mempool_destroy(info->response_mempool); 141838c8a9a5SSteve French kmem_cache_destroy(info->response_cache); 141938c8a9a5SSteve French mempool_destroy(info->request_mempool); 142038c8a9a5SSteve French kmem_cache_destroy(info->request_cache); 142138c8a9a5SSteve French } 142238c8a9a5SSteve French 142338c8a9a5SSteve French #define MAX_NAME_LEN 80 142438c8a9a5SSteve French static int allocate_caches_and_workqueue(struct smbd_connection *info) 142538c8a9a5SSteve French { 142638c8a9a5SSteve French char name[MAX_NAME_LEN]; 142738c8a9a5SSteve French int rc; 142838c8a9a5SSteve French 142938c8a9a5SSteve French scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); 143038c8a9a5SSteve French info->request_cache = 143138c8a9a5SSteve French kmem_cache_create( 143238c8a9a5SSteve French name, 143338c8a9a5SSteve French sizeof(struct smbd_request) + 143438c8a9a5SSteve French sizeof(struct smbd_data_transfer), 143538c8a9a5SSteve French 0, SLAB_HWCACHE_ALIGN, NULL); 143638c8a9a5SSteve French if (!info->request_cache) 143738c8a9a5SSteve French return -ENOMEM; 143838c8a9a5SSteve French 143938c8a9a5SSteve French info->request_mempool = 144038c8a9a5SSteve French mempool_create(info->send_credit_target, mempool_alloc_slab, 144138c8a9a5SSteve French mempool_free_slab, info->request_cache); 144238c8a9a5SSteve French if (!info->request_mempool) 144338c8a9a5SSteve French goto out1; 144438c8a9a5SSteve French 144538c8a9a5SSteve French scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); 144638c8a9a5SSteve French info->response_cache = 144738c8a9a5SSteve French kmem_cache_create( 144838c8a9a5SSteve French name, 144938c8a9a5SSteve French sizeof(struct smbd_response) + 145038c8a9a5SSteve French info->max_receive_size, 145138c8a9a5SSteve French 0, SLAB_HWCACHE_ALIGN, NULL); 145238c8a9a5SSteve French if (!info->response_cache) 145338c8a9a5SSteve French goto out2; 145438c8a9a5SSteve French 145538c8a9a5SSteve French info->response_mempool = 145638c8a9a5SSteve French mempool_create(info->receive_credit_max, mempool_alloc_slab, 145738c8a9a5SSteve French mempool_free_slab, info->response_cache); 145838c8a9a5SSteve French if (!info->response_mempool) 145938c8a9a5SSteve French goto out3; 146038c8a9a5SSteve French 146138c8a9a5SSteve French scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); 146238c8a9a5SSteve French info->workqueue = create_workqueue(name); 146338c8a9a5SSteve French if (!info->workqueue) 146438c8a9a5SSteve French goto out4; 146538c8a9a5SSteve French 146638c8a9a5SSteve French rc = allocate_receive_buffers(info, info->receive_credit_max); 146738c8a9a5SSteve French if (rc) { 146838c8a9a5SSteve French log_rdma_event(ERR, "failed to allocate receive buffers\n"); 146938c8a9a5SSteve French goto out5; 147038c8a9a5SSteve French } 147138c8a9a5SSteve French 147238c8a9a5SSteve French return 0; 147338c8a9a5SSteve French 147438c8a9a5SSteve French out5: 147538c8a9a5SSteve French destroy_workqueue(info->workqueue); 147638c8a9a5SSteve French out4: 147738c8a9a5SSteve French mempool_destroy(info->response_mempool); 147838c8a9a5SSteve French out3: 147938c8a9a5SSteve French kmem_cache_destroy(info->response_cache); 148038c8a9a5SSteve French out2: 148138c8a9a5SSteve French mempool_destroy(info->request_mempool); 148238c8a9a5SSteve French out1: 148338c8a9a5SSteve French kmem_cache_destroy(info->request_cache); 148438c8a9a5SSteve French return -ENOMEM; 148538c8a9a5SSteve French } 148638c8a9a5SSteve French 148738c8a9a5SSteve French /* Create a SMBD connection, called by upper layer */ 148838c8a9a5SSteve French static struct smbd_connection *_smbd_get_connection( 148938c8a9a5SSteve French struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) 149038c8a9a5SSteve French { 149138c8a9a5SSteve French int rc; 149238c8a9a5SSteve French struct smbd_connection *info; 149338c8a9a5SSteve French struct rdma_conn_param conn_param; 149438c8a9a5SSteve French struct ib_qp_init_attr qp_attr; 149538c8a9a5SSteve French struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; 149638c8a9a5SSteve French struct ib_port_immutable port_immutable; 149738c8a9a5SSteve French u32 ird_ord_hdr[2]; 149838c8a9a5SSteve French 149938c8a9a5SSteve French info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); 150038c8a9a5SSteve French if (!info) 150138c8a9a5SSteve French return NULL; 150238c8a9a5SSteve French 150338c8a9a5SSteve French info->transport_status = SMBD_CONNECTING; 150438c8a9a5SSteve French rc = smbd_ia_open(info, dstaddr, port); 150538c8a9a5SSteve French if (rc) { 150638c8a9a5SSteve French log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); 150738c8a9a5SSteve French goto create_id_failed; 150838c8a9a5SSteve French } 150938c8a9a5SSteve French 151038c8a9a5SSteve French if (smbd_send_credit_target > info->id->device->attrs.max_cqe || 151138c8a9a5SSteve French smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { 151238c8a9a5SSteve French log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 151338c8a9a5SSteve French smbd_send_credit_target, 151438c8a9a5SSteve French info->id->device->attrs.max_cqe, 151538c8a9a5SSteve French info->id->device->attrs.max_qp_wr); 151638c8a9a5SSteve French goto config_failed; 151738c8a9a5SSteve French } 151838c8a9a5SSteve French 151938c8a9a5SSteve French if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || 152038c8a9a5SSteve French smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { 152138c8a9a5SSteve French log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 152238c8a9a5SSteve French smbd_receive_credit_max, 152338c8a9a5SSteve French info->id->device->attrs.max_cqe, 152438c8a9a5SSteve French info->id->device->attrs.max_qp_wr); 152538c8a9a5SSteve French goto config_failed; 152638c8a9a5SSteve French } 152738c8a9a5SSteve French 152838c8a9a5SSteve French info->receive_credit_max = smbd_receive_credit_max; 152938c8a9a5SSteve French info->send_credit_target = smbd_send_credit_target; 153038c8a9a5SSteve French info->max_send_size = smbd_max_send_size; 153138c8a9a5SSteve French info->max_fragmented_recv_size = smbd_max_fragmented_recv_size; 153238c8a9a5SSteve French info->max_receive_size = smbd_max_receive_size; 153338c8a9a5SSteve French info->keep_alive_interval = smbd_keep_alive_interval; 153438c8a9a5SSteve French 153538c8a9a5SSteve French if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || 153638c8a9a5SSteve French info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { 153738c8a9a5SSteve French log_rdma_event(ERR, 153838c8a9a5SSteve French "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 153938c8a9a5SSteve French IB_DEVICE_NAME_MAX, 154038c8a9a5SSteve French info->id->device->name, 154138c8a9a5SSteve French info->id->device->attrs.max_send_sge, 154238c8a9a5SSteve French info->id->device->attrs.max_recv_sge); 154338c8a9a5SSteve French goto config_failed; 154438c8a9a5SSteve French } 154538c8a9a5SSteve French 154638c8a9a5SSteve French info->send_cq = NULL; 154738c8a9a5SSteve French info->recv_cq = NULL; 154838c8a9a5SSteve French info->send_cq = 154938c8a9a5SSteve French ib_alloc_cq_any(info->id->device, info, 155038c8a9a5SSteve French info->send_credit_target, IB_POLL_SOFTIRQ); 155138c8a9a5SSteve French if (IS_ERR(info->send_cq)) { 155238c8a9a5SSteve French info->send_cq = NULL; 155338c8a9a5SSteve French goto alloc_cq_failed; 155438c8a9a5SSteve French } 155538c8a9a5SSteve French 155638c8a9a5SSteve French info->recv_cq = 155738c8a9a5SSteve French ib_alloc_cq_any(info->id->device, info, 155838c8a9a5SSteve French info->receive_credit_max, IB_POLL_SOFTIRQ); 155938c8a9a5SSteve French if (IS_ERR(info->recv_cq)) { 156038c8a9a5SSteve French info->recv_cq = NULL; 156138c8a9a5SSteve French goto alloc_cq_failed; 156238c8a9a5SSteve French } 156338c8a9a5SSteve French 156438c8a9a5SSteve French memset(&qp_attr, 0, sizeof(qp_attr)); 156538c8a9a5SSteve French qp_attr.event_handler = smbd_qp_async_error_upcall; 156638c8a9a5SSteve French qp_attr.qp_context = info; 156738c8a9a5SSteve French qp_attr.cap.max_send_wr = info->send_credit_target; 156838c8a9a5SSteve French qp_attr.cap.max_recv_wr = info->receive_credit_max; 156938c8a9a5SSteve French qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; 157038c8a9a5SSteve French qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; 157138c8a9a5SSteve French qp_attr.cap.max_inline_data = 0; 157238c8a9a5SSteve French qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 157338c8a9a5SSteve French qp_attr.qp_type = IB_QPT_RC; 157438c8a9a5SSteve French qp_attr.send_cq = info->send_cq; 157538c8a9a5SSteve French qp_attr.recv_cq = info->recv_cq; 157638c8a9a5SSteve French qp_attr.port_num = ~0; 157738c8a9a5SSteve French 157838c8a9a5SSteve French rc = rdma_create_qp(info->id, info->pd, &qp_attr); 157938c8a9a5SSteve French if (rc) { 158038c8a9a5SSteve French log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); 158138c8a9a5SSteve French goto create_qp_failed; 158238c8a9a5SSteve French } 158338c8a9a5SSteve French 158438c8a9a5SSteve French memset(&conn_param, 0, sizeof(conn_param)); 158538c8a9a5SSteve French conn_param.initiator_depth = 0; 158638c8a9a5SSteve French 158738c8a9a5SSteve French conn_param.responder_resources = 158838c8a9a5SSteve French info->id->device->attrs.max_qp_rd_atom 158938c8a9a5SSteve French < SMBD_CM_RESPONDER_RESOURCES ? 159038c8a9a5SSteve French info->id->device->attrs.max_qp_rd_atom : 159138c8a9a5SSteve French SMBD_CM_RESPONDER_RESOURCES; 159238c8a9a5SSteve French info->responder_resources = conn_param.responder_resources; 159338c8a9a5SSteve French log_rdma_mr(INFO, "responder_resources=%d\n", 159438c8a9a5SSteve French info->responder_resources); 159538c8a9a5SSteve French 159638c8a9a5SSteve French /* Need to send IRD/ORD in private data for iWARP */ 159738c8a9a5SSteve French info->id->device->ops.get_port_immutable( 159838c8a9a5SSteve French info->id->device, info->id->port_num, &port_immutable); 159938c8a9a5SSteve French if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 160038c8a9a5SSteve French ird_ord_hdr[0] = info->responder_resources; 160138c8a9a5SSteve French ird_ord_hdr[1] = 1; 160238c8a9a5SSteve French conn_param.private_data = ird_ord_hdr; 160338c8a9a5SSteve French conn_param.private_data_len = sizeof(ird_ord_hdr); 160438c8a9a5SSteve French } else { 160538c8a9a5SSteve French conn_param.private_data = NULL; 160638c8a9a5SSteve French conn_param.private_data_len = 0; 160738c8a9a5SSteve French } 160838c8a9a5SSteve French 160938c8a9a5SSteve French conn_param.retry_count = SMBD_CM_RETRY; 161038c8a9a5SSteve French conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; 161138c8a9a5SSteve French conn_param.flow_control = 0; 161238c8a9a5SSteve French 161338c8a9a5SSteve French log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", 161438c8a9a5SSteve French &addr_in->sin_addr, port); 161538c8a9a5SSteve French 161638c8a9a5SSteve French init_waitqueue_head(&info->conn_wait); 161738c8a9a5SSteve French init_waitqueue_head(&info->disconn_wait); 161838c8a9a5SSteve French init_waitqueue_head(&info->wait_reassembly_queue); 161938c8a9a5SSteve French rc = rdma_connect(info->id, &conn_param); 162038c8a9a5SSteve French if (rc) { 162138c8a9a5SSteve French log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); 162238c8a9a5SSteve French goto rdma_connect_failed; 162338c8a9a5SSteve French } 162438c8a9a5SSteve French 162538c8a9a5SSteve French wait_event_interruptible( 162638c8a9a5SSteve French info->conn_wait, info->transport_status != SMBD_CONNECTING); 162738c8a9a5SSteve French 162838c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) { 162938c8a9a5SSteve French log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); 163038c8a9a5SSteve French goto rdma_connect_failed; 163138c8a9a5SSteve French } 163238c8a9a5SSteve French 163338c8a9a5SSteve French log_rdma_event(INFO, "rdma_connect connected\n"); 163438c8a9a5SSteve French 163538c8a9a5SSteve French rc = allocate_caches_and_workqueue(info); 163638c8a9a5SSteve French if (rc) { 163738c8a9a5SSteve French log_rdma_event(ERR, "cache allocation failed\n"); 163838c8a9a5SSteve French goto allocate_cache_failed; 163938c8a9a5SSteve French } 164038c8a9a5SSteve French 164138c8a9a5SSteve French init_waitqueue_head(&info->wait_send_queue); 164238c8a9a5SSteve French INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); 164338c8a9a5SSteve French queue_delayed_work(info->workqueue, &info->idle_timer_work, 164438c8a9a5SSteve French info->keep_alive_interval*HZ); 164538c8a9a5SSteve French 164638c8a9a5SSteve French init_waitqueue_head(&info->wait_send_pending); 164738c8a9a5SSteve French atomic_set(&info->send_pending, 0); 164838c8a9a5SSteve French 164938c8a9a5SSteve French init_waitqueue_head(&info->wait_post_send); 165038c8a9a5SSteve French 165138c8a9a5SSteve French INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); 165238c8a9a5SSteve French INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); 165338c8a9a5SSteve French info->new_credits_offered = 0; 165438c8a9a5SSteve French spin_lock_init(&info->lock_new_credits_offered); 165538c8a9a5SSteve French 165638c8a9a5SSteve French rc = smbd_negotiate(info); 165738c8a9a5SSteve French if (rc) { 165838c8a9a5SSteve French log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); 165938c8a9a5SSteve French goto negotiation_failed; 166038c8a9a5SSteve French } 166138c8a9a5SSteve French 166238c8a9a5SSteve French rc = allocate_mr_list(info); 166338c8a9a5SSteve French if (rc) { 166438c8a9a5SSteve French log_rdma_mr(ERR, "memory registration allocation failed\n"); 166538c8a9a5SSteve French goto allocate_mr_failed; 166638c8a9a5SSteve French } 166738c8a9a5SSteve French 166838c8a9a5SSteve French return info; 166938c8a9a5SSteve French 167038c8a9a5SSteve French allocate_mr_failed: 167138c8a9a5SSteve French /* At this point, need to a full transport shutdown */ 167238c8a9a5SSteve French server->smbd_conn = info; 167338c8a9a5SSteve French smbd_destroy(server); 167438c8a9a5SSteve French return NULL; 167538c8a9a5SSteve French 167638c8a9a5SSteve French negotiation_failed: 167738c8a9a5SSteve French cancel_delayed_work_sync(&info->idle_timer_work); 167838c8a9a5SSteve French destroy_caches_and_workqueue(info); 167938c8a9a5SSteve French info->transport_status = SMBD_NEGOTIATE_FAILED; 168038c8a9a5SSteve French init_waitqueue_head(&info->conn_wait); 168138c8a9a5SSteve French rdma_disconnect(info->id); 168238c8a9a5SSteve French wait_event(info->conn_wait, 168338c8a9a5SSteve French info->transport_status == SMBD_DISCONNECTED); 168438c8a9a5SSteve French 168538c8a9a5SSteve French allocate_cache_failed: 168638c8a9a5SSteve French rdma_connect_failed: 168738c8a9a5SSteve French rdma_destroy_qp(info->id); 168838c8a9a5SSteve French 168938c8a9a5SSteve French create_qp_failed: 169038c8a9a5SSteve French alloc_cq_failed: 169138c8a9a5SSteve French if (info->send_cq) 169238c8a9a5SSteve French ib_free_cq(info->send_cq); 169338c8a9a5SSteve French if (info->recv_cq) 169438c8a9a5SSteve French ib_free_cq(info->recv_cq); 169538c8a9a5SSteve French 169638c8a9a5SSteve French config_failed: 169738c8a9a5SSteve French ib_dealloc_pd(info->pd); 169838c8a9a5SSteve French rdma_destroy_id(info->id); 169938c8a9a5SSteve French 170038c8a9a5SSteve French create_id_failed: 170138c8a9a5SSteve French kfree(info); 170238c8a9a5SSteve French return NULL; 170338c8a9a5SSteve French } 170438c8a9a5SSteve French 170538c8a9a5SSteve French struct smbd_connection *smbd_get_connection( 170638c8a9a5SSteve French struct TCP_Server_Info *server, struct sockaddr *dstaddr) 170738c8a9a5SSteve French { 170838c8a9a5SSteve French struct smbd_connection *ret; 170938c8a9a5SSteve French int port = SMBD_PORT; 171038c8a9a5SSteve French 171138c8a9a5SSteve French try_again: 171238c8a9a5SSteve French ret = _smbd_get_connection(server, dstaddr, port); 171338c8a9a5SSteve French 171438c8a9a5SSteve French /* Try SMB_PORT if SMBD_PORT doesn't work */ 171538c8a9a5SSteve French if (!ret && port == SMBD_PORT) { 171638c8a9a5SSteve French port = SMB_PORT; 171738c8a9a5SSteve French goto try_again; 171838c8a9a5SSteve French } 171938c8a9a5SSteve French return ret; 172038c8a9a5SSteve French } 172138c8a9a5SSteve French 172238c8a9a5SSteve French /* 172338c8a9a5SSteve French * Receive data from receive reassembly queue 172438c8a9a5SSteve French * All the incoming data packets are placed in reassembly queue 172538c8a9a5SSteve French * buf: the buffer to read data into 172638c8a9a5SSteve French * size: the length of data to read 172738c8a9a5SSteve French * return value: actual data read 172838c8a9a5SSteve French * Note: this implementation copies the data from reassebmly queue to receive 172938c8a9a5SSteve French * buffers used by upper layer. This is not the optimal code path. A better way 173038c8a9a5SSteve French * to do it is to not have upper layer allocate its receive buffers but rather 173138c8a9a5SSteve French * borrow the buffer from reassembly queue, and return it after data is 173238c8a9a5SSteve French * consumed. But this will require more changes to upper layer code, and also 173338c8a9a5SSteve French * need to consider packet boundaries while they still being reassembled. 173438c8a9a5SSteve French */ 173538c8a9a5SSteve French static int smbd_recv_buf(struct smbd_connection *info, char *buf, 173638c8a9a5SSteve French unsigned int size) 173738c8a9a5SSteve French { 173838c8a9a5SSteve French struct smbd_response *response; 173938c8a9a5SSteve French struct smbd_data_transfer *data_transfer; 174038c8a9a5SSteve French int to_copy, to_read, data_read, offset; 174138c8a9a5SSteve French u32 data_length, remaining_data_length, data_offset; 174238c8a9a5SSteve French int rc; 174338c8a9a5SSteve French 174438c8a9a5SSteve French again: 174538c8a9a5SSteve French /* 174638c8a9a5SSteve French * No need to hold the reassembly queue lock all the time as we are 174738c8a9a5SSteve French * the only one reading from the front of the queue. The transport 174838c8a9a5SSteve French * may add more entries to the back of the queue at the same time 174938c8a9a5SSteve French */ 175038c8a9a5SSteve French log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, 175138c8a9a5SSteve French info->reassembly_data_length); 175238c8a9a5SSteve French if (info->reassembly_data_length >= size) { 175338c8a9a5SSteve French int queue_length; 175438c8a9a5SSteve French int queue_removed = 0; 175538c8a9a5SSteve French 175638c8a9a5SSteve French /* 175738c8a9a5SSteve French * Need to make sure reassembly_data_length is read before 175838c8a9a5SSteve French * reading reassembly_queue_length and calling 175938c8a9a5SSteve French * _get_first_reassembly. This call is lock free 176038c8a9a5SSteve French * as we never read at the end of the queue which are being 176138c8a9a5SSteve French * updated in SOFTIRQ as more data is received 176238c8a9a5SSteve French */ 176338c8a9a5SSteve French virt_rmb(); 176438c8a9a5SSteve French queue_length = info->reassembly_queue_length; 176538c8a9a5SSteve French data_read = 0; 176638c8a9a5SSteve French to_read = size; 176738c8a9a5SSteve French offset = info->first_entry_offset; 176838c8a9a5SSteve French while (data_read < size) { 176938c8a9a5SSteve French response = _get_first_reassembly(info); 177038c8a9a5SSteve French data_transfer = smbd_response_payload(response); 177138c8a9a5SSteve French data_length = le32_to_cpu(data_transfer->data_length); 177238c8a9a5SSteve French remaining_data_length = 177338c8a9a5SSteve French le32_to_cpu( 177438c8a9a5SSteve French data_transfer->remaining_data_length); 177538c8a9a5SSteve French data_offset = le32_to_cpu(data_transfer->data_offset); 177638c8a9a5SSteve French 177738c8a9a5SSteve French /* 177838c8a9a5SSteve French * The upper layer expects RFC1002 length at the 177938c8a9a5SSteve French * beginning of the payload. Return it to indicate 178038c8a9a5SSteve French * the total length of the packet. This minimize the 178138c8a9a5SSteve French * change to upper layer packet processing logic. This 178238c8a9a5SSteve French * will be eventually remove when an intermediate 178338c8a9a5SSteve French * transport layer is added 178438c8a9a5SSteve French */ 178538c8a9a5SSteve French if (response->first_segment && size == 4) { 178638c8a9a5SSteve French unsigned int rfc1002_len = 178738c8a9a5SSteve French data_length + remaining_data_length; 178838c8a9a5SSteve French *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 178938c8a9a5SSteve French data_read = 4; 179038c8a9a5SSteve French response->first_segment = false; 179138c8a9a5SSteve French log_read(INFO, "returning rfc1002 length %d\n", 179238c8a9a5SSteve French rfc1002_len); 179338c8a9a5SSteve French goto read_rfc1002_done; 179438c8a9a5SSteve French } 179538c8a9a5SSteve French 179638c8a9a5SSteve French to_copy = min_t(int, data_length - offset, to_read); 179738c8a9a5SSteve French memcpy( 179838c8a9a5SSteve French buf + data_read, 179938c8a9a5SSteve French (char *)data_transfer + data_offset + offset, 180038c8a9a5SSteve French to_copy); 180138c8a9a5SSteve French 180238c8a9a5SSteve French /* move on to the next buffer? */ 180338c8a9a5SSteve French if (to_copy == data_length - offset) { 180438c8a9a5SSteve French queue_length--; 180538c8a9a5SSteve French /* 180638c8a9a5SSteve French * No need to lock if we are not at the 180738c8a9a5SSteve French * end of the queue 180838c8a9a5SSteve French */ 180938c8a9a5SSteve French if (queue_length) 181038c8a9a5SSteve French list_del(&response->list); 181138c8a9a5SSteve French else { 181238c8a9a5SSteve French spin_lock_irq( 181338c8a9a5SSteve French &info->reassembly_queue_lock); 181438c8a9a5SSteve French list_del(&response->list); 181538c8a9a5SSteve French spin_unlock_irq( 181638c8a9a5SSteve French &info->reassembly_queue_lock); 181738c8a9a5SSteve French } 181838c8a9a5SSteve French queue_removed++; 181938c8a9a5SSteve French info->count_reassembly_queue--; 182038c8a9a5SSteve French info->count_dequeue_reassembly_queue++; 182138c8a9a5SSteve French put_receive_buffer(info, response); 182238c8a9a5SSteve French offset = 0; 182338c8a9a5SSteve French log_read(INFO, "put_receive_buffer offset=0\n"); 182438c8a9a5SSteve French } else 182538c8a9a5SSteve French offset += to_copy; 182638c8a9a5SSteve French 182738c8a9a5SSteve French to_read -= to_copy; 182838c8a9a5SSteve French data_read += to_copy; 182938c8a9a5SSteve French 183038c8a9a5SSteve French log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", 183138c8a9a5SSteve French to_copy, data_length - offset, 183238c8a9a5SSteve French to_read, data_read, offset); 183338c8a9a5SSteve French } 183438c8a9a5SSteve French 183538c8a9a5SSteve French spin_lock_irq(&info->reassembly_queue_lock); 183638c8a9a5SSteve French info->reassembly_data_length -= data_read; 183738c8a9a5SSteve French info->reassembly_queue_length -= queue_removed; 183838c8a9a5SSteve French spin_unlock_irq(&info->reassembly_queue_lock); 183938c8a9a5SSteve French 184038c8a9a5SSteve French info->first_entry_offset = offset; 184138c8a9a5SSteve French log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 184238c8a9a5SSteve French data_read, info->reassembly_data_length, 184338c8a9a5SSteve French info->first_entry_offset); 184438c8a9a5SSteve French read_rfc1002_done: 184538c8a9a5SSteve French return data_read; 184638c8a9a5SSteve French } 184738c8a9a5SSteve French 184838c8a9a5SSteve French log_read(INFO, "wait_event on more data\n"); 184938c8a9a5SSteve French rc = wait_event_interruptible( 185038c8a9a5SSteve French info->wait_reassembly_queue, 185138c8a9a5SSteve French info->reassembly_data_length >= size || 185238c8a9a5SSteve French info->transport_status != SMBD_CONNECTED); 185338c8a9a5SSteve French /* Don't return any data if interrupted */ 185438c8a9a5SSteve French if (rc) 185538c8a9a5SSteve French return rc; 185638c8a9a5SSteve French 185738c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) { 185838c8a9a5SSteve French log_read(ERR, "disconnected\n"); 185938c8a9a5SSteve French return -ECONNABORTED; 186038c8a9a5SSteve French } 186138c8a9a5SSteve French 186238c8a9a5SSteve French goto again; 186338c8a9a5SSteve French } 186438c8a9a5SSteve French 186538c8a9a5SSteve French /* 186638c8a9a5SSteve French * Receive a page from receive reassembly queue 186738c8a9a5SSteve French * page: the page to read data into 186838c8a9a5SSteve French * to_read: the length of data to read 186938c8a9a5SSteve French * return value: actual data read 187038c8a9a5SSteve French */ 187138c8a9a5SSteve French static int smbd_recv_page(struct smbd_connection *info, 187238c8a9a5SSteve French struct page *page, unsigned int page_offset, 187338c8a9a5SSteve French unsigned int to_read) 187438c8a9a5SSteve French { 187538c8a9a5SSteve French int ret; 187638c8a9a5SSteve French char *to_address; 187738c8a9a5SSteve French void *page_address; 187838c8a9a5SSteve French 187938c8a9a5SSteve French /* make sure we have the page ready for read */ 188038c8a9a5SSteve French ret = wait_event_interruptible( 188138c8a9a5SSteve French info->wait_reassembly_queue, 188238c8a9a5SSteve French info->reassembly_data_length >= to_read || 188338c8a9a5SSteve French info->transport_status != SMBD_CONNECTED); 188438c8a9a5SSteve French if (ret) 188538c8a9a5SSteve French return ret; 188638c8a9a5SSteve French 188738c8a9a5SSteve French /* now we can read from reassembly queue and not sleep */ 188838c8a9a5SSteve French page_address = kmap_atomic(page); 188938c8a9a5SSteve French to_address = (char *) page_address + page_offset; 189038c8a9a5SSteve French 189138c8a9a5SSteve French log_read(INFO, "reading from page=%p address=%p to_read=%d\n", 189238c8a9a5SSteve French page, to_address, to_read); 189338c8a9a5SSteve French 189438c8a9a5SSteve French ret = smbd_recv_buf(info, to_address, to_read); 189538c8a9a5SSteve French kunmap_atomic(page_address); 189638c8a9a5SSteve French 189738c8a9a5SSteve French return ret; 189838c8a9a5SSteve French } 189938c8a9a5SSteve French 190038c8a9a5SSteve French /* 190138c8a9a5SSteve French * Receive data from transport 190238c8a9a5SSteve French * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC 190338c8a9a5SSteve French * return: total bytes read, or 0. SMB Direct will not do partial read. 190438c8a9a5SSteve French */ 190538c8a9a5SSteve French int smbd_recv(struct smbd_connection *info, struct msghdr *msg) 190638c8a9a5SSteve French { 190738c8a9a5SSteve French char *buf; 190838c8a9a5SSteve French struct page *page; 190938c8a9a5SSteve French unsigned int to_read, page_offset; 191038c8a9a5SSteve French int rc; 191138c8a9a5SSteve French 191238c8a9a5SSteve French if (iov_iter_rw(&msg->msg_iter) == WRITE) { 191338c8a9a5SSteve French /* It's a bug in upper layer to get there */ 191438c8a9a5SSteve French cifs_dbg(VFS, "Invalid msg iter dir %u\n", 191538c8a9a5SSteve French iov_iter_rw(&msg->msg_iter)); 191638c8a9a5SSteve French rc = -EINVAL; 191738c8a9a5SSteve French goto out; 191838c8a9a5SSteve French } 191938c8a9a5SSteve French 192038c8a9a5SSteve French switch (iov_iter_type(&msg->msg_iter)) { 192138c8a9a5SSteve French case ITER_KVEC: 192238c8a9a5SSteve French buf = msg->msg_iter.kvec->iov_base; 192338c8a9a5SSteve French to_read = msg->msg_iter.kvec->iov_len; 192438c8a9a5SSteve French rc = smbd_recv_buf(info, buf, to_read); 192538c8a9a5SSteve French break; 192638c8a9a5SSteve French 192738c8a9a5SSteve French case ITER_BVEC: 192838c8a9a5SSteve French page = msg->msg_iter.bvec->bv_page; 192938c8a9a5SSteve French page_offset = msg->msg_iter.bvec->bv_offset; 193038c8a9a5SSteve French to_read = msg->msg_iter.bvec->bv_len; 193138c8a9a5SSteve French rc = smbd_recv_page(info, page, page_offset, to_read); 193238c8a9a5SSteve French break; 193338c8a9a5SSteve French 193438c8a9a5SSteve French default: 193538c8a9a5SSteve French /* It's a bug in upper layer to get there */ 193638c8a9a5SSteve French cifs_dbg(VFS, "Invalid msg type %d\n", 193738c8a9a5SSteve French iov_iter_type(&msg->msg_iter)); 193838c8a9a5SSteve French rc = -EINVAL; 193938c8a9a5SSteve French } 194038c8a9a5SSteve French 194138c8a9a5SSteve French out: 194238c8a9a5SSteve French /* SMBDirect will read it all or nothing */ 194338c8a9a5SSteve French if (rc > 0) 194438c8a9a5SSteve French msg->msg_iter.count = 0; 194538c8a9a5SSteve French return rc; 194638c8a9a5SSteve French } 194738c8a9a5SSteve French 194838c8a9a5SSteve French /* 194938c8a9a5SSteve French * Send data to transport 195038c8a9a5SSteve French * Each rqst is transported as a SMBDirect payload 195138c8a9a5SSteve French * rqst: the data to write 195238c8a9a5SSteve French * return value: 0 if successfully write, otherwise error code 195338c8a9a5SSteve French */ 195438c8a9a5SSteve French int smbd_send(struct TCP_Server_Info *server, 195538c8a9a5SSteve French int num_rqst, struct smb_rqst *rqst_array) 195638c8a9a5SSteve French { 195738c8a9a5SSteve French struct smbd_connection *info = server->smbd_conn; 195838c8a9a5SSteve French struct smb_rqst *rqst; 195938c8a9a5SSteve French struct iov_iter iter; 196038c8a9a5SSteve French unsigned int remaining_data_length, klen; 196138c8a9a5SSteve French int rc, i, rqst_idx; 196238c8a9a5SSteve French 196338c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) 196438c8a9a5SSteve French return -EAGAIN; 196538c8a9a5SSteve French 196638c8a9a5SSteve French /* 196738c8a9a5SSteve French * Add in the page array if there is one. The caller needs to set 196838c8a9a5SSteve French * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and 196938c8a9a5SSteve French * ends at page boundary 197038c8a9a5SSteve French */ 197138c8a9a5SSteve French remaining_data_length = 0; 197238c8a9a5SSteve French for (i = 0; i < num_rqst; i++) 197338c8a9a5SSteve French remaining_data_length += smb_rqst_len(server, &rqst_array[i]); 197438c8a9a5SSteve French 197538c8a9a5SSteve French if (unlikely(remaining_data_length > info->max_fragmented_send_size)) { 197638c8a9a5SSteve French /* assertion: payload never exceeds negotiated maximum */ 197738c8a9a5SSteve French log_write(ERR, "payload size %d > max size %d\n", 197838c8a9a5SSteve French remaining_data_length, info->max_fragmented_send_size); 197938c8a9a5SSteve French return -EINVAL; 198038c8a9a5SSteve French } 198138c8a9a5SSteve French 198238c8a9a5SSteve French log_write(INFO, "num_rqst=%d total length=%u\n", 198338c8a9a5SSteve French num_rqst, remaining_data_length); 198438c8a9a5SSteve French 198538c8a9a5SSteve French rqst_idx = 0; 198638c8a9a5SSteve French do { 198738c8a9a5SSteve French rqst = &rqst_array[rqst_idx]; 198838c8a9a5SSteve French 198938c8a9a5SSteve French cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n", 199038c8a9a5SSteve French rqst_idx, smb_rqst_len(server, rqst)); 199138c8a9a5SSteve French for (i = 0; i < rqst->rq_nvec; i++) 199238c8a9a5SSteve French dump_smb(rqst->rq_iov[i].iov_base, rqst->rq_iov[i].iov_len); 199338c8a9a5SSteve French 199438c8a9a5SSteve French log_write(INFO, "RDMA-WR[%u] nvec=%d len=%u iter=%zu rqlen=%lu\n", 199538c8a9a5SSteve French rqst_idx, rqst->rq_nvec, remaining_data_length, 199638c8a9a5SSteve French iov_iter_count(&rqst->rq_iter), smb_rqst_len(server, rqst)); 199738c8a9a5SSteve French 199838c8a9a5SSteve French /* Send the metadata pages. */ 199938c8a9a5SSteve French klen = 0; 200038c8a9a5SSteve French for (i = 0; i < rqst->rq_nvec; i++) 200138c8a9a5SSteve French klen += rqst->rq_iov[i].iov_len; 200238c8a9a5SSteve French iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); 200338c8a9a5SSteve French 200438c8a9a5SSteve French rc = smbd_post_send_iter(info, &iter, &remaining_data_length); 200538c8a9a5SSteve French if (rc < 0) 200638c8a9a5SSteve French break; 200738c8a9a5SSteve French 200838c8a9a5SSteve French if (iov_iter_count(&rqst->rq_iter) > 0) { 200938c8a9a5SSteve French /* And then the data pages if there are any */ 201038c8a9a5SSteve French rc = smbd_post_send_iter(info, &rqst->rq_iter, 201138c8a9a5SSteve French &remaining_data_length); 201238c8a9a5SSteve French if (rc < 0) 201338c8a9a5SSteve French break; 201438c8a9a5SSteve French } 201538c8a9a5SSteve French 201638c8a9a5SSteve French } while (++rqst_idx < num_rqst); 201738c8a9a5SSteve French 201838c8a9a5SSteve French /* 201938c8a9a5SSteve French * As an optimization, we don't wait for individual I/O to finish 202038c8a9a5SSteve French * before sending the next one. 202138c8a9a5SSteve French * Send them all and wait for pending send count to get to 0 202238c8a9a5SSteve French * that means all the I/Os have been out and we are good to return 202338c8a9a5SSteve French */ 202438c8a9a5SSteve French 202538c8a9a5SSteve French wait_event(info->wait_send_pending, 202638c8a9a5SSteve French atomic_read(&info->send_pending) == 0); 202738c8a9a5SSteve French 202838c8a9a5SSteve French return rc; 202938c8a9a5SSteve French } 203038c8a9a5SSteve French 203138c8a9a5SSteve French static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) 203238c8a9a5SSteve French { 203338c8a9a5SSteve French struct smbd_mr *mr; 203438c8a9a5SSteve French struct ib_cqe *cqe; 203538c8a9a5SSteve French 203638c8a9a5SSteve French if (wc->status) { 203738c8a9a5SSteve French log_rdma_mr(ERR, "status=%d\n", wc->status); 203838c8a9a5SSteve French cqe = wc->wr_cqe; 203938c8a9a5SSteve French mr = container_of(cqe, struct smbd_mr, cqe); 204038c8a9a5SSteve French smbd_disconnect_rdma_connection(mr->conn); 204138c8a9a5SSteve French } 204238c8a9a5SSteve French } 204338c8a9a5SSteve French 204438c8a9a5SSteve French /* 204538c8a9a5SSteve French * The work queue function that recovers MRs 204638c8a9a5SSteve French * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used 204738c8a9a5SSteve French * again. Both calls are slow, so finish them in a workqueue. This will not 204838c8a9a5SSteve French * block I/O path. 204938c8a9a5SSteve French * There is one workqueue that recovers MRs, there is no need to lock as the 205038c8a9a5SSteve French * I/O requests calling smbd_register_mr will never update the links in the 205138c8a9a5SSteve French * mr_list. 205238c8a9a5SSteve French */ 205338c8a9a5SSteve French static void smbd_mr_recovery_work(struct work_struct *work) 205438c8a9a5SSteve French { 205538c8a9a5SSteve French struct smbd_connection *info = 205638c8a9a5SSteve French container_of(work, struct smbd_connection, mr_recovery_work); 205738c8a9a5SSteve French struct smbd_mr *smbdirect_mr; 205838c8a9a5SSteve French int rc; 205938c8a9a5SSteve French 206038c8a9a5SSteve French list_for_each_entry(smbdirect_mr, &info->mr_list, list) { 206138c8a9a5SSteve French if (smbdirect_mr->state == MR_ERROR) { 206238c8a9a5SSteve French 206338c8a9a5SSteve French /* recover this MR entry */ 206438c8a9a5SSteve French rc = ib_dereg_mr(smbdirect_mr->mr); 206538c8a9a5SSteve French if (rc) { 206638c8a9a5SSteve French log_rdma_mr(ERR, 206738c8a9a5SSteve French "ib_dereg_mr failed rc=%x\n", 206838c8a9a5SSteve French rc); 206938c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 207038c8a9a5SSteve French continue; 207138c8a9a5SSteve French } 207238c8a9a5SSteve French 207338c8a9a5SSteve French smbdirect_mr->mr = ib_alloc_mr( 207438c8a9a5SSteve French info->pd, info->mr_type, 207538c8a9a5SSteve French info->max_frmr_depth); 207638c8a9a5SSteve French if (IS_ERR(smbdirect_mr->mr)) { 207738c8a9a5SSteve French log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 207838c8a9a5SSteve French info->mr_type, 207938c8a9a5SSteve French info->max_frmr_depth); 208038c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 208138c8a9a5SSteve French continue; 208238c8a9a5SSteve French } 208338c8a9a5SSteve French } else 208438c8a9a5SSteve French /* This MR is being used, don't recover it */ 208538c8a9a5SSteve French continue; 208638c8a9a5SSteve French 208738c8a9a5SSteve French smbdirect_mr->state = MR_READY; 208838c8a9a5SSteve French 208938c8a9a5SSteve French /* smbdirect_mr->state is updated by this function 209038c8a9a5SSteve French * and is read and updated by I/O issuing CPUs trying 209138c8a9a5SSteve French * to get a MR, the call to atomic_inc_return 209238c8a9a5SSteve French * implicates a memory barrier and guarantees this 209338c8a9a5SSteve French * value is updated before waking up any calls to 209438c8a9a5SSteve French * get_mr() from the I/O issuing CPUs 209538c8a9a5SSteve French */ 209638c8a9a5SSteve French if (atomic_inc_return(&info->mr_ready_count) == 1) 209738c8a9a5SSteve French wake_up_interruptible(&info->wait_mr); 209838c8a9a5SSteve French } 209938c8a9a5SSteve French } 210038c8a9a5SSteve French 210138c8a9a5SSteve French static void destroy_mr_list(struct smbd_connection *info) 210238c8a9a5SSteve French { 210338c8a9a5SSteve French struct smbd_mr *mr, *tmp; 210438c8a9a5SSteve French 210538c8a9a5SSteve French cancel_work_sync(&info->mr_recovery_work); 210638c8a9a5SSteve French list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { 210738c8a9a5SSteve French if (mr->state == MR_INVALIDATED) 210838c8a9a5SSteve French ib_dma_unmap_sg(info->id->device, mr->sgt.sgl, 210938c8a9a5SSteve French mr->sgt.nents, mr->dir); 211038c8a9a5SSteve French ib_dereg_mr(mr->mr); 211138c8a9a5SSteve French kfree(mr->sgt.sgl); 211238c8a9a5SSteve French kfree(mr); 211338c8a9a5SSteve French } 211438c8a9a5SSteve French } 211538c8a9a5SSteve French 211638c8a9a5SSteve French /* 211738c8a9a5SSteve French * Allocate MRs used for RDMA read/write 211838c8a9a5SSteve French * The number of MRs will not exceed hardware capability in responder_resources 211938c8a9a5SSteve French * All MRs are kept in mr_list. The MR can be recovered after it's used 212038c8a9a5SSteve French * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 212138c8a9a5SSteve French * as MRs are used and recovered for I/O, but the list links will not change 212238c8a9a5SSteve French */ 212338c8a9a5SSteve French static int allocate_mr_list(struct smbd_connection *info) 212438c8a9a5SSteve French { 212538c8a9a5SSteve French int i; 212638c8a9a5SSteve French struct smbd_mr *smbdirect_mr, *tmp; 212738c8a9a5SSteve French 212838c8a9a5SSteve French INIT_LIST_HEAD(&info->mr_list); 212938c8a9a5SSteve French init_waitqueue_head(&info->wait_mr); 213038c8a9a5SSteve French spin_lock_init(&info->mr_list_lock); 213138c8a9a5SSteve French atomic_set(&info->mr_ready_count, 0); 213238c8a9a5SSteve French atomic_set(&info->mr_used_count, 0); 213338c8a9a5SSteve French init_waitqueue_head(&info->wait_for_mr_cleanup); 213438c8a9a5SSteve French INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); 213538c8a9a5SSteve French /* Allocate more MRs (2x) than hardware responder_resources */ 213638c8a9a5SSteve French for (i = 0; i < info->responder_resources * 2; i++) { 213738c8a9a5SSteve French smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); 213838c8a9a5SSteve French if (!smbdirect_mr) 213938c8a9a5SSteve French goto out; 214038c8a9a5SSteve French smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, 214138c8a9a5SSteve French info->max_frmr_depth); 214238c8a9a5SSteve French if (IS_ERR(smbdirect_mr->mr)) { 214338c8a9a5SSteve French log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 214438c8a9a5SSteve French info->mr_type, info->max_frmr_depth); 214538c8a9a5SSteve French goto out; 214638c8a9a5SSteve French } 214738c8a9a5SSteve French smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth, 214838c8a9a5SSteve French sizeof(struct scatterlist), 214938c8a9a5SSteve French GFP_KERNEL); 215038c8a9a5SSteve French if (!smbdirect_mr->sgt.sgl) { 215138c8a9a5SSteve French log_rdma_mr(ERR, "failed to allocate sgl\n"); 215238c8a9a5SSteve French ib_dereg_mr(smbdirect_mr->mr); 215338c8a9a5SSteve French goto out; 215438c8a9a5SSteve French } 215538c8a9a5SSteve French smbdirect_mr->state = MR_READY; 215638c8a9a5SSteve French smbdirect_mr->conn = info; 215738c8a9a5SSteve French 215838c8a9a5SSteve French list_add_tail(&smbdirect_mr->list, &info->mr_list); 215938c8a9a5SSteve French atomic_inc(&info->mr_ready_count); 216038c8a9a5SSteve French } 216138c8a9a5SSteve French return 0; 216238c8a9a5SSteve French 216338c8a9a5SSteve French out: 216438c8a9a5SSteve French kfree(smbdirect_mr); 216538c8a9a5SSteve French 216638c8a9a5SSteve French list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { 216738c8a9a5SSteve French list_del(&smbdirect_mr->list); 216838c8a9a5SSteve French ib_dereg_mr(smbdirect_mr->mr); 216938c8a9a5SSteve French kfree(smbdirect_mr->sgt.sgl); 217038c8a9a5SSteve French kfree(smbdirect_mr); 217138c8a9a5SSteve French } 217238c8a9a5SSteve French return -ENOMEM; 217338c8a9a5SSteve French } 217438c8a9a5SSteve French 217538c8a9a5SSteve French /* 217638c8a9a5SSteve French * Get a MR from mr_list. This function waits until there is at least one 217738c8a9a5SSteve French * MR available in the list. It may access the list while the 217838c8a9a5SSteve French * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock 217938c8a9a5SSteve French * as they never modify the same places. However, there may be several CPUs 218038c8a9a5SSteve French * issueing I/O trying to get MR at the same time, mr_list_lock is used to 218138c8a9a5SSteve French * protect this situation. 218238c8a9a5SSteve French */ 218338c8a9a5SSteve French static struct smbd_mr *get_mr(struct smbd_connection *info) 218438c8a9a5SSteve French { 218538c8a9a5SSteve French struct smbd_mr *ret; 218638c8a9a5SSteve French int rc; 218738c8a9a5SSteve French again: 218838c8a9a5SSteve French rc = wait_event_interruptible(info->wait_mr, 218938c8a9a5SSteve French atomic_read(&info->mr_ready_count) || 219038c8a9a5SSteve French info->transport_status != SMBD_CONNECTED); 219138c8a9a5SSteve French if (rc) { 219238c8a9a5SSteve French log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); 219338c8a9a5SSteve French return NULL; 219438c8a9a5SSteve French } 219538c8a9a5SSteve French 219638c8a9a5SSteve French if (info->transport_status != SMBD_CONNECTED) { 219738c8a9a5SSteve French log_rdma_mr(ERR, "info->transport_status=%x\n", 219838c8a9a5SSteve French info->transport_status); 219938c8a9a5SSteve French return NULL; 220038c8a9a5SSteve French } 220138c8a9a5SSteve French 220238c8a9a5SSteve French spin_lock(&info->mr_list_lock); 220338c8a9a5SSteve French list_for_each_entry(ret, &info->mr_list, list) { 220438c8a9a5SSteve French if (ret->state == MR_READY) { 220538c8a9a5SSteve French ret->state = MR_REGISTERED; 220638c8a9a5SSteve French spin_unlock(&info->mr_list_lock); 220738c8a9a5SSteve French atomic_dec(&info->mr_ready_count); 220838c8a9a5SSteve French atomic_inc(&info->mr_used_count); 220938c8a9a5SSteve French return ret; 221038c8a9a5SSteve French } 221138c8a9a5SSteve French } 221238c8a9a5SSteve French 221338c8a9a5SSteve French spin_unlock(&info->mr_list_lock); 221438c8a9a5SSteve French /* 221538c8a9a5SSteve French * It is possible that we could fail to get MR because other processes may 221638c8a9a5SSteve French * try to acquire a MR at the same time. If this is the case, retry it. 221738c8a9a5SSteve French */ 221838c8a9a5SSteve French goto again; 221938c8a9a5SSteve French } 222038c8a9a5SSteve French 222138c8a9a5SSteve French /* 222238c8a9a5SSteve French * Transcribe the pages from an iterator into an MR scatterlist. 222338c8a9a5SSteve French */ 222438c8a9a5SSteve French static int smbd_iter_to_mr(struct smbd_connection *info, 222538c8a9a5SSteve French struct iov_iter *iter, 222638c8a9a5SSteve French struct sg_table *sgt, 222738c8a9a5SSteve French unsigned int max_sg) 222838c8a9a5SSteve French { 222938c8a9a5SSteve French int ret; 223038c8a9a5SSteve French 223138c8a9a5SSteve French memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 223238c8a9a5SSteve French 22330d7aeb68SDavid Howells ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 223438c8a9a5SSteve French WARN_ON(ret < 0); 223538c8a9a5SSteve French if (sgt->nents > 0) 223638c8a9a5SSteve French sg_mark_end(&sgt->sgl[sgt->nents - 1]); 223738c8a9a5SSteve French return ret; 223838c8a9a5SSteve French } 223938c8a9a5SSteve French 224038c8a9a5SSteve French /* 224138c8a9a5SSteve French * Register memory for RDMA read/write 224238c8a9a5SSteve French * iter: the buffer to register memory with 224338c8a9a5SSteve French * writing: true if this is a RDMA write (SMB read), false for RDMA read 224438c8a9a5SSteve French * need_invalidate: true if this MR needs to be locally invalidated after I/O 224538c8a9a5SSteve French * return value: the MR registered, NULL if failed. 224638c8a9a5SSteve French */ 224738c8a9a5SSteve French struct smbd_mr *smbd_register_mr(struct smbd_connection *info, 224838c8a9a5SSteve French struct iov_iter *iter, 224938c8a9a5SSteve French bool writing, bool need_invalidate) 225038c8a9a5SSteve French { 225138c8a9a5SSteve French struct smbd_mr *smbdirect_mr; 225238c8a9a5SSteve French int rc, num_pages; 225338c8a9a5SSteve French enum dma_data_direction dir; 225438c8a9a5SSteve French struct ib_reg_wr *reg_wr; 225538c8a9a5SSteve French 225638c8a9a5SSteve French num_pages = iov_iter_npages(iter, info->max_frmr_depth + 1); 225738c8a9a5SSteve French if (num_pages > info->max_frmr_depth) { 225838c8a9a5SSteve French log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", 225938c8a9a5SSteve French num_pages, info->max_frmr_depth); 226038c8a9a5SSteve French WARN_ON_ONCE(1); 226138c8a9a5SSteve French return NULL; 226238c8a9a5SSteve French } 226338c8a9a5SSteve French 226438c8a9a5SSteve French smbdirect_mr = get_mr(info); 226538c8a9a5SSteve French if (!smbdirect_mr) { 226638c8a9a5SSteve French log_rdma_mr(ERR, "get_mr returning NULL\n"); 226738c8a9a5SSteve French return NULL; 226838c8a9a5SSteve French } 226938c8a9a5SSteve French 227038c8a9a5SSteve French dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 227138c8a9a5SSteve French smbdirect_mr->dir = dir; 227238c8a9a5SSteve French smbdirect_mr->need_invalidate = need_invalidate; 227338c8a9a5SSteve French smbdirect_mr->sgt.nents = 0; 227438c8a9a5SSteve French smbdirect_mr->sgt.orig_nents = 0; 227538c8a9a5SSteve French 227638c8a9a5SSteve French log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", 227738c8a9a5SSteve French num_pages, iov_iter_count(iter), info->max_frmr_depth); 227838c8a9a5SSteve French smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); 227938c8a9a5SSteve French 228038c8a9a5SSteve French rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl, 228138c8a9a5SSteve French smbdirect_mr->sgt.nents, dir); 228238c8a9a5SSteve French if (!rc) { 228338c8a9a5SSteve French log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", 228438c8a9a5SSteve French num_pages, dir, rc); 228538c8a9a5SSteve French goto dma_map_error; 228638c8a9a5SSteve French } 228738c8a9a5SSteve French 228838c8a9a5SSteve French rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, 228938c8a9a5SSteve French smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); 229038c8a9a5SSteve French if (rc != smbdirect_mr->sgt.nents) { 229138c8a9a5SSteve French log_rdma_mr(ERR, 229238c8a9a5SSteve French "ib_map_mr_sg failed rc = %d nents = %x\n", 229338c8a9a5SSteve French rc, smbdirect_mr->sgt.nents); 229438c8a9a5SSteve French goto map_mr_error; 229538c8a9a5SSteve French } 229638c8a9a5SSteve French 229738c8a9a5SSteve French ib_update_fast_reg_key(smbdirect_mr->mr, 229838c8a9a5SSteve French ib_inc_rkey(smbdirect_mr->mr->rkey)); 229938c8a9a5SSteve French reg_wr = &smbdirect_mr->wr; 230038c8a9a5SSteve French reg_wr->wr.opcode = IB_WR_REG_MR; 230138c8a9a5SSteve French smbdirect_mr->cqe.done = register_mr_done; 230238c8a9a5SSteve French reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; 230338c8a9a5SSteve French reg_wr->wr.num_sge = 0; 230438c8a9a5SSteve French reg_wr->wr.send_flags = IB_SEND_SIGNALED; 230538c8a9a5SSteve French reg_wr->mr = smbdirect_mr->mr; 230638c8a9a5SSteve French reg_wr->key = smbdirect_mr->mr->rkey; 230738c8a9a5SSteve French reg_wr->access = writing ? 230838c8a9a5SSteve French IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 230938c8a9a5SSteve French IB_ACCESS_REMOTE_READ; 231038c8a9a5SSteve French 231138c8a9a5SSteve French /* 231238c8a9a5SSteve French * There is no need for waiting for complemtion on ib_post_send 231338c8a9a5SSteve French * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 231438c8a9a5SSteve French * on the next ib_post_send when we actaully send I/O to remote peer 231538c8a9a5SSteve French */ 231638c8a9a5SSteve French rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); 231738c8a9a5SSteve French if (!rc) 231838c8a9a5SSteve French return smbdirect_mr; 231938c8a9a5SSteve French 232038c8a9a5SSteve French log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", 232138c8a9a5SSteve French rc, reg_wr->key); 232238c8a9a5SSteve French 232338c8a9a5SSteve French /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ 232438c8a9a5SSteve French map_mr_error: 232538c8a9a5SSteve French ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl, 232638c8a9a5SSteve French smbdirect_mr->sgt.nents, smbdirect_mr->dir); 232738c8a9a5SSteve French 232838c8a9a5SSteve French dma_map_error: 232938c8a9a5SSteve French smbdirect_mr->state = MR_ERROR; 233038c8a9a5SSteve French if (atomic_dec_and_test(&info->mr_used_count)) 233138c8a9a5SSteve French wake_up(&info->wait_for_mr_cleanup); 233238c8a9a5SSteve French 233338c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 233438c8a9a5SSteve French 233538c8a9a5SSteve French return NULL; 233638c8a9a5SSteve French } 233738c8a9a5SSteve French 233838c8a9a5SSteve French static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 233938c8a9a5SSteve French { 234038c8a9a5SSteve French struct smbd_mr *smbdirect_mr; 234138c8a9a5SSteve French struct ib_cqe *cqe; 234238c8a9a5SSteve French 234338c8a9a5SSteve French cqe = wc->wr_cqe; 234438c8a9a5SSteve French smbdirect_mr = container_of(cqe, struct smbd_mr, cqe); 234538c8a9a5SSteve French smbdirect_mr->state = MR_INVALIDATED; 234638c8a9a5SSteve French if (wc->status != IB_WC_SUCCESS) { 234738c8a9a5SSteve French log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); 234838c8a9a5SSteve French smbdirect_mr->state = MR_ERROR; 234938c8a9a5SSteve French } 235038c8a9a5SSteve French complete(&smbdirect_mr->invalidate_done); 235138c8a9a5SSteve French } 235238c8a9a5SSteve French 235338c8a9a5SSteve French /* 235438c8a9a5SSteve French * Deregister a MR after I/O is done 235538c8a9a5SSteve French * This function may wait if remote invalidation is not used 235638c8a9a5SSteve French * and we have to locally invalidate the buffer to prevent data is being 235738c8a9a5SSteve French * modified by remote peer after upper layer consumes it 235838c8a9a5SSteve French */ 235938c8a9a5SSteve French int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) 236038c8a9a5SSteve French { 236138c8a9a5SSteve French struct ib_send_wr *wr; 236238c8a9a5SSteve French struct smbd_connection *info = smbdirect_mr->conn; 236338c8a9a5SSteve French int rc = 0; 236438c8a9a5SSteve French 236538c8a9a5SSteve French if (smbdirect_mr->need_invalidate) { 236638c8a9a5SSteve French /* Need to finish local invalidation before returning */ 236738c8a9a5SSteve French wr = &smbdirect_mr->inv_wr; 236838c8a9a5SSteve French wr->opcode = IB_WR_LOCAL_INV; 236938c8a9a5SSteve French smbdirect_mr->cqe.done = local_inv_done; 237038c8a9a5SSteve French wr->wr_cqe = &smbdirect_mr->cqe; 237138c8a9a5SSteve French wr->num_sge = 0; 237238c8a9a5SSteve French wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; 237338c8a9a5SSteve French wr->send_flags = IB_SEND_SIGNALED; 237438c8a9a5SSteve French 237538c8a9a5SSteve French init_completion(&smbdirect_mr->invalidate_done); 237638c8a9a5SSteve French rc = ib_post_send(info->id->qp, wr, NULL); 237738c8a9a5SSteve French if (rc) { 237838c8a9a5SSteve French log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); 237938c8a9a5SSteve French smbd_disconnect_rdma_connection(info); 238038c8a9a5SSteve French goto done; 238138c8a9a5SSteve French } 238238c8a9a5SSteve French wait_for_completion(&smbdirect_mr->invalidate_done); 238338c8a9a5SSteve French smbdirect_mr->need_invalidate = false; 238438c8a9a5SSteve French } else 238538c8a9a5SSteve French /* 238638c8a9a5SSteve French * For remote invalidation, just set it to MR_INVALIDATED 238738c8a9a5SSteve French * and defer to mr_recovery_work to recover the MR for next use 238838c8a9a5SSteve French */ 238938c8a9a5SSteve French smbdirect_mr->state = MR_INVALIDATED; 239038c8a9a5SSteve French 239138c8a9a5SSteve French if (smbdirect_mr->state == MR_INVALIDATED) { 239238c8a9a5SSteve French ib_dma_unmap_sg( 239338c8a9a5SSteve French info->id->device, smbdirect_mr->sgt.sgl, 239438c8a9a5SSteve French smbdirect_mr->sgt.nents, 239538c8a9a5SSteve French smbdirect_mr->dir); 239638c8a9a5SSteve French smbdirect_mr->state = MR_READY; 239738c8a9a5SSteve French if (atomic_inc_return(&info->mr_ready_count) == 1) 239838c8a9a5SSteve French wake_up_interruptible(&info->wait_mr); 239938c8a9a5SSteve French } else 240038c8a9a5SSteve French /* 240138c8a9a5SSteve French * Schedule the work to do MR recovery for future I/Os MR 240238c8a9a5SSteve French * recovery is slow and don't want it to block current I/O 240338c8a9a5SSteve French */ 240438c8a9a5SSteve French queue_work(info->workqueue, &info->mr_recovery_work); 240538c8a9a5SSteve French 240638c8a9a5SSteve French done: 240738c8a9a5SSteve French if (atomic_dec_and_test(&info->mr_used_count)) 240838c8a9a5SSteve French wake_up(&info->wait_for_mr_cleanup); 240938c8a9a5SSteve French 241038c8a9a5SSteve French return rc; 241138c8a9a5SSteve French } 241238c8a9a5SSteve French 241338c8a9a5SSteve French static bool smb_set_sge(struct smb_extract_to_rdma *rdma, 241438c8a9a5SSteve French struct page *lowest_page, size_t off, size_t len) 241538c8a9a5SSteve French { 241638c8a9a5SSteve French struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; 241738c8a9a5SSteve French u64 addr; 241838c8a9a5SSteve French 241938c8a9a5SSteve French addr = ib_dma_map_page(rdma->device, lowest_page, 242038c8a9a5SSteve French off, len, rdma->direction); 242138c8a9a5SSteve French if (ib_dma_mapping_error(rdma->device, addr)) 242238c8a9a5SSteve French return false; 242338c8a9a5SSteve French 242438c8a9a5SSteve French sge->addr = addr; 242538c8a9a5SSteve French sge->length = len; 242638c8a9a5SSteve French sge->lkey = rdma->local_dma_lkey; 242738c8a9a5SSteve French rdma->nr_sge++; 242838c8a9a5SSteve French return true; 242938c8a9a5SSteve French } 243038c8a9a5SSteve French 243138c8a9a5SSteve French /* 243238c8a9a5SSteve French * Extract page fragments from a BVEC-class iterator and add them to an RDMA 243338c8a9a5SSteve French * element list. The pages are not pinned. 243438c8a9a5SSteve French */ 243538c8a9a5SSteve French static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, 243638c8a9a5SSteve French struct smb_extract_to_rdma *rdma, 243738c8a9a5SSteve French ssize_t maxsize) 243838c8a9a5SSteve French { 243938c8a9a5SSteve French const struct bio_vec *bv = iter->bvec; 244038c8a9a5SSteve French unsigned long start = iter->iov_offset; 244138c8a9a5SSteve French unsigned int i; 244238c8a9a5SSteve French ssize_t ret = 0; 244338c8a9a5SSteve French 244438c8a9a5SSteve French for (i = 0; i < iter->nr_segs; i++) { 244538c8a9a5SSteve French size_t off, len; 244638c8a9a5SSteve French 244738c8a9a5SSteve French len = bv[i].bv_len; 244838c8a9a5SSteve French if (start >= len) { 244938c8a9a5SSteve French start -= len; 245038c8a9a5SSteve French continue; 245138c8a9a5SSteve French } 245238c8a9a5SSteve French 245338c8a9a5SSteve French len = min_t(size_t, maxsize, len - start); 245438c8a9a5SSteve French off = bv[i].bv_offset + start; 245538c8a9a5SSteve French 245638c8a9a5SSteve French if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) 245738c8a9a5SSteve French return -EIO; 245838c8a9a5SSteve French 245938c8a9a5SSteve French ret += len; 246038c8a9a5SSteve French maxsize -= len; 246138c8a9a5SSteve French if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 246238c8a9a5SSteve French break; 246338c8a9a5SSteve French start = 0; 246438c8a9a5SSteve French } 246538c8a9a5SSteve French 246638c8a9a5SSteve French return ret; 246738c8a9a5SSteve French } 246838c8a9a5SSteve French 246938c8a9a5SSteve French /* 247038c8a9a5SSteve French * Extract fragments from a KVEC-class iterator and add them to an RDMA list. 247138c8a9a5SSteve French * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 247238c8a9a5SSteve French * The pages are not pinned. 247338c8a9a5SSteve French */ 247438c8a9a5SSteve French static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, 247538c8a9a5SSteve French struct smb_extract_to_rdma *rdma, 247638c8a9a5SSteve French ssize_t maxsize) 247738c8a9a5SSteve French { 247838c8a9a5SSteve French const struct kvec *kv = iter->kvec; 247938c8a9a5SSteve French unsigned long start = iter->iov_offset; 248038c8a9a5SSteve French unsigned int i; 248138c8a9a5SSteve French ssize_t ret = 0; 248238c8a9a5SSteve French 248338c8a9a5SSteve French for (i = 0; i < iter->nr_segs; i++) { 248438c8a9a5SSteve French struct page *page; 248538c8a9a5SSteve French unsigned long kaddr; 248638c8a9a5SSteve French size_t off, len, seg; 248738c8a9a5SSteve French 248838c8a9a5SSteve French len = kv[i].iov_len; 248938c8a9a5SSteve French if (start >= len) { 249038c8a9a5SSteve French start -= len; 249138c8a9a5SSteve French continue; 249238c8a9a5SSteve French } 249338c8a9a5SSteve French 249438c8a9a5SSteve French kaddr = (unsigned long)kv[i].iov_base + start; 249538c8a9a5SSteve French off = kaddr & ~PAGE_MASK; 249638c8a9a5SSteve French len = min_t(size_t, maxsize, len - start); 249738c8a9a5SSteve French kaddr &= PAGE_MASK; 249838c8a9a5SSteve French 249938c8a9a5SSteve French maxsize -= len; 250038c8a9a5SSteve French do { 250138c8a9a5SSteve French seg = min_t(size_t, len, PAGE_SIZE - off); 250238c8a9a5SSteve French 250338c8a9a5SSteve French if (is_vmalloc_or_module_addr((void *)kaddr)) 250438c8a9a5SSteve French page = vmalloc_to_page((void *)kaddr); 250538c8a9a5SSteve French else 25063b1ddbb6SArnd Bergmann page = virt_to_page((void *)kaddr); 250738c8a9a5SSteve French 250838c8a9a5SSteve French if (!smb_set_sge(rdma, page, off, seg)) 250938c8a9a5SSteve French return -EIO; 251038c8a9a5SSteve French 251138c8a9a5SSteve French ret += seg; 251238c8a9a5SSteve French len -= seg; 251338c8a9a5SSteve French kaddr += PAGE_SIZE; 251438c8a9a5SSteve French off = 0; 251538c8a9a5SSteve French } while (len > 0 && rdma->nr_sge < rdma->max_sge); 251638c8a9a5SSteve French 251738c8a9a5SSteve French if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 251838c8a9a5SSteve French break; 251938c8a9a5SSteve French start = 0; 252038c8a9a5SSteve French } 252138c8a9a5SSteve French 252238c8a9a5SSteve French return ret; 252338c8a9a5SSteve French } 252438c8a9a5SSteve French 252538c8a9a5SSteve French /* 252638c8a9a5SSteve French * Extract folio fragments from an XARRAY-class iterator and add them to an 252738c8a9a5SSteve French * RDMA list. The folios are not pinned. 252838c8a9a5SSteve French */ 252938c8a9a5SSteve French static ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter, 253038c8a9a5SSteve French struct smb_extract_to_rdma *rdma, 253138c8a9a5SSteve French ssize_t maxsize) 253238c8a9a5SSteve French { 253338c8a9a5SSteve French struct xarray *xa = iter->xarray; 253438c8a9a5SSteve French struct folio *folio; 253538c8a9a5SSteve French loff_t start = iter->xarray_start + iter->iov_offset; 253638c8a9a5SSteve French pgoff_t index = start / PAGE_SIZE; 253738c8a9a5SSteve French ssize_t ret = 0; 253838c8a9a5SSteve French size_t off, len; 253938c8a9a5SSteve French XA_STATE(xas, xa, index); 254038c8a9a5SSteve French 254138c8a9a5SSteve French rcu_read_lock(); 254238c8a9a5SSteve French 254338c8a9a5SSteve French xas_for_each(&xas, folio, ULONG_MAX) { 254438c8a9a5SSteve French if (xas_retry(&xas, folio)) 254538c8a9a5SSteve French continue; 254638c8a9a5SSteve French if (WARN_ON(xa_is_value(folio))) 254738c8a9a5SSteve French break; 254838c8a9a5SSteve French if (WARN_ON(folio_test_hugetlb(folio))) 254938c8a9a5SSteve French break; 255038c8a9a5SSteve French 255138c8a9a5SSteve French off = offset_in_folio(folio, start); 255238c8a9a5SSteve French len = min_t(size_t, maxsize, folio_size(folio) - off); 255338c8a9a5SSteve French 255438c8a9a5SSteve French if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) { 255538c8a9a5SSteve French rcu_read_unlock(); 255638c8a9a5SSteve French return -EIO; 255738c8a9a5SSteve French } 255838c8a9a5SSteve French 255938c8a9a5SSteve French maxsize -= len; 256038c8a9a5SSteve French ret += len; 256138c8a9a5SSteve French if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 256238c8a9a5SSteve French break; 256338c8a9a5SSteve French } 256438c8a9a5SSteve French 256538c8a9a5SSteve French rcu_read_unlock(); 256638c8a9a5SSteve French return ret; 256738c8a9a5SSteve French } 256838c8a9a5SSteve French 256938c8a9a5SSteve French /* 257038c8a9a5SSteve French * Extract page fragments from up to the given amount of the source iterator 257138c8a9a5SSteve French * and build up an RDMA list that refers to all of those bits. The RDMA list 257238c8a9a5SSteve French * is appended to, up to the maximum number of elements set in the parameter 257338c8a9a5SSteve French * block. 257438c8a9a5SSteve French * 257538c8a9a5SSteve French * The extracted page fragments are not pinned or ref'd in any way; if an 257638c8a9a5SSteve French * IOVEC/UBUF-type iterator is to be used, it should be converted to a 257738c8a9a5SSteve French * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 257838c8a9a5SSteve French * way. 257938c8a9a5SSteve French */ 258038c8a9a5SSteve French static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 258138c8a9a5SSteve French struct smb_extract_to_rdma *rdma) 258238c8a9a5SSteve French { 258338c8a9a5SSteve French ssize_t ret; 258438c8a9a5SSteve French int before = rdma->nr_sge; 258538c8a9a5SSteve French 258638c8a9a5SSteve French switch (iov_iter_type(iter)) { 258738c8a9a5SSteve French case ITER_BVEC: 258838c8a9a5SSteve French ret = smb_extract_bvec_to_rdma(iter, rdma, len); 258938c8a9a5SSteve French break; 259038c8a9a5SSteve French case ITER_KVEC: 259138c8a9a5SSteve French ret = smb_extract_kvec_to_rdma(iter, rdma, len); 259238c8a9a5SSteve French break; 259338c8a9a5SSteve French case ITER_XARRAY: 259438c8a9a5SSteve French ret = smb_extract_xarray_to_rdma(iter, rdma, len); 259538c8a9a5SSteve French break; 259638c8a9a5SSteve French default: 259738c8a9a5SSteve French WARN_ON_ONCE(1); 259838c8a9a5SSteve French return -EIO; 259938c8a9a5SSteve French } 260038c8a9a5SSteve French 260138c8a9a5SSteve French if (ret > 0) { 260238c8a9a5SSteve French iov_iter_advance(iter, ret); 260338c8a9a5SSteve French } else if (ret < 0) { 260438c8a9a5SSteve French while (rdma->nr_sge > before) { 260538c8a9a5SSteve French struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; 260638c8a9a5SSteve French 260738c8a9a5SSteve French ib_dma_unmap_single(rdma->device, sge->addr, sge->length, 260838c8a9a5SSteve French rdma->direction); 260938c8a9a5SSteve French sge->addr = 0; 261038c8a9a5SSteve French } 261138c8a9a5SSteve French } 261238c8a9a5SSteve French 261338c8a9a5SSteve French return ret; 261438c8a9a5SSteve French } 2615