xref: /openbmc/linux/fs/smb/client/transport.c (revision 63186a89)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   Copyright (C) International Business Machines  Corp., 2002,2008
5  *   Author(s): Steve French (sfrench@us.ibm.com)
6  *   Jeremy Allison (jra@samba.org) 2006.
7  *
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/list.h>
12 #include <linux/gfp.h>
13 #include <linux/wait.h>
14 #include <linux/net.h>
15 #include <linux/delay.h>
16 #include <linux/freezer.h>
17 #include <linux/tcp.h>
18 #include <linux/bvec.h>
19 #include <linux/highmem.h>
20 #include <linux/uaccess.h>
21 #include <asm/processor.h>
22 #include <linux/mempool.h>
23 #include <linux/sched/signal.h>
24 #include <linux/task_io_accounting_ops.h>
25 #include "cifspdu.h"
26 #include "cifsglob.h"
27 #include "cifsproto.h"
28 #include "cifs_debug.h"
29 #include "smb2proto.h"
30 #include "smbdirect.h"
31 
32 /* Max number of iovectors we can use off the stack when sending requests. */
33 #define CIFS_MAX_IOV_SIZE 8
34 
35 void
36 cifs_wake_up_task(struct mid_q_entry *mid)
37 {
38 	wake_up_process(mid->callback_data);
39 }
40 
41 static struct mid_q_entry *
42 alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
43 {
44 	struct mid_q_entry *temp;
45 
46 	if (server == NULL) {
47 		cifs_dbg(VFS, "%s: null TCP session\n", __func__);
48 		return NULL;
49 	}
50 
51 	temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
52 	memset(temp, 0, sizeof(struct mid_q_entry));
53 	kref_init(&temp->refcount);
54 	temp->mid = get_mid(smb_buffer);
55 	temp->pid = current->pid;
56 	temp->command = cpu_to_le16(smb_buffer->Command);
57 	cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
58 	/* easier to use jiffies */
59 	/* when mid allocated can be before when sent */
60 	temp->when_alloc = jiffies;
61 	temp->server = server;
62 
63 	/*
64 	 * The default is for the mid to be synchronous, so the
65 	 * default callback just wakes up the current task.
66 	 */
67 	get_task_struct(current);
68 	temp->creator = current;
69 	temp->callback = cifs_wake_up_task;
70 	temp->callback_data = current;
71 
72 	atomic_inc(&mid_count);
73 	temp->mid_state = MID_REQUEST_ALLOCATED;
74 	return temp;
75 }
76 
77 static void __release_mid(struct kref *refcount)
78 {
79 	struct mid_q_entry *midEntry =
80 			container_of(refcount, struct mid_q_entry, refcount);
81 #ifdef CONFIG_CIFS_STATS2
82 	__le16 command = midEntry->server->vals->lock_cmd;
83 	__u16 smb_cmd = le16_to_cpu(midEntry->command);
84 	unsigned long now;
85 	unsigned long roundtrip_time;
86 #endif
87 	struct TCP_Server_Info *server = midEntry->server;
88 
89 	if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) &&
90 	    midEntry->mid_state == MID_RESPONSE_RECEIVED &&
91 	    server->ops->handle_cancelled_mid)
92 		server->ops->handle_cancelled_mid(midEntry, server);
93 
94 	midEntry->mid_state = MID_FREE;
95 	atomic_dec(&mid_count);
96 	if (midEntry->large_buf)
97 		cifs_buf_release(midEntry->resp_buf);
98 	else
99 		cifs_small_buf_release(midEntry->resp_buf);
100 #ifdef CONFIG_CIFS_STATS2
101 	now = jiffies;
102 	if (now < midEntry->when_alloc)
103 		cifs_server_dbg(VFS, "Invalid mid allocation time\n");
104 	roundtrip_time = now - midEntry->when_alloc;
105 
106 	if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) {
107 		if (atomic_read(&server->num_cmds[smb_cmd]) == 0) {
108 			server->slowest_cmd[smb_cmd] = roundtrip_time;
109 			server->fastest_cmd[smb_cmd] = roundtrip_time;
110 		} else {
111 			if (server->slowest_cmd[smb_cmd] < roundtrip_time)
112 				server->slowest_cmd[smb_cmd] = roundtrip_time;
113 			else if (server->fastest_cmd[smb_cmd] > roundtrip_time)
114 				server->fastest_cmd[smb_cmd] = roundtrip_time;
115 		}
116 		cifs_stats_inc(&server->num_cmds[smb_cmd]);
117 		server->time_per_cmd[smb_cmd] += roundtrip_time;
118 	}
119 	/*
120 	 * commands taking longer than one second (default) can be indications
121 	 * that something is wrong, unless it is quite a slow link or a very
122 	 * busy server. Note that this calc is unlikely or impossible to wrap
123 	 * as long as slow_rsp_threshold is not set way above recommended max
124 	 * value (32767 ie 9 hours) and is generally harmless even if wrong
125 	 * since only affects debug counters - so leaving the calc as simple
126 	 * comparison rather than doing multiple conversions and overflow
127 	 * checks
128 	 */
129 	if ((slow_rsp_threshold != 0) &&
130 	    time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
131 	    (midEntry->command != command)) {
132 		/*
133 		 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command
134 		 * NB: le16_to_cpu returns unsigned so can not be negative below
135 		 */
136 		if (smb_cmd < NUMBER_OF_SMB2_COMMANDS)
137 			cifs_stats_inc(&server->smb2slowcmd[smb_cmd]);
138 
139 		trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid,
140 			       midEntry->when_sent, midEntry->when_received);
141 		if (cifsFYI & CIFS_TIMER) {
142 			pr_debug("slow rsp: cmd %d mid %llu",
143 				 midEntry->command, midEntry->mid);
144 			cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n",
145 				  now - midEntry->when_alloc,
146 				  now - midEntry->when_sent,
147 				  now - midEntry->when_received);
148 		}
149 	}
150 #endif
151 	put_task_struct(midEntry->creator);
152 
153 	mempool_free(midEntry, cifs_mid_poolp);
154 }
155 
156 void release_mid(struct mid_q_entry *mid)
157 {
158 	struct TCP_Server_Info *server = mid->server;
159 
160 	spin_lock(&server->mid_lock);
161 	kref_put(&mid->refcount, __release_mid);
162 	spin_unlock(&server->mid_lock);
163 }
164 
165 void
166 delete_mid(struct mid_q_entry *mid)
167 {
168 	spin_lock(&mid->server->mid_lock);
169 	if (!(mid->mid_flags & MID_DELETED)) {
170 		list_del_init(&mid->qhead);
171 		mid->mid_flags |= MID_DELETED;
172 	}
173 	spin_unlock(&mid->server->mid_lock);
174 
175 	release_mid(mid);
176 }
177 
178 /*
179  * smb_send_kvec - send an array of kvecs to the server
180  * @server:	Server to send the data to
181  * @smb_msg:	Message to send
182  * @sent:	amount of data sent on socket is stored here
183  *
184  * Our basic "send data to server" function. Should be called with srv_mutex
185  * held. The caller is responsible for handling the results.
186  */
187 static int
188 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
189 	      size_t *sent)
190 {
191 	int rc = 0;
192 	int retries = 0;
193 	struct socket *ssocket = server->ssocket;
194 
195 	*sent = 0;
196 
197 	if (server->noblocksnd)
198 		smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
199 	else
200 		smb_msg->msg_flags = MSG_NOSIGNAL;
201 
202 	while (msg_data_left(smb_msg)) {
203 		/*
204 		 * If blocking send, we try 3 times, since each can block
205 		 * for 5 seconds. For nonblocking  we have to try more
206 		 * but wait increasing amounts of time allowing time for
207 		 * socket to clear.  The overall time we wait in either
208 		 * case to send on the socket is about 15 seconds.
209 		 * Similarly we wait for 15 seconds for a response from
210 		 * the server in SendReceive[2] for the server to send
211 		 * a response back for most types of requests (except
212 		 * SMB Write past end of file which can be slow, and
213 		 * blocking lock operations). NFS waits slightly longer
214 		 * than CIFS, but this can make it take longer for
215 		 * nonresponsive servers to be detected and 15 seconds
216 		 * is more than enough time for modern networks to
217 		 * send a packet.  In most cases if we fail to send
218 		 * after the retries we will kill the socket and
219 		 * reconnect which may clear the network problem.
220 		 */
221 		rc = sock_sendmsg(ssocket, smb_msg);
222 		if (rc == -EAGAIN) {
223 			retries++;
224 			if (retries >= 14 ||
225 			    (!server->noblocksnd && (retries > 2))) {
226 				cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n",
227 					 ssocket);
228 				return -EAGAIN;
229 			}
230 			msleep(1 << retries);
231 			continue;
232 		}
233 
234 		if (rc < 0)
235 			return rc;
236 
237 		if (rc == 0) {
238 			/* should never happen, letting socket clear before
239 			   retrying is our only obvious option here */
240 			cifs_server_dbg(VFS, "tcp sent no data\n");
241 			msleep(500);
242 			continue;
243 		}
244 
245 		/* send was at least partially successful */
246 		*sent += rc;
247 		retries = 0; /* in case we get ENOSPC on the next send */
248 	}
249 	return 0;
250 }
251 
252 unsigned long
253 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
254 {
255 	unsigned int i;
256 	struct kvec *iov;
257 	int nvec;
258 	unsigned long buflen = 0;
259 
260 	if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
261 	    rqst->rq_iov[0].iov_len == 4) {
262 		iov = &rqst->rq_iov[1];
263 		nvec = rqst->rq_nvec - 1;
264 	} else {
265 		iov = rqst->rq_iov;
266 		nvec = rqst->rq_nvec;
267 	}
268 
269 	/* total up iov array first */
270 	for (i = 0; i < nvec; i++)
271 		buflen += iov[i].iov_len;
272 
273 	buflen += iov_iter_count(&rqst->rq_iter);
274 	return buflen;
275 }
276 
277 static int
278 __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
279 		struct smb_rqst *rqst)
280 {
281 	int rc;
282 	struct kvec *iov;
283 	int n_vec;
284 	unsigned int send_length = 0;
285 	unsigned int i, j;
286 	sigset_t mask, oldmask;
287 	size_t total_len = 0, sent, size;
288 	struct socket *ssocket = server->ssocket;
289 	struct msghdr smb_msg = {};
290 	__be32 rfc1002_marker;
291 
292 	cifs_in_send_inc(server);
293 	if (cifs_rdma_enabled(server)) {
294 		/* return -EAGAIN when connecting or reconnecting */
295 		rc = -EAGAIN;
296 		if (server->smbd_conn)
297 			rc = smbd_send(server, num_rqst, rqst);
298 		goto smbd_done;
299 	}
300 
301 	rc = -EAGAIN;
302 	if (ssocket == NULL)
303 		goto out;
304 
305 	rc = -ERESTARTSYS;
306 	if (fatal_signal_pending(current)) {
307 		cifs_dbg(FYI, "signal pending before send request\n");
308 		goto out;
309 	}
310 
311 	rc = 0;
312 	/* cork the socket */
313 	tcp_sock_set_cork(ssocket->sk, true);
314 
315 	for (j = 0; j < num_rqst; j++)
316 		send_length += smb_rqst_len(server, &rqst[j]);
317 	rfc1002_marker = cpu_to_be32(send_length);
318 
319 	/*
320 	 * We should not allow signals to interrupt the network send because
321 	 * any partial send will cause session reconnects thus increasing
322 	 * latency of system calls and overload a server with unnecessary
323 	 * requests.
324 	 */
325 
326 	sigfillset(&mask);
327 	sigprocmask(SIG_BLOCK, &mask, &oldmask);
328 
329 	/* Generate a rfc1002 marker for SMB2+ */
330 	if (!is_smb1(server)) {
331 		struct kvec hiov = {
332 			.iov_base = &rfc1002_marker,
333 			.iov_len  = 4
334 		};
335 		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4);
336 		rc = smb_send_kvec(server, &smb_msg, &sent);
337 		if (rc < 0)
338 			goto unmask;
339 
340 		total_len += sent;
341 		send_length += 4;
342 	}
343 
344 	cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length);
345 
346 	for (j = 0; j < num_rqst; j++) {
347 		iov = rqst[j].rq_iov;
348 		n_vec = rqst[j].rq_nvec;
349 
350 		size = 0;
351 		for (i = 0; i < n_vec; i++) {
352 			dump_smb(iov[i].iov_base, iov[i].iov_len);
353 			size += iov[i].iov_len;
354 		}
355 
356 		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size);
357 
358 		rc = smb_send_kvec(server, &smb_msg, &sent);
359 		if (rc < 0)
360 			goto unmask;
361 
362 		total_len += sent;
363 
364 		if (iov_iter_count(&rqst[j].rq_iter) > 0) {
365 			smb_msg.msg_iter = rqst[j].rq_iter;
366 			rc = smb_send_kvec(server, &smb_msg, &sent);
367 			if (rc < 0)
368 				break;
369 			total_len += sent;
370 		}
371 
372 }
373 
374 unmask:
375 	sigprocmask(SIG_SETMASK, &oldmask, NULL);
376 
377 	/*
378 	 * If signal is pending but we have already sent the whole packet to
379 	 * the server we need to return success status to allow a corresponding
380 	 * mid entry to be kept in the pending requests queue thus allowing
381 	 * to handle responses from the server by the client.
382 	 *
383 	 * If only part of the packet has been sent there is no need to hide
384 	 * interrupt because the session will be reconnected anyway, so there
385 	 * won't be any response from the server to handle.
386 	 */
387 
388 	if (signal_pending(current) && (total_len != send_length)) {
389 		cifs_dbg(FYI, "signal is pending after attempt to send\n");
390 		rc = -ERESTARTSYS;
391 	}
392 
393 	/* uncork it */
394 	tcp_sock_set_cork(ssocket->sk, false);
395 
396 	if ((total_len > 0) && (total_len != send_length)) {
397 		cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
398 			 send_length, total_len);
399 		/*
400 		 * If we have only sent part of an SMB then the next SMB could
401 		 * be taken as the remainder of this one. We need to kill the
402 		 * socket so the server throws away the partial SMB
403 		 */
404 		cifs_signal_cifsd_for_reconnect(server, false);
405 		trace_smb3_partial_send_reconnect(server->CurrentMid,
406 						  server->conn_id, server->hostname);
407 	}
408 smbd_done:
409 	if (rc < 0 && rc != -EINTR)
410 		cifs_server_dbg(VFS, "Error %d sending data on socket to server\n",
411 			 rc);
412 	else if (rc > 0)
413 		rc = 0;
414 out:
415 	cifs_in_send_dec(server);
416 	return rc;
417 }
418 
419 static int
420 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
421 	      struct smb_rqst *rqst, int flags)
422 {
423 	struct kvec iov;
424 	struct smb2_transform_hdr *tr_hdr;
425 	struct smb_rqst cur_rqst[MAX_COMPOUND];
426 	int rc;
427 
428 	if (!(flags & CIFS_TRANSFORM_REQ))
429 		return __smb_send_rqst(server, num_rqst, rqst);
430 
431 	if (num_rqst > MAX_COMPOUND - 1)
432 		return -ENOMEM;
433 
434 	if (!server->ops->init_transform_rq) {
435 		cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
436 		return -EIO;
437 	}
438 
439 	tr_hdr = kzalloc(sizeof(*tr_hdr), GFP_NOFS);
440 	if (!tr_hdr)
441 		return -ENOMEM;
442 
443 	memset(&cur_rqst[0], 0, sizeof(cur_rqst));
444 	memset(&iov, 0, sizeof(iov));
445 
446 	iov.iov_base = tr_hdr;
447 	iov.iov_len = sizeof(*tr_hdr);
448 	cur_rqst[0].rq_iov = &iov;
449 	cur_rqst[0].rq_nvec = 1;
450 
451 	rc = server->ops->init_transform_rq(server, num_rqst + 1,
452 					    &cur_rqst[0], rqst);
453 	if (rc)
454 		goto out;
455 
456 	rc = __smb_send_rqst(server, num_rqst + 1, &cur_rqst[0]);
457 	smb3_free_compound_rqst(num_rqst, &cur_rqst[1]);
458 out:
459 	kfree(tr_hdr);
460 	return rc;
461 }
462 
463 int
464 smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
465 	 unsigned int smb_buf_length)
466 {
467 	struct kvec iov[2];
468 	struct smb_rqst rqst = { .rq_iov = iov,
469 				 .rq_nvec = 2 };
470 
471 	iov[0].iov_base = smb_buffer;
472 	iov[0].iov_len = 4;
473 	iov[1].iov_base = (char *)smb_buffer + 4;
474 	iov[1].iov_len = smb_buf_length;
475 
476 	return __smb_send_rqst(server, 1, &rqst);
477 }
478 
479 static int
480 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
481 		      const int timeout, const int flags,
482 		      unsigned int *instance)
483 {
484 	long rc;
485 	int *credits;
486 	int optype;
487 	long int t;
488 	int scredits, in_flight;
489 
490 	if (timeout < 0)
491 		t = MAX_JIFFY_OFFSET;
492 	else
493 		t = msecs_to_jiffies(timeout);
494 
495 	optype = flags & CIFS_OP_MASK;
496 
497 	*instance = 0;
498 
499 	credits = server->ops->get_credits_field(server, optype);
500 	/* Since an echo is already inflight, no need to wait to send another */
501 	if (*credits <= 0 && optype == CIFS_ECHO_OP)
502 		return -EAGAIN;
503 
504 	spin_lock(&server->req_lock);
505 	if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) {
506 		/* oplock breaks must not be held up */
507 		server->in_flight++;
508 		if (server->in_flight > server->max_in_flight)
509 			server->max_in_flight = server->in_flight;
510 		*credits -= 1;
511 		*instance = server->reconnect_instance;
512 		scredits = *credits;
513 		in_flight = server->in_flight;
514 		spin_unlock(&server->req_lock);
515 
516 		trace_smb3_nblk_credits(server->CurrentMid,
517 				server->conn_id, server->hostname, scredits, -1, in_flight);
518 		cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
519 				__func__, 1, scredits);
520 
521 		return 0;
522 	}
523 
524 	while (1) {
525 		spin_unlock(&server->req_lock);
526 
527 		spin_lock(&server->srv_lock);
528 		if (server->tcpStatus == CifsExiting) {
529 			spin_unlock(&server->srv_lock);
530 			return -ENOENT;
531 		}
532 		spin_unlock(&server->srv_lock);
533 
534 		spin_lock(&server->req_lock);
535 		if (*credits < num_credits) {
536 			scredits = *credits;
537 			spin_unlock(&server->req_lock);
538 
539 			cifs_num_waiters_inc(server);
540 			rc = wait_event_killable_timeout(server->request_q,
541 				has_credits(server, credits, num_credits), t);
542 			cifs_num_waiters_dec(server);
543 			if (!rc) {
544 				spin_lock(&server->req_lock);
545 				scredits = *credits;
546 				in_flight = server->in_flight;
547 				spin_unlock(&server->req_lock);
548 
549 				trace_smb3_credit_timeout(server->CurrentMid,
550 						server->conn_id, server->hostname, scredits,
551 						num_credits, in_flight);
552 				cifs_server_dbg(VFS, "wait timed out after %d ms\n",
553 						timeout);
554 				return -EBUSY;
555 			}
556 			if (rc == -ERESTARTSYS)
557 				return -ERESTARTSYS;
558 			spin_lock(&server->req_lock);
559 		} else {
560 			/*
561 			 * For normal commands, reserve the last MAX_COMPOUND
562 			 * credits to compound requests.
563 			 * Otherwise these compounds could be permanently
564 			 * starved for credits by single-credit requests.
565 			 *
566 			 * To prevent spinning CPU, block this thread until
567 			 * there are >MAX_COMPOUND credits available.
568 			 * But only do this is we already have a lot of
569 			 * credits in flight to avoid triggering this check
570 			 * for servers that are slow to hand out credits on
571 			 * new sessions.
572 			 */
573 			if (!optype && num_credits == 1 &&
574 			    server->in_flight > 2 * MAX_COMPOUND &&
575 			    *credits <= MAX_COMPOUND) {
576 				spin_unlock(&server->req_lock);
577 
578 				cifs_num_waiters_inc(server);
579 				rc = wait_event_killable_timeout(
580 					server->request_q,
581 					has_credits(server, credits,
582 						    MAX_COMPOUND + 1),
583 					t);
584 				cifs_num_waiters_dec(server);
585 				if (!rc) {
586 					spin_lock(&server->req_lock);
587 					scredits = *credits;
588 					in_flight = server->in_flight;
589 					spin_unlock(&server->req_lock);
590 
591 					trace_smb3_credit_timeout(
592 							server->CurrentMid,
593 							server->conn_id, server->hostname,
594 							scredits, num_credits, in_flight);
595 					cifs_server_dbg(VFS, "wait timed out after %d ms\n",
596 							timeout);
597 					return -EBUSY;
598 				}
599 				if (rc == -ERESTARTSYS)
600 					return -ERESTARTSYS;
601 				spin_lock(&server->req_lock);
602 				continue;
603 			}
604 
605 			/*
606 			 * Can not count locking commands against total
607 			 * as they are allowed to block on server.
608 			 */
609 
610 			/* update # of requests on the wire to server */
611 			if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
612 				*credits -= num_credits;
613 				server->in_flight += num_credits;
614 				if (server->in_flight > server->max_in_flight)
615 					server->max_in_flight = server->in_flight;
616 				*instance = server->reconnect_instance;
617 			}
618 			scredits = *credits;
619 			in_flight = server->in_flight;
620 			spin_unlock(&server->req_lock);
621 
622 			trace_smb3_waitff_credits(server->CurrentMid,
623 					server->conn_id, server->hostname, scredits,
624 					-(num_credits), in_flight);
625 			cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
626 					__func__, num_credits, scredits);
627 			break;
628 		}
629 	}
630 	return 0;
631 }
632 
633 static int
634 wait_for_free_request(struct TCP_Server_Info *server, const int flags,
635 		      unsigned int *instance)
636 {
637 	return wait_for_free_credits(server, 1, -1, flags,
638 				     instance);
639 }
640 
641 static int
642 wait_for_compound_request(struct TCP_Server_Info *server, int num,
643 			  const int flags, unsigned int *instance)
644 {
645 	int *credits;
646 	int scredits, in_flight;
647 
648 	credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
649 
650 	spin_lock(&server->req_lock);
651 	scredits = *credits;
652 	in_flight = server->in_flight;
653 
654 	if (*credits < num) {
655 		/*
656 		 * If the server is tight on resources or just gives us less
657 		 * credits for other reasons (e.g. requests are coming out of
658 		 * order and the server delays granting more credits until it
659 		 * processes a missing mid) and we exhausted most available
660 		 * credits there may be situations when we try to send
661 		 * a compound request but we don't have enough credits. At this
662 		 * point the client needs to decide if it should wait for
663 		 * additional credits or fail the request. If at least one
664 		 * request is in flight there is a high probability that the
665 		 * server will return enough credits to satisfy this compound
666 		 * request.
667 		 *
668 		 * Return immediately if no requests in flight since we will be
669 		 * stuck on waiting for credits.
670 		 */
671 		if (server->in_flight == 0) {
672 			spin_unlock(&server->req_lock);
673 			trace_smb3_insufficient_credits(server->CurrentMid,
674 					server->conn_id, server->hostname, scredits,
675 					num, in_flight);
676 			cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
677 					__func__, in_flight, num, scredits);
678 			return -EDEADLK;
679 		}
680 	}
681 	spin_unlock(&server->req_lock);
682 
683 	return wait_for_free_credits(server, num, 60000, flags,
684 				     instance);
685 }
686 
687 int
688 cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
689 		      unsigned int *num, struct cifs_credits *credits)
690 {
691 	*num = size;
692 	credits->value = 0;
693 	credits->instance = server->reconnect_instance;
694 	return 0;
695 }
696 
697 static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
698 			struct mid_q_entry **ppmidQ)
699 {
700 	spin_lock(&ses->ses_lock);
701 	if (ses->ses_status == SES_NEW) {
702 		if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
703 			(in_buf->Command != SMB_COM_NEGOTIATE)) {
704 			spin_unlock(&ses->ses_lock);
705 			return -EAGAIN;
706 		}
707 		/* else ok - we are setting up session */
708 	}
709 
710 	if (ses->ses_status == SES_EXITING) {
711 		/* check if SMB session is bad because we are setting it up */
712 		if (in_buf->Command != SMB_COM_LOGOFF_ANDX) {
713 			spin_unlock(&ses->ses_lock);
714 			return -EAGAIN;
715 		}
716 		/* else ok - we are shutting down session */
717 	}
718 	spin_unlock(&ses->ses_lock);
719 
720 	*ppmidQ = alloc_mid(in_buf, ses->server);
721 	if (*ppmidQ == NULL)
722 		return -ENOMEM;
723 	spin_lock(&ses->server->mid_lock);
724 	list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q);
725 	spin_unlock(&ses->server->mid_lock);
726 	return 0;
727 }
728 
729 static int
730 wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
731 {
732 	int error;
733 
734 	error = wait_event_state(server->response_q,
735 				 midQ->mid_state != MID_REQUEST_SUBMITTED,
736 				 (TASK_KILLABLE|TASK_FREEZABLE_UNSAFE));
737 	if (error < 0)
738 		return -ERESTARTSYS;
739 
740 	return 0;
741 }
742 
743 struct mid_q_entry *
744 cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
745 {
746 	int rc;
747 	struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
748 	struct mid_q_entry *mid;
749 
750 	if (rqst->rq_iov[0].iov_len != 4 ||
751 	    rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
752 		return ERR_PTR(-EIO);
753 
754 	/* enable signing if server requires it */
755 	if (server->sign)
756 		hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
757 
758 	mid = alloc_mid(hdr, server);
759 	if (mid == NULL)
760 		return ERR_PTR(-ENOMEM);
761 
762 	rc = cifs_sign_rqst(rqst, server, &mid->sequence_number);
763 	if (rc) {
764 		release_mid(mid);
765 		return ERR_PTR(rc);
766 	}
767 
768 	return mid;
769 }
770 
771 /*
772  * Send a SMB request and set the callback function in the mid to handle
773  * the result. Caller is responsible for dealing with timeouts.
774  */
775 int
776 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
777 		mid_receive_t *receive, mid_callback_t *callback,
778 		mid_handle_t *handle, void *cbdata, const int flags,
779 		const struct cifs_credits *exist_credits)
780 {
781 	int rc;
782 	struct mid_q_entry *mid;
783 	struct cifs_credits credits = { .value = 0, .instance = 0 };
784 	unsigned int instance;
785 	int optype;
786 
787 	optype = flags & CIFS_OP_MASK;
788 
789 	if ((flags & CIFS_HAS_CREDITS) == 0) {
790 		rc = wait_for_free_request(server, flags, &instance);
791 		if (rc)
792 			return rc;
793 		credits.value = 1;
794 		credits.instance = instance;
795 	} else
796 		instance = exist_credits->instance;
797 
798 	cifs_server_lock(server);
799 
800 	/*
801 	 * We can't use credits obtained from the previous session to send this
802 	 * request. Check if there were reconnects after we obtained credits and
803 	 * return -EAGAIN in such cases to let callers handle it.
804 	 */
805 	if (instance != server->reconnect_instance) {
806 		cifs_server_unlock(server);
807 		add_credits_and_wake_if(server, &credits, optype);
808 		return -EAGAIN;
809 	}
810 
811 	mid = server->ops->setup_async_request(server, rqst);
812 	if (IS_ERR(mid)) {
813 		cifs_server_unlock(server);
814 		add_credits_and_wake_if(server, &credits, optype);
815 		return PTR_ERR(mid);
816 	}
817 
818 	mid->receive = receive;
819 	mid->callback = callback;
820 	mid->callback_data = cbdata;
821 	mid->handle = handle;
822 	mid->mid_state = MID_REQUEST_SUBMITTED;
823 
824 	/* put it on the pending_mid_q */
825 	spin_lock(&server->mid_lock);
826 	list_add_tail(&mid->qhead, &server->pending_mid_q);
827 	spin_unlock(&server->mid_lock);
828 
829 	/*
830 	 * Need to store the time in mid before calling I/O. For call_async,
831 	 * I/O response may come back and free the mid entry on another thread.
832 	 */
833 	cifs_save_when_sent(mid);
834 	rc = smb_send_rqst(server, 1, rqst, flags);
835 
836 	if (rc < 0) {
837 		revert_current_mid(server, mid->credits);
838 		server->sequence_number -= 2;
839 		delete_mid(mid);
840 	}
841 
842 	cifs_server_unlock(server);
843 
844 	if (rc == 0)
845 		return 0;
846 
847 	add_credits_and_wake_if(server, &credits, optype);
848 	return rc;
849 }
850 
851 /*
852  *
853  * Send an SMB Request.  No response info (other than return code)
854  * needs to be parsed.
855  *
856  * flags indicate the type of request buffer and how long to wait
857  * and whether to log NT STATUS code (error) before mapping it to POSIX error
858  *
859  */
860 int
861 SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
862 		 char *in_buf, int flags)
863 {
864 	int rc;
865 	struct kvec iov[1];
866 	struct kvec rsp_iov;
867 	int resp_buf_type;
868 
869 	iov[0].iov_base = in_buf;
870 	iov[0].iov_len = get_rfc1002_length(in_buf) + 4;
871 	flags |= CIFS_NO_RSP_BUF;
872 	rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
873 	cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc);
874 
875 	return rc;
876 }
877 
878 static int
879 cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
880 {
881 	int rc = 0;
882 
883 	cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
884 		 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
885 
886 	spin_lock(&server->mid_lock);
887 	switch (mid->mid_state) {
888 	case MID_RESPONSE_RECEIVED:
889 		spin_unlock(&server->mid_lock);
890 		return rc;
891 	case MID_RETRY_NEEDED:
892 		rc = -EAGAIN;
893 		break;
894 	case MID_RESPONSE_MALFORMED:
895 		rc = -EIO;
896 		break;
897 	case MID_SHUTDOWN:
898 		rc = -EHOSTDOWN;
899 		break;
900 	default:
901 		if (!(mid->mid_flags & MID_DELETED)) {
902 			list_del_init(&mid->qhead);
903 			mid->mid_flags |= MID_DELETED;
904 		}
905 		cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
906 			 __func__, mid->mid, mid->mid_state);
907 		rc = -EIO;
908 	}
909 	spin_unlock(&server->mid_lock);
910 
911 	release_mid(mid);
912 	return rc;
913 }
914 
915 static inline int
916 send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
917 	    struct mid_q_entry *mid)
918 {
919 	return server->ops->send_cancel ?
920 				server->ops->send_cancel(server, rqst, mid) : 0;
921 }
922 
923 int
924 cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
925 		   bool log_error)
926 {
927 	unsigned int len = get_rfc1002_length(mid->resp_buf) + 4;
928 
929 	dump_smb(mid->resp_buf, min_t(u32, 92, len));
930 
931 	/* convert the length into a more usable form */
932 	if (server->sign) {
933 		struct kvec iov[2];
934 		int rc = 0;
935 		struct smb_rqst rqst = { .rq_iov = iov,
936 					 .rq_nvec = 2 };
937 
938 		iov[0].iov_base = mid->resp_buf;
939 		iov[0].iov_len = 4;
940 		iov[1].iov_base = (char *)mid->resp_buf + 4;
941 		iov[1].iov_len = len - 4;
942 		/* FIXME: add code to kill session */
943 		rc = cifs_verify_signature(&rqst, server,
944 					   mid->sequence_number);
945 		if (rc)
946 			cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n",
947 				 rc);
948 	}
949 
950 	/* BB special case reconnect tid and uid here? */
951 	return map_and_check_smb_error(mid, log_error);
952 }
953 
954 struct mid_q_entry *
955 cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored,
956 		   struct smb_rqst *rqst)
957 {
958 	int rc;
959 	struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
960 	struct mid_q_entry *mid;
961 
962 	if (rqst->rq_iov[0].iov_len != 4 ||
963 	    rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
964 		return ERR_PTR(-EIO);
965 
966 	rc = allocate_mid(ses, hdr, &mid);
967 	if (rc)
968 		return ERR_PTR(rc);
969 	rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number);
970 	if (rc) {
971 		delete_mid(mid);
972 		return ERR_PTR(rc);
973 	}
974 	return mid;
975 }
976 
977 static void
978 cifs_compound_callback(struct mid_q_entry *mid)
979 {
980 	struct TCP_Server_Info *server = mid->server;
981 	struct cifs_credits credits;
982 
983 	credits.value = server->ops->get_credits(mid);
984 	credits.instance = server->reconnect_instance;
985 
986 	add_credits(server, &credits, mid->optype);
987 }
988 
989 static void
990 cifs_compound_last_callback(struct mid_q_entry *mid)
991 {
992 	cifs_compound_callback(mid);
993 	cifs_wake_up_task(mid);
994 }
995 
996 static void
997 cifs_cancelled_callback(struct mid_q_entry *mid)
998 {
999 	cifs_compound_callback(mid);
1000 	release_mid(mid);
1001 }
1002 
1003 /*
1004  * Return a channel (master if none) of @ses that can be used to send
1005  * regular requests.
1006  *
1007  * If we are currently binding a new channel (negprot/sess.setup),
1008  * return the new incomplete channel.
1009  */
1010 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
1011 {
1012 	uint index = 0;
1013 	unsigned int min_in_flight = UINT_MAX, max_in_flight = 0;
1014 	struct TCP_Server_Info *server = NULL;
1015 	int i;
1016 
1017 	if (!ses)
1018 		return NULL;
1019 
1020 	spin_lock(&ses->chan_lock);
1021 	for (i = 0; i < ses->chan_count; i++) {
1022 		server = ses->chans[i].server;
1023 		if (!server)
1024 			continue;
1025 
1026 		/*
1027 		 * strictly speaking, we should pick up req_lock to read
1028 		 * server->in_flight. But it shouldn't matter much here if we
1029 		 * race while reading this data. The worst that can happen is
1030 		 * that we could use a channel that's not least loaded. Avoiding
1031 		 * taking the lock could help reduce wait time, which is
1032 		 * important for this function
1033 		 */
1034 		if (server->in_flight < min_in_flight) {
1035 			min_in_flight = server->in_flight;
1036 			index = i;
1037 		}
1038 		if (server->in_flight > max_in_flight)
1039 			max_in_flight = server->in_flight;
1040 	}
1041 
1042 	/* if all channels are equally loaded, fall back to round-robin */
1043 	if (min_in_flight == max_in_flight) {
1044 		index = (uint)atomic_inc_return(&ses->chan_seq);
1045 		index %= ses->chan_count;
1046 	}
1047 	spin_unlock(&ses->chan_lock);
1048 
1049 	return ses->chans[index].server;
1050 }
1051 
1052 int
1053 compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
1054 		   struct TCP_Server_Info *server,
1055 		   const int flags, const int num_rqst, struct smb_rqst *rqst,
1056 		   int *resp_buf_type, struct kvec *resp_iov)
1057 {
1058 	int i, j, optype, rc = 0;
1059 	struct mid_q_entry *midQ[MAX_COMPOUND];
1060 	bool cancelled_mid[MAX_COMPOUND] = {false};
1061 	struct cifs_credits credits[MAX_COMPOUND] = {
1062 		{ .value = 0, .instance = 0 }
1063 	};
1064 	unsigned int instance;
1065 	char *buf;
1066 
1067 	optype = flags & CIFS_OP_MASK;
1068 
1069 	for (i = 0; i < num_rqst; i++)
1070 		resp_buf_type[i] = CIFS_NO_BUFFER;  /* no response buf yet */
1071 
1072 	if (!ses || !ses->server || !server) {
1073 		cifs_dbg(VFS, "Null session\n");
1074 		return -EIO;
1075 	}
1076 
1077 	spin_lock(&server->srv_lock);
1078 	if (server->tcpStatus == CifsExiting) {
1079 		spin_unlock(&server->srv_lock);
1080 		return -ENOENT;
1081 	}
1082 	spin_unlock(&server->srv_lock);
1083 
1084 	/*
1085 	 * Wait for all the requests to become available.
1086 	 * This approach still leaves the possibility to be stuck waiting for
1087 	 * credits if the server doesn't grant credits to the outstanding
1088 	 * requests and if the client is completely idle, not generating any
1089 	 * other requests.
1090 	 * This can be handled by the eventual session reconnect.
1091 	 */
1092 	rc = wait_for_compound_request(server, num_rqst, flags,
1093 				       &instance);
1094 	if (rc)
1095 		return rc;
1096 
1097 	for (i = 0; i < num_rqst; i++) {
1098 		credits[i].value = 1;
1099 		credits[i].instance = instance;
1100 	}
1101 
1102 	/*
1103 	 * Make sure that we sign in the same order that we send on this socket
1104 	 * and avoid races inside tcp sendmsg code that could cause corruption
1105 	 * of smb data.
1106 	 */
1107 
1108 	cifs_server_lock(server);
1109 
1110 	/*
1111 	 * All the parts of the compound chain belong obtained credits from the
1112 	 * same session. We can not use credits obtained from the previous
1113 	 * session to send this request. Check if there were reconnects after
1114 	 * we obtained credits and return -EAGAIN in such cases to let callers
1115 	 * handle it.
1116 	 */
1117 	if (instance != server->reconnect_instance) {
1118 		cifs_server_unlock(server);
1119 		for (j = 0; j < num_rqst; j++)
1120 			add_credits(server, &credits[j], optype);
1121 		return -EAGAIN;
1122 	}
1123 
1124 	for (i = 0; i < num_rqst; i++) {
1125 		midQ[i] = server->ops->setup_request(ses, server, &rqst[i]);
1126 		if (IS_ERR(midQ[i])) {
1127 			revert_current_mid(server, i);
1128 			for (j = 0; j < i; j++)
1129 				delete_mid(midQ[j]);
1130 			cifs_server_unlock(server);
1131 
1132 			/* Update # of requests on wire to server */
1133 			for (j = 0; j < num_rqst; j++)
1134 				add_credits(server, &credits[j], optype);
1135 			return PTR_ERR(midQ[i]);
1136 		}
1137 
1138 		midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
1139 		midQ[i]->optype = optype;
1140 		/*
1141 		 * Invoke callback for every part of the compound chain
1142 		 * to calculate credits properly. Wake up this thread only when
1143 		 * the last element is received.
1144 		 */
1145 		if (i < num_rqst - 1)
1146 			midQ[i]->callback = cifs_compound_callback;
1147 		else
1148 			midQ[i]->callback = cifs_compound_last_callback;
1149 	}
1150 	rc = smb_send_rqst(server, num_rqst, rqst, flags);
1151 
1152 	for (i = 0; i < num_rqst; i++)
1153 		cifs_save_when_sent(midQ[i]);
1154 
1155 	if (rc < 0) {
1156 		revert_current_mid(server, num_rqst);
1157 		server->sequence_number -= 2;
1158 	}
1159 
1160 	cifs_server_unlock(server);
1161 
1162 	/*
1163 	 * If sending failed for some reason or it is an oplock break that we
1164 	 * will not receive a response to - return credits back
1165 	 */
1166 	if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) {
1167 		for (i = 0; i < num_rqst; i++)
1168 			add_credits(server, &credits[i], optype);
1169 		goto out;
1170 	}
1171 
1172 	/*
1173 	 * At this point the request is passed to the network stack - we assume
1174 	 * that any credits taken from the server structure on the client have
1175 	 * been spent and we can't return them back. Once we receive responses
1176 	 * we will collect credits granted by the server in the mid callbacks
1177 	 * and add those credits to the server structure.
1178 	 */
1179 
1180 	/*
1181 	 * Compounding is never used during session establish.
1182 	 */
1183 	spin_lock(&ses->ses_lock);
1184 	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
1185 		spin_unlock(&ses->ses_lock);
1186 
1187 		cifs_server_lock(server);
1188 		smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
1189 		cifs_server_unlock(server);
1190 
1191 		spin_lock(&ses->ses_lock);
1192 	}
1193 	spin_unlock(&ses->ses_lock);
1194 
1195 	for (i = 0; i < num_rqst; i++) {
1196 		rc = wait_for_response(server, midQ[i]);
1197 		if (rc != 0)
1198 			break;
1199 	}
1200 	if (rc != 0) {
1201 		for (; i < num_rqst; i++) {
1202 			cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
1203 				 midQ[i]->mid, le16_to_cpu(midQ[i]->command));
1204 			send_cancel(server, &rqst[i], midQ[i]);
1205 			spin_lock(&server->mid_lock);
1206 			midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
1207 			if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
1208 				midQ[i]->callback = cifs_cancelled_callback;
1209 				cancelled_mid[i] = true;
1210 				credits[i].value = 0;
1211 			}
1212 			spin_unlock(&server->mid_lock);
1213 		}
1214 	}
1215 
1216 	for (i = 0; i < num_rqst; i++) {
1217 		if (rc < 0)
1218 			goto out;
1219 
1220 		rc = cifs_sync_mid_result(midQ[i], server);
1221 		if (rc != 0) {
1222 			/* mark this mid as cancelled to not free it below */
1223 			cancelled_mid[i] = true;
1224 			goto out;
1225 		}
1226 
1227 		if (!midQ[i]->resp_buf ||
1228 		    midQ[i]->mid_state != MID_RESPONSE_RECEIVED) {
1229 			rc = -EIO;
1230 			cifs_dbg(FYI, "Bad MID state?\n");
1231 			goto out;
1232 		}
1233 
1234 		buf = (char *)midQ[i]->resp_buf;
1235 		resp_iov[i].iov_base = buf;
1236 		resp_iov[i].iov_len = midQ[i]->resp_buf_size +
1237 			HEADER_PREAMBLE_SIZE(server);
1238 
1239 		if (midQ[i]->large_buf)
1240 			resp_buf_type[i] = CIFS_LARGE_BUFFER;
1241 		else
1242 			resp_buf_type[i] = CIFS_SMALL_BUFFER;
1243 
1244 		rc = server->ops->check_receive(midQ[i], server,
1245 						     flags & CIFS_LOG_ERROR);
1246 
1247 		/* mark it so buf will not be freed by delete_mid */
1248 		if ((flags & CIFS_NO_RSP_BUF) == 0)
1249 			midQ[i]->resp_buf = NULL;
1250 
1251 	}
1252 
1253 	/*
1254 	 * Compounding is never used during session establish.
1255 	 */
1256 	spin_lock(&ses->ses_lock);
1257 	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
1258 		struct kvec iov = {
1259 			.iov_base = resp_iov[0].iov_base,
1260 			.iov_len = resp_iov[0].iov_len
1261 		};
1262 		spin_unlock(&ses->ses_lock);
1263 		cifs_server_lock(server);
1264 		smb311_update_preauth_hash(ses, server, &iov, 1);
1265 		cifs_server_unlock(server);
1266 		spin_lock(&ses->ses_lock);
1267 	}
1268 	spin_unlock(&ses->ses_lock);
1269 
1270 out:
1271 	/*
1272 	 * This will dequeue all mids. After this it is important that the
1273 	 * demultiplex_thread will not process any of these mids any futher.
1274 	 * This is prevented above by using a noop callback that will not
1275 	 * wake this thread except for the very last PDU.
1276 	 */
1277 	for (i = 0; i < num_rqst; i++) {
1278 		if (!cancelled_mid[i])
1279 			delete_mid(midQ[i]);
1280 	}
1281 
1282 	return rc;
1283 }
1284 
1285 int
1286 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
1287 	       struct TCP_Server_Info *server,
1288 	       struct smb_rqst *rqst, int *resp_buf_type, const int flags,
1289 	       struct kvec *resp_iov)
1290 {
1291 	return compound_send_recv(xid, ses, server, flags, 1,
1292 				  rqst, resp_buf_type, resp_iov);
1293 }
1294 
1295 int
1296 SendReceive2(const unsigned int xid, struct cifs_ses *ses,
1297 	     struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
1298 	     const int flags, struct kvec *resp_iov)
1299 {
1300 	struct smb_rqst rqst;
1301 	struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
1302 	int rc;
1303 
1304 	if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
1305 		new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec),
1306 					GFP_KERNEL);
1307 		if (!new_iov) {
1308 			/* otherwise cifs_send_recv below sets resp_buf_type */
1309 			*resp_buf_type = CIFS_NO_BUFFER;
1310 			return -ENOMEM;
1311 		}
1312 	} else
1313 		new_iov = s_iov;
1314 
1315 	/* 1st iov is a RFC1001 length followed by the rest of the packet */
1316 	memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
1317 
1318 	new_iov[0].iov_base = new_iov[1].iov_base;
1319 	new_iov[0].iov_len = 4;
1320 	new_iov[1].iov_base += 4;
1321 	new_iov[1].iov_len -= 4;
1322 
1323 	memset(&rqst, 0, sizeof(struct smb_rqst));
1324 	rqst.rq_iov = new_iov;
1325 	rqst.rq_nvec = n_vec + 1;
1326 
1327 	rc = cifs_send_recv(xid, ses, ses->server,
1328 			    &rqst, resp_buf_type, flags, resp_iov);
1329 	if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
1330 		kfree(new_iov);
1331 	return rc;
1332 }
1333 
1334 int
1335 SendReceive(const unsigned int xid, struct cifs_ses *ses,
1336 	    struct smb_hdr *in_buf, struct smb_hdr *out_buf,
1337 	    int *pbytes_returned, const int flags)
1338 {
1339 	int rc = 0;
1340 	struct mid_q_entry *midQ;
1341 	unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
1342 	struct kvec iov = { .iov_base = in_buf, .iov_len = len };
1343 	struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
1344 	struct cifs_credits credits = { .value = 1, .instance = 0 };
1345 	struct TCP_Server_Info *server;
1346 
1347 	if (ses == NULL) {
1348 		cifs_dbg(VFS, "Null smb session\n");
1349 		return -EIO;
1350 	}
1351 	server = ses->server;
1352 	if (server == NULL) {
1353 		cifs_dbg(VFS, "Null tcp session\n");
1354 		return -EIO;
1355 	}
1356 
1357 	spin_lock(&server->srv_lock);
1358 	if (server->tcpStatus == CifsExiting) {
1359 		spin_unlock(&server->srv_lock);
1360 		return -ENOENT;
1361 	}
1362 	spin_unlock(&server->srv_lock);
1363 
1364 	/* Ensure that we do not send more than 50 overlapping requests
1365 	   to the same server. We may make this configurable later or
1366 	   use ses->maxReq */
1367 
1368 	if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
1369 		cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n",
1370 				len);
1371 		return -EIO;
1372 	}
1373 
1374 	rc = wait_for_free_request(server, flags, &credits.instance);
1375 	if (rc)
1376 		return rc;
1377 
1378 	/* make sure that we sign in the same order that we send on this socket
1379 	   and avoid races inside tcp sendmsg code that could cause corruption
1380 	   of smb data */
1381 
1382 	cifs_server_lock(server);
1383 
1384 	rc = allocate_mid(ses, in_buf, &midQ);
1385 	if (rc) {
1386 		cifs_server_unlock(server);
1387 		/* Update # of requests on wire to server */
1388 		add_credits(server, &credits, 0);
1389 		return rc;
1390 	}
1391 
1392 	rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
1393 	if (rc) {
1394 		cifs_server_unlock(server);
1395 		goto out;
1396 	}
1397 
1398 	midQ->mid_state = MID_REQUEST_SUBMITTED;
1399 
1400 	rc = smb_send(server, in_buf, len);
1401 	cifs_save_when_sent(midQ);
1402 
1403 	if (rc < 0)
1404 		server->sequence_number -= 2;
1405 
1406 	cifs_server_unlock(server);
1407 
1408 	if (rc < 0)
1409 		goto out;
1410 
1411 	rc = wait_for_response(server, midQ);
1412 	if (rc != 0) {
1413 		send_cancel(server, &rqst, midQ);
1414 		spin_lock(&server->mid_lock);
1415 		if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
1416 			/* no longer considered to be "in-flight" */
1417 			midQ->callback = release_mid;
1418 			spin_unlock(&server->mid_lock);
1419 			add_credits(server, &credits, 0);
1420 			return rc;
1421 		}
1422 		spin_unlock(&server->mid_lock);
1423 	}
1424 
1425 	rc = cifs_sync_mid_result(midQ, server);
1426 	if (rc != 0) {
1427 		add_credits(server, &credits, 0);
1428 		return rc;
1429 	}
1430 
1431 	if (!midQ->resp_buf || !out_buf ||
1432 	    midQ->mid_state != MID_RESPONSE_RECEIVED) {
1433 		rc = -EIO;
1434 		cifs_server_dbg(VFS, "Bad MID state?\n");
1435 		goto out;
1436 	}
1437 
1438 	*pbytes_returned = get_rfc1002_length(midQ->resp_buf);
1439 	memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
1440 	rc = cifs_check_receive(midQ, server, 0);
1441 out:
1442 	delete_mid(midQ);
1443 	add_credits(server, &credits, 0);
1444 
1445 	return rc;
1446 }
1447 
1448 /* We send a LOCKINGX_CANCEL_LOCK to cause the Windows
1449    blocking lock to return. */
1450 
1451 static int
1452 send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
1453 			struct smb_hdr *in_buf,
1454 			struct smb_hdr *out_buf)
1455 {
1456 	int bytes_returned;
1457 	struct cifs_ses *ses = tcon->ses;
1458 	LOCK_REQ *pSMB = (LOCK_REQ *)in_buf;
1459 
1460 	/* We just modify the current in_buf to change
1461 	   the type of lock from LOCKING_ANDX_SHARED_LOCK
1462 	   or LOCKING_ANDX_EXCLUSIVE_LOCK to
1463 	   LOCKING_ANDX_CANCEL_LOCK. */
1464 
1465 	pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES;
1466 	pSMB->Timeout = 0;
1467 	pSMB->hdr.Mid = get_next_mid(ses->server);
1468 
1469 	return SendReceive(xid, ses, in_buf, out_buf,
1470 			&bytes_returned, 0);
1471 }
1472 
1473 int
1474 SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
1475 	    struct smb_hdr *in_buf, struct smb_hdr *out_buf,
1476 	    int *pbytes_returned)
1477 {
1478 	int rc = 0;
1479 	int rstart = 0;
1480 	struct mid_q_entry *midQ;
1481 	struct cifs_ses *ses;
1482 	unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
1483 	struct kvec iov = { .iov_base = in_buf, .iov_len = len };
1484 	struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
1485 	unsigned int instance;
1486 	struct TCP_Server_Info *server;
1487 
1488 	if (tcon == NULL || tcon->ses == NULL) {
1489 		cifs_dbg(VFS, "Null smb session\n");
1490 		return -EIO;
1491 	}
1492 	ses = tcon->ses;
1493 	server = ses->server;
1494 
1495 	if (server == NULL) {
1496 		cifs_dbg(VFS, "Null tcp session\n");
1497 		return -EIO;
1498 	}
1499 
1500 	spin_lock(&server->srv_lock);
1501 	if (server->tcpStatus == CifsExiting) {
1502 		spin_unlock(&server->srv_lock);
1503 		return -ENOENT;
1504 	}
1505 	spin_unlock(&server->srv_lock);
1506 
1507 	/* Ensure that we do not send more than 50 overlapping requests
1508 	   to the same server. We may make this configurable later or
1509 	   use ses->maxReq */
1510 
1511 	if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
1512 		cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n",
1513 			      len);
1514 		return -EIO;
1515 	}
1516 
1517 	rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance);
1518 	if (rc)
1519 		return rc;
1520 
1521 	/* make sure that we sign in the same order that we send on this socket
1522 	   and avoid races inside tcp sendmsg code that could cause corruption
1523 	   of smb data */
1524 
1525 	cifs_server_lock(server);
1526 
1527 	rc = allocate_mid(ses, in_buf, &midQ);
1528 	if (rc) {
1529 		cifs_server_unlock(server);
1530 		return rc;
1531 	}
1532 
1533 	rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
1534 	if (rc) {
1535 		delete_mid(midQ);
1536 		cifs_server_unlock(server);
1537 		return rc;
1538 	}
1539 
1540 	midQ->mid_state = MID_REQUEST_SUBMITTED;
1541 	rc = smb_send(server, in_buf, len);
1542 	cifs_save_when_sent(midQ);
1543 
1544 	if (rc < 0)
1545 		server->sequence_number -= 2;
1546 
1547 	cifs_server_unlock(server);
1548 
1549 	if (rc < 0) {
1550 		delete_mid(midQ);
1551 		return rc;
1552 	}
1553 
1554 	/* Wait for a reply - allow signals to interrupt. */
1555 	rc = wait_event_interruptible(server->response_q,
1556 		(!(midQ->mid_state == MID_REQUEST_SUBMITTED)) ||
1557 		((server->tcpStatus != CifsGood) &&
1558 		 (server->tcpStatus != CifsNew)));
1559 
1560 	/* Were we interrupted by a signal ? */
1561 	spin_lock(&server->srv_lock);
1562 	if ((rc == -ERESTARTSYS) &&
1563 		(midQ->mid_state == MID_REQUEST_SUBMITTED) &&
1564 		((server->tcpStatus == CifsGood) ||
1565 		 (server->tcpStatus == CifsNew))) {
1566 		spin_unlock(&server->srv_lock);
1567 
1568 		if (in_buf->Command == SMB_COM_TRANSACTION2) {
1569 			/* POSIX lock. We send a NT_CANCEL SMB to cause the
1570 			   blocking lock to return. */
1571 			rc = send_cancel(server, &rqst, midQ);
1572 			if (rc) {
1573 				delete_mid(midQ);
1574 				return rc;
1575 			}
1576 		} else {
1577 			/* Windows lock. We send a LOCKINGX_CANCEL_LOCK
1578 			   to cause the blocking lock to return. */
1579 
1580 			rc = send_lock_cancel(xid, tcon, in_buf, out_buf);
1581 
1582 			/* If we get -ENOLCK back the lock may have
1583 			   already been removed. Don't exit in this case. */
1584 			if (rc && rc != -ENOLCK) {
1585 				delete_mid(midQ);
1586 				return rc;
1587 			}
1588 		}
1589 
1590 		rc = wait_for_response(server, midQ);
1591 		if (rc) {
1592 			send_cancel(server, &rqst, midQ);
1593 			spin_lock(&server->mid_lock);
1594 			if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
1595 				/* no longer considered to be "in-flight" */
1596 				midQ->callback = release_mid;
1597 				spin_unlock(&server->mid_lock);
1598 				return rc;
1599 			}
1600 			spin_unlock(&server->mid_lock);
1601 		}
1602 
1603 		/* We got the response - restart system call. */
1604 		rstart = 1;
1605 		spin_lock(&server->srv_lock);
1606 	}
1607 	spin_unlock(&server->srv_lock);
1608 
1609 	rc = cifs_sync_mid_result(midQ, server);
1610 	if (rc != 0)
1611 		return rc;
1612 
1613 	/* rcvd frame is ok */
1614 	if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_RECEIVED) {
1615 		rc = -EIO;
1616 		cifs_tcon_dbg(VFS, "Bad MID state?\n");
1617 		goto out;
1618 	}
1619 
1620 	*pbytes_returned = get_rfc1002_length(midQ->resp_buf);
1621 	memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
1622 	rc = cifs_check_receive(midQ, server, 0);
1623 out:
1624 	delete_mid(midQ);
1625 	if (rstart && rc == -EACCES)
1626 		return -ERESTARTSYS;
1627 	return rc;
1628 }
1629 
1630 /*
1631  * Discard any remaining data in the current SMB. To do this, we borrow the
1632  * current bigbuf.
1633  */
1634 int
1635 cifs_discard_remaining_data(struct TCP_Server_Info *server)
1636 {
1637 	unsigned int rfclen = server->pdu_size;
1638 	size_t remaining = rfclen + HEADER_PREAMBLE_SIZE(server) -
1639 		server->total_read;
1640 
1641 	while (remaining > 0) {
1642 		ssize_t length;
1643 
1644 		length = cifs_discard_from_socket(server,
1645 				min_t(size_t, remaining,
1646 				      CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
1647 		if (length < 0)
1648 			return length;
1649 		server->total_read += length;
1650 		remaining -= length;
1651 	}
1652 
1653 	return 0;
1654 }
1655 
1656 static int
1657 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
1658 		     bool malformed)
1659 {
1660 	int length;
1661 
1662 	length = cifs_discard_remaining_data(server);
1663 	dequeue_mid(mid, malformed);
1664 	mid->resp_buf = server->smallbuf;
1665 	server->smallbuf = NULL;
1666 	return length;
1667 }
1668 
1669 static int
1670 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1671 {
1672 	struct cifs_readdata *rdata = mid->callback_data;
1673 
1674 	return  __cifs_readv_discard(server, mid, rdata->result);
1675 }
1676 
1677 int
1678 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1679 {
1680 	int length, len;
1681 	unsigned int data_offset, data_len;
1682 	struct cifs_readdata *rdata = mid->callback_data;
1683 	char *buf = server->smallbuf;
1684 	unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server);
1685 	bool use_rdma_mr = false;
1686 
1687 	cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
1688 		 __func__, mid->mid, rdata->offset, rdata->bytes);
1689 
1690 	/*
1691 	 * read the rest of READ_RSP header (sans Data array), or whatever we
1692 	 * can if there's not enough data. At this point, we've read down to
1693 	 * the Mid.
1694 	 */
1695 	len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
1696 							HEADER_SIZE(server) + 1;
1697 
1698 	length = cifs_read_from_socket(server,
1699 				       buf + HEADER_SIZE(server) - 1, len);
1700 	if (length < 0)
1701 		return length;
1702 	server->total_read += length;
1703 
1704 	if (server->ops->is_session_expired &&
1705 	    server->ops->is_session_expired(buf)) {
1706 		cifs_reconnect(server, true);
1707 		return -1;
1708 	}
1709 
1710 	if (server->ops->is_status_pending &&
1711 	    server->ops->is_status_pending(buf, server)) {
1712 		cifs_discard_remaining_data(server);
1713 		return -1;
1714 	}
1715 
1716 	/* set up first two iov for signature check and to get credits */
1717 	rdata->iov[0].iov_base = buf;
1718 	rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server);
1719 	rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server);
1720 	rdata->iov[1].iov_len =
1721 		server->total_read - HEADER_PREAMBLE_SIZE(server);
1722 	cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
1723 		 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1724 	cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
1725 		 rdata->iov[1].iov_base, rdata->iov[1].iov_len);
1726 
1727 	/* Was the SMB read successful? */
1728 	rdata->result = server->ops->map_error(buf, false);
1729 	if (rdata->result != 0) {
1730 		cifs_dbg(FYI, "%s: server returned error %d\n",
1731 			 __func__, rdata->result);
1732 		/* normal error on read response */
1733 		return __cifs_readv_discard(server, mid, false);
1734 	}
1735 
1736 	/* Is there enough to get to the rest of the READ_RSP header? */
1737 	if (server->total_read < server->vals->read_rsp_size) {
1738 		cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n",
1739 			 __func__, server->total_read,
1740 			 server->vals->read_rsp_size);
1741 		rdata->result = -EIO;
1742 		return cifs_readv_discard(server, mid);
1743 	}
1744 
1745 	data_offset = server->ops->read_data_offset(buf) +
1746 		HEADER_PREAMBLE_SIZE(server);
1747 	if (data_offset < server->total_read) {
1748 		/*
1749 		 * win2k8 sometimes sends an offset of 0 when the read
1750 		 * is beyond the EOF. Treat it as if the data starts just after
1751 		 * the header.
1752 		 */
1753 		cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
1754 			 __func__, data_offset);
1755 		data_offset = server->total_read;
1756 	} else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
1757 		/* data_offset is beyond the end of smallbuf */
1758 		cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
1759 			 __func__, data_offset);
1760 		rdata->result = -EIO;
1761 		return cifs_readv_discard(server, mid);
1762 	}
1763 
1764 	cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n",
1765 		 __func__, server->total_read, data_offset);
1766 
1767 	len = data_offset - server->total_read;
1768 	if (len > 0) {
1769 		/* read any junk before data into the rest of smallbuf */
1770 		length = cifs_read_from_socket(server,
1771 					       buf + server->total_read, len);
1772 		if (length < 0)
1773 			return length;
1774 		server->total_read += length;
1775 	}
1776 
1777 	/* how much data is in the response? */
1778 #ifdef CONFIG_CIFS_SMB_DIRECT
1779 	use_rdma_mr = rdata->mr;
1780 #endif
1781 	data_len = server->ops->read_data_length(buf, use_rdma_mr);
1782 	if (!use_rdma_mr && (data_offset + data_len > buflen)) {
1783 		/* data_len is corrupt -- discard frame */
1784 		rdata->result = -EIO;
1785 		return cifs_readv_discard(server, mid);
1786 	}
1787 
1788 #ifdef CONFIG_CIFS_SMB_DIRECT
1789 	if (rdata->mr)
1790 		length = data_len; /* An RDMA read is already done. */
1791 	else
1792 #endif
1793 		length = cifs_read_iter_from_socket(server, &rdata->iter,
1794 						    data_len);
1795 	if (length > 0)
1796 		rdata->got_bytes += length;
1797 	server->total_read += length;
1798 
1799 	cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
1800 		 server->total_read, buflen, data_len);
1801 
1802 	/* discard anything left over */
1803 	if (server->total_read < buflen)
1804 		return cifs_readv_discard(server, mid);
1805 
1806 	dequeue_mid(mid, false);
1807 	mid->resp_buf = server->smallbuf;
1808 	server->smallbuf = NULL;
1809 	return length;
1810 }
1811