xref: /openbmc/linux/net/ceph/messenger_v2.c (revision d7955ce4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Ceph msgr2 protocol implementation
4  *
5  * Copyright (C) 2020 Ilya Dryomov <idryomov@gmail.com>
6  */
7 
8 #include <linux/ceph/ceph_debug.h>
9 
10 #include <crypto/aead.h>
11 #include <crypto/algapi.h>  /* for crypto_memneq() */
12 #include <crypto/hash.h>
13 #include <crypto/sha2.h>
14 #include <linux/bvec.h>
15 #include <linux/crc32c.h>
16 #include <linux/net.h>
17 #include <linux/scatterlist.h>
18 #include <linux/socket.h>
19 #include <linux/sched/mm.h>
20 #include <net/sock.h>
21 #include <net/tcp.h>
22 
23 #include <linux/ceph/ceph_features.h>
24 #include <linux/ceph/decode.h>
25 #include <linux/ceph/libceph.h>
26 #include <linux/ceph/messenger.h>
27 
28 #include "crypto.h"  /* for CEPH_KEY_LEN and CEPH_MAX_CON_SECRET_LEN */
29 
30 #define FRAME_TAG_HELLO			1
31 #define FRAME_TAG_AUTH_REQUEST		2
32 #define FRAME_TAG_AUTH_BAD_METHOD	3
33 #define FRAME_TAG_AUTH_REPLY_MORE	4
34 #define FRAME_TAG_AUTH_REQUEST_MORE	5
35 #define FRAME_TAG_AUTH_DONE		6
36 #define FRAME_TAG_AUTH_SIGNATURE	7
37 #define FRAME_TAG_CLIENT_IDENT		8
38 #define FRAME_TAG_SERVER_IDENT		9
39 #define FRAME_TAG_IDENT_MISSING_FEATURES 10
40 #define FRAME_TAG_SESSION_RECONNECT	11
41 #define FRAME_TAG_SESSION_RESET		12
42 #define FRAME_TAG_SESSION_RETRY		13
43 #define FRAME_TAG_SESSION_RETRY_GLOBAL	14
44 #define FRAME_TAG_SESSION_RECONNECT_OK	15
45 #define FRAME_TAG_WAIT			16
46 #define FRAME_TAG_MESSAGE		17
47 #define FRAME_TAG_KEEPALIVE2		18
48 #define FRAME_TAG_KEEPALIVE2_ACK	19
49 #define FRAME_TAG_ACK			20
50 
51 #define FRAME_LATE_STATUS_ABORTED	0x1
52 #define FRAME_LATE_STATUS_COMPLETE	0xe
53 #define FRAME_LATE_STATUS_ABORTED_MASK	0xf
54 
55 #define IN_S_HANDLE_PREAMBLE		1
56 #define IN_S_HANDLE_CONTROL		2
57 #define IN_S_HANDLE_CONTROL_REMAINDER	3
58 #define IN_S_PREPARE_READ_DATA		4
59 #define IN_S_PREPARE_READ_DATA_CONT	5
60 #define IN_S_PREPARE_READ_ENC_PAGE	6
61 #define IN_S_HANDLE_EPILOGUE		7
62 #define IN_S_FINISH_SKIP		8
63 
64 #define OUT_S_QUEUE_DATA		1
65 #define OUT_S_QUEUE_DATA_CONT		2
66 #define OUT_S_QUEUE_ENC_PAGE		3
67 #define OUT_S_QUEUE_ZEROS		4
68 #define OUT_S_FINISH_MESSAGE		5
69 #define OUT_S_GET_NEXT			6
70 
71 #define CTRL_BODY(p)	((void *)(p) + CEPH_PREAMBLE_LEN)
72 #define FRONT_PAD(p)	((void *)(p) + CEPH_EPILOGUE_SECURE_LEN)
73 #define MIDDLE_PAD(p)	(FRONT_PAD(p) + CEPH_GCM_BLOCK_LEN)
74 #define DATA_PAD(p)	(MIDDLE_PAD(p) + CEPH_GCM_BLOCK_LEN)
75 
76 #define CEPH_MSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
77 
78 static int do_recvmsg(struct socket *sock, struct iov_iter *it)
79 {
80 	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
81 	int ret;
82 
83 	msg.msg_iter = *it;
84 	while (iov_iter_count(it)) {
85 		ret = sock_recvmsg(sock, &msg, msg.msg_flags);
86 		if (ret <= 0) {
87 			if (ret == -EAGAIN)
88 				ret = 0;
89 			return ret;
90 		}
91 
92 		iov_iter_advance(it, ret);
93 	}
94 
95 	WARN_ON(msg_data_left(&msg));
96 	return 1;
97 }
98 
99 /*
100  * Read as much as possible.
101  *
102  * Return:
103  *   1 - done, nothing (else) to read
104  *   0 - socket is empty, need to wait
105  *  <0 - error
106  */
107 static int ceph_tcp_recv(struct ceph_connection *con)
108 {
109 	int ret;
110 
111 	dout("%s con %p %s %zu\n", __func__, con,
112 	     iov_iter_is_discard(&con->v2.in_iter) ? "discard" : "need",
113 	     iov_iter_count(&con->v2.in_iter));
114 	ret = do_recvmsg(con->sock, &con->v2.in_iter);
115 	dout("%s con %p ret %d left %zu\n", __func__, con, ret,
116 	     iov_iter_count(&con->v2.in_iter));
117 	return ret;
118 }
119 
120 static int do_sendmsg(struct socket *sock, struct iov_iter *it)
121 {
122 	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
123 	int ret;
124 
125 	msg.msg_iter = *it;
126 	while (iov_iter_count(it)) {
127 		ret = sock_sendmsg(sock, &msg);
128 		if (ret <= 0) {
129 			if (ret == -EAGAIN)
130 				ret = 0;
131 			return ret;
132 		}
133 
134 		iov_iter_advance(it, ret);
135 	}
136 
137 	WARN_ON(msg_data_left(&msg));
138 	return 1;
139 }
140 
141 static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
142 {
143 	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
144 	struct bio_vec bv;
145 	int ret;
146 
147 	if (WARN_ON(!iov_iter_is_bvec(it)))
148 		return -EINVAL;
149 
150 	while (iov_iter_count(it)) {
151 		/* iov_iter_iovec() for ITER_BVEC */
152 		bvec_set_page(&bv, it->bvec->bv_page,
153 			      min(iov_iter_count(it),
154 				  it->bvec->bv_len - it->iov_offset),
155 			      it->bvec->bv_offset + it->iov_offset);
156 
157 		/*
158 		 * MSG_SPLICE_PAGES cannot properly handle pages with
159 		 * page_count == 0, we need to fall back to sendmsg if
160 		 * that's the case.
161 		 *
162 		 * Same goes for slab pages: skb_can_coalesce() allows
163 		 * coalescing neighboring slab objects into a single frag
164 		 * which triggers one of hardened usercopy checks.
165 		 */
166 		if (sendpage_ok(bv.bv_page))
167 			msg.msg_flags |= MSG_SPLICE_PAGES;
168 		else
169 			msg.msg_flags &= ~MSG_SPLICE_PAGES;
170 
171 		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
172 		ret = sock_sendmsg(sock, &msg);
173 		if (ret <= 0) {
174 			if (ret == -EAGAIN)
175 				ret = 0;
176 			return ret;
177 		}
178 
179 		iov_iter_advance(it, ret);
180 	}
181 
182 	return 1;
183 }
184 
185 /*
186  * Write as much as possible.  The socket is expected to be corked,
187  * so we don't bother with MSG_MORE here.
188  *
189  * Return:
190  *   1 - done, nothing (else) to write
191  *   0 - socket is full, need to wait
192  *  <0 - error
193  */
194 static int ceph_tcp_send(struct ceph_connection *con)
195 {
196 	int ret;
197 
198 	dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
199 	     iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
200 	if (con->v2.out_iter_sendpage)
201 		ret = do_try_sendpage(con->sock, &con->v2.out_iter);
202 	else
203 		ret = do_sendmsg(con->sock, &con->v2.out_iter);
204 	dout("%s con %p ret %d left %zu\n", __func__, con, ret,
205 	     iov_iter_count(&con->v2.out_iter));
206 	return ret;
207 }
208 
209 static void add_in_kvec(struct ceph_connection *con, void *buf, int len)
210 {
211 	BUG_ON(con->v2.in_kvec_cnt >= ARRAY_SIZE(con->v2.in_kvecs));
212 	WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
213 
214 	con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_base = buf;
215 	con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_len = len;
216 	con->v2.in_kvec_cnt++;
217 
218 	con->v2.in_iter.nr_segs++;
219 	con->v2.in_iter.count += len;
220 }
221 
222 static void reset_in_kvecs(struct ceph_connection *con)
223 {
224 	WARN_ON(iov_iter_count(&con->v2.in_iter));
225 
226 	con->v2.in_kvec_cnt = 0;
227 	iov_iter_kvec(&con->v2.in_iter, ITER_DEST, con->v2.in_kvecs, 0, 0);
228 }
229 
230 static void set_in_bvec(struct ceph_connection *con, const struct bio_vec *bv)
231 {
232 	WARN_ON(iov_iter_count(&con->v2.in_iter));
233 
234 	con->v2.in_bvec = *bv;
235 	iov_iter_bvec(&con->v2.in_iter, ITER_DEST, &con->v2.in_bvec, 1, bv->bv_len);
236 }
237 
238 static void set_in_skip(struct ceph_connection *con, int len)
239 {
240 	WARN_ON(iov_iter_count(&con->v2.in_iter));
241 
242 	dout("%s con %p len %d\n", __func__, con, len);
243 	iov_iter_discard(&con->v2.in_iter, ITER_DEST, len);
244 }
245 
246 static void add_out_kvec(struct ceph_connection *con, void *buf, int len)
247 {
248 	BUG_ON(con->v2.out_kvec_cnt >= ARRAY_SIZE(con->v2.out_kvecs));
249 	WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
250 	WARN_ON(con->v2.out_zero);
251 
252 	con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_base = buf;
253 	con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_len = len;
254 	con->v2.out_kvec_cnt++;
255 
256 	con->v2.out_iter.nr_segs++;
257 	con->v2.out_iter.count += len;
258 }
259 
260 static void reset_out_kvecs(struct ceph_connection *con)
261 {
262 	WARN_ON(iov_iter_count(&con->v2.out_iter));
263 	WARN_ON(con->v2.out_zero);
264 
265 	con->v2.out_kvec_cnt = 0;
266 
267 	iov_iter_kvec(&con->v2.out_iter, ITER_SOURCE, con->v2.out_kvecs, 0, 0);
268 	con->v2.out_iter_sendpage = false;
269 }
270 
271 static void set_out_bvec(struct ceph_connection *con, const struct bio_vec *bv,
272 			 bool zerocopy)
273 {
274 	WARN_ON(iov_iter_count(&con->v2.out_iter));
275 	WARN_ON(con->v2.out_zero);
276 
277 	con->v2.out_bvec = *bv;
278 	con->v2.out_iter_sendpage = zerocopy;
279 	iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
280 		      con->v2.out_bvec.bv_len);
281 }
282 
283 static void set_out_bvec_zero(struct ceph_connection *con)
284 {
285 	WARN_ON(iov_iter_count(&con->v2.out_iter));
286 	WARN_ON(!con->v2.out_zero);
287 
288 	bvec_set_page(&con->v2.out_bvec, ceph_zero_page,
289 		      min(con->v2.out_zero, (int)PAGE_SIZE), 0);
290 	con->v2.out_iter_sendpage = true;
291 	iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
292 		      con->v2.out_bvec.bv_len);
293 }
294 
295 static void out_zero_add(struct ceph_connection *con, int len)
296 {
297 	dout("%s con %p len %d\n", __func__, con, len);
298 	con->v2.out_zero += len;
299 }
300 
301 static void *alloc_conn_buf(struct ceph_connection *con, int len)
302 {
303 	void *buf;
304 
305 	dout("%s con %p len %d\n", __func__, con, len);
306 
307 	if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
308 		return NULL;
309 
310 	buf = kvmalloc(len, GFP_NOIO);
311 	if (!buf)
312 		return NULL;
313 
314 	con->v2.conn_bufs[con->v2.conn_buf_cnt++] = buf;
315 	return buf;
316 }
317 
318 static void free_conn_bufs(struct ceph_connection *con)
319 {
320 	while (con->v2.conn_buf_cnt)
321 		kvfree(con->v2.conn_bufs[--con->v2.conn_buf_cnt]);
322 }
323 
324 static void add_in_sign_kvec(struct ceph_connection *con, void *buf, int len)
325 {
326 	BUG_ON(con->v2.in_sign_kvec_cnt >= ARRAY_SIZE(con->v2.in_sign_kvecs));
327 
328 	con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_base = buf;
329 	con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_len = len;
330 	con->v2.in_sign_kvec_cnt++;
331 }
332 
333 static void clear_in_sign_kvecs(struct ceph_connection *con)
334 {
335 	con->v2.in_sign_kvec_cnt = 0;
336 }
337 
338 static void add_out_sign_kvec(struct ceph_connection *con, void *buf, int len)
339 {
340 	BUG_ON(con->v2.out_sign_kvec_cnt >= ARRAY_SIZE(con->v2.out_sign_kvecs));
341 
342 	con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_base = buf;
343 	con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_len = len;
344 	con->v2.out_sign_kvec_cnt++;
345 }
346 
347 static void clear_out_sign_kvecs(struct ceph_connection *con)
348 {
349 	con->v2.out_sign_kvec_cnt = 0;
350 }
351 
352 static bool con_secure(struct ceph_connection *con)
353 {
354 	return con->v2.con_mode == CEPH_CON_MODE_SECURE;
355 }
356 
357 static int front_len(const struct ceph_msg *msg)
358 {
359 	return le32_to_cpu(msg->hdr.front_len);
360 }
361 
362 static int middle_len(const struct ceph_msg *msg)
363 {
364 	return le32_to_cpu(msg->hdr.middle_len);
365 }
366 
367 static int data_len(const struct ceph_msg *msg)
368 {
369 	return le32_to_cpu(msg->hdr.data_len);
370 }
371 
372 static bool need_padding(int len)
373 {
374 	return !IS_ALIGNED(len, CEPH_GCM_BLOCK_LEN);
375 }
376 
377 static int padded_len(int len)
378 {
379 	return ALIGN(len, CEPH_GCM_BLOCK_LEN);
380 }
381 
382 static int padding_len(int len)
383 {
384 	return padded_len(len) - len;
385 }
386 
387 /* preamble + control segment */
388 static int head_onwire_len(int ctrl_len, bool secure)
389 {
390 	int head_len;
391 	int rem_len;
392 
393 	BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
394 
395 	if (secure) {
396 		head_len = CEPH_PREAMBLE_SECURE_LEN;
397 		if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
398 			rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
399 			head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN;
400 		}
401 	} else {
402 		head_len = CEPH_PREAMBLE_PLAIN_LEN;
403 		if (ctrl_len)
404 			head_len += ctrl_len + CEPH_CRC_LEN;
405 	}
406 	return head_len;
407 }
408 
409 /* front, middle and data segments + epilogue */
410 static int __tail_onwire_len(int front_len, int middle_len, int data_len,
411 			     bool secure)
412 {
413 	BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
414 	       middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
415 	       data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
416 
417 	if (!front_len && !middle_len && !data_len)
418 		return 0;
419 
420 	if (!secure)
421 		return front_len + middle_len + data_len +
422 		       CEPH_EPILOGUE_PLAIN_LEN;
423 
424 	return padded_len(front_len) + padded_len(middle_len) +
425 	       padded_len(data_len) + CEPH_EPILOGUE_SECURE_LEN;
426 }
427 
428 static int tail_onwire_len(const struct ceph_msg *msg, bool secure)
429 {
430 	return __tail_onwire_len(front_len(msg), middle_len(msg),
431 				 data_len(msg), secure);
432 }
433 
434 /* head_onwire_len(sizeof(struct ceph_msg_header2), false) */
435 #define MESSAGE_HEAD_PLAIN_LEN	(CEPH_PREAMBLE_PLAIN_LEN +		\
436 				 sizeof(struct ceph_msg_header2) +	\
437 				 CEPH_CRC_LEN)
438 
439 static const int frame_aligns[] = {
440 	sizeof(void *),
441 	sizeof(void *),
442 	sizeof(void *),
443 	PAGE_SIZE
444 };
445 
446 /*
447  * Discards trailing empty segments, unless there is just one segment.
448  * A frame always has at least one (possibly empty) segment.
449  */
450 static int calc_segment_count(const int *lens, int len_cnt)
451 {
452 	int i;
453 
454 	for (i = len_cnt - 1; i >= 0; i--) {
455 		if (lens[i])
456 			return i + 1;
457 	}
458 
459 	return 1;
460 }
461 
462 static void init_frame_desc(struct ceph_frame_desc *desc, int tag,
463 			    const int *lens, int len_cnt)
464 {
465 	int i;
466 
467 	memset(desc, 0, sizeof(*desc));
468 
469 	desc->fd_tag = tag;
470 	desc->fd_seg_cnt = calc_segment_count(lens, len_cnt);
471 	BUG_ON(desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT);
472 	for (i = 0; i < desc->fd_seg_cnt; i++) {
473 		desc->fd_lens[i] = lens[i];
474 		desc->fd_aligns[i] = frame_aligns[i];
475 	}
476 }
477 
478 /*
479  * Preamble crc covers everything up to itself (28 bytes) and
480  * is calculated and verified irrespective of the connection mode
481  * (i.e. even if the frame is encrypted).
482  */
483 static void encode_preamble(const struct ceph_frame_desc *desc, void *p)
484 {
485 	void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
486 	void *start = p;
487 	int i;
488 
489 	memset(p, 0, CEPH_PREAMBLE_LEN);
490 
491 	ceph_encode_8(&p, desc->fd_tag);
492 	ceph_encode_8(&p, desc->fd_seg_cnt);
493 	for (i = 0; i < desc->fd_seg_cnt; i++) {
494 		ceph_encode_32(&p, desc->fd_lens[i]);
495 		ceph_encode_16(&p, desc->fd_aligns[i]);
496 	}
497 
498 	put_unaligned_le32(crc32c(0, start, crcp - start), crcp);
499 }
500 
501 static int decode_preamble(void *p, struct ceph_frame_desc *desc)
502 {
503 	void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
504 	u32 crc, expected_crc;
505 	int i;
506 
507 	crc = crc32c(0, p, crcp - p);
508 	expected_crc = get_unaligned_le32(crcp);
509 	if (crc != expected_crc) {
510 		pr_err("bad preamble crc, calculated %u, expected %u\n",
511 		       crc, expected_crc);
512 		return -EBADMSG;
513 	}
514 
515 	memset(desc, 0, sizeof(*desc));
516 
517 	desc->fd_tag = ceph_decode_8(&p);
518 	desc->fd_seg_cnt = ceph_decode_8(&p);
519 	if (desc->fd_seg_cnt < 1 ||
520 	    desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT) {
521 		pr_err("bad segment count %d\n", desc->fd_seg_cnt);
522 		return -EINVAL;
523 	}
524 	for (i = 0; i < desc->fd_seg_cnt; i++) {
525 		desc->fd_lens[i] = ceph_decode_32(&p);
526 		desc->fd_aligns[i] = ceph_decode_16(&p);
527 	}
528 
529 	if (desc->fd_lens[0] < 0 ||
530 	    desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
531 		pr_err("bad control segment length %d\n", desc->fd_lens[0]);
532 		return -EINVAL;
533 	}
534 	if (desc->fd_lens[1] < 0 ||
535 	    desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
536 		pr_err("bad front segment length %d\n", desc->fd_lens[1]);
537 		return -EINVAL;
538 	}
539 	if (desc->fd_lens[2] < 0 ||
540 	    desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
541 		pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
542 		return -EINVAL;
543 	}
544 	if (desc->fd_lens[3] < 0 ||
545 	    desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
546 		pr_err("bad data segment length %d\n", desc->fd_lens[3]);
547 		return -EINVAL;
548 	}
549 
550 	/*
551 	 * This would fire for FRAME_TAG_WAIT (it has one empty
552 	 * segment), but we should never get it as client.
553 	 */
554 	if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
555 		pr_err("last segment empty, segment count %d\n",
556 		       desc->fd_seg_cnt);
557 		return -EINVAL;
558 	}
559 
560 	return 0;
561 }
562 
563 static void encode_epilogue_plain(struct ceph_connection *con, bool aborted)
564 {
565 	con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
566 						 FRAME_LATE_STATUS_COMPLETE;
567 	cpu_to_le32s(&con->v2.out_epil.front_crc);
568 	cpu_to_le32s(&con->v2.out_epil.middle_crc);
569 	cpu_to_le32s(&con->v2.out_epil.data_crc);
570 }
571 
572 static void encode_epilogue_secure(struct ceph_connection *con, bool aborted)
573 {
574 	memset(&con->v2.out_epil, 0, sizeof(con->v2.out_epil));
575 	con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
576 						 FRAME_LATE_STATUS_COMPLETE;
577 }
578 
579 static int decode_epilogue(void *p, u32 *front_crc, u32 *middle_crc,
580 			   u32 *data_crc)
581 {
582 	u8 late_status;
583 
584 	late_status = ceph_decode_8(&p);
585 	if ((late_status & FRAME_LATE_STATUS_ABORTED_MASK) !=
586 			FRAME_LATE_STATUS_COMPLETE) {
587 		/* we should never get an aborted message as client */
588 		pr_err("bad late_status 0x%x\n", late_status);
589 		return -EINVAL;
590 	}
591 
592 	if (front_crc && middle_crc && data_crc) {
593 		*front_crc = ceph_decode_32(&p);
594 		*middle_crc = ceph_decode_32(&p);
595 		*data_crc = ceph_decode_32(&p);
596 	}
597 
598 	return 0;
599 }
600 
601 static void fill_header(struct ceph_msg_header *hdr,
602 			const struct ceph_msg_header2 *hdr2,
603 			int front_len, int middle_len, int data_len,
604 			const struct ceph_entity_name *peer_name)
605 {
606 	hdr->seq = hdr2->seq;
607 	hdr->tid = hdr2->tid;
608 	hdr->type = hdr2->type;
609 	hdr->priority = hdr2->priority;
610 	hdr->version = hdr2->version;
611 	hdr->front_len = cpu_to_le32(front_len);
612 	hdr->middle_len = cpu_to_le32(middle_len);
613 	hdr->data_len = cpu_to_le32(data_len);
614 	hdr->data_off = hdr2->data_off;
615 	hdr->src = *peer_name;
616 	hdr->compat_version = hdr2->compat_version;
617 	hdr->reserved = 0;
618 	hdr->crc = 0;
619 }
620 
621 static void fill_header2(struct ceph_msg_header2 *hdr2,
622 			 const struct ceph_msg_header *hdr, u64 ack_seq)
623 {
624 	hdr2->seq = hdr->seq;
625 	hdr2->tid = hdr->tid;
626 	hdr2->type = hdr->type;
627 	hdr2->priority = hdr->priority;
628 	hdr2->version = hdr->version;
629 	hdr2->data_pre_padding_len = 0;
630 	hdr2->data_off = hdr->data_off;
631 	hdr2->ack_seq = cpu_to_le64(ack_seq);
632 	hdr2->flags = 0;
633 	hdr2->compat_version = hdr->compat_version;
634 	hdr2->reserved = 0;
635 }
636 
637 static int verify_control_crc(struct ceph_connection *con)
638 {
639 	int ctrl_len = con->v2.in_desc.fd_lens[0];
640 	u32 crc, expected_crc;
641 
642 	WARN_ON(con->v2.in_kvecs[0].iov_len != ctrl_len);
643 	WARN_ON(con->v2.in_kvecs[1].iov_len != CEPH_CRC_LEN);
644 
645 	crc = crc32c(-1, con->v2.in_kvecs[0].iov_base, ctrl_len);
646 	expected_crc = get_unaligned_le32(con->v2.in_kvecs[1].iov_base);
647 	if (crc != expected_crc) {
648 		pr_err("bad control crc, calculated %u, expected %u\n",
649 		       crc, expected_crc);
650 		return -EBADMSG;
651 	}
652 
653 	return 0;
654 }
655 
656 static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc,
657 				u32 middle_crc, u32 data_crc)
658 {
659 	if (front_len(con->in_msg)) {
660 		con->in_front_crc = crc32c(-1, con->in_msg->front.iov_base,
661 					   front_len(con->in_msg));
662 	} else {
663 		WARN_ON(!middle_len(con->in_msg) && !data_len(con->in_msg));
664 		con->in_front_crc = -1;
665 	}
666 
667 	if (middle_len(con->in_msg))
668 		con->in_middle_crc = crc32c(-1,
669 					    con->in_msg->middle->vec.iov_base,
670 					    middle_len(con->in_msg));
671 	else if (data_len(con->in_msg))
672 		con->in_middle_crc = -1;
673 	else
674 		con->in_middle_crc = 0;
675 
676 	if (!data_len(con->in_msg))
677 		con->in_data_crc = 0;
678 
679 	dout("%s con %p msg %p crcs %u %u %u\n", __func__, con, con->in_msg,
680 	     con->in_front_crc, con->in_middle_crc, con->in_data_crc);
681 
682 	if (con->in_front_crc != front_crc) {
683 		pr_err("bad front crc, calculated %u, expected %u\n",
684 		       con->in_front_crc, front_crc);
685 		return -EBADMSG;
686 	}
687 	if (con->in_middle_crc != middle_crc) {
688 		pr_err("bad middle crc, calculated %u, expected %u\n",
689 		       con->in_middle_crc, middle_crc);
690 		return -EBADMSG;
691 	}
692 	if (con->in_data_crc != data_crc) {
693 		pr_err("bad data crc, calculated %u, expected %u\n",
694 		       con->in_data_crc, data_crc);
695 		return -EBADMSG;
696 	}
697 
698 	return 0;
699 }
700 
701 static int setup_crypto(struct ceph_connection *con,
702 			const u8 *session_key, int session_key_len,
703 			const u8 *con_secret, int con_secret_len)
704 {
705 	unsigned int noio_flag;
706 	int ret;
707 
708 	dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
709 	     __func__, con, con->v2.con_mode, session_key_len, con_secret_len);
710 	WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req);
711 
712 	if (con->v2.con_mode != CEPH_CON_MODE_CRC &&
713 	    con->v2.con_mode != CEPH_CON_MODE_SECURE) {
714 		pr_err("bad con_mode %d\n", con->v2.con_mode);
715 		return -EINVAL;
716 	}
717 
718 	if (!session_key_len) {
719 		WARN_ON(con->v2.con_mode != CEPH_CON_MODE_CRC);
720 		WARN_ON(con_secret_len);
721 		return 0;  /* auth_none */
722 	}
723 
724 	noio_flag = memalloc_noio_save();
725 	con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
726 	memalloc_noio_restore(noio_flag);
727 	if (IS_ERR(con->v2.hmac_tfm)) {
728 		ret = PTR_ERR(con->v2.hmac_tfm);
729 		con->v2.hmac_tfm = NULL;
730 		pr_err("failed to allocate hmac tfm context: %d\n", ret);
731 		return ret;
732 	}
733 
734 	WARN_ON((unsigned long)session_key &
735 		crypto_shash_alignmask(con->v2.hmac_tfm));
736 	ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key,
737 				  session_key_len);
738 	if (ret) {
739 		pr_err("failed to set hmac key: %d\n", ret);
740 		return ret;
741 	}
742 
743 	if (con->v2.con_mode == CEPH_CON_MODE_CRC) {
744 		WARN_ON(con_secret_len);
745 		return 0;  /* auth_x, plain mode */
746 	}
747 
748 	if (con_secret_len < CEPH_GCM_KEY_LEN + 2 * CEPH_GCM_IV_LEN) {
749 		pr_err("con_secret too small %d\n", con_secret_len);
750 		return -EINVAL;
751 	}
752 
753 	noio_flag = memalloc_noio_save();
754 	con->v2.gcm_tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
755 	memalloc_noio_restore(noio_flag);
756 	if (IS_ERR(con->v2.gcm_tfm)) {
757 		ret = PTR_ERR(con->v2.gcm_tfm);
758 		con->v2.gcm_tfm = NULL;
759 		pr_err("failed to allocate gcm tfm context: %d\n", ret);
760 		return ret;
761 	}
762 
763 	WARN_ON((unsigned long)con_secret &
764 		crypto_aead_alignmask(con->v2.gcm_tfm));
765 	ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN);
766 	if (ret) {
767 		pr_err("failed to set gcm key: %d\n", ret);
768 		return ret;
769 	}
770 
771 	WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN);
772 	ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN);
773 	if (ret) {
774 		pr_err("failed to set gcm tag size: %d\n", ret);
775 		return ret;
776 	}
777 
778 	con->v2.gcm_req = aead_request_alloc(con->v2.gcm_tfm, GFP_NOIO);
779 	if (!con->v2.gcm_req) {
780 		pr_err("failed to allocate gcm request\n");
781 		return -ENOMEM;
782 	}
783 
784 	crypto_init_wait(&con->v2.gcm_wait);
785 	aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
786 				  crypto_req_done, &con->v2.gcm_wait);
787 
788 	memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN,
789 	       CEPH_GCM_IV_LEN);
790 	memcpy(&con->v2.out_gcm_nonce,
791 	       con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN,
792 	       CEPH_GCM_IV_LEN);
793 	return 0;  /* auth_x, secure mode */
794 }
795 
796 static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
797 		       int kvec_cnt, u8 *hmac)
798 {
799 	SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm);  /* tfm arg is ignored */
800 	int ret;
801 	int i;
802 
803 	dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con,
804 	     con->v2.hmac_tfm, kvec_cnt);
805 
806 	if (!con->v2.hmac_tfm) {
807 		memset(hmac, 0, SHA256_DIGEST_SIZE);
808 		return 0;  /* auth_none */
809 	}
810 
811 	desc->tfm = con->v2.hmac_tfm;
812 	ret = crypto_shash_init(desc);
813 	if (ret)
814 		goto out;
815 
816 	for (i = 0; i < kvec_cnt; i++) {
817 		WARN_ON((unsigned long)kvecs[i].iov_base &
818 			crypto_shash_alignmask(con->v2.hmac_tfm));
819 		ret = crypto_shash_update(desc, kvecs[i].iov_base,
820 					  kvecs[i].iov_len);
821 		if (ret)
822 			goto out;
823 	}
824 
825 	ret = crypto_shash_final(desc, hmac);
826 
827 out:
828 	shash_desc_zero(desc);
829 	return ret;  /* auth_x, both plain and secure modes */
830 }
831 
832 static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce)
833 {
834 	u64 counter;
835 
836 	counter = le64_to_cpu(nonce->counter);
837 	nonce->counter = cpu_to_le64(counter + 1);
838 }
839 
840 static int gcm_crypt(struct ceph_connection *con, bool encrypt,
841 		     struct scatterlist *src, struct scatterlist *dst,
842 		     int src_len)
843 {
844 	struct ceph_gcm_nonce *nonce;
845 	int ret;
846 
847 	nonce = encrypt ? &con->v2.out_gcm_nonce : &con->v2.in_gcm_nonce;
848 
849 	aead_request_set_ad(con->v2.gcm_req, 0);  /* no AAD */
850 	aead_request_set_crypt(con->v2.gcm_req, src, dst, src_len, (u8 *)nonce);
851 	ret = crypto_wait_req(encrypt ? crypto_aead_encrypt(con->v2.gcm_req) :
852 					crypto_aead_decrypt(con->v2.gcm_req),
853 			      &con->v2.gcm_wait);
854 	if (ret)
855 		return ret;
856 
857 	gcm_inc_nonce(nonce);
858 	return 0;
859 }
860 
861 static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
862 			struct bio_vec *bv)
863 {
864 	struct page *page;
865 	size_t off, len;
866 
867 	WARN_ON(!cursor->total_resid);
868 
869 	/* skip zero-length data items */
870 	while (!cursor->resid)
871 		ceph_msg_data_advance(cursor, 0);
872 
873 	/* get a piece of data, cursor isn't advanced */
874 	page = ceph_msg_data_next(cursor, &off, &len);
875 	bvec_set_page(bv, page, len, off);
876 }
877 
878 static int calc_sg_cnt(void *buf, int buf_len)
879 {
880 	int sg_cnt;
881 
882 	if (!buf_len)
883 		return 0;
884 
885 	sg_cnt = need_padding(buf_len) ? 1 : 0;
886 	if (is_vmalloc_addr(buf)) {
887 		WARN_ON(offset_in_page(buf));
888 		sg_cnt += PAGE_ALIGN(buf_len) >> PAGE_SHIFT;
889 	} else {
890 		sg_cnt++;
891 	}
892 
893 	return sg_cnt;
894 }
895 
896 static int calc_sg_cnt_cursor(struct ceph_msg_data_cursor *cursor)
897 {
898 	int data_len = cursor->total_resid;
899 	struct bio_vec bv;
900 	int sg_cnt;
901 
902 	if (!data_len)
903 		return 0;
904 
905 	sg_cnt = need_padding(data_len) ? 1 : 0;
906 	do {
907 		get_bvec_at(cursor, &bv);
908 		sg_cnt++;
909 
910 		ceph_msg_data_advance(cursor, bv.bv_len);
911 	} while (cursor->total_resid);
912 
913 	return sg_cnt;
914 }
915 
916 static void init_sgs(struct scatterlist **sg, void *buf, int buf_len, u8 *pad)
917 {
918 	void *end = buf + buf_len;
919 	struct page *page;
920 	int len;
921 	void *p;
922 
923 	if (!buf_len)
924 		return;
925 
926 	if (is_vmalloc_addr(buf)) {
927 		p = buf;
928 		do {
929 			page = vmalloc_to_page(p);
930 			len = min_t(int, end - p, PAGE_SIZE);
931 			WARN_ON(!page || !len || offset_in_page(p));
932 			sg_set_page(*sg, page, len, 0);
933 			*sg = sg_next(*sg);
934 			p += len;
935 		} while (p != end);
936 	} else {
937 		sg_set_buf(*sg, buf, buf_len);
938 		*sg = sg_next(*sg);
939 	}
940 
941 	if (need_padding(buf_len)) {
942 		sg_set_buf(*sg, pad, padding_len(buf_len));
943 		*sg = sg_next(*sg);
944 	}
945 }
946 
947 static void init_sgs_cursor(struct scatterlist **sg,
948 			    struct ceph_msg_data_cursor *cursor, u8 *pad)
949 {
950 	int data_len = cursor->total_resid;
951 	struct bio_vec bv;
952 
953 	if (!data_len)
954 		return;
955 
956 	do {
957 		get_bvec_at(cursor, &bv);
958 		sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
959 		*sg = sg_next(*sg);
960 
961 		ceph_msg_data_advance(cursor, bv.bv_len);
962 	} while (cursor->total_resid);
963 
964 	if (need_padding(data_len)) {
965 		sg_set_buf(*sg, pad, padding_len(data_len));
966 		*sg = sg_next(*sg);
967 	}
968 }
969 
970 static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
971 			     u8 *front_pad, u8 *middle_pad, u8 *data_pad,
972 			     void *epilogue, bool add_tag)
973 {
974 	struct ceph_msg_data_cursor cursor;
975 	struct scatterlist *cur_sg;
976 	int sg_cnt;
977 	int ret;
978 
979 	if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
980 		return 0;
981 
982 	sg_cnt = 1;  /* epilogue + [auth tag] */
983 	if (front_len(msg))
984 		sg_cnt += calc_sg_cnt(msg->front.iov_base,
985 				      front_len(msg));
986 	if (middle_len(msg))
987 		sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
988 				      middle_len(msg));
989 	if (data_len(msg)) {
990 		ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
991 		sg_cnt += calc_sg_cnt_cursor(&cursor);
992 	}
993 
994 	ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
995 	if (ret)
996 		return ret;
997 
998 	cur_sg = sgt->sgl;
999 	if (front_len(msg))
1000 		init_sgs(&cur_sg, msg->front.iov_base, front_len(msg),
1001 			 front_pad);
1002 	if (middle_len(msg))
1003 		init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
1004 			 middle_pad);
1005 	if (data_len(msg)) {
1006 		ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
1007 		init_sgs_cursor(&cur_sg, &cursor, data_pad);
1008 	}
1009 
1010 	WARN_ON(!sg_is_last(cur_sg));
1011 	sg_set_buf(cur_sg, epilogue,
1012 		   CEPH_GCM_BLOCK_LEN + (add_tag ? CEPH_GCM_TAG_LEN : 0));
1013 	return 0;
1014 }
1015 
1016 static int decrypt_preamble(struct ceph_connection *con)
1017 {
1018 	struct scatterlist sg;
1019 
1020 	sg_init_one(&sg, con->v2.in_buf, CEPH_PREAMBLE_SECURE_LEN);
1021 	return gcm_crypt(con, false, &sg, &sg, CEPH_PREAMBLE_SECURE_LEN);
1022 }
1023 
1024 static int decrypt_control_remainder(struct ceph_connection *con)
1025 {
1026 	int ctrl_len = con->v2.in_desc.fd_lens[0];
1027 	int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1028 	int pt_len = padding_len(rem_len) + CEPH_GCM_TAG_LEN;
1029 	struct scatterlist sgs[2];
1030 
1031 	WARN_ON(con->v2.in_kvecs[0].iov_len != rem_len);
1032 	WARN_ON(con->v2.in_kvecs[1].iov_len != pt_len);
1033 
1034 	sg_init_table(sgs, 2);
1035 	sg_set_buf(&sgs[0], con->v2.in_kvecs[0].iov_base, rem_len);
1036 	sg_set_buf(&sgs[1], con->v2.in_buf, pt_len);
1037 
1038 	return gcm_crypt(con, false, sgs, sgs,
1039 			 padded_len(rem_len) + CEPH_GCM_TAG_LEN);
1040 }
1041 
1042 static int decrypt_tail(struct ceph_connection *con)
1043 {
1044 	struct sg_table enc_sgt = {};
1045 	struct sg_table sgt = {};
1046 	int tail_len;
1047 	int ret;
1048 
1049 	tail_len = tail_onwire_len(con->in_msg, true);
1050 	ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
1051 					con->v2.in_enc_page_cnt, 0, tail_len,
1052 					GFP_NOIO);
1053 	if (ret)
1054 		goto out;
1055 
1056 	ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
1057 			MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
1058 			con->v2.in_buf, true);
1059 	if (ret)
1060 		goto out;
1061 
1062 	dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
1063 	     con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
1064 	ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
1065 	if (ret)
1066 		goto out;
1067 
1068 	WARN_ON(!con->v2.in_enc_page_cnt);
1069 	ceph_release_page_vector(con->v2.in_enc_pages,
1070 				 con->v2.in_enc_page_cnt);
1071 	con->v2.in_enc_pages = NULL;
1072 	con->v2.in_enc_page_cnt = 0;
1073 
1074 out:
1075 	sg_free_table(&sgt);
1076 	sg_free_table(&enc_sgt);
1077 	return ret;
1078 }
1079 
1080 static int prepare_banner(struct ceph_connection *con)
1081 {
1082 	int buf_len = CEPH_BANNER_V2_LEN + 2 + 8 + 8;
1083 	void *buf, *p;
1084 
1085 	buf = alloc_conn_buf(con, buf_len);
1086 	if (!buf)
1087 		return -ENOMEM;
1088 
1089 	p = buf;
1090 	ceph_encode_copy(&p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN);
1091 	ceph_encode_16(&p, sizeof(u64) + sizeof(u64));
1092 	ceph_encode_64(&p, CEPH_MSGR2_SUPPORTED_FEATURES);
1093 	ceph_encode_64(&p, CEPH_MSGR2_REQUIRED_FEATURES);
1094 	WARN_ON(p != buf + buf_len);
1095 
1096 	add_out_kvec(con, buf, buf_len);
1097 	add_out_sign_kvec(con, buf, buf_len);
1098 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1099 	return 0;
1100 }
1101 
1102 /*
1103  * base:
1104  *   preamble
1105  *   control body (ctrl_len bytes)
1106  *   space for control crc
1107  *
1108  * extdata (optional):
1109  *   control body (extdata_len bytes)
1110  *
1111  * Compute control crc and gather base and extdata into:
1112  *
1113  *   preamble
1114  *   control body (ctrl_len + extdata_len bytes)
1115  *   control crc
1116  *
1117  * Preamble should already be encoded at the start of base.
1118  */
1119 static void prepare_head_plain(struct ceph_connection *con, void *base,
1120 			       int ctrl_len, void *extdata, int extdata_len,
1121 			       bool to_be_signed)
1122 {
1123 	int base_len = CEPH_PREAMBLE_LEN + ctrl_len + CEPH_CRC_LEN;
1124 	void *crcp = base + base_len - CEPH_CRC_LEN;
1125 	u32 crc;
1126 
1127 	crc = crc32c(-1, CTRL_BODY(base), ctrl_len);
1128 	if (extdata_len)
1129 		crc = crc32c(crc, extdata, extdata_len);
1130 	put_unaligned_le32(crc, crcp);
1131 
1132 	if (!extdata_len) {
1133 		add_out_kvec(con, base, base_len);
1134 		if (to_be_signed)
1135 			add_out_sign_kvec(con, base, base_len);
1136 		return;
1137 	}
1138 
1139 	add_out_kvec(con, base, crcp - base);
1140 	add_out_kvec(con, extdata, extdata_len);
1141 	add_out_kvec(con, crcp, CEPH_CRC_LEN);
1142 	if (to_be_signed) {
1143 		add_out_sign_kvec(con, base, crcp - base);
1144 		add_out_sign_kvec(con, extdata, extdata_len);
1145 		add_out_sign_kvec(con, crcp, CEPH_CRC_LEN);
1146 	}
1147 }
1148 
1149 static int prepare_head_secure_small(struct ceph_connection *con,
1150 				     void *base, int ctrl_len)
1151 {
1152 	struct scatterlist sg;
1153 	int ret;
1154 
1155 	/* inline buffer padding? */
1156 	if (ctrl_len < CEPH_PREAMBLE_INLINE_LEN)
1157 		memset(CTRL_BODY(base) + ctrl_len, 0,
1158 		       CEPH_PREAMBLE_INLINE_LEN - ctrl_len);
1159 
1160 	sg_init_one(&sg, base, CEPH_PREAMBLE_SECURE_LEN);
1161 	ret = gcm_crypt(con, true, &sg, &sg,
1162 			CEPH_PREAMBLE_SECURE_LEN - CEPH_GCM_TAG_LEN);
1163 	if (ret)
1164 		return ret;
1165 
1166 	add_out_kvec(con, base, CEPH_PREAMBLE_SECURE_LEN);
1167 	return 0;
1168 }
1169 
1170 /*
1171  * base:
1172  *   preamble
1173  *   control body (ctrl_len bytes)
1174  *   space for padding, if needed
1175  *   space for control remainder auth tag
1176  *   space for preamble auth tag
1177  *
1178  * Encrypt preamble and the inline portion, then encrypt the remainder
1179  * and gather into:
1180  *
1181  *   preamble
1182  *   control body (48 bytes)
1183  *   preamble auth tag
1184  *   control body (ctrl_len - 48 bytes)
1185  *   zero padding, if needed
1186  *   control remainder auth tag
1187  *
1188  * Preamble should already be encoded at the start of base.
1189  */
1190 static int prepare_head_secure_big(struct ceph_connection *con,
1191 				   void *base, int ctrl_len)
1192 {
1193 	int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1194 	void *rem = CTRL_BODY(base) + CEPH_PREAMBLE_INLINE_LEN;
1195 	void *rem_tag = rem + padded_len(rem_len);
1196 	void *pmbl_tag = rem_tag + CEPH_GCM_TAG_LEN;
1197 	struct scatterlist sgs[2];
1198 	int ret;
1199 
1200 	sg_init_table(sgs, 2);
1201 	sg_set_buf(&sgs[0], base, rem - base);
1202 	sg_set_buf(&sgs[1], pmbl_tag, CEPH_GCM_TAG_LEN);
1203 	ret = gcm_crypt(con, true, sgs, sgs, rem - base);
1204 	if (ret)
1205 		return ret;
1206 
1207 	/* control remainder padding? */
1208 	if (need_padding(rem_len))
1209 		memset(rem + rem_len, 0, padding_len(rem_len));
1210 
1211 	sg_init_one(&sgs[0], rem, pmbl_tag - rem);
1212 	ret = gcm_crypt(con, true, sgs, sgs, rem_tag - rem);
1213 	if (ret)
1214 		return ret;
1215 
1216 	add_out_kvec(con, base, rem - base);
1217 	add_out_kvec(con, pmbl_tag, CEPH_GCM_TAG_LEN);
1218 	add_out_kvec(con, rem, pmbl_tag - rem);
1219 	return 0;
1220 }
1221 
1222 static int __prepare_control(struct ceph_connection *con, int tag,
1223 			     void *base, int ctrl_len, void *extdata,
1224 			     int extdata_len, bool to_be_signed)
1225 {
1226 	int total_len = ctrl_len + extdata_len;
1227 	struct ceph_frame_desc desc;
1228 	int ret;
1229 
1230 	dout("%s con %p tag %d len %d (%d+%d)\n", __func__, con, tag,
1231 	     total_len, ctrl_len, extdata_len);
1232 
1233 	/* extdata may be vmalloc'ed but not base */
1234 	if (WARN_ON(is_vmalloc_addr(base) || !ctrl_len))
1235 		return -EINVAL;
1236 
1237 	init_frame_desc(&desc, tag, &total_len, 1);
1238 	encode_preamble(&desc, base);
1239 
1240 	if (con_secure(con)) {
1241 		if (WARN_ON(extdata_len || to_be_signed))
1242 			return -EINVAL;
1243 
1244 		if (ctrl_len <= CEPH_PREAMBLE_INLINE_LEN)
1245 			/* fully inlined, inline buffer may need padding */
1246 			ret = prepare_head_secure_small(con, base, ctrl_len);
1247 		else
1248 			/* partially inlined, inline buffer is full */
1249 			ret = prepare_head_secure_big(con, base, ctrl_len);
1250 		if (ret)
1251 			return ret;
1252 	} else {
1253 		prepare_head_plain(con, base, ctrl_len, extdata, extdata_len,
1254 				   to_be_signed);
1255 	}
1256 
1257 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1258 	return 0;
1259 }
1260 
1261 static int prepare_control(struct ceph_connection *con, int tag,
1262 			   void *base, int ctrl_len)
1263 {
1264 	return __prepare_control(con, tag, base, ctrl_len, NULL, 0, false);
1265 }
1266 
1267 static int prepare_hello(struct ceph_connection *con)
1268 {
1269 	void *buf, *p;
1270 	int ctrl_len;
1271 
1272 	ctrl_len = 1 + ceph_entity_addr_encoding_len(&con->peer_addr);
1273 	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1274 	if (!buf)
1275 		return -ENOMEM;
1276 
1277 	p = CTRL_BODY(buf);
1278 	ceph_encode_8(&p, CEPH_ENTITY_TYPE_CLIENT);
1279 	ceph_encode_entity_addr(&p, &con->peer_addr);
1280 	WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1281 
1282 	return __prepare_control(con, FRAME_TAG_HELLO, buf, ctrl_len,
1283 				 NULL, 0, true);
1284 }
1285 
1286 /* so that head_onwire_len(AUTH_BUF_LEN, false) is 512 */
1287 #define AUTH_BUF_LEN	(512 - CEPH_CRC_LEN - CEPH_PREAMBLE_PLAIN_LEN)
1288 
1289 static int prepare_auth_request(struct ceph_connection *con)
1290 {
1291 	void *authorizer, *authorizer_copy;
1292 	int ctrl_len, authorizer_len;
1293 	void *buf;
1294 	int ret;
1295 
1296 	ctrl_len = AUTH_BUF_LEN;
1297 	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1298 	if (!buf)
1299 		return -ENOMEM;
1300 
1301 	mutex_unlock(&con->mutex);
1302 	ret = con->ops->get_auth_request(con, CTRL_BODY(buf), &ctrl_len,
1303 					 &authorizer, &authorizer_len);
1304 	mutex_lock(&con->mutex);
1305 	if (con->state != CEPH_CON_S_V2_HELLO) {
1306 		dout("%s con %p state changed to %d\n", __func__, con,
1307 		     con->state);
1308 		return -EAGAIN;
1309 	}
1310 
1311 	dout("%s con %p get_auth_request ret %d\n", __func__, con, ret);
1312 	if (ret)
1313 		return ret;
1314 
1315 	authorizer_copy = alloc_conn_buf(con, authorizer_len);
1316 	if (!authorizer_copy)
1317 		return -ENOMEM;
1318 
1319 	memcpy(authorizer_copy, authorizer, authorizer_len);
1320 
1321 	return __prepare_control(con, FRAME_TAG_AUTH_REQUEST, buf, ctrl_len,
1322 				 authorizer_copy, authorizer_len, true);
1323 }
1324 
1325 static int prepare_auth_request_more(struct ceph_connection *con,
1326 				     void *reply, int reply_len)
1327 {
1328 	int ctrl_len, authorizer_len;
1329 	void *authorizer;
1330 	void *buf;
1331 	int ret;
1332 
1333 	ctrl_len = AUTH_BUF_LEN;
1334 	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1335 	if (!buf)
1336 		return -ENOMEM;
1337 
1338 	mutex_unlock(&con->mutex);
1339 	ret = con->ops->handle_auth_reply_more(con, reply, reply_len,
1340 					       CTRL_BODY(buf), &ctrl_len,
1341 					       &authorizer, &authorizer_len);
1342 	mutex_lock(&con->mutex);
1343 	if (con->state != CEPH_CON_S_V2_AUTH) {
1344 		dout("%s con %p state changed to %d\n", __func__, con,
1345 		     con->state);
1346 		return -EAGAIN;
1347 	}
1348 
1349 	dout("%s con %p handle_auth_reply_more ret %d\n", __func__, con, ret);
1350 	if (ret)
1351 		return ret;
1352 
1353 	return __prepare_control(con, FRAME_TAG_AUTH_REQUEST_MORE, buf,
1354 				 ctrl_len, authorizer, authorizer_len, true);
1355 }
1356 
1357 static int prepare_auth_signature(struct ceph_connection *con)
1358 {
1359 	void *buf;
1360 	int ret;
1361 
1362 	buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
1363 						  con_secure(con)));
1364 	if (!buf)
1365 		return -ENOMEM;
1366 
1367 	ret = hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt,
1368 			  CTRL_BODY(buf));
1369 	if (ret)
1370 		return ret;
1371 
1372 	return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf,
1373 			       SHA256_DIGEST_SIZE);
1374 }
1375 
1376 static int prepare_client_ident(struct ceph_connection *con)
1377 {
1378 	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1379 	struct ceph_client *client = from_msgr(con->msgr);
1380 	u64 global_id = ceph_client_gid(client);
1381 	void *buf, *p;
1382 	int ctrl_len;
1383 
1384 	WARN_ON(con->v2.server_cookie);
1385 	WARN_ON(con->v2.connect_seq);
1386 	WARN_ON(con->v2.peer_global_seq);
1387 
1388 	if (!con->v2.client_cookie) {
1389 		do {
1390 			get_random_bytes(&con->v2.client_cookie,
1391 					 sizeof(con->v2.client_cookie));
1392 		} while (!con->v2.client_cookie);
1393 		dout("%s con %p generated cookie 0x%llx\n", __func__, con,
1394 		     con->v2.client_cookie);
1395 	} else {
1396 		dout("%s con %p cookie already set 0x%llx\n", __func__, con,
1397 		     con->v2.client_cookie);
1398 	}
1399 
1400 	dout("%s con %p my_addr %s/%u peer_addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx cookie 0x%llx\n",
1401 	     __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1402 	     ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce),
1403 	     global_id, con->v2.global_seq, client->supported_features,
1404 	     client->required_features, con->v2.client_cookie);
1405 
1406 	ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) +
1407 		   ceph_entity_addr_encoding_len(&con->peer_addr) + 6 * 8;
1408 	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1409 	if (!buf)
1410 		return -ENOMEM;
1411 
1412 	p = CTRL_BODY(buf);
1413 	ceph_encode_8(&p, 2);  /* addrvec marker */
1414 	ceph_encode_32(&p, 1);  /* addr_cnt */
1415 	ceph_encode_entity_addr(&p, my_addr);
1416 	ceph_encode_entity_addr(&p, &con->peer_addr);
1417 	ceph_encode_64(&p, global_id);
1418 	ceph_encode_64(&p, con->v2.global_seq);
1419 	ceph_encode_64(&p, client->supported_features);
1420 	ceph_encode_64(&p, client->required_features);
1421 	ceph_encode_64(&p, 0);  /* flags */
1422 	ceph_encode_64(&p, con->v2.client_cookie);
1423 	WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1424 
1425 	return prepare_control(con, FRAME_TAG_CLIENT_IDENT, buf, ctrl_len);
1426 }
1427 
1428 static int prepare_session_reconnect(struct ceph_connection *con)
1429 {
1430 	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1431 	void *buf, *p;
1432 	int ctrl_len;
1433 
1434 	WARN_ON(!con->v2.client_cookie);
1435 	WARN_ON(!con->v2.server_cookie);
1436 	WARN_ON(!con->v2.connect_seq);
1437 	WARN_ON(!con->v2.peer_global_seq);
1438 
1439 	dout("%s con %p my_addr %s/%u client_cookie 0x%llx server_cookie 0x%llx global_seq %llu connect_seq %llu in_seq %llu\n",
1440 	     __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1441 	     con->v2.client_cookie, con->v2.server_cookie, con->v2.global_seq,
1442 	     con->v2.connect_seq, con->in_seq);
1443 
1444 	ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) + 5 * 8;
1445 	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1446 	if (!buf)
1447 		return -ENOMEM;
1448 
1449 	p = CTRL_BODY(buf);
1450 	ceph_encode_8(&p, 2);  /* entity_addrvec_t marker */
1451 	ceph_encode_32(&p, 1);  /* my_addrs len */
1452 	ceph_encode_entity_addr(&p, my_addr);
1453 	ceph_encode_64(&p, con->v2.client_cookie);
1454 	ceph_encode_64(&p, con->v2.server_cookie);
1455 	ceph_encode_64(&p, con->v2.global_seq);
1456 	ceph_encode_64(&p, con->v2.connect_seq);
1457 	ceph_encode_64(&p, con->in_seq);
1458 	WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1459 
1460 	return prepare_control(con, FRAME_TAG_SESSION_RECONNECT, buf, ctrl_len);
1461 }
1462 
1463 static int prepare_keepalive2(struct ceph_connection *con)
1464 {
1465 	struct ceph_timespec *ts = CTRL_BODY(con->v2.out_buf);
1466 	struct timespec64 now;
1467 
1468 	ktime_get_real_ts64(&now);
1469 	dout("%s con %p timestamp %lld.%09ld\n", __func__, con, now.tv_sec,
1470 	     now.tv_nsec);
1471 
1472 	ceph_encode_timespec64(ts, &now);
1473 
1474 	reset_out_kvecs(con);
1475 	return prepare_control(con, FRAME_TAG_KEEPALIVE2, con->v2.out_buf,
1476 			       sizeof(struct ceph_timespec));
1477 }
1478 
1479 static int prepare_ack(struct ceph_connection *con)
1480 {
1481 	void *p;
1482 
1483 	dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1484 	     con->in_seq_acked, con->in_seq);
1485 	con->in_seq_acked = con->in_seq;
1486 
1487 	p = CTRL_BODY(con->v2.out_buf);
1488 	ceph_encode_64(&p, con->in_seq_acked);
1489 
1490 	reset_out_kvecs(con);
1491 	return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8);
1492 }
1493 
1494 static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted)
1495 {
1496 	dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con,
1497 	     con->out_msg, aborted, con->v2.out_epil.front_crc,
1498 	     con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc);
1499 
1500 	encode_epilogue_plain(con, aborted);
1501 	add_out_kvec(con, &con->v2.out_epil, CEPH_EPILOGUE_PLAIN_LEN);
1502 }
1503 
1504 /*
1505  * For "used" empty segments, crc is -1.  For unused (trailing)
1506  * segments, crc is 0.
1507  */
1508 static void prepare_message_plain(struct ceph_connection *con)
1509 {
1510 	struct ceph_msg *msg = con->out_msg;
1511 
1512 	prepare_head_plain(con, con->v2.out_buf,
1513 			   sizeof(struct ceph_msg_header2), NULL, 0, false);
1514 
1515 	if (!front_len(msg) && !middle_len(msg)) {
1516 		if (!data_len(msg)) {
1517 			/*
1518 			 * Empty message: once the head is written,
1519 			 * we are done -- there is no epilogue.
1520 			 */
1521 			con->v2.out_state = OUT_S_FINISH_MESSAGE;
1522 			return;
1523 		}
1524 
1525 		con->v2.out_epil.front_crc = -1;
1526 		con->v2.out_epil.middle_crc = -1;
1527 		con->v2.out_state = OUT_S_QUEUE_DATA;
1528 		return;
1529 	}
1530 
1531 	if (front_len(msg)) {
1532 		con->v2.out_epil.front_crc = crc32c(-1, msg->front.iov_base,
1533 						    front_len(msg));
1534 		add_out_kvec(con, msg->front.iov_base, front_len(msg));
1535 	} else {
1536 		/* middle (at least) is there, checked above */
1537 		con->v2.out_epil.front_crc = -1;
1538 	}
1539 
1540 	if (middle_len(msg)) {
1541 		con->v2.out_epil.middle_crc =
1542 			crc32c(-1, msg->middle->vec.iov_base, middle_len(msg));
1543 		add_out_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
1544 	} else {
1545 		con->v2.out_epil.middle_crc = data_len(msg) ? -1 : 0;
1546 	}
1547 
1548 	if (data_len(msg)) {
1549 		con->v2.out_state = OUT_S_QUEUE_DATA;
1550 	} else {
1551 		con->v2.out_epil.data_crc = 0;
1552 		prepare_epilogue_plain(con, false);
1553 		con->v2.out_state = OUT_S_FINISH_MESSAGE;
1554 	}
1555 }
1556 
1557 /*
1558  * Unfortunately the kernel crypto API doesn't support streaming
1559  * (piecewise) operation for AEAD algorithms, so we can't get away
1560  * with a fixed size buffer and a couple sgs.  Instead, we have to
1561  * allocate pages for the entire tail of the message (currently up
1562  * to ~32M) and two sgs arrays (up to ~256K each)...
1563  */
1564 static int prepare_message_secure(struct ceph_connection *con)
1565 {
1566 	void *zerop = page_address(ceph_zero_page);
1567 	struct sg_table enc_sgt = {};
1568 	struct sg_table sgt = {};
1569 	struct page **enc_pages;
1570 	int enc_page_cnt;
1571 	int tail_len;
1572 	int ret;
1573 
1574 	ret = prepare_head_secure_small(con, con->v2.out_buf,
1575 					sizeof(struct ceph_msg_header2));
1576 	if (ret)
1577 		return ret;
1578 
1579 	tail_len = tail_onwire_len(con->out_msg, true);
1580 	if (!tail_len) {
1581 		/*
1582 		 * Empty message: once the head is written,
1583 		 * we are done -- there is no epilogue.
1584 		 */
1585 		con->v2.out_state = OUT_S_FINISH_MESSAGE;
1586 		return 0;
1587 	}
1588 
1589 	encode_epilogue_secure(con, false);
1590 	ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
1591 				&con->v2.out_epil, false);
1592 	if (ret)
1593 		goto out;
1594 
1595 	enc_page_cnt = calc_pages_for(0, tail_len);
1596 	enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
1597 	if (IS_ERR(enc_pages)) {
1598 		ret = PTR_ERR(enc_pages);
1599 		goto out;
1600 	}
1601 
1602 	WARN_ON(con->v2.out_enc_pages || con->v2.out_enc_page_cnt);
1603 	con->v2.out_enc_pages = enc_pages;
1604 	con->v2.out_enc_page_cnt = enc_page_cnt;
1605 	con->v2.out_enc_resid = tail_len;
1606 	con->v2.out_enc_i = 0;
1607 
1608 	ret = sg_alloc_table_from_pages(&enc_sgt, enc_pages, enc_page_cnt,
1609 					0, tail_len, GFP_NOIO);
1610 	if (ret)
1611 		goto out;
1612 
1613 	ret = gcm_crypt(con, true, sgt.sgl, enc_sgt.sgl,
1614 			tail_len - CEPH_GCM_TAG_LEN);
1615 	if (ret)
1616 		goto out;
1617 
1618 	dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con,
1619 	     con->out_msg, sgt.orig_nents, enc_page_cnt);
1620 	con->v2.out_state = OUT_S_QUEUE_ENC_PAGE;
1621 
1622 out:
1623 	sg_free_table(&sgt);
1624 	sg_free_table(&enc_sgt);
1625 	return ret;
1626 }
1627 
1628 static int prepare_message(struct ceph_connection *con)
1629 {
1630 	int lens[] = {
1631 		sizeof(struct ceph_msg_header2),
1632 		front_len(con->out_msg),
1633 		middle_len(con->out_msg),
1634 		data_len(con->out_msg)
1635 	};
1636 	struct ceph_frame_desc desc;
1637 	int ret;
1638 
1639 	dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con,
1640 	     con->out_msg, lens[0], lens[1], lens[2], lens[3]);
1641 
1642 	if (con->in_seq > con->in_seq_acked) {
1643 		dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1644 		     con->in_seq_acked, con->in_seq);
1645 		con->in_seq_acked = con->in_seq;
1646 	}
1647 
1648 	reset_out_kvecs(con);
1649 	init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4);
1650 	encode_preamble(&desc, con->v2.out_buf);
1651 	fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr,
1652 		     con->in_seq_acked);
1653 
1654 	if (con_secure(con)) {
1655 		ret = prepare_message_secure(con);
1656 		if (ret)
1657 			return ret;
1658 	} else {
1659 		prepare_message_plain(con);
1660 	}
1661 
1662 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1663 	return 0;
1664 }
1665 
1666 static int prepare_read_banner_prefix(struct ceph_connection *con)
1667 {
1668 	void *buf;
1669 
1670 	buf = alloc_conn_buf(con, CEPH_BANNER_V2_PREFIX_LEN);
1671 	if (!buf)
1672 		return -ENOMEM;
1673 
1674 	reset_in_kvecs(con);
1675 	add_in_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1676 	add_in_sign_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1677 	con->state = CEPH_CON_S_V2_BANNER_PREFIX;
1678 	return 0;
1679 }
1680 
1681 static int prepare_read_banner_payload(struct ceph_connection *con,
1682 				       int payload_len)
1683 {
1684 	void *buf;
1685 
1686 	buf = alloc_conn_buf(con, payload_len);
1687 	if (!buf)
1688 		return -ENOMEM;
1689 
1690 	reset_in_kvecs(con);
1691 	add_in_kvec(con, buf, payload_len);
1692 	add_in_sign_kvec(con, buf, payload_len);
1693 	con->state = CEPH_CON_S_V2_BANNER_PAYLOAD;
1694 	return 0;
1695 }
1696 
1697 static void prepare_read_preamble(struct ceph_connection *con)
1698 {
1699 	reset_in_kvecs(con);
1700 	add_in_kvec(con, con->v2.in_buf,
1701 		    con_secure(con) ? CEPH_PREAMBLE_SECURE_LEN :
1702 				      CEPH_PREAMBLE_PLAIN_LEN);
1703 	con->v2.in_state = IN_S_HANDLE_PREAMBLE;
1704 }
1705 
1706 static int prepare_read_control(struct ceph_connection *con)
1707 {
1708 	int ctrl_len = con->v2.in_desc.fd_lens[0];
1709 	int head_len;
1710 	void *buf;
1711 
1712 	reset_in_kvecs(con);
1713 	if (con->state == CEPH_CON_S_V2_HELLO ||
1714 	    con->state == CEPH_CON_S_V2_AUTH) {
1715 		head_len = head_onwire_len(ctrl_len, false);
1716 		buf = alloc_conn_buf(con, head_len);
1717 		if (!buf)
1718 			return -ENOMEM;
1719 
1720 		/* preserve preamble */
1721 		memcpy(buf, con->v2.in_buf, CEPH_PREAMBLE_LEN);
1722 
1723 		add_in_kvec(con, CTRL_BODY(buf), ctrl_len);
1724 		add_in_kvec(con, CTRL_BODY(buf) + ctrl_len, CEPH_CRC_LEN);
1725 		add_in_sign_kvec(con, buf, head_len);
1726 	} else {
1727 		if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
1728 			buf = alloc_conn_buf(con, ctrl_len);
1729 			if (!buf)
1730 				return -ENOMEM;
1731 
1732 			add_in_kvec(con, buf, ctrl_len);
1733 		} else {
1734 			add_in_kvec(con, CTRL_BODY(con->v2.in_buf), ctrl_len);
1735 		}
1736 		add_in_kvec(con, con->v2.in_buf, CEPH_CRC_LEN);
1737 	}
1738 	con->v2.in_state = IN_S_HANDLE_CONTROL;
1739 	return 0;
1740 }
1741 
1742 static int prepare_read_control_remainder(struct ceph_connection *con)
1743 {
1744 	int ctrl_len = con->v2.in_desc.fd_lens[0];
1745 	int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1746 	void *buf;
1747 
1748 	buf = alloc_conn_buf(con, ctrl_len);
1749 	if (!buf)
1750 		return -ENOMEM;
1751 
1752 	memcpy(buf, CTRL_BODY(con->v2.in_buf), CEPH_PREAMBLE_INLINE_LEN);
1753 
1754 	reset_in_kvecs(con);
1755 	add_in_kvec(con, buf + CEPH_PREAMBLE_INLINE_LEN, rem_len);
1756 	add_in_kvec(con, con->v2.in_buf,
1757 		    padding_len(rem_len) + CEPH_GCM_TAG_LEN);
1758 	con->v2.in_state = IN_S_HANDLE_CONTROL_REMAINDER;
1759 	return 0;
1760 }
1761 
1762 static int prepare_read_data(struct ceph_connection *con)
1763 {
1764 	struct bio_vec bv;
1765 
1766 	con->in_data_crc = -1;
1767 	ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
1768 				  data_len(con->in_msg));
1769 
1770 	get_bvec_at(&con->v2.in_cursor, &bv);
1771 	if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1772 		if (unlikely(!con->bounce_page)) {
1773 			con->bounce_page = alloc_page(GFP_NOIO);
1774 			if (!con->bounce_page) {
1775 				pr_err("failed to allocate bounce page\n");
1776 				return -ENOMEM;
1777 			}
1778 		}
1779 
1780 		bv.bv_page = con->bounce_page;
1781 		bv.bv_offset = 0;
1782 	}
1783 	set_in_bvec(con, &bv);
1784 	con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
1785 	return 0;
1786 }
1787 
1788 static void prepare_read_data_cont(struct ceph_connection *con)
1789 {
1790 	struct bio_vec bv;
1791 
1792 	if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1793 		con->in_data_crc = crc32c(con->in_data_crc,
1794 					  page_address(con->bounce_page),
1795 					  con->v2.in_bvec.bv_len);
1796 
1797 		get_bvec_at(&con->v2.in_cursor, &bv);
1798 		memcpy_to_page(bv.bv_page, bv.bv_offset,
1799 			       page_address(con->bounce_page),
1800 			       con->v2.in_bvec.bv_len);
1801 	} else {
1802 		con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
1803 						    con->v2.in_bvec.bv_page,
1804 						    con->v2.in_bvec.bv_offset,
1805 						    con->v2.in_bvec.bv_len);
1806 	}
1807 
1808 	ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
1809 	if (con->v2.in_cursor.total_resid) {
1810 		get_bvec_at(&con->v2.in_cursor, &bv);
1811 		if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1812 			bv.bv_page = con->bounce_page;
1813 			bv.bv_offset = 0;
1814 		}
1815 		set_in_bvec(con, &bv);
1816 		WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
1817 		return;
1818 	}
1819 
1820 	/*
1821 	 * We've read all data.  Prepare to read epilogue.
1822 	 */
1823 	reset_in_kvecs(con);
1824 	add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1825 	con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1826 }
1827 
1828 static int prepare_read_tail_plain(struct ceph_connection *con)
1829 {
1830 	struct ceph_msg *msg = con->in_msg;
1831 
1832 	if (!front_len(msg) && !middle_len(msg)) {
1833 		WARN_ON(!data_len(msg));
1834 		return prepare_read_data(con);
1835 	}
1836 
1837 	reset_in_kvecs(con);
1838 	if (front_len(msg)) {
1839 		add_in_kvec(con, msg->front.iov_base, front_len(msg));
1840 		WARN_ON(msg->front.iov_len != front_len(msg));
1841 	}
1842 	if (middle_len(msg)) {
1843 		add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
1844 		WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
1845 	}
1846 
1847 	if (data_len(msg)) {
1848 		con->v2.in_state = IN_S_PREPARE_READ_DATA;
1849 	} else {
1850 		add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1851 		con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1852 	}
1853 	return 0;
1854 }
1855 
1856 static void prepare_read_enc_page(struct ceph_connection *con)
1857 {
1858 	struct bio_vec bv;
1859 
1860 	dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
1861 	     con->v2.in_enc_resid);
1862 	WARN_ON(!con->v2.in_enc_resid);
1863 
1864 	bvec_set_page(&bv, con->v2.in_enc_pages[con->v2.in_enc_i],
1865 		      min(con->v2.in_enc_resid, (int)PAGE_SIZE), 0);
1866 
1867 	set_in_bvec(con, &bv);
1868 	con->v2.in_enc_i++;
1869 	con->v2.in_enc_resid -= bv.bv_len;
1870 
1871 	if (con->v2.in_enc_resid) {
1872 		con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
1873 		return;
1874 	}
1875 
1876 	/*
1877 	 * We are set to read the last piece of ciphertext (ending
1878 	 * with epilogue) + auth tag.
1879 	 */
1880 	WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
1881 	con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1882 }
1883 
1884 static int prepare_read_tail_secure(struct ceph_connection *con)
1885 {
1886 	struct page **enc_pages;
1887 	int enc_page_cnt;
1888 	int tail_len;
1889 
1890 	tail_len = tail_onwire_len(con->in_msg, true);
1891 	WARN_ON(!tail_len);
1892 
1893 	enc_page_cnt = calc_pages_for(0, tail_len);
1894 	enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
1895 	if (IS_ERR(enc_pages))
1896 		return PTR_ERR(enc_pages);
1897 
1898 	WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
1899 	con->v2.in_enc_pages = enc_pages;
1900 	con->v2.in_enc_page_cnt = enc_page_cnt;
1901 	con->v2.in_enc_resid = tail_len;
1902 	con->v2.in_enc_i = 0;
1903 
1904 	prepare_read_enc_page(con);
1905 	return 0;
1906 }
1907 
1908 static void __finish_skip(struct ceph_connection *con)
1909 {
1910 	con->in_seq++;
1911 	prepare_read_preamble(con);
1912 }
1913 
1914 static void prepare_skip_message(struct ceph_connection *con)
1915 {
1916 	struct ceph_frame_desc *desc = &con->v2.in_desc;
1917 	int tail_len;
1918 
1919 	dout("%s con %p %d+%d+%d\n", __func__, con, desc->fd_lens[1],
1920 	     desc->fd_lens[2], desc->fd_lens[3]);
1921 
1922 	tail_len = __tail_onwire_len(desc->fd_lens[1], desc->fd_lens[2],
1923 				     desc->fd_lens[3], con_secure(con));
1924 	if (!tail_len) {
1925 		__finish_skip(con);
1926 	} else {
1927 		set_in_skip(con, tail_len);
1928 		con->v2.in_state = IN_S_FINISH_SKIP;
1929 	}
1930 }
1931 
1932 static int process_banner_prefix(struct ceph_connection *con)
1933 {
1934 	int payload_len;
1935 	void *p;
1936 
1937 	WARN_ON(con->v2.in_kvecs[0].iov_len != CEPH_BANNER_V2_PREFIX_LEN);
1938 
1939 	p = con->v2.in_kvecs[0].iov_base;
1940 	if (memcmp(p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN)) {
1941 		if (!memcmp(p, CEPH_BANNER, CEPH_BANNER_LEN))
1942 			con->error_msg = "server is speaking msgr1 protocol";
1943 		else
1944 			con->error_msg = "protocol error, bad banner";
1945 		return -EINVAL;
1946 	}
1947 
1948 	p += CEPH_BANNER_V2_LEN;
1949 	payload_len = ceph_decode_16(&p);
1950 	dout("%s con %p payload_len %d\n", __func__, con, payload_len);
1951 
1952 	return prepare_read_banner_payload(con, payload_len);
1953 }
1954 
1955 static int process_banner_payload(struct ceph_connection *con)
1956 {
1957 	void *end = con->v2.in_kvecs[0].iov_base + con->v2.in_kvecs[0].iov_len;
1958 	u64 feat = CEPH_MSGR2_SUPPORTED_FEATURES;
1959 	u64 req_feat = CEPH_MSGR2_REQUIRED_FEATURES;
1960 	u64 server_feat, server_req_feat;
1961 	void *p;
1962 	int ret;
1963 
1964 	p = con->v2.in_kvecs[0].iov_base;
1965 	ceph_decode_64_safe(&p, end, server_feat, bad);
1966 	ceph_decode_64_safe(&p, end, server_req_feat, bad);
1967 
1968 	dout("%s con %p server_feat 0x%llx server_req_feat 0x%llx\n",
1969 	     __func__, con, server_feat, server_req_feat);
1970 
1971 	if (req_feat & ~server_feat) {
1972 		pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
1973 		       server_feat, req_feat & ~server_feat);
1974 		con->error_msg = "missing required protocol features";
1975 		return -EINVAL;
1976 	}
1977 	if (server_req_feat & ~feat) {
1978 		pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
1979 		       feat, server_req_feat & ~feat);
1980 		con->error_msg = "missing required protocol features";
1981 		return -EINVAL;
1982 	}
1983 
1984 	/* no reset_out_kvecs() as our banner may still be pending */
1985 	ret = prepare_hello(con);
1986 	if (ret) {
1987 		pr_err("prepare_hello failed: %d\n", ret);
1988 		return ret;
1989 	}
1990 
1991 	con->state = CEPH_CON_S_V2_HELLO;
1992 	prepare_read_preamble(con);
1993 	return 0;
1994 
1995 bad:
1996 	pr_err("failed to decode banner payload\n");
1997 	return -EINVAL;
1998 }
1999 
2000 static int process_hello(struct ceph_connection *con, void *p, void *end)
2001 {
2002 	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
2003 	struct ceph_entity_addr addr_for_me;
2004 	u8 entity_type;
2005 	int ret;
2006 
2007 	if (con->state != CEPH_CON_S_V2_HELLO) {
2008 		con->error_msg = "protocol error, unexpected hello";
2009 		return -EINVAL;
2010 	}
2011 
2012 	ceph_decode_8_safe(&p, end, entity_type, bad);
2013 	ret = ceph_decode_entity_addr(&p, end, &addr_for_me);
2014 	if (ret) {
2015 		pr_err("failed to decode addr_for_me: %d\n", ret);
2016 		return ret;
2017 	}
2018 
2019 	dout("%s con %p entity_type %d addr_for_me %s\n", __func__, con,
2020 	     entity_type, ceph_pr_addr(&addr_for_me));
2021 
2022 	if (entity_type != con->peer_name.type) {
2023 		pr_err("bad peer type, want %d, got %d\n",
2024 		       con->peer_name.type, entity_type);
2025 		con->error_msg = "wrong peer at address";
2026 		return -EINVAL;
2027 	}
2028 
2029 	/*
2030 	 * Set our address to the address our first peer (i.e. monitor)
2031 	 * sees that we are connecting from.  If we are behind some sort
2032 	 * of NAT and want to be identified by some private (not NATed)
2033 	 * address, ip option should be used.
2034 	 */
2035 	if (ceph_addr_is_blank(my_addr)) {
2036 		memcpy(&my_addr->in_addr, &addr_for_me.in_addr,
2037 		       sizeof(my_addr->in_addr));
2038 		ceph_addr_set_port(my_addr, 0);
2039 		dout("%s con %p set my addr %s, as seen by peer %s\n",
2040 		     __func__, con, ceph_pr_addr(my_addr),
2041 		     ceph_pr_addr(&con->peer_addr));
2042 	} else {
2043 		dout("%s con %p my addr already set %s\n",
2044 		     __func__, con, ceph_pr_addr(my_addr));
2045 	}
2046 
2047 	WARN_ON(ceph_addr_is_blank(my_addr) || ceph_addr_port(my_addr));
2048 	WARN_ON(my_addr->type != CEPH_ENTITY_ADDR_TYPE_ANY);
2049 	WARN_ON(!my_addr->nonce);
2050 
2051 	/* no reset_out_kvecs() as our hello may still be pending */
2052 	ret = prepare_auth_request(con);
2053 	if (ret) {
2054 		if (ret != -EAGAIN)
2055 			pr_err("prepare_auth_request failed: %d\n", ret);
2056 		return ret;
2057 	}
2058 
2059 	con->state = CEPH_CON_S_V2_AUTH;
2060 	return 0;
2061 
2062 bad:
2063 	pr_err("failed to decode hello\n");
2064 	return -EINVAL;
2065 }
2066 
2067 static int process_auth_bad_method(struct ceph_connection *con,
2068 				   void *p, void *end)
2069 {
2070 	int allowed_protos[8], allowed_modes[8];
2071 	int allowed_proto_cnt, allowed_mode_cnt;
2072 	int used_proto, result;
2073 	int ret;
2074 	int i;
2075 
2076 	if (con->state != CEPH_CON_S_V2_AUTH) {
2077 		con->error_msg = "protocol error, unexpected auth_bad_method";
2078 		return -EINVAL;
2079 	}
2080 
2081 	ceph_decode_32_safe(&p, end, used_proto, bad);
2082 	ceph_decode_32_safe(&p, end, result, bad);
2083 	dout("%s con %p used_proto %d result %d\n", __func__, con, used_proto,
2084 	     result);
2085 
2086 	ceph_decode_32_safe(&p, end, allowed_proto_cnt, bad);
2087 	if (allowed_proto_cnt > ARRAY_SIZE(allowed_protos)) {
2088 		pr_err("allowed_protos too big %d\n", allowed_proto_cnt);
2089 		return -EINVAL;
2090 	}
2091 	for (i = 0; i < allowed_proto_cnt; i++) {
2092 		ceph_decode_32_safe(&p, end, allowed_protos[i], bad);
2093 		dout("%s con %p allowed_protos[%d] %d\n", __func__, con,
2094 		     i, allowed_protos[i]);
2095 	}
2096 
2097 	ceph_decode_32_safe(&p, end, allowed_mode_cnt, bad);
2098 	if (allowed_mode_cnt > ARRAY_SIZE(allowed_modes)) {
2099 		pr_err("allowed_modes too big %d\n", allowed_mode_cnt);
2100 		return -EINVAL;
2101 	}
2102 	for (i = 0; i < allowed_mode_cnt; i++) {
2103 		ceph_decode_32_safe(&p, end, allowed_modes[i], bad);
2104 		dout("%s con %p allowed_modes[%d] %d\n", __func__, con,
2105 		     i, allowed_modes[i]);
2106 	}
2107 
2108 	mutex_unlock(&con->mutex);
2109 	ret = con->ops->handle_auth_bad_method(con, used_proto, result,
2110 					       allowed_protos,
2111 					       allowed_proto_cnt,
2112 					       allowed_modes,
2113 					       allowed_mode_cnt);
2114 	mutex_lock(&con->mutex);
2115 	if (con->state != CEPH_CON_S_V2_AUTH) {
2116 		dout("%s con %p state changed to %d\n", __func__, con,
2117 		     con->state);
2118 		return -EAGAIN;
2119 	}
2120 
2121 	dout("%s con %p handle_auth_bad_method ret %d\n", __func__, con, ret);
2122 	return ret;
2123 
2124 bad:
2125 	pr_err("failed to decode auth_bad_method\n");
2126 	return -EINVAL;
2127 }
2128 
2129 static int process_auth_reply_more(struct ceph_connection *con,
2130 				   void *p, void *end)
2131 {
2132 	int payload_len;
2133 	int ret;
2134 
2135 	if (con->state != CEPH_CON_S_V2_AUTH) {
2136 		con->error_msg = "protocol error, unexpected auth_reply_more";
2137 		return -EINVAL;
2138 	}
2139 
2140 	ceph_decode_32_safe(&p, end, payload_len, bad);
2141 	ceph_decode_need(&p, end, payload_len, bad);
2142 
2143 	dout("%s con %p payload_len %d\n", __func__, con, payload_len);
2144 
2145 	reset_out_kvecs(con);
2146 	ret = prepare_auth_request_more(con, p, payload_len);
2147 	if (ret) {
2148 		if (ret != -EAGAIN)
2149 			pr_err("prepare_auth_request_more failed: %d\n", ret);
2150 		return ret;
2151 	}
2152 
2153 	return 0;
2154 
2155 bad:
2156 	pr_err("failed to decode auth_reply_more\n");
2157 	return -EINVAL;
2158 }
2159 
2160 /*
2161  * Align session_key and con_secret to avoid GFP_ATOMIC allocation
2162  * inside crypto_shash_setkey() and crypto_aead_setkey() called from
2163  * setup_crypto().  __aligned(16) isn't guaranteed to work for stack
2164  * objects, so do it by hand.
2165  */
2166 static int process_auth_done(struct ceph_connection *con, void *p, void *end)
2167 {
2168 	u8 session_key_buf[CEPH_KEY_LEN + 16];
2169 	u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
2170 	u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
2171 	u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
2172 	int session_key_len, con_secret_len;
2173 	int payload_len;
2174 	u64 global_id;
2175 	int ret;
2176 
2177 	if (con->state != CEPH_CON_S_V2_AUTH) {
2178 		con->error_msg = "protocol error, unexpected auth_done";
2179 		return -EINVAL;
2180 	}
2181 
2182 	ceph_decode_64_safe(&p, end, global_id, bad);
2183 	ceph_decode_32_safe(&p, end, con->v2.con_mode, bad);
2184 	ceph_decode_32_safe(&p, end, payload_len, bad);
2185 
2186 	dout("%s con %p global_id %llu con_mode %d payload_len %d\n",
2187 	     __func__, con, global_id, con->v2.con_mode, payload_len);
2188 
2189 	mutex_unlock(&con->mutex);
2190 	session_key_len = 0;
2191 	con_secret_len = 0;
2192 	ret = con->ops->handle_auth_done(con, global_id, p, payload_len,
2193 					 session_key, &session_key_len,
2194 					 con_secret, &con_secret_len);
2195 	mutex_lock(&con->mutex);
2196 	if (con->state != CEPH_CON_S_V2_AUTH) {
2197 		dout("%s con %p state changed to %d\n", __func__, con,
2198 		     con->state);
2199 		ret = -EAGAIN;
2200 		goto out;
2201 	}
2202 
2203 	dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret);
2204 	if (ret)
2205 		goto out;
2206 
2207 	ret = setup_crypto(con, session_key, session_key_len, con_secret,
2208 			   con_secret_len);
2209 	if (ret)
2210 		goto out;
2211 
2212 	reset_out_kvecs(con);
2213 	ret = prepare_auth_signature(con);
2214 	if (ret) {
2215 		pr_err("prepare_auth_signature failed: %d\n", ret);
2216 		goto out;
2217 	}
2218 
2219 	con->state = CEPH_CON_S_V2_AUTH_SIGNATURE;
2220 
2221 out:
2222 	memzero_explicit(session_key_buf, sizeof(session_key_buf));
2223 	memzero_explicit(con_secret_buf, sizeof(con_secret_buf));
2224 	return ret;
2225 
2226 bad:
2227 	pr_err("failed to decode auth_done\n");
2228 	return -EINVAL;
2229 }
2230 
2231 static int process_auth_signature(struct ceph_connection *con,
2232 				  void *p, void *end)
2233 {
2234 	u8 hmac[SHA256_DIGEST_SIZE];
2235 	int ret;
2236 
2237 	if (con->state != CEPH_CON_S_V2_AUTH_SIGNATURE) {
2238 		con->error_msg = "protocol error, unexpected auth_signature";
2239 		return -EINVAL;
2240 	}
2241 
2242 	ret = hmac_sha256(con, con->v2.out_sign_kvecs,
2243 			  con->v2.out_sign_kvec_cnt, hmac);
2244 	if (ret)
2245 		return ret;
2246 
2247 	ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad);
2248 	if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) {
2249 		con->error_msg = "integrity error, bad auth signature";
2250 		return -EBADMSG;
2251 	}
2252 
2253 	dout("%s con %p auth signature ok\n", __func__, con);
2254 
2255 	/* no reset_out_kvecs() as our auth_signature may still be pending */
2256 	if (!con->v2.server_cookie) {
2257 		ret = prepare_client_ident(con);
2258 		if (ret) {
2259 			pr_err("prepare_client_ident failed: %d\n", ret);
2260 			return ret;
2261 		}
2262 
2263 		con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2264 	} else {
2265 		ret = prepare_session_reconnect(con);
2266 		if (ret) {
2267 			pr_err("prepare_session_reconnect failed: %d\n", ret);
2268 			return ret;
2269 		}
2270 
2271 		con->state = CEPH_CON_S_V2_SESSION_RECONNECT;
2272 	}
2273 
2274 	return 0;
2275 
2276 bad:
2277 	pr_err("failed to decode auth_signature\n");
2278 	return -EINVAL;
2279 }
2280 
2281 static int process_server_ident(struct ceph_connection *con,
2282 				void *p, void *end)
2283 {
2284 	struct ceph_client *client = from_msgr(con->msgr);
2285 	u64 features, required_features;
2286 	struct ceph_entity_addr addr;
2287 	u64 global_seq;
2288 	u64 global_id;
2289 	u64 cookie;
2290 	u64 flags;
2291 	int ret;
2292 
2293 	if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2294 		con->error_msg = "protocol error, unexpected server_ident";
2295 		return -EINVAL;
2296 	}
2297 
2298 	ret = ceph_decode_entity_addrvec(&p, end, true, &addr);
2299 	if (ret) {
2300 		pr_err("failed to decode server addrs: %d\n", ret);
2301 		return ret;
2302 	}
2303 
2304 	ceph_decode_64_safe(&p, end, global_id, bad);
2305 	ceph_decode_64_safe(&p, end, global_seq, bad);
2306 	ceph_decode_64_safe(&p, end, features, bad);
2307 	ceph_decode_64_safe(&p, end, required_features, bad);
2308 	ceph_decode_64_safe(&p, end, flags, bad);
2309 	ceph_decode_64_safe(&p, end, cookie, bad);
2310 
2311 	dout("%s con %p addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx flags 0x%llx cookie 0x%llx\n",
2312 	     __func__, con, ceph_pr_addr(&addr), le32_to_cpu(addr.nonce),
2313 	     global_id, global_seq, features, required_features, flags, cookie);
2314 
2315 	/* is this who we intended to talk to? */
2316 	if (memcmp(&addr, &con->peer_addr, sizeof(con->peer_addr))) {
2317 		pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n",
2318 		       ceph_pr_addr(&con->peer_addr),
2319 		       le32_to_cpu(con->peer_addr.nonce),
2320 		       ceph_pr_addr(&addr), le32_to_cpu(addr.nonce));
2321 		con->error_msg = "wrong peer at address";
2322 		return -EINVAL;
2323 	}
2324 
2325 	if (client->required_features & ~features) {
2326 		pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2327 		       features, client->required_features & ~features);
2328 		con->error_msg = "missing required protocol features";
2329 		return -EINVAL;
2330 	}
2331 
2332 	/*
2333 	 * Both name->type and name->num are set in ceph_con_open() but
2334 	 * name->num may be bogus in the initial monmap.  name->type is
2335 	 * verified in handle_hello().
2336 	 */
2337 	WARN_ON(!con->peer_name.type);
2338 	con->peer_name.num = cpu_to_le64(global_id);
2339 	con->v2.peer_global_seq = global_seq;
2340 	con->peer_features = features;
2341 	WARN_ON(required_features & ~client->supported_features);
2342 	con->v2.server_cookie = cookie;
2343 
2344 	if (flags & CEPH_MSG_CONNECT_LOSSY) {
2345 		ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX);
2346 		WARN_ON(con->v2.server_cookie);
2347 	} else {
2348 		WARN_ON(!con->v2.server_cookie);
2349 	}
2350 
2351 	clear_in_sign_kvecs(con);
2352 	clear_out_sign_kvecs(con);
2353 	free_conn_bufs(con);
2354 	con->delay = 0;  /* reset backoff memory */
2355 
2356 	con->state = CEPH_CON_S_OPEN;
2357 	con->v2.out_state = OUT_S_GET_NEXT;
2358 	return 0;
2359 
2360 bad:
2361 	pr_err("failed to decode server_ident\n");
2362 	return -EINVAL;
2363 }
2364 
2365 static int process_ident_missing_features(struct ceph_connection *con,
2366 					  void *p, void *end)
2367 {
2368 	struct ceph_client *client = from_msgr(con->msgr);
2369 	u64 missing_features;
2370 
2371 	if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2372 		con->error_msg = "protocol error, unexpected ident_missing_features";
2373 		return -EINVAL;
2374 	}
2375 
2376 	ceph_decode_64_safe(&p, end, missing_features, bad);
2377 	pr_err("RADOS feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2378 	       client->supported_features, missing_features);
2379 	con->error_msg = "missing required protocol features";
2380 	return -EINVAL;
2381 
2382 bad:
2383 	pr_err("failed to decode ident_missing_features\n");
2384 	return -EINVAL;
2385 }
2386 
2387 static int process_session_reconnect_ok(struct ceph_connection *con,
2388 					void *p, void *end)
2389 {
2390 	u64 seq;
2391 
2392 	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2393 		con->error_msg = "protocol error, unexpected session_reconnect_ok";
2394 		return -EINVAL;
2395 	}
2396 
2397 	ceph_decode_64_safe(&p, end, seq, bad);
2398 
2399 	dout("%s con %p seq %llu\n", __func__, con, seq);
2400 	ceph_con_discard_requeued(con, seq);
2401 
2402 	clear_in_sign_kvecs(con);
2403 	clear_out_sign_kvecs(con);
2404 	free_conn_bufs(con);
2405 	con->delay = 0;  /* reset backoff memory */
2406 
2407 	con->state = CEPH_CON_S_OPEN;
2408 	con->v2.out_state = OUT_S_GET_NEXT;
2409 	return 0;
2410 
2411 bad:
2412 	pr_err("failed to decode session_reconnect_ok\n");
2413 	return -EINVAL;
2414 }
2415 
2416 static int process_session_retry(struct ceph_connection *con,
2417 				 void *p, void *end)
2418 {
2419 	u64 connect_seq;
2420 	int ret;
2421 
2422 	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2423 		con->error_msg = "protocol error, unexpected session_retry";
2424 		return -EINVAL;
2425 	}
2426 
2427 	ceph_decode_64_safe(&p, end, connect_seq, bad);
2428 
2429 	dout("%s con %p connect_seq %llu\n", __func__, con, connect_seq);
2430 	WARN_ON(connect_seq <= con->v2.connect_seq);
2431 	con->v2.connect_seq = connect_seq + 1;
2432 
2433 	free_conn_bufs(con);
2434 
2435 	reset_out_kvecs(con);
2436 	ret = prepare_session_reconnect(con);
2437 	if (ret) {
2438 		pr_err("prepare_session_reconnect (cseq) failed: %d\n", ret);
2439 		return ret;
2440 	}
2441 
2442 	return 0;
2443 
2444 bad:
2445 	pr_err("failed to decode session_retry\n");
2446 	return -EINVAL;
2447 }
2448 
2449 static int process_session_retry_global(struct ceph_connection *con,
2450 					void *p, void *end)
2451 {
2452 	u64 global_seq;
2453 	int ret;
2454 
2455 	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2456 		con->error_msg = "protocol error, unexpected session_retry_global";
2457 		return -EINVAL;
2458 	}
2459 
2460 	ceph_decode_64_safe(&p, end, global_seq, bad);
2461 
2462 	dout("%s con %p global_seq %llu\n", __func__, con, global_seq);
2463 	WARN_ON(global_seq <= con->v2.global_seq);
2464 	con->v2.global_seq = ceph_get_global_seq(con->msgr, global_seq);
2465 
2466 	free_conn_bufs(con);
2467 
2468 	reset_out_kvecs(con);
2469 	ret = prepare_session_reconnect(con);
2470 	if (ret) {
2471 		pr_err("prepare_session_reconnect (gseq) failed: %d\n", ret);
2472 		return ret;
2473 	}
2474 
2475 	return 0;
2476 
2477 bad:
2478 	pr_err("failed to decode session_retry_global\n");
2479 	return -EINVAL;
2480 }
2481 
2482 static int process_session_reset(struct ceph_connection *con,
2483 				 void *p, void *end)
2484 {
2485 	bool full;
2486 	int ret;
2487 
2488 	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2489 		con->error_msg = "protocol error, unexpected session_reset";
2490 		return -EINVAL;
2491 	}
2492 
2493 	ceph_decode_8_safe(&p, end, full, bad);
2494 	if (!full) {
2495 		con->error_msg = "protocol error, bad session_reset";
2496 		return -EINVAL;
2497 	}
2498 
2499 	pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name),
2500 		ceph_pr_addr(&con->peer_addr));
2501 	ceph_con_reset_session(con);
2502 
2503 	mutex_unlock(&con->mutex);
2504 	if (con->ops->peer_reset)
2505 		con->ops->peer_reset(con);
2506 	mutex_lock(&con->mutex);
2507 	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2508 		dout("%s con %p state changed to %d\n", __func__, con,
2509 		     con->state);
2510 		return -EAGAIN;
2511 	}
2512 
2513 	free_conn_bufs(con);
2514 
2515 	reset_out_kvecs(con);
2516 	ret = prepare_client_ident(con);
2517 	if (ret) {
2518 		pr_err("prepare_client_ident (rst) failed: %d\n", ret);
2519 		return ret;
2520 	}
2521 
2522 	con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2523 	return 0;
2524 
2525 bad:
2526 	pr_err("failed to decode session_reset\n");
2527 	return -EINVAL;
2528 }
2529 
2530 static int process_keepalive2_ack(struct ceph_connection *con,
2531 				  void *p, void *end)
2532 {
2533 	if (con->state != CEPH_CON_S_OPEN) {
2534 		con->error_msg = "protocol error, unexpected keepalive2_ack";
2535 		return -EINVAL;
2536 	}
2537 
2538 	ceph_decode_need(&p, end, sizeof(struct ceph_timespec), bad);
2539 	ceph_decode_timespec64(&con->last_keepalive_ack, p);
2540 
2541 	dout("%s con %p timestamp %lld.%09ld\n", __func__, con,
2542 	     con->last_keepalive_ack.tv_sec, con->last_keepalive_ack.tv_nsec);
2543 
2544 	return 0;
2545 
2546 bad:
2547 	pr_err("failed to decode keepalive2_ack\n");
2548 	return -EINVAL;
2549 }
2550 
2551 static int process_ack(struct ceph_connection *con, void *p, void *end)
2552 {
2553 	u64 seq;
2554 
2555 	if (con->state != CEPH_CON_S_OPEN) {
2556 		con->error_msg = "protocol error, unexpected ack";
2557 		return -EINVAL;
2558 	}
2559 
2560 	ceph_decode_64_safe(&p, end, seq, bad);
2561 
2562 	dout("%s con %p seq %llu\n", __func__, con, seq);
2563 	ceph_con_discard_sent(con, seq);
2564 	return 0;
2565 
2566 bad:
2567 	pr_err("failed to decode ack\n");
2568 	return -EINVAL;
2569 }
2570 
2571 static int process_control(struct ceph_connection *con, void *p, void *end)
2572 {
2573 	int tag = con->v2.in_desc.fd_tag;
2574 	int ret;
2575 
2576 	dout("%s con %p tag %d len %d\n", __func__, con, tag, (int)(end - p));
2577 
2578 	switch (tag) {
2579 	case FRAME_TAG_HELLO:
2580 		ret = process_hello(con, p, end);
2581 		break;
2582 	case FRAME_TAG_AUTH_BAD_METHOD:
2583 		ret = process_auth_bad_method(con, p, end);
2584 		break;
2585 	case FRAME_TAG_AUTH_REPLY_MORE:
2586 		ret = process_auth_reply_more(con, p, end);
2587 		break;
2588 	case FRAME_TAG_AUTH_DONE:
2589 		ret = process_auth_done(con, p, end);
2590 		break;
2591 	case FRAME_TAG_AUTH_SIGNATURE:
2592 		ret = process_auth_signature(con, p, end);
2593 		break;
2594 	case FRAME_TAG_SERVER_IDENT:
2595 		ret = process_server_ident(con, p, end);
2596 		break;
2597 	case FRAME_TAG_IDENT_MISSING_FEATURES:
2598 		ret = process_ident_missing_features(con, p, end);
2599 		break;
2600 	case FRAME_TAG_SESSION_RECONNECT_OK:
2601 		ret = process_session_reconnect_ok(con, p, end);
2602 		break;
2603 	case FRAME_TAG_SESSION_RETRY:
2604 		ret = process_session_retry(con, p, end);
2605 		break;
2606 	case FRAME_TAG_SESSION_RETRY_GLOBAL:
2607 		ret = process_session_retry_global(con, p, end);
2608 		break;
2609 	case FRAME_TAG_SESSION_RESET:
2610 		ret = process_session_reset(con, p, end);
2611 		break;
2612 	case FRAME_TAG_KEEPALIVE2_ACK:
2613 		ret = process_keepalive2_ack(con, p, end);
2614 		break;
2615 	case FRAME_TAG_ACK:
2616 		ret = process_ack(con, p, end);
2617 		break;
2618 	default:
2619 		pr_err("bad tag %d\n", tag);
2620 		con->error_msg = "protocol error, bad tag";
2621 		return -EINVAL;
2622 	}
2623 	if (ret) {
2624 		dout("%s con %p error %d\n", __func__, con, ret);
2625 		return ret;
2626 	}
2627 
2628 	prepare_read_preamble(con);
2629 	return 0;
2630 }
2631 
2632 /*
2633  * Return:
2634  *   1 - con->in_msg set, read message
2635  *   0 - skip message
2636  *  <0 - error
2637  */
2638 static int process_message_header(struct ceph_connection *con,
2639 				  void *p, void *end)
2640 {
2641 	struct ceph_frame_desc *desc = &con->v2.in_desc;
2642 	struct ceph_msg_header2 *hdr2 = p;
2643 	struct ceph_msg_header hdr;
2644 	int skip;
2645 	int ret;
2646 	u64 seq;
2647 
2648 	/* verify seq# */
2649 	seq = le64_to_cpu(hdr2->seq);
2650 	if ((s64)seq - (s64)con->in_seq < 1) {
2651 		pr_info("%s%lld %s skipping old message: seq %llu, expected %llu\n",
2652 			ENTITY_NAME(con->peer_name),
2653 			ceph_pr_addr(&con->peer_addr),
2654 			seq, con->in_seq + 1);
2655 		return 0;
2656 	}
2657 	if ((s64)seq - (s64)con->in_seq > 1) {
2658 		pr_err("bad seq %llu, expected %llu\n", seq, con->in_seq + 1);
2659 		con->error_msg = "bad message sequence # for incoming message";
2660 		return -EBADE;
2661 	}
2662 
2663 	ceph_con_discard_sent(con, le64_to_cpu(hdr2->ack_seq));
2664 
2665 	fill_header(&hdr, hdr2, desc->fd_lens[1], desc->fd_lens[2],
2666 		    desc->fd_lens[3], &con->peer_name);
2667 	ret = ceph_con_in_msg_alloc(con, &hdr, &skip);
2668 	if (ret)
2669 		return ret;
2670 
2671 	WARN_ON(!con->in_msg ^ skip);
2672 	if (skip)
2673 		return 0;
2674 
2675 	WARN_ON(!con->in_msg);
2676 	WARN_ON(con->in_msg->con != con);
2677 	return 1;
2678 }
2679 
2680 static int process_message(struct ceph_connection *con)
2681 {
2682 	ceph_con_process_message(con);
2683 
2684 	/*
2685 	 * We could have been closed by ceph_con_close() because
2686 	 * ceph_con_process_message() temporarily drops con->mutex.
2687 	 */
2688 	if (con->state != CEPH_CON_S_OPEN) {
2689 		dout("%s con %p state changed to %d\n", __func__, con,
2690 		     con->state);
2691 		return -EAGAIN;
2692 	}
2693 
2694 	prepare_read_preamble(con);
2695 	return 0;
2696 }
2697 
2698 static int __handle_control(struct ceph_connection *con, void *p)
2699 {
2700 	void *end = p + con->v2.in_desc.fd_lens[0];
2701 	struct ceph_msg *msg;
2702 	int ret;
2703 
2704 	if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE)
2705 		return process_control(con, p, end);
2706 
2707 	ret = process_message_header(con, p, end);
2708 	if (ret < 0)
2709 		return ret;
2710 	if (ret == 0) {
2711 		prepare_skip_message(con);
2712 		return 0;
2713 	}
2714 
2715 	msg = con->in_msg;  /* set in process_message_header() */
2716 	if (front_len(msg)) {
2717 		WARN_ON(front_len(msg) > msg->front_alloc_len);
2718 		msg->front.iov_len = front_len(msg);
2719 	} else {
2720 		msg->front.iov_len = 0;
2721 	}
2722 	if (middle_len(msg)) {
2723 		WARN_ON(middle_len(msg) > msg->middle->alloc_len);
2724 		msg->middle->vec.iov_len = middle_len(msg);
2725 	} else if (msg->middle) {
2726 		msg->middle->vec.iov_len = 0;
2727 	}
2728 
2729 	if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
2730 		return process_message(con);
2731 
2732 	if (con_secure(con))
2733 		return prepare_read_tail_secure(con);
2734 
2735 	return prepare_read_tail_plain(con);
2736 }
2737 
2738 static int handle_preamble(struct ceph_connection *con)
2739 {
2740 	struct ceph_frame_desc *desc = &con->v2.in_desc;
2741 	int ret;
2742 
2743 	if (con_secure(con)) {
2744 		ret = decrypt_preamble(con);
2745 		if (ret) {
2746 			if (ret == -EBADMSG)
2747 				con->error_msg = "integrity error, bad preamble auth tag";
2748 			return ret;
2749 		}
2750 	}
2751 
2752 	ret = decode_preamble(con->v2.in_buf, desc);
2753 	if (ret) {
2754 		if (ret == -EBADMSG)
2755 			con->error_msg = "integrity error, bad crc";
2756 		else
2757 			con->error_msg = "protocol error, bad preamble";
2758 		return ret;
2759 	}
2760 
2761 	dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__,
2762 	     con, desc->fd_tag, desc->fd_seg_cnt, desc->fd_lens[0],
2763 	     desc->fd_lens[1], desc->fd_lens[2], desc->fd_lens[3]);
2764 
2765 	if (!con_secure(con))
2766 		return prepare_read_control(con);
2767 
2768 	if (desc->fd_lens[0] > CEPH_PREAMBLE_INLINE_LEN)
2769 		return prepare_read_control_remainder(con);
2770 
2771 	return __handle_control(con, CTRL_BODY(con->v2.in_buf));
2772 }
2773 
2774 static int handle_control(struct ceph_connection *con)
2775 {
2776 	int ctrl_len = con->v2.in_desc.fd_lens[0];
2777 	void *buf;
2778 	int ret;
2779 
2780 	WARN_ON(con_secure(con));
2781 
2782 	ret = verify_control_crc(con);
2783 	if (ret) {
2784 		con->error_msg = "integrity error, bad crc";
2785 		return ret;
2786 	}
2787 
2788 	if (con->state == CEPH_CON_S_V2_AUTH) {
2789 		buf = alloc_conn_buf(con, ctrl_len);
2790 		if (!buf)
2791 			return -ENOMEM;
2792 
2793 		memcpy(buf, con->v2.in_kvecs[0].iov_base, ctrl_len);
2794 		return __handle_control(con, buf);
2795 	}
2796 
2797 	return __handle_control(con, con->v2.in_kvecs[0].iov_base);
2798 }
2799 
2800 static int handle_control_remainder(struct ceph_connection *con)
2801 {
2802 	int ret;
2803 
2804 	WARN_ON(!con_secure(con));
2805 
2806 	ret = decrypt_control_remainder(con);
2807 	if (ret) {
2808 		if (ret == -EBADMSG)
2809 			con->error_msg = "integrity error, bad control remainder auth tag";
2810 		return ret;
2811 	}
2812 
2813 	return __handle_control(con, con->v2.in_kvecs[0].iov_base -
2814 				     CEPH_PREAMBLE_INLINE_LEN);
2815 }
2816 
2817 static int handle_epilogue(struct ceph_connection *con)
2818 {
2819 	u32 front_crc, middle_crc, data_crc;
2820 	int ret;
2821 
2822 	if (con_secure(con)) {
2823 		ret = decrypt_tail(con);
2824 		if (ret) {
2825 			if (ret == -EBADMSG)
2826 				con->error_msg = "integrity error, bad epilogue auth tag";
2827 			return ret;
2828 		}
2829 
2830 		/* just late_status */
2831 		ret = decode_epilogue(con->v2.in_buf, NULL, NULL, NULL);
2832 		if (ret) {
2833 			con->error_msg = "protocol error, bad epilogue";
2834 			return ret;
2835 		}
2836 	} else {
2837 		ret = decode_epilogue(con->v2.in_buf, &front_crc,
2838 				      &middle_crc, &data_crc);
2839 		if (ret) {
2840 			con->error_msg = "protocol error, bad epilogue";
2841 			return ret;
2842 		}
2843 
2844 		ret = verify_epilogue_crcs(con, front_crc, middle_crc,
2845 					   data_crc);
2846 		if (ret) {
2847 			con->error_msg = "integrity error, bad crc";
2848 			return ret;
2849 		}
2850 	}
2851 
2852 	return process_message(con);
2853 }
2854 
2855 static void finish_skip(struct ceph_connection *con)
2856 {
2857 	dout("%s con %p\n", __func__, con);
2858 
2859 	if (con_secure(con))
2860 		gcm_inc_nonce(&con->v2.in_gcm_nonce);
2861 
2862 	__finish_skip(con);
2863 }
2864 
2865 static int populate_in_iter(struct ceph_connection *con)
2866 {
2867 	int ret;
2868 
2869 	dout("%s con %p state %d in_state %d\n", __func__, con, con->state,
2870 	     con->v2.in_state);
2871 	WARN_ON(iov_iter_count(&con->v2.in_iter));
2872 
2873 	if (con->state == CEPH_CON_S_V2_BANNER_PREFIX) {
2874 		ret = process_banner_prefix(con);
2875 	} else if (con->state == CEPH_CON_S_V2_BANNER_PAYLOAD) {
2876 		ret = process_banner_payload(con);
2877 	} else if ((con->state >= CEPH_CON_S_V2_HELLO &&
2878 		    con->state <= CEPH_CON_S_V2_SESSION_RECONNECT) ||
2879 		   con->state == CEPH_CON_S_OPEN) {
2880 		switch (con->v2.in_state) {
2881 		case IN_S_HANDLE_PREAMBLE:
2882 			ret = handle_preamble(con);
2883 			break;
2884 		case IN_S_HANDLE_CONTROL:
2885 			ret = handle_control(con);
2886 			break;
2887 		case IN_S_HANDLE_CONTROL_REMAINDER:
2888 			ret = handle_control_remainder(con);
2889 			break;
2890 		case IN_S_PREPARE_READ_DATA:
2891 			ret = prepare_read_data(con);
2892 			break;
2893 		case IN_S_PREPARE_READ_DATA_CONT:
2894 			prepare_read_data_cont(con);
2895 			ret = 0;
2896 			break;
2897 		case IN_S_PREPARE_READ_ENC_PAGE:
2898 			prepare_read_enc_page(con);
2899 			ret = 0;
2900 			break;
2901 		case IN_S_HANDLE_EPILOGUE:
2902 			ret = handle_epilogue(con);
2903 			break;
2904 		case IN_S_FINISH_SKIP:
2905 			finish_skip(con);
2906 			ret = 0;
2907 			break;
2908 		default:
2909 			WARN(1, "bad in_state %d", con->v2.in_state);
2910 			return -EINVAL;
2911 		}
2912 	} else {
2913 		WARN(1, "bad state %d", con->state);
2914 		return -EINVAL;
2915 	}
2916 	if (ret) {
2917 		dout("%s con %p error %d\n", __func__, con, ret);
2918 		return ret;
2919 	}
2920 
2921 	if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
2922 		return -ENODATA;
2923 	dout("%s con %p populated %zu\n", __func__, con,
2924 	     iov_iter_count(&con->v2.in_iter));
2925 	return 1;
2926 }
2927 
2928 int ceph_con_v2_try_read(struct ceph_connection *con)
2929 {
2930 	int ret;
2931 
2932 	dout("%s con %p state %d need %zu\n", __func__, con, con->state,
2933 	     iov_iter_count(&con->v2.in_iter));
2934 
2935 	if (con->state == CEPH_CON_S_PREOPEN)
2936 		return 0;
2937 
2938 	/*
2939 	 * We should always have something pending here.  If not,
2940 	 * avoid calling populate_in_iter() as if we read something
2941 	 * (ceph_tcp_recv() would immediately return 1).
2942 	 */
2943 	if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
2944 		return -ENODATA;
2945 
2946 	for (;;) {
2947 		ret = ceph_tcp_recv(con);
2948 		if (ret <= 0)
2949 			return ret;
2950 
2951 		ret = populate_in_iter(con);
2952 		if (ret <= 0) {
2953 			if (ret && ret != -EAGAIN && !con->error_msg)
2954 				con->error_msg = "read processing error";
2955 			return ret;
2956 		}
2957 	}
2958 }
2959 
2960 static void queue_data(struct ceph_connection *con)
2961 {
2962 	struct bio_vec bv;
2963 
2964 	con->v2.out_epil.data_crc = -1;
2965 	ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg,
2966 				  data_len(con->out_msg));
2967 
2968 	get_bvec_at(&con->v2.out_cursor, &bv);
2969 	set_out_bvec(con, &bv, true);
2970 	con->v2.out_state = OUT_S_QUEUE_DATA_CONT;
2971 }
2972 
2973 static void queue_data_cont(struct ceph_connection *con)
2974 {
2975 	struct bio_vec bv;
2976 
2977 	con->v2.out_epil.data_crc = ceph_crc32c_page(
2978 		con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
2979 		con->v2.out_bvec.bv_offset, con->v2.out_bvec.bv_len);
2980 
2981 	ceph_msg_data_advance(&con->v2.out_cursor, con->v2.out_bvec.bv_len);
2982 	if (con->v2.out_cursor.total_resid) {
2983 		get_bvec_at(&con->v2.out_cursor, &bv);
2984 		set_out_bvec(con, &bv, true);
2985 		WARN_ON(con->v2.out_state != OUT_S_QUEUE_DATA_CONT);
2986 		return;
2987 	}
2988 
2989 	/*
2990 	 * We've written all data.  Queue epilogue.  Once it's written,
2991 	 * we are done.
2992 	 */
2993 	reset_out_kvecs(con);
2994 	prepare_epilogue_plain(con, false);
2995 	con->v2.out_state = OUT_S_FINISH_MESSAGE;
2996 }
2997 
2998 static void queue_enc_page(struct ceph_connection *con)
2999 {
3000 	struct bio_vec bv;
3001 
3002 	dout("%s con %p i %d resid %d\n", __func__, con, con->v2.out_enc_i,
3003 	     con->v2.out_enc_resid);
3004 	WARN_ON(!con->v2.out_enc_resid);
3005 
3006 	bvec_set_page(&bv, con->v2.out_enc_pages[con->v2.out_enc_i],
3007 		      min(con->v2.out_enc_resid, (int)PAGE_SIZE), 0);
3008 
3009 	set_out_bvec(con, &bv, false);
3010 	con->v2.out_enc_i++;
3011 	con->v2.out_enc_resid -= bv.bv_len;
3012 
3013 	if (con->v2.out_enc_resid) {
3014 		WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE);
3015 		return;
3016 	}
3017 
3018 	/*
3019 	 * We've queued the last piece of ciphertext (ending with
3020 	 * epilogue) + auth tag.  Once it's written, we are done.
3021 	 */
3022 	WARN_ON(con->v2.out_enc_i != con->v2.out_enc_page_cnt);
3023 	con->v2.out_state = OUT_S_FINISH_MESSAGE;
3024 }
3025 
3026 static void queue_zeros(struct ceph_connection *con)
3027 {
3028 	dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero);
3029 
3030 	if (con->v2.out_zero) {
3031 		set_out_bvec_zero(con);
3032 		con->v2.out_zero -= con->v2.out_bvec.bv_len;
3033 		con->v2.out_state = OUT_S_QUEUE_ZEROS;
3034 		return;
3035 	}
3036 
3037 	/*
3038 	 * We've zero-filled everything up to epilogue.  Queue epilogue
3039 	 * with late_status set to ABORTED and crcs adjusted for zeros.
3040 	 * Once it's written, we are done patching up for the revoke.
3041 	 */
3042 	reset_out_kvecs(con);
3043 	prepare_epilogue_plain(con, true);
3044 	con->v2.out_state = OUT_S_FINISH_MESSAGE;
3045 }
3046 
3047 static void finish_message(struct ceph_connection *con)
3048 {
3049 	dout("%s con %p msg %p\n", __func__, con, con->out_msg);
3050 
3051 	/* we end up here both plain and secure modes */
3052 	if (con->v2.out_enc_pages) {
3053 		WARN_ON(!con->v2.out_enc_page_cnt);
3054 		ceph_release_page_vector(con->v2.out_enc_pages,
3055 					 con->v2.out_enc_page_cnt);
3056 		con->v2.out_enc_pages = NULL;
3057 		con->v2.out_enc_page_cnt = 0;
3058 	}
3059 	/* message may have been revoked */
3060 	if (con->out_msg) {
3061 		ceph_msg_put(con->out_msg);
3062 		con->out_msg = NULL;
3063 	}
3064 
3065 	con->v2.out_state = OUT_S_GET_NEXT;
3066 }
3067 
3068 static int populate_out_iter(struct ceph_connection *con)
3069 {
3070 	int ret;
3071 
3072 	dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
3073 	     con->v2.out_state);
3074 	WARN_ON(iov_iter_count(&con->v2.out_iter));
3075 
3076 	if (con->state != CEPH_CON_S_OPEN) {
3077 		WARN_ON(con->state < CEPH_CON_S_V2_BANNER_PREFIX ||
3078 			con->state > CEPH_CON_S_V2_SESSION_RECONNECT);
3079 		goto nothing_pending;
3080 	}
3081 
3082 	switch (con->v2.out_state) {
3083 	case OUT_S_QUEUE_DATA:
3084 		WARN_ON(!con->out_msg);
3085 		queue_data(con);
3086 		goto populated;
3087 	case OUT_S_QUEUE_DATA_CONT:
3088 		WARN_ON(!con->out_msg);
3089 		queue_data_cont(con);
3090 		goto populated;
3091 	case OUT_S_QUEUE_ENC_PAGE:
3092 		queue_enc_page(con);
3093 		goto populated;
3094 	case OUT_S_QUEUE_ZEROS:
3095 		WARN_ON(con->out_msg);  /* revoked */
3096 		queue_zeros(con);
3097 		goto populated;
3098 	case OUT_S_FINISH_MESSAGE:
3099 		finish_message(con);
3100 		break;
3101 	case OUT_S_GET_NEXT:
3102 		break;
3103 	default:
3104 		WARN(1, "bad out_state %d", con->v2.out_state);
3105 		return -EINVAL;
3106 	}
3107 
3108 	WARN_ON(con->v2.out_state != OUT_S_GET_NEXT);
3109 	if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) {
3110 		ret = prepare_keepalive2(con);
3111 		if (ret) {
3112 			pr_err("prepare_keepalive2 failed: %d\n", ret);
3113 			return ret;
3114 		}
3115 	} else if (!list_empty(&con->out_queue)) {
3116 		ceph_con_get_out_msg(con);
3117 		ret = prepare_message(con);
3118 		if (ret) {
3119 			pr_err("prepare_message failed: %d\n", ret);
3120 			return ret;
3121 		}
3122 	} else if (con->in_seq > con->in_seq_acked) {
3123 		ret = prepare_ack(con);
3124 		if (ret) {
3125 			pr_err("prepare_ack failed: %d\n", ret);
3126 			return ret;
3127 		}
3128 	} else {
3129 		goto nothing_pending;
3130 	}
3131 
3132 populated:
3133 	if (WARN_ON(!iov_iter_count(&con->v2.out_iter)))
3134 		return -ENODATA;
3135 	dout("%s con %p populated %zu\n", __func__, con,
3136 	     iov_iter_count(&con->v2.out_iter));
3137 	return 1;
3138 
3139 nothing_pending:
3140 	WARN_ON(iov_iter_count(&con->v2.out_iter));
3141 	dout("%s con %p nothing pending\n", __func__, con);
3142 	ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
3143 	return 0;
3144 }
3145 
3146 int ceph_con_v2_try_write(struct ceph_connection *con)
3147 {
3148 	int ret;
3149 
3150 	dout("%s con %p state %d have %zu\n", __func__, con, con->state,
3151 	     iov_iter_count(&con->v2.out_iter));
3152 
3153 	/* open the socket first? */
3154 	if (con->state == CEPH_CON_S_PREOPEN) {
3155 		WARN_ON(con->peer_addr.type != CEPH_ENTITY_ADDR_TYPE_MSGR2);
3156 
3157 		/*
3158 		 * Always bump global_seq.  Bump connect_seq only if
3159 		 * there is a session (i.e. we are reconnecting and will
3160 		 * send session_reconnect instead of client_ident).
3161 		 */
3162 		con->v2.global_seq = ceph_get_global_seq(con->msgr, 0);
3163 		if (con->v2.server_cookie)
3164 			con->v2.connect_seq++;
3165 
3166 		ret = prepare_read_banner_prefix(con);
3167 		if (ret) {
3168 			pr_err("prepare_read_banner_prefix failed: %d\n", ret);
3169 			con->error_msg = "connect error";
3170 			return ret;
3171 		}
3172 
3173 		reset_out_kvecs(con);
3174 		ret = prepare_banner(con);
3175 		if (ret) {
3176 			pr_err("prepare_banner failed: %d\n", ret);
3177 			con->error_msg = "connect error";
3178 			return ret;
3179 		}
3180 
3181 		ret = ceph_tcp_connect(con);
3182 		if (ret) {
3183 			pr_err("ceph_tcp_connect failed: %d\n", ret);
3184 			con->error_msg = "connect error";
3185 			return ret;
3186 		}
3187 	}
3188 
3189 	if (!iov_iter_count(&con->v2.out_iter)) {
3190 		ret = populate_out_iter(con);
3191 		if (ret <= 0) {
3192 			if (ret && ret != -EAGAIN && !con->error_msg)
3193 				con->error_msg = "write processing error";
3194 			return ret;
3195 		}
3196 	}
3197 
3198 	tcp_sock_set_cork(con->sock->sk, true);
3199 	for (;;) {
3200 		ret = ceph_tcp_send(con);
3201 		if (ret <= 0)
3202 			break;
3203 
3204 		ret = populate_out_iter(con);
3205 		if (ret <= 0) {
3206 			if (ret && ret != -EAGAIN && !con->error_msg)
3207 				con->error_msg = "write processing error";
3208 			break;
3209 		}
3210 	}
3211 
3212 	tcp_sock_set_cork(con->sock->sk, false);
3213 	return ret;
3214 }
3215 
3216 static u32 crc32c_zeros(u32 crc, int zero_len)
3217 {
3218 	int len;
3219 
3220 	while (zero_len) {
3221 		len = min(zero_len, (int)PAGE_SIZE);
3222 		crc = crc32c(crc, page_address(ceph_zero_page), len);
3223 		zero_len -= len;
3224 	}
3225 
3226 	return crc;
3227 }
3228 
3229 static void prepare_zero_front(struct ceph_connection *con, int resid)
3230 {
3231 	int sent;
3232 
3233 	WARN_ON(!resid || resid > front_len(con->out_msg));
3234 	sent = front_len(con->out_msg) - resid;
3235 	dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3236 
3237 	if (sent) {
3238 		con->v2.out_epil.front_crc =
3239 			crc32c(-1, con->out_msg->front.iov_base, sent);
3240 		con->v2.out_epil.front_crc =
3241 			crc32c_zeros(con->v2.out_epil.front_crc, resid);
3242 	} else {
3243 		con->v2.out_epil.front_crc = crc32c_zeros(-1, resid);
3244 	}
3245 
3246 	con->v2.out_iter.count -= resid;
3247 	out_zero_add(con, resid);
3248 }
3249 
3250 static void prepare_zero_middle(struct ceph_connection *con, int resid)
3251 {
3252 	int sent;
3253 
3254 	WARN_ON(!resid || resid > middle_len(con->out_msg));
3255 	sent = middle_len(con->out_msg) - resid;
3256 	dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3257 
3258 	if (sent) {
3259 		con->v2.out_epil.middle_crc =
3260 			crc32c(-1, con->out_msg->middle->vec.iov_base, sent);
3261 		con->v2.out_epil.middle_crc =
3262 			crc32c_zeros(con->v2.out_epil.middle_crc, resid);
3263 	} else {
3264 		con->v2.out_epil.middle_crc = crc32c_zeros(-1, resid);
3265 	}
3266 
3267 	con->v2.out_iter.count -= resid;
3268 	out_zero_add(con, resid);
3269 }
3270 
3271 static void prepare_zero_data(struct ceph_connection *con)
3272 {
3273 	dout("%s con %p\n", __func__, con);
3274 	con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg));
3275 	out_zero_add(con, data_len(con->out_msg));
3276 }
3277 
3278 static void revoke_at_queue_data(struct ceph_connection *con)
3279 {
3280 	int boundary;
3281 	int resid;
3282 
3283 	WARN_ON(!data_len(con->out_msg));
3284 	WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3285 	resid = iov_iter_count(&con->v2.out_iter);
3286 
3287 	boundary = front_len(con->out_msg) + middle_len(con->out_msg);
3288 	if (resid > boundary) {
3289 		resid -= boundary;
3290 		WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3291 		dout("%s con %p was sending head\n", __func__, con);
3292 		if (front_len(con->out_msg))
3293 			prepare_zero_front(con, front_len(con->out_msg));
3294 		if (middle_len(con->out_msg))
3295 			prepare_zero_middle(con, middle_len(con->out_msg));
3296 		prepare_zero_data(con);
3297 		WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3298 		con->v2.out_state = OUT_S_QUEUE_ZEROS;
3299 		return;
3300 	}
3301 
3302 	boundary = middle_len(con->out_msg);
3303 	if (resid > boundary) {
3304 		resid -= boundary;
3305 		dout("%s con %p was sending front\n", __func__, con);
3306 		prepare_zero_front(con, resid);
3307 		if (middle_len(con->out_msg))
3308 			prepare_zero_middle(con, middle_len(con->out_msg));
3309 		prepare_zero_data(con);
3310 		queue_zeros(con);
3311 		return;
3312 	}
3313 
3314 	WARN_ON(!resid);
3315 	dout("%s con %p was sending middle\n", __func__, con);
3316 	prepare_zero_middle(con, resid);
3317 	prepare_zero_data(con);
3318 	queue_zeros(con);
3319 }
3320 
3321 static void revoke_at_queue_data_cont(struct ceph_connection *con)
3322 {
3323 	int sent, resid;  /* current piece of data */
3324 
3325 	WARN_ON(!data_len(con->out_msg));
3326 	WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter));
3327 	resid = iov_iter_count(&con->v2.out_iter);
3328 	WARN_ON(!resid || resid > con->v2.out_bvec.bv_len);
3329 	sent = con->v2.out_bvec.bv_len - resid;
3330 	dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3331 
3332 	if (sent) {
3333 		con->v2.out_epil.data_crc = ceph_crc32c_page(
3334 			con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
3335 			con->v2.out_bvec.bv_offset, sent);
3336 		ceph_msg_data_advance(&con->v2.out_cursor, sent);
3337 	}
3338 	WARN_ON(resid > con->v2.out_cursor.total_resid);
3339 	con->v2.out_epil.data_crc = crc32c_zeros(con->v2.out_epil.data_crc,
3340 						con->v2.out_cursor.total_resid);
3341 
3342 	con->v2.out_iter.count -= resid;
3343 	out_zero_add(con, con->v2.out_cursor.total_resid);
3344 	queue_zeros(con);
3345 }
3346 
3347 static void revoke_at_finish_message(struct ceph_connection *con)
3348 {
3349 	int boundary;
3350 	int resid;
3351 
3352 	WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3353 	resid = iov_iter_count(&con->v2.out_iter);
3354 
3355 	if (!front_len(con->out_msg) && !middle_len(con->out_msg) &&
3356 	    !data_len(con->out_msg)) {
3357 		WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN);
3358 		dout("%s con %p was sending head (empty message) - noop\n",
3359 		     __func__, con);
3360 		return;
3361 	}
3362 
3363 	boundary = front_len(con->out_msg) + middle_len(con->out_msg) +
3364 		   CEPH_EPILOGUE_PLAIN_LEN;
3365 	if (resid > boundary) {
3366 		resid -= boundary;
3367 		WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3368 		dout("%s con %p was sending head\n", __func__, con);
3369 		if (front_len(con->out_msg))
3370 			prepare_zero_front(con, front_len(con->out_msg));
3371 		if (middle_len(con->out_msg))
3372 			prepare_zero_middle(con, middle_len(con->out_msg));
3373 		con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3374 		WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3375 		con->v2.out_state = OUT_S_QUEUE_ZEROS;
3376 		return;
3377 	}
3378 
3379 	boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3380 	if (resid > boundary) {
3381 		resid -= boundary;
3382 		dout("%s con %p was sending front\n", __func__, con);
3383 		prepare_zero_front(con, resid);
3384 		if (middle_len(con->out_msg))
3385 			prepare_zero_middle(con, middle_len(con->out_msg));
3386 		con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3387 		queue_zeros(con);
3388 		return;
3389 	}
3390 
3391 	boundary = CEPH_EPILOGUE_PLAIN_LEN;
3392 	if (resid > boundary) {
3393 		resid -= boundary;
3394 		dout("%s con %p was sending middle\n", __func__, con);
3395 		prepare_zero_middle(con, resid);
3396 		con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3397 		queue_zeros(con);
3398 		return;
3399 	}
3400 
3401 	WARN_ON(!resid);
3402 	dout("%s con %p was sending epilogue - noop\n", __func__, con);
3403 }
3404 
3405 void ceph_con_v2_revoke(struct ceph_connection *con)
3406 {
3407 	WARN_ON(con->v2.out_zero);
3408 
3409 	if (con_secure(con)) {
3410 		WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE &&
3411 			con->v2.out_state != OUT_S_FINISH_MESSAGE);
3412 		dout("%s con %p secure - noop\n", __func__, con);
3413 		return;
3414 	}
3415 
3416 	switch (con->v2.out_state) {
3417 	case OUT_S_QUEUE_DATA:
3418 		revoke_at_queue_data(con);
3419 		break;
3420 	case OUT_S_QUEUE_DATA_CONT:
3421 		revoke_at_queue_data_cont(con);
3422 		break;
3423 	case OUT_S_FINISH_MESSAGE:
3424 		revoke_at_finish_message(con);
3425 		break;
3426 	default:
3427 		WARN(1, "bad out_state %d", con->v2.out_state);
3428 		break;
3429 	}
3430 }
3431 
3432 static void revoke_at_prepare_read_data(struct ceph_connection *con)
3433 {
3434 	int remaining;
3435 	int resid;
3436 
3437 	WARN_ON(con_secure(con));
3438 	WARN_ON(!data_len(con->in_msg));
3439 	WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
3440 	resid = iov_iter_count(&con->v2.in_iter);
3441 	WARN_ON(!resid);
3442 
3443 	remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3444 	dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
3445 	     remaining);
3446 	con->v2.in_iter.count -= resid;
3447 	set_in_skip(con, resid + remaining);
3448 	con->v2.in_state = IN_S_FINISH_SKIP;
3449 }
3450 
3451 static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
3452 {
3453 	int recved, resid;  /* current piece of data */
3454 	int remaining;
3455 
3456 	WARN_ON(con_secure(con));
3457 	WARN_ON(!data_len(con->in_msg));
3458 	WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3459 	resid = iov_iter_count(&con->v2.in_iter);
3460 	WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3461 	recved = con->v2.in_bvec.bv_len - resid;
3462 	dout("%s con %p recved %d resid %d\n", __func__, con, recved, resid);
3463 
3464 	if (recved)
3465 		ceph_msg_data_advance(&con->v2.in_cursor, recved);
3466 	WARN_ON(resid > con->v2.in_cursor.total_resid);
3467 
3468 	remaining = CEPH_EPILOGUE_PLAIN_LEN;
3469 	dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
3470 	     con->v2.in_cursor.total_resid, remaining);
3471 	con->v2.in_iter.count -= resid;
3472 	set_in_skip(con, con->v2.in_cursor.total_resid + remaining);
3473 	con->v2.in_state = IN_S_FINISH_SKIP;
3474 }
3475 
3476 static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
3477 {
3478 	int resid;  /* current enc page (not necessarily data) */
3479 
3480 	WARN_ON(!con_secure(con));
3481 	WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3482 	resid = iov_iter_count(&con->v2.in_iter);
3483 	WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3484 
3485 	dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
3486 	     con->v2.in_enc_resid);
3487 	con->v2.in_iter.count -= resid;
3488 	set_in_skip(con, resid + con->v2.in_enc_resid);
3489 	con->v2.in_state = IN_S_FINISH_SKIP;
3490 }
3491 
3492 static void revoke_at_handle_epilogue(struct ceph_connection *con)
3493 {
3494 	int resid;
3495 
3496 	resid = iov_iter_count(&con->v2.in_iter);
3497 	WARN_ON(!resid);
3498 
3499 	dout("%s con %p resid %d\n", __func__, con, resid);
3500 	con->v2.in_iter.count -= resid;
3501 	set_in_skip(con, resid);
3502 	con->v2.in_state = IN_S_FINISH_SKIP;
3503 }
3504 
3505 void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
3506 {
3507 	switch (con->v2.in_state) {
3508 	case IN_S_PREPARE_READ_DATA:
3509 		revoke_at_prepare_read_data(con);
3510 		break;
3511 	case IN_S_PREPARE_READ_DATA_CONT:
3512 		revoke_at_prepare_read_data_cont(con);
3513 		break;
3514 	case IN_S_PREPARE_READ_ENC_PAGE:
3515 		revoke_at_prepare_read_enc_page(con);
3516 		break;
3517 	case IN_S_HANDLE_EPILOGUE:
3518 		revoke_at_handle_epilogue(con);
3519 		break;
3520 	default:
3521 		WARN(1, "bad in_state %d", con->v2.in_state);
3522 		break;
3523 	}
3524 }
3525 
3526 bool ceph_con_v2_opened(struct ceph_connection *con)
3527 {
3528 	return con->v2.peer_global_seq;
3529 }
3530 
3531 void ceph_con_v2_reset_session(struct ceph_connection *con)
3532 {
3533 	con->v2.client_cookie = 0;
3534 	con->v2.server_cookie = 0;
3535 	con->v2.global_seq = 0;
3536 	con->v2.connect_seq = 0;
3537 	con->v2.peer_global_seq = 0;
3538 }
3539 
3540 void ceph_con_v2_reset_protocol(struct ceph_connection *con)
3541 {
3542 	iov_iter_truncate(&con->v2.in_iter, 0);
3543 	iov_iter_truncate(&con->v2.out_iter, 0);
3544 	con->v2.out_zero = 0;
3545 
3546 	clear_in_sign_kvecs(con);
3547 	clear_out_sign_kvecs(con);
3548 	free_conn_bufs(con);
3549 
3550 	if (con->v2.in_enc_pages) {
3551 		WARN_ON(!con->v2.in_enc_page_cnt);
3552 		ceph_release_page_vector(con->v2.in_enc_pages,
3553 					 con->v2.in_enc_page_cnt);
3554 		con->v2.in_enc_pages = NULL;
3555 		con->v2.in_enc_page_cnt = 0;
3556 	}
3557 	if (con->v2.out_enc_pages) {
3558 		WARN_ON(!con->v2.out_enc_page_cnt);
3559 		ceph_release_page_vector(con->v2.out_enc_pages,
3560 					 con->v2.out_enc_page_cnt);
3561 		con->v2.out_enc_pages = NULL;
3562 		con->v2.out_enc_page_cnt = 0;
3563 	}
3564 
3565 	con->v2.con_mode = CEPH_CON_MODE_UNKNOWN;
3566 	memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN);
3567 	memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN);
3568 
3569 	if (con->v2.hmac_tfm) {
3570 		crypto_free_shash(con->v2.hmac_tfm);
3571 		con->v2.hmac_tfm = NULL;
3572 	}
3573 	if (con->v2.gcm_req) {
3574 		aead_request_free(con->v2.gcm_req);
3575 		con->v2.gcm_req = NULL;
3576 	}
3577 	if (con->v2.gcm_tfm) {
3578 		crypto_free_aead(con->v2.gcm_tfm);
3579 		con->v2.gcm_tfm = NULL;
3580 	}
3581 }
3582