1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Ceph msgr2 protocol implementation
4   *
5   * Copyright (C) 2020 Ilya Dryomov <idryomov@gmail.com>
6   */
7  
8  #include <linux/ceph/ceph_debug.h>
9  
10  #include <crypto/aead.h>
11  #include <crypto/hash.h>
12  #include <crypto/sha2.h>
13  #include <crypto/utils.h>
14  #include <linux/bvec.h>
15  #include <linux/crc32c.h>
16  #include <linux/net.h>
17  #include <linux/scatterlist.h>
18  #include <linux/socket.h>
19  #include <linux/sched/mm.h>
20  #include <net/sock.h>
21  #include <net/tcp.h>
22  
23  #include <linux/ceph/ceph_features.h>
24  #include <linux/ceph/decode.h>
25  #include <linux/ceph/libceph.h>
26  #include <linux/ceph/messenger.h>
27  
28  #include "crypto.h"  /* for CEPH_KEY_LEN and CEPH_MAX_CON_SECRET_LEN */
29  
30  #define FRAME_TAG_HELLO			1
31  #define FRAME_TAG_AUTH_REQUEST		2
32  #define FRAME_TAG_AUTH_BAD_METHOD	3
33  #define FRAME_TAG_AUTH_REPLY_MORE	4
34  #define FRAME_TAG_AUTH_REQUEST_MORE	5
35  #define FRAME_TAG_AUTH_DONE		6
36  #define FRAME_TAG_AUTH_SIGNATURE	7
37  #define FRAME_TAG_CLIENT_IDENT		8
38  #define FRAME_TAG_SERVER_IDENT		9
39  #define FRAME_TAG_IDENT_MISSING_FEATURES 10
40  #define FRAME_TAG_SESSION_RECONNECT	11
41  #define FRAME_TAG_SESSION_RESET		12
42  #define FRAME_TAG_SESSION_RETRY		13
43  #define FRAME_TAG_SESSION_RETRY_GLOBAL	14
44  #define FRAME_TAG_SESSION_RECONNECT_OK	15
45  #define FRAME_TAG_WAIT			16
46  #define FRAME_TAG_MESSAGE		17
47  #define FRAME_TAG_KEEPALIVE2		18
48  #define FRAME_TAG_KEEPALIVE2_ACK	19
49  #define FRAME_TAG_ACK			20
50  
51  #define FRAME_LATE_STATUS_ABORTED	0x1
52  #define FRAME_LATE_STATUS_COMPLETE	0xe
53  #define FRAME_LATE_STATUS_ABORTED_MASK	0xf
54  
55  #define IN_S_HANDLE_PREAMBLE			1
56  #define IN_S_HANDLE_CONTROL			2
57  #define IN_S_HANDLE_CONTROL_REMAINDER		3
58  #define IN_S_PREPARE_READ_DATA			4
59  #define IN_S_PREPARE_READ_DATA_CONT		5
60  #define IN_S_PREPARE_READ_ENC_PAGE		6
61  #define IN_S_PREPARE_SPARSE_DATA		7
62  #define IN_S_PREPARE_SPARSE_DATA_CONT		8
63  #define IN_S_HANDLE_EPILOGUE			9
64  #define IN_S_FINISH_SKIP			10
65  
66  #define OUT_S_QUEUE_DATA		1
67  #define OUT_S_QUEUE_DATA_CONT		2
68  #define OUT_S_QUEUE_ENC_PAGE		3
69  #define OUT_S_QUEUE_ZEROS		4
70  #define OUT_S_FINISH_MESSAGE		5
71  #define OUT_S_GET_NEXT			6
72  
73  #define CTRL_BODY(p)	((void *)(p) + CEPH_PREAMBLE_LEN)
74  #define FRONT_PAD(p)	((void *)(p) + CEPH_EPILOGUE_SECURE_LEN)
75  #define MIDDLE_PAD(p)	(FRONT_PAD(p) + CEPH_GCM_BLOCK_LEN)
76  #define DATA_PAD(p)	(MIDDLE_PAD(p) + CEPH_GCM_BLOCK_LEN)
77  
78  #define CEPH_MSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
79  
do_recvmsg(struct socket * sock,struct iov_iter * it)80  static int do_recvmsg(struct socket *sock, struct iov_iter *it)
81  {
82  	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
83  	int ret;
84  
85  	msg.msg_iter = *it;
86  	while (iov_iter_count(it)) {
87  		ret = sock_recvmsg(sock, &msg, msg.msg_flags);
88  		if (ret <= 0) {
89  			if (ret == -EAGAIN)
90  				ret = 0;
91  			return ret;
92  		}
93  
94  		iov_iter_advance(it, ret);
95  	}
96  
97  	WARN_ON(msg_data_left(&msg));
98  	return 1;
99  }
100  
101  /*
102   * Read as much as possible.
103   *
104   * Return:
105   *   1 - done, nothing (else) to read
106   *   0 - socket is empty, need to wait
107   *  <0 - error
108   */
ceph_tcp_recv(struct ceph_connection * con)109  static int ceph_tcp_recv(struct ceph_connection *con)
110  {
111  	int ret;
112  
113  	dout("%s con %p %s %zu\n", __func__, con,
114  	     iov_iter_is_discard(&con->v2.in_iter) ? "discard" : "need",
115  	     iov_iter_count(&con->v2.in_iter));
116  	ret = do_recvmsg(con->sock, &con->v2.in_iter);
117  	dout("%s con %p ret %d left %zu\n", __func__, con, ret,
118  	     iov_iter_count(&con->v2.in_iter));
119  	return ret;
120  }
121  
do_sendmsg(struct socket * sock,struct iov_iter * it)122  static int do_sendmsg(struct socket *sock, struct iov_iter *it)
123  {
124  	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
125  	int ret;
126  
127  	msg.msg_iter = *it;
128  	while (iov_iter_count(it)) {
129  		ret = sock_sendmsg(sock, &msg);
130  		if (ret <= 0) {
131  			if (ret == -EAGAIN)
132  				ret = 0;
133  			return ret;
134  		}
135  
136  		iov_iter_advance(it, ret);
137  	}
138  
139  	WARN_ON(msg_data_left(&msg));
140  	return 1;
141  }
142  
do_try_sendpage(struct socket * sock,struct iov_iter * it)143  static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
144  {
145  	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
146  	struct bio_vec bv;
147  	int ret;
148  
149  	if (WARN_ON(!iov_iter_is_bvec(it)))
150  		return -EINVAL;
151  
152  	while (iov_iter_count(it)) {
153  		/* iov_iter_iovec() for ITER_BVEC */
154  		bvec_set_page(&bv, it->bvec->bv_page,
155  			      min(iov_iter_count(it),
156  				  it->bvec->bv_len - it->iov_offset),
157  			      it->bvec->bv_offset + it->iov_offset);
158  
159  		/*
160  		 * MSG_SPLICE_PAGES cannot properly handle pages with
161  		 * page_count == 0, we need to fall back to sendmsg if
162  		 * that's the case.
163  		 *
164  		 * Same goes for slab pages: skb_can_coalesce() allows
165  		 * coalescing neighboring slab objects into a single frag
166  		 * which triggers one of hardened usercopy checks.
167  		 */
168  		if (sendpage_ok(bv.bv_page))
169  			msg.msg_flags |= MSG_SPLICE_PAGES;
170  		else
171  			msg.msg_flags &= ~MSG_SPLICE_PAGES;
172  
173  		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
174  		ret = sock_sendmsg(sock, &msg);
175  		if (ret <= 0) {
176  			if (ret == -EAGAIN)
177  				ret = 0;
178  			return ret;
179  		}
180  
181  		iov_iter_advance(it, ret);
182  	}
183  
184  	return 1;
185  }
186  
187  /*
188   * Write as much as possible.  The socket is expected to be corked,
189   * so we don't bother with MSG_MORE here.
190   *
191   * Return:
192   *   1 - done, nothing (else) to write
193   *   0 - socket is full, need to wait
194   *  <0 - error
195   */
ceph_tcp_send(struct ceph_connection * con)196  static int ceph_tcp_send(struct ceph_connection *con)
197  {
198  	int ret;
199  
200  	dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
201  	     iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
202  	if (con->v2.out_iter_sendpage)
203  		ret = do_try_sendpage(con->sock, &con->v2.out_iter);
204  	else
205  		ret = do_sendmsg(con->sock, &con->v2.out_iter);
206  	dout("%s con %p ret %d left %zu\n", __func__, con, ret,
207  	     iov_iter_count(&con->v2.out_iter));
208  	return ret;
209  }
210  
add_in_kvec(struct ceph_connection * con,void * buf,int len)211  static void add_in_kvec(struct ceph_connection *con, void *buf, int len)
212  {
213  	BUG_ON(con->v2.in_kvec_cnt >= ARRAY_SIZE(con->v2.in_kvecs));
214  	WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
215  
216  	con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_base = buf;
217  	con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_len = len;
218  	con->v2.in_kvec_cnt++;
219  
220  	con->v2.in_iter.nr_segs++;
221  	con->v2.in_iter.count += len;
222  }
223  
reset_in_kvecs(struct ceph_connection * con)224  static void reset_in_kvecs(struct ceph_connection *con)
225  {
226  	WARN_ON(iov_iter_count(&con->v2.in_iter));
227  
228  	con->v2.in_kvec_cnt = 0;
229  	iov_iter_kvec(&con->v2.in_iter, ITER_DEST, con->v2.in_kvecs, 0, 0);
230  }
231  
set_in_bvec(struct ceph_connection * con,const struct bio_vec * bv)232  static void set_in_bvec(struct ceph_connection *con, const struct bio_vec *bv)
233  {
234  	WARN_ON(iov_iter_count(&con->v2.in_iter));
235  
236  	con->v2.in_bvec = *bv;
237  	iov_iter_bvec(&con->v2.in_iter, ITER_DEST, &con->v2.in_bvec, 1, bv->bv_len);
238  }
239  
set_in_skip(struct ceph_connection * con,int len)240  static void set_in_skip(struct ceph_connection *con, int len)
241  {
242  	WARN_ON(iov_iter_count(&con->v2.in_iter));
243  
244  	dout("%s con %p len %d\n", __func__, con, len);
245  	iov_iter_discard(&con->v2.in_iter, ITER_DEST, len);
246  }
247  
add_out_kvec(struct ceph_connection * con,void * buf,int len)248  static void add_out_kvec(struct ceph_connection *con, void *buf, int len)
249  {
250  	BUG_ON(con->v2.out_kvec_cnt >= ARRAY_SIZE(con->v2.out_kvecs));
251  	WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
252  	WARN_ON(con->v2.out_zero);
253  
254  	con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_base = buf;
255  	con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_len = len;
256  	con->v2.out_kvec_cnt++;
257  
258  	con->v2.out_iter.nr_segs++;
259  	con->v2.out_iter.count += len;
260  }
261  
reset_out_kvecs(struct ceph_connection * con)262  static void reset_out_kvecs(struct ceph_connection *con)
263  {
264  	WARN_ON(iov_iter_count(&con->v2.out_iter));
265  	WARN_ON(con->v2.out_zero);
266  
267  	con->v2.out_kvec_cnt = 0;
268  
269  	iov_iter_kvec(&con->v2.out_iter, ITER_SOURCE, con->v2.out_kvecs, 0, 0);
270  	con->v2.out_iter_sendpage = false;
271  }
272  
set_out_bvec(struct ceph_connection * con,const struct bio_vec * bv,bool zerocopy)273  static void set_out_bvec(struct ceph_connection *con, const struct bio_vec *bv,
274  			 bool zerocopy)
275  {
276  	WARN_ON(iov_iter_count(&con->v2.out_iter));
277  	WARN_ON(con->v2.out_zero);
278  
279  	con->v2.out_bvec = *bv;
280  	con->v2.out_iter_sendpage = zerocopy;
281  	iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
282  		      con->v2.out_bvec.bv_len);
283  }
284  
set_out_bvec_zero(struct ceph_connection * con)285  static void set_out_bvec_zero(struct ceph_connection *con)
286  {
287  	WARN_ON(iov_iter_count(&con->v2.out_iter));
288  	WARN_ON(!con->v2.out_zero);
289  
290  	bvec_set_page(&con->v2.out_bvec, ceph_zero_page,
291  		      min(con->v2.out_zero, (int)PAGE_SIZE), 0);
292  	con->v2.out_iter_sendpage = true;
293  	iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
294  		      con->v2.out_bvec.bv_len);
295  }
296  
out_zero_add(struct ceph_connection * con,int len)297  static void out_zero_add(struct ceph_connection *con, int len)
298  {
299  	dout("%s con %p len %d\n", __func__, con, len);
300  	con->v2.out_zero += len;
301  }
302  
alloc_conn_buf(struct ceph_connection * con,int len)303  static void *alloc_conn_buf(struct ceph_connection *con, int len)
304  {
305  	void *buf;
306  
307  	dout("%s con %p len %d\n", __func__, con, len);
308  
309  	if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
310  		return NULL;
311  
312  	buf = kvmalloc(len, GFP_NOIO);
313  	if (!buf)
314  		return NULL;
315  
316  	con->v2.conn_bufs[con->v2.conn_buf_cnt++] = buf;
317  	return buf;
318  }
319  
free_conn_bufs(struct ceph_connection * con)320  static void free_conn_bufs(struct ceph_connection *con)
321  {
322  	while (con->v2.conn_buf_cnt)
323  		kvfree(con->v2.conn_bufs[--con->v2.conn_buf_cnt]);
324  }
325  
add_in_sign_kvec(struct ceph_connection * con,void * buf,int len)326  static void add_in_sign_kvec(struct ceph_connection *con, void *buf, int len)
327  {
328  	BUG_ON(con->v2.in_sign_kvec_cnt >= ARRAY_SIZE(con->v2.in_sign_kvecs));
329  
330  	con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_base = buf;
331  	con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_len = len;
332  	con->v2.in_sign_kvec_cnt++;
333  }
334  
clear_in_sign_kvecs(struct ceph_connection * con)335  static void clear_in_sign_kvecs(struct ceph_connection *con)
336  {
337  	con->v2.in_sign_kvec_cnt = 0;
338  }
339  
add_out_sign_kvec(struct ceph_connection * con,void * buf,int len)340  static void add_out_sign_kvec(struct ceph_connection *con, void *buf, int len)
341  {
342  	BUG_ON(con->v2.out_sign_kvec_cnt >= ARRAY_SIZE(con->v2.out_sign_kvecs));
343  
344  	con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_base = buf;
345  	con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_len = len;
346  	con->v2.out_sign_kvec_cnt++;
347  }
348  
clear_out_sign_kvecs(struct ceph_connection * con)349  static void clear_out_sign_kvecs(struct ceph_connection *con)
350  {
351  	con->v2.out_sign_kvec_cnt = 0;
352  }
353  
con_secure(struct ceph_connection * con)354  static bool con_secure(struct ceph_connection *con)
355  {
356  	return con->v2.con_mode == CEPH_CON_MODE_SECURE;
357  }
358  
front_len(const struct ceph_msg * msg)359  static int front_len(const struct ceph_msg *msg)
360  {
361  	return le32_to_cpu(msg->hdr.front_len);
362  }
363  
middle_len(const struct ceph_msg * msg)364  static int middle_len(const struct ceph_msg *msg)
365  {
366  	return le32_to_cpu(msg->hdr.middle_len);
367  }
368  
data_len(const struct ceph_msg * msg)369  static int data_len(const struct ceph_msg *msg)
370  {
371  	return le32_to_cpu(msg->hdr.data_len);
372  }
373  
need_padding(int len)374  static bool need_padding(int len)
375  {
376  	return !IS_ALIGNED(len, CEPH_GCM_BLOCK_LEN);
377  }
378  
padded_len(int len)379  static int padded_len(int len)
380  {
381  	return ALIGN(len, CEPH_GCM_BLOCK_LEN);
382  }
383  
padding_len(int len)384  static int padding_len(int len)
385  {
386  	return padded_len(len) - len;
387  }
388  
389  /* preamble + control segment */
head_onwire_len(int ctrl_len,bool secure)390  static int head_onwire_len(int ctrl_len, bool secure)
391  {
392  	int head_len;
393  	int rem_len;
394  
395  	BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
396  
397  	if (secure) {
398  		head_len = CEPH_PREAMBLE_SECURE_LEN;
399  		if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
400  			rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
401  			head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN;
402  		}
403  	} else {
404  		head_len = CEPH_PREAMBLE_PLAIN_LEN;
405  		if (ctrl_len)
406  			head_len += ctrl_len + CEPH_CRC_LEN;
407  	}
408  	return head_len;
409  }
410  
411  /* front, middle and data segments + epilogue */
__tail_onwire_len(int front_len,int middle_len,int data_len,bool secure)412  static int __tail_onwire_len(int front_len, int middle_len, int data_len,
413  			     bool secure)
414  {
415  	BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
416  	       middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
417  	       data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
418  
419  	if (!front_len && !middle_len && !data_len)
420  		return 0;
421  
422  	if (!secure)
423  		return front_len + middle_len + data_len +
424  		       CEPH_EPILOGUE_PLAIN_LEN;
425  
426  	return padded_len(front_len) + padded_len(middle_len) +
427  	       padded_len(data_len) + CEPH_EPILOGUE_SECURE_LEN;
428  }
429  
tail_onwire_len(const struct ceph_msg * msg,bool secure)430  static int tail_onwire_len(const struct ceph_msg *msg, bool secure)
431  {
432  	return __tail_onwire_len(front_len(msg), middle_len(msg),
433  				 data_len(msg), secure);
434  }
435  
436  /* head_onwire_len(sizeof(struct ceph_msg_header2), false) */
437  #define MESSAGE_HEAD_PLAIN_LEN	(CEPH_PREAMBLE_PLAIN_LEN +		\
438  				 sizeof(struct ceph_msg_header2) +	\
439  				 CEPH_CRC_LEN)
440  
441  static const int frame_aligns[] = {
442  	sizeof(void *),
443  	sizeof(void *),
444  	sizeof(void *),
445  	PAGE_SIZE
446  };
447  
448  /*
449   * Discards trailing empty segments, unless there is just one segment.
450   * A frame always has at least one (possibly empty) segment.
451   */
calc_segment_count(const int * lens,int len_cnt)452  static int calc_segment_count(const int *lens, int len_cnt)
453  {
454  	int i;
455  
456  	for (i = len_cnt - 1; i >= 0; i--) {
457  		if (lens[i])
458  			return i + 1;
459  	}
460  
461  	return 1;
462  }
463  
init_frame_desc(struct ceph_frame_desc * desc,int tag,const int * lens,int len_cnt)464  static void init_frame_desc(struct ceph_frame_desc *desc, int tag,
465  			    const int *lens, int len_cnt)
466  {
467  	int i;
468  
469  	memset(desc, 0, sizeof(*desc));
470  
471  	desc->fd_tag = tag;
472  	desc->fd_seg_cnt = calc_segment_count(lens, len_cnt);
473  	BUG_ON(desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT);
474  	for (i = 0; i < desc->fd_seg_cnt; i++) {
475  		desc->fd_lens[i] = lens[i];
476  		desc->fd_aligns[i] = frame_aligns[i];
477  	}
478  }
479  
480  /*
481   * Preamble crc covers everything up to itself (28 bytes) and
482   * is calculated and verified irrespective of the connection mode
483   * (i.e. even if the frame is encrypted).
484   */
encode_preamble(const struct ceph_frame_desc * desc,void * p)485  static void encode_preamble(const struct ceph_frame_desc *desc, void *p)
486  {
487  	void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
488  	void *start = p;
489  	int i;
490  
491  	memset(p, 0, CEPH_PREAMBLE_LEN);
492  
493  	ceph_encode_8(&p, desc->fd_tag);
494  	ceph_encode_8(&p, desc->fd_seg_cnt);
495  	for (i = 0; i < desc->fd_seg_cnt; i++) {
496  		ceph_encode_32(&p, desc->fd_lens[i]);
497  		ceph_encode_16(&p, desc->fd_aligns[i]);
498  	}
499  
500  	put_unaligned_le32(crc32c(0, start, crcp - start), crcp);
501  }
502  
decode_preamble(void * p,struct ceph_frame_desc * desc)503  static int decode_preamble(void *p, struct ceph_frame_desc *desc)
504  {
505  	void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN;
506  	u32 crc, expected_crc;
507  	int i;
508  
509  	crc = crc32c(0, p, crcp - p);
510  	expected_crc = get_unaligned_le32(crcp);
511  	if (crc != expected_crc) {
512  		pr_err("bad preamble crc, calculated %u, expected %u\n",
513  		       crc, expected_crc);
514  		return -EBADMSG;
515  	}
516  
517  	memset(desc, 0, sizeof(*desc));
518  
519  	desc->fd_tag = ceph_decode_8(&p);
520  	desc->fd_seg_cnt = ceph_decode_8(&p);
521  	if (desc->fd_seg_cnt < 1 ||
522  	    desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT) {
523  		pr_err("bad segment count %d\n", desc->fd_seg_cnt);
524  		return -EINVAL;
525  	}
526  	for (i = 0; i < desc->fd_seg_cnt; i++) {
527  		desc->fd_lens[i] = ceph_decode_32(&p);
528  		desc->fd_aligns[i] = ceph_decode_16(&p);
529  	}
530  
531  	if (desc->fd_lens[0] < 0 ||
532  	    desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
533  		pr_err("bad control segment length %d\n", desc->fd_lens[0]);
534  		return -EINVAL;
535  	}
536  	if (desc->fd_lens[1] < 0 ||
537  	    desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
538  		pr_err("bad front segment length %d\n", desc->fd_lens[1]);
539  		return -EINVAL;
540  	}
541  	if (desc->fd_lens[2] < 0 ||
542  	    desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
543  		pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
544  		return -EINVAL;
545  	}
546  	if (desc->fd_lens[3] < 0 ||
547  	    desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
548  		pr_err("bad data segment length %d\n", desc->fd_lens[3]);
549  		return -EINVAL;
550  	}
551  
552  	/*
553  	 * This would fire for FRAME_TAG_WAIT (it has one empty
554  	 * segment), but we should never get it as client.
555  	 */
556  	if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
557  		pr_err("last segment empty, segment count %d\n",
558  		       desc->fd_seg_cnt);
559  		return -EINVAL;
560  	}
561  
562  	return 0;
563  }
564  
encode_epilogue_plain(struct ceph_connection * con,bool aborted)565  static void encode_epilogue_plain(struct ceph_connection *con, bool aborted)
566  {
567  	con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
568  						 FRAME_LATE_STATUS_COMPLETE;
569  	cpu_to_le32s(&con->v2.out_epil.front_crc);
570  	cpu_to_le32s(&con->v2.out_epil.middle_crc);
571  	cpu_to_le32s(&con->v2.out_epil.data_crc);
572  }
573  
encode_epilogue_secure(struct ceph_connection * con,bool aborted)574  static void encode_epilogue_secure(struct ceph_connection *con, bool aborted)
575  {
576  	memset(&con->v2.out_epil, 0, sizeof(con->v2.out_epil));
577  	con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED :
578  						 FRAME_LATE_STATUS_COMPLETE;
579  }
580  
decode_epilogue(void * p,u32 * front_crc,u32 * middle_crc,u32 * data_crc)581  static int decode_epilogue(void *p, u32 *front_crc, u32 *middle_crc,
582  			   u32 *data_crc)
583  {
584  	u8 late_status;
585  
586  	late_status = ceph_decode_8(&p);
587  	if ((late_status & FRAME_LATE_STATUS_ABORTED_MASK) !=
588  			FRAME_LATE_STATUS_COMPLETE) {
589  		/* we should never get an aborted message as client */
590  		pr_err("bad late_status 0x%x\n", late_status);
591  		return -EINVAL;
592  	}
593  
594  	if (front_crc && middle_crc && data_crc) {
595  		*front_crc = ceph_decode_32(&p);
596  		*middle_crc = ceph_decode_32(&p);
597  		*data_crc = ceph_decode_32(&p);
598  	}
599  
600  	return 0;
601  }
602  
fill_header(struct ceph_msg_header * hdr,const struct ceph_msg_header2 * hdr2,int front_len,int middle_len,int data_len,const struct ceph_entity_name * peer_name)603  static void fill_header(struct ceph_msg_header *hdr,
604  			const struct ceph_msg_header2 *hdr2,
605  			int front_len, int middle_len, int data_len,
606  			const struct ceph_entity_name *peer_name)
607  {
608  	hdr->seq = hdr2->seq;
609  	hdr->tid = hdr2->tid;
610  	hdr->type = hdr2->type;
611  	hdr->priority = hdr2->priority;
612  	hdr->version = hdr2->version;
613  	hdr->front_len = cpu_to_le32(front_len);
614  	hdr->middle_len = cpu_to_le32(middle_len);
615  	hdr->data_len = cpu_to_le32(data_len);
616  	hdr->data_off = hdr2->data_off;
617  	hdr->src = *peer_name;
618  	hdr->compat_version = hdr2->compat_version;
619  	hdr->reserved = 0;
620  	hdr->crc = 0;
621  }
622  
fill_header2(struct ceph_msg_header2 * hdr2,const struct ceph_msg_header * hdr,u64 ack_seq)623  static void fill_header2(struct ceph_msg_header2 *hdr2,
624  			 const struct ceph_msg_header *hdr, u64 ack_seq)
625  {
626  	hdr2->seq = hdr->seq;
627  	hdr2->tid = hdr->tid;
628  	hdr2->type = hdr->type;
629  	hdr2->priority = hdr->priority;
630  	hdr2->version = hdr->version;
631  	hdr2->data_pre_padding_len = 0;
632  	hdr2->data_off = hdr->data_off;
633  	hdr2->ack_seq = cpu_to_le64(ack_seq);
634  	hdr2->flags = 0;
635  	hdr2->compat_version = hdr->compat_version;
636  	hdr2->reserved = 0;
637  }
638  
verify_control_crc(struct ceph_connection * con)639  static int verify_control_crc(struct ceph_connection *con)
640  {
641  	int ctrl_len = con->v2.in_desc.fd_lens[0];
642  	u32 crc, expected_crc;
643  
644  	WARN_ON(con->v2.in_kvecs[0].iov_len != ctrl_len);
645  	WARN_ON(con->v2.in_kvecs[1].iov_len != CEPH_CRC_LEN);
646  
647  	crc = crc32c(-1, con->v2.in_kvecs[0].iov_base, ctrl_len);
648  	expected_crc = get_unaligned_le32(con->v2.in_kvecs[1].iov_base);
649  	if (crc != expected_crc) {
650  		pr_err("bad control crc, calculated %u, expected %u\n",
651  		       crc, expected_crc);
652  		return -EBADMSG;
653  	}
654  
655  	return 0;
656  }
657  
verify_epilogue_crcs(struct ceph_connection * con,u32 front_crc,u32 middle_crc,u32 data_crc)658  static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc,
659  				u32 middle_crc, u32 data_crc)
660  {
661  	if (front_len(con->in_msg)) {
662  		con->in_front_crc = crc32c(-1, con->in_msg->front.iov_base,
663  					   front_len(con->in_msg));
664  	} else {
665  		WARN_ON(!middle_len(con->in_msg) && !data_len(con->in_msg));
666  		con->in_front_crc = -1;
667  	}
668  
669  	if (middle_len(con->in_msg))
670  		con->in_middle_crc = crc32c(-1,
671  					    con->in_msg->middle->vec.iov_base,
672  					    middle_len(con->in_msg));
673  	else if (data_len(con->in_msg))
674  		con->in_middle_crc = -1;
675  	else
676  		con->in_middle_crc = 0;
677  
678  	if (!data_len(con->in_msg))
679  		con->in_data_crc = 0;
680  
681  	dout("%s con %p msg %p crcs %u %u %u\n", __func__, con, con->in_msg,
682  	     con->in_front_crc, con->in_middle_crc, con->in_data_crc);
683  
684  	if (con->in_front_crc != front_crc) {
685  		pr_err("bad front crc, calculated %u, expected %u\n",
686  		       con->in_front_crc, front_crc);
687  		return -EBADMSG;
688  	}
689  	if (con->in_middle_crc != middle_crc) {
690  		pr_err("bad middle crc, calculated %u, expected %u\n",
691  		       con->in_middle_crc, middle_crc);
692  		return -EBADMSG;
693  	}
694  	if (con->in_data_crc != data_crc) {
695  		pr_err("bad data crc, calculated %u, expected %u\n",
696  		       con->in_data_crc, data_crc);
697  		return -EBADMSG;
698  	}
699  
700  	return 0;
701  }
702  
setup_crypto(struct ceph_connection * con,const u8 * session_key,int session_key_len,const u8 * con_secret,int con_secret_len)703  static int setup_crypto(struct ceph_connection *con,
704  			const u8 *session_key, int session_key_len,
705  			const u8 *con_secret, int con_secret_len)
706  {
707  	unsigned int noio_flag;
708  	int ret;
709  
710  	dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
711  	     __func__, con, con->v2.con_mode, session_key_len, con_secret_len);
712  	WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req);
713  
714  	if (con->v2.con_mode != CEPH_CON_MODE_CRC &&
715  	    con->v2.con_mode != CEPH_CON_MODE_SECURE) {
716  		pr_err("bad con_mode %d\n", con->v2.con_mode);
717  		return -EINVAL;
718  	}
719  
720  	if (!session_key_len) {
721  		WARN_ON(con->v2.con_mode != CEPH_CON_MODE_CRC);
722  		WARN_ON(con_secret_len);
723  		return 0;  /* auth_none */
724  	}
725  
726  	noio_flag = memalloc_noio_save();
727  	con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
728  	memalloc_noio_restore(noio_flag);
729  	if (IS_ERR(con->v2.hmac_tfm)) {
730  		ret = PTR_ERR(con->v2.hmac_tfm);
731  		con->v2.hmac_tfm = NULL;
732  		pr_err("failed to allocate hmac tfm context: %d\n", ret);
733  		return ret;
734  	}
735  
736  	ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key,
737  				  session_key_len);
738  	if (ret) {
739  		pr_err("failed to set hmac key: %d\n", ret);
740  		return ret;
741  	}
742  
743  	if (con->v2.con_mode == CEPH_CON_MODE_CRC) {
744  		WARN_ON(con_secret_len);
745  		return 0;  /* auth_x, plain mode */
746  	}
747  
748  	if (con_secret_len < CEPH_GCM_KEY_LEN + 2 * CEPH_GCM_IV_LEN) {
749  		pr_err("con_secret too small %d\n", con_secret_len);
750  		return -EINVAL;
751  	}
752  
753  	noio_flag = memalloc_noio_save();
754  	con->v2.gcm_tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
755  	memalloc_noio_restore(noio_flag);
756  	if (IS_ERR(con->v2.gcm_tfm)) {
757  		ret = PTR_ERR(con->v2.gcm_tfm);
758  		con->v2.gcm_tfm = NULL;
759  		pr_err("failed to allocate gcm tfm context: %d\n", ret);
760  		return ret;
761  	}
762  
763  	WARN_ON((unsigned long)con_secret &
764  		crypto_aead_alignmask(con->v2.gcm_tfm));
765  	ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN);
766  	if (ret) {
767  		pr_err("failed to set gcm key: %d\n", ret);
768  		return ret;
769  	}
770  
771  	WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN);
772  	ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN);
773  	if (ret) {
774  		pr_err("failed to set gcm tag size: %d\n", ret);
775  		return ret;
776  	}
777  
778  	con->v2.gcm_req = aead_request_alloc(con->v2.gcm_tfm, GFP_NOIO);
779  	if (!con->v2.gcm_req) {
780  		pr_err("failed to allocate gcm request\n");
781  		return -ENOMEM;
782  	}
783  
784  	crypto_init_wait(&con->v2.gcm_wait);
785  	aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
786  				  crypto_req_done, &con->v2.gcm_wait);
787  
788  	memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN,
789  	       CEPH_GCM_IV_LEN);
790  	memcpy(&con->v2.out_gcm_nonce,
791  	       con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN,
792  	       CEPH_GCM_IV_LEN);
793  	return 0;  /* auth_x, secure mode */
794  }
795  
hmac_sha256(struct ceph_connection * con,const struct kvec * kvecs,int kvec_cnt,u8 * hmac)796  static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
797  		       int kvec_cnt, u8 *hmac)
798  {
799  	SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm);  /* tfm arg is ignored */
800  	int ret;
801  	int i;
802  
803  	dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con,
804  	     con->v2.hmac_tfm, kvec_cnt);
805  
806  	if (!con->v2.hmac_tfm) {
807  		memset(hmac, 0, SHA256_DIGEST_SIZE);
808  		return 0;  /* auth_none */
809  	}
810  
811  	desc->tfm = con->v2.hmac_tfm;
812  	ret = crypto_shash_init(desc);
813  	if (ret)
814  		goto out;
815  
816  	for (i = 0; i < kvec_cnt; i++) {
817  		ret = crypto_shash_update(desc, kvecs[i].iov_base,
818  					  kvecs[i].iov_len);
819  		if (ret)
820  			goto out;
821  	}
822  
823  	ret = crypto_shash_final(desc, hmac);
824  
825  out:
826  	shash_desc_zero(desc);
827  	return ret;  /* auth_x, both plain and secure modes */
828  }
829  
gcm_inc_nonce(struct ceph_gcm_nonce * nonce)830  static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce)
831  {
832  	u64 counter;
833  
834  	counter = le64_to_cpu(nonce->counter);
835  	nonce->counter = cpu_to_le64(counter + 1);
836  }
837  
gcm_crypt(struct ceph_connection * con,bool encrypt,struct scatterlist * src,struct scatterlist * dst,int src_len)838  static int gcm_crypt(struct ceph_connection *con, bool encrypt,
839  		     struct scatterlist *src, struct scatterlist *dst,
840  		     int src_len)
841  {
842  	struct ceph_gcm_nonce *nonce;
843  	int ret;
844  
845  	nonce = encrypt ? &con->v2.out_gcm_nonce : &con->v2.in_gcm_nonce;
846  
847  	aead_request_set_ad(con->v2.gcm_req, 0);  /* no AAD */
848  	aead_request_set_crypt(con->v2.gcm_req, src, dst, src_len, (u8 *)nonce);
849  	ret = crypto_wait_req(encrypt ? crypto_aead_encrypt(con->v2.gcm_req) :
850  					crypto_aead_decrypt(con->v2.gcm_req),
851  			      &con->v2.gcm_wait);
852  	if (ret)
853  		return ret;
854  
855  	gcm_inc_nonce(nonce);
856  	return 0;
857  }
858  
get_bvec_at(struct ceph_msg_data_cursor * cursor,struct bio_vec * bv)859  static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
860  			struct bio_vec *bv)
861  {
862  	struct page *page;
863  	size_t off, len;
864  
865  	WARN_ON(!cursor->total_resid);
866  
867  	/* skip zero-length data items */
868  	while (!cursor->resid)
869  		ceph_msg_data_advance(cursor, 0);
870  
871  	/* get a piece of data, cursor isn't advanced */
872  	page = ceph_msg_data_next(cursor, &off, &len);
873  	bvec_set_page(bv, page, len, off);
874  }
875  
calc_sg_cnt(void * buf,int buf_len)876  static int calc_sg_cnt(void *buf, int buf_len)
877  {
878  	int sg_cnt;
879  
880  	if (!buf_len)
881  		return 0;
882  
883  	sg_cnt = need_padding(buf_len) ? 1 : 0;
884  	if (is_vmalloc_addr(buf)) {
885  		WARN_ON(offset_in_page(buf));
886  		sg_cnt += PAGE_ALIGN(buf_len) >> PAGE_SHIFT;
887  	} else {
888  		sg_cnt++;
889  	}
890  
891  	return sg_cnt;
892  }
893  
calc_sg_cnt_cursor(struct ceph_msg_data_cursor * cursor)894  static int calc_sg_cnt_cursor(struct ceph_msg_data_cursor *cursor)
895  {
896  	int data_len = cursor->total_resid;
897  	struct bio_vec bv;
898  	int sg_cnt;
899  
900  	if (!data_len)
901  		return 0;
902  
903  	sg_cnt = need_padding(data_len) ? 1 : 0;
904  	do {
905  		get_bvec_at(cursor, &bv);
906  		sg_cnt++;
907  
908  		ceph_msg_data_advance(cursor, bv.bv_len);
909  	} while (cursor->total_resid);
910  
911  	return sg_cnt;
912  }
913  
init_sgs(struct scatterlist ** sg,void * buf,int buf_len,u8 * pad)914  static void init_sgs(struct scatterlist **sg, void *buf, int buf_len, u8 *pad)
915  {
916  	void *end = buf + buf_len;
917  	struct page *page;
918  	int len;
919  	void *p;
920  
921  	if (!buf_len)
922  		return;
923  
924  	if (is_vmalloc_addr(buf)) {
925  		p = buf;
926  		do {
927  			page = vmalloc_to_page(p);
928  			len = min_t(int, end - p, PAGE_SIZE);
929  			WARN_ON(!page || !len || offset_in_page(p));
930  			sg_set_page(*sg, page, len, 0);
931  			*sg = sg_next(*sg);
932  			p += len;
933  		} while (p != end);
934  	} else {
935  		sg_set_buf(*sg, buf, buf_len);
936  		*sg = sg_next(*sg);
937  	}
938  
939  	if (need_padding(buf_len)) {
940  		sg_set_buf(*sg, pad, padding_len(buf_len));
941  		*sg = sg_next(*sg);
942  	}
943  }
944  
init_sgs_cursor(struct scatterlist ** sg,struct ceph_msg_data_cursor * cursor,u8 * pad)945  static void init_sgs_cursor(struct scatterlist **sg,
946  			    struct ceph_msg_data_cursor *cursor, u8 *pad)
947  {
948  	int data_len = cursor->total_resid;
949  	struct bio_vec bv;
950  
951  	if (!data_len)
952  		return;
953  
954  	do {
955  		get_bvec_at(cursor, &bv);
956  		sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
957  		*sg = sg_next(*sg);
958  
959  		ceph_msg_data_advance(cursor, bv.bv_len);
960  	} while (cursor->total_resid);
961  
962  	if (need_padding(data_len)) {
963  		sg_set_buf(*sg, pad, padding_len(data_len));
964  		*sg = sg_next(*sg);
965  	}
966  }
967  
968  /**
969   * init_sgs_pages: set up scatterlist on an array of page pointers
970   * @sg:		scatterlist to populate
971   * @pages:	pointer to page array
972   * @dpos:	position in the array to start (bytes)
973   * @dlen:	len to add to sg (bytes)
974   * @pad:	pointer to pad destination (if any)
975   *
976   * Populate the scatterlist from the page array, starting at an arbitrary
977   * byte in the array and running for a specified length.
978   */
init_sgs_pages(struct scatterlist ** sg,struct page ** pages,int dpos,int dlen,u8 * pad)979  static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
980  			   int dpos, int dlen, u8 *pad)
981  {
982  	int idx = dpos >> PAGE_SHIFT;
983  	int off = offset_in_page(dpos);
984  	int resid = dlen;
985  
986  	do {
987  		int len = min(resid, (int)PAGE_SIZE - off);
988  
989  		sg_set_page(*sg, pages[idx], len, off);
990  		*sg = sg_next(*sg);
991  		off = 0;
992  		++idx;
993  		resid -= len;
994  	} while (resid);
995  
996  	if (need_padding(dlen)) {
997  		sg_set_buf(*sg, pad, padding_len(dlen));
998  		*sg = sg_next(*sg);
999  	}
1000  }
1001  
setup_message_sgs(struct sg_table * sgt,struct ceph_msg * msg,u8 * front_pad,u8 * middle_pad,u8 * data_pad,void * epilogue,struct page ** pages,int dpos,bool add_tag)1002  static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
1003  			     u8 *front_pad, u8 *middle_pad, u8 *data_pad,
1004  			     void *epilogue, struct page **pages, int dpos,
1005  			     bool add_tag)
1006  {
1007  	struct ceph_msg_data_cursor cursor;
1008  	struct scatterlist *cur_sg;
1009  	int dlen = data_len(msg);
1010  	int sg_cnt;
1011  	int ret;
1012  
1013  	if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
1014  		return 0;
1015  
1016  	sg_cnt = 1;  /* epilogue + [auth tag] */
1017  	if (front_len(msg))
1018  		sg_cnt += calc_sg_cnt(msg->front.iov_base,
1019  				      front_len(msg));
1020  	if (middle_len(msg))
1021  		sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
1022  				      middle_len(msg));
1023  	if (dlen) {
1024  		if (pages) {
1025  			sg_cnt += calc_pages_for(dpos, dlen);
1026  			if (need_padding(dlen))
1027  				sg_cnt++;
1028  		} else {
1029  			ceph_msg_data_cursor_init(&cursor, msg, dlen);
1030  			sg_cnt += calc_sg_cnt_cursor(&cursor);
1031  		}
1032  	}
1033  
1034  	ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
1035  	if (ret)
1036  		return ret;
1037  
1038  	cur_sg = sgt->sgl;
1039  	if (front_len(msg))
1040  		init_sgs(&cur_sg, msg->front.iov_base, front_len(msg),
1041  			 front_pad);
1042  	if (middle_len(msg))
1043  		init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
1044  			 middle_pad);
1045  	if (dlen) {
1046  		if (pages) {
1047  			init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
1048  		} else {
1049  			ceph_msg_data_cursor_init(&cursor, msg, dlen);
1050  			init_sgs_cursor(&cur_sg, &cursor, data_pad);
1051  		}
1052  	}
1053  
1054  	WARN_ON(!sg_is_last(cur_sg));
1055  	sg_set_buf(cur_sg, epilogue,
1056  		   CEPH_GCM_BLOCK_LEN + (add_tag ? CEPH_GCM_TAG_LEN : 0));
1057  	return 0;
1058  }
1059  
decrypt_preamble(struct ceph_connection * con)1060  static int decrypt_preamble(struct ceph_connection *con)
1061  {
1062  	struct scatterlist sg;
1063  
1064  	sg_init_one(&sg, con->v2.in_buf, CEPH_PREAMBLE_SECURE_LEN);
1065  	return gcm_crypt(con, false, &sg, &sg, CEPH_PREAMBLE_SECURE_LEN);
1066  }
1067  
decrypt_control_remainder(struct ceph_connection * con)1068  static int decrypt_control_remainder(struct ceph_connection *con)
1069  {
1070  	int ctrl_len = con->v2.in_desc.fd_lens[0];
1071  	int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1072  	int pt_len = padding_len(rem_len) + CEPH_GCM_TAG_LEN;
1073  	struct scatterlist sgs[2];
1074  
1075  	WARN_ON(con->v2.in_kvecs[0].iov_len != rem_len);
1076  	WARN_ON(con->v2.in_kvecs[1].iov_len != pt_len);
1077  
1078  	sg_init_table(sgs, 2);
1079  	sg_set_buf(&sgs[0], con->v2.in_kvecs[0].iov_base, rem_len);
1080  	sg_set_buf(&sgs[1], con->v2.in_buf, pt_len);
1081  
1082  	return gcm_crypt(con, false, sgs, sgs,
1083  			 padded_len(rem_len) + CEPH_GCM_TAG_LEN);
1084  }
1085  
1086  /* Process sparse read data that lives in a buffer */
process_v2_sparse_read(struct ceph_connection * con,struct page ** pages,int spos)1087  static int process_v2_sparse_read(struct ceph_connection *con,
1088  				  struct page **pages, int spos)
1089  {
1090  	struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
1091  	int ret;
1092  
1093  	for (;;) {
1094  		char *buf = NULL;
1095  
1096  		ret = con->ops->sparse_read(con, cursor, &buf);
1097  		if (ret <= 0)
1098  			return ret;
1099  
1100  		dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
1101  
1102  		do {
1103  			int idx = spos >> PAGE_SHIFT;
1104  			int soff = offset_in_page(spos);
1105  			struct page *spage = con->v2.in_enc_pages[idx];
1106  			int len = min_t(int, ret, PAGE_SIZE - soff);
1107  
1108  			if (buf) {
1109  				memcpy_from_page(buf, spage, soff, len);
1110  				buf += len;
1111  			} else {
1112  				struct bio_vec bv;
1113  
1114  				get_bvec_at(cursor, &bv);
1115  				len = min_t(int, len, bv.bv_len);
1116  				memcpy_page(bv.bv_page, bv.bv_offset,
1117  					    spage, soff, len);
1118  				ceph_msg_data_advance(cursor, len);
1119  			}
1120  			spos += len;
1121  			ret -= len;
1122  		} while (ret);
1123  	}
1124  }
1125  
decrypt_tail(struct ceph_connection * con)1126  static int decrypt_tail(struct ceph_connection *con)
1127  {
1128  	struct sg_table enc_sgt = {};
1129  	struct sg_table sgt = {};
1130  	struct page **pages = NULL;
1131  	bool sparse = !!con->in_msg->sparse_read_total;
1132  	int dpos = 0;
1133  	int tail_len;
1134  	int ret;
1135  
1136  	tail_len = tail_onwire_len(con->in_msg, true);
1137  	ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
1138  					con->v2.in_enc_page_cnt, 0, tail_len,
1139  					GFP_NOIO);
1140  	if (ret)
1141  		goto out;
1142  
1143  	if (sparse) {
1144  		dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
1145  		pages = con->v2.in_enc_pages;
1146  	}
1147  
1148  	ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
1149  				MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
1150  				con->v2.in_buf, pages, dpos, true);
1151  	if (ret)
1152  		goto out;
1153  
1154  	dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
1155  	     con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
1156  	ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
1157  	if (ret)
1158  		goto out;
1159  
1160  	if (sparse && data_len(con->in_msg)) {
1161  		ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
1162  		if (ret)
1163  			goto out;
1164  	}
1165  
1166  	WARN_ON(!con->v2.in_enc_page_cnt);
1167  	ceph_release_page_vector(con->v2.in_enc_pages,
1168  				 con->v2.in_enc_page_cnt);
1169  	con->v2.in_enc_pages = NULL;
1170  	con->v2.in_enc_page_cnt = 0;
1171  
1172  out:
1173  	sg_free_table(&sgt);
1174  	sg_free_table(&enc_sgt);
1175  	return ret;
1176  }
1177  
prepare_banner(struct ceph_connection * con)1178  static int prepare_banner(struct ceph_connection *con)
1179  {
1180  	int buf_len = CEPH_BANNER_V2_LEN + 2 + 8 + 8;
1181  	void *buf, *p;
1182  
1183  	buf = alloc_conn_buf(con, buf_len);
1184  	if (!buf)
1185  		return -ENOMEM;
1186  
1187  	p = buf;
1188  	ceph_encode_copy(&p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN);
1189  	ceph_encode_16(&p, sizeof(u64) + sizeof(u64));
1190  	ceph_encode_64(&p, CEPH_MSGR2_SUPPORTED_FEATURES);
1191  	ceph_encode_64(&p, CEPH_MSGR2_REQUIRED_FEATURES);
1192  	WARN_ON(p != buf + buf_len);
1193  
1194  	add_out_kvec(con, buf, buf_len);
1195  	add_out_sign_kvec(con, buf, buf_len);
1196  	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1197  	return 0;
1198  }
1199  
1200  /*
1201   * base:
1202   *   preamble
1203   *   control body (ctrl_len bytes)
1204   *   space for control crc
1205   *
1206   * extdata (optional):
1207   *   control body (extdata_len bytes)
1208   *
1209   * Compute control crc and gather base and extdata into:
1210   *
1211   *   preamble
1212   *   control body (ctrl_len + extdata_len bytes)
1213   *   control crc
1214   *
1215   * Preamble should already be encoded at the start of base.
1216   */
prepare_head_plain(struct ceph_connection * con,void * base,int ctrl_len,void * extdata,int extdata_len,bool to_be_signed)1217  static void prepare_head_plain(struct ceph_connection *con, void *base,
1218  			       int ctrl_len, void *extdata, int extdata_len,
1219  			       bool to_be_signed)
1220  {
1221  	int base_len = CEPH_PREAMBLE_LEN + ctrl_len + CEPH_CRC_LEN;
1222  	void *crcp = base + base_len - CEPH_CRC_LEN;
1223  	u32 crc;
1224  
1225  	crc = crc32c(-1, CTRL_BODY(base), ctrl_len);
1226  	if (extdata_len)
1227  		crc = crc32c(crc, extdata, extdata_len);
1228  	put_unaligned_le32(crc, crcp);
1229  
1230  	if (!extdata_len) {
1231  		add_out_kvec(con, base, base_len);
1232  		if (to_be_signed)
1233  			add_out_sign_kvec(con, base, base_len);
1234  		return;
1235  	}
1236  
1237  	add_out_kvec(con, base, crcp - base);
1238  	add_out_kvec(con, extdata, extdata_len);
1239  	add_out_kvec(con, crcp, CEPH_CRC_LEN);
1240  	if (to_be_signed) {
1241  		add_out_sign_kvec(con, base, crcp - base);
1242  		add_out_sign_kvec(con, extdata, extdata_len);
1243  		add_out_sign_kvec(con, crcp, CEPH_CRC_LEN);
1244  	}
1245  }
1246  
prepare_head_secure_small(struct ceph_connection * con,void * base,int ctrl_len)1247  static int prepare_head_secure_small(struct ceph_connection *con,
1248  				     void *base, int ctrl_len)
1249  {
1250  	struct scatterlist sg;
1251  	int ret;
1252  
1253  	/* inline buffer padding? */
1254  	if (ctrl_len < CEPH_PREAMBLE_INLINE_LEN)
1255  		memset(CTRL_BODY(base) + ctrl_len, 0,
1256  		       CEPH_PREAMBLE_INLINE_LEN - ctrl_len);
1257  
1258  	sg_init_one(&sg, base, CEPH_PREAMBLE_SECURE_LEN);
1259  	ret = gcm_crypt(con, true, &sg, &sg,
1260  			CEPH_PREAMBLE_SECURE_LEN - CEPH_GCM_TAG_LEN);
1261  	if (ret)
1262  		return ret;
1263  
1264  	add_out_kvec(con, base, CEPH_PREAMBLE_SECURE_LEN);
1265  	return 0;
1266  }
1267  
1268  /*
1269   * base:
1270   *   preamble
1271   *   control body (ctrl_len bytes)
1272   *   space for padding, if needed
1273   *   space for control remainder auth tag
1274   *   space for preamble auth tag
1275   *
1276   * Encrypt preamble and the inline portion, then encrypt the remainder
1277   * and gather into:
1278   *
1279   *   preamble
1280   *   control body (48 bytes)
1281   *   preamble auth tag
1282   *   control body (ctrl_len - 48 bytes)
1283   *   zero padding, if needed
1284   *   control remainder auth tag
1285   *
1286   * Preamble should already be encoded at the start of base.
1287   */
prepare_head_secure_big(struct ceph_connection * con,void * base,int ctrl_len)1288  static int prepare_head_secure_big(struct ceph_connection *con,
1289  				   void *base, int ctrl_len)
1290  {
1291  	int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1292  	void *rem = CTRL_BODY(base) + CEPH_PREAMBLE_INLINE_LEN;
1293  	void *rem_tag = rem + padded_len(rem_len);
1294  	void *pmbl_tag = rem_tag + CEPH_GCM_TAG_LEN;
1295  	struct scatterlist sgs[2];
1296  	int ret;
1297  
1298  	sg_init_table(sgs, 2);
1299  	sg_set_buf(&sgs[0], base, rem - base);
1300  	sg_set_buf(&sgs[1], pmbl_tag, CEPH_GCM_TAG_LEN);
1301  	ret = gcm_crypt(con, true, sgs, sgs, rem - base);
1302  	if (ret)
1303  		return ret;
1304  
1305  	/* control remainder padding? */
1306  	if (need_padding(rem_len))
1307  		memset(rem + rem_len, 0, padding_len(rem_len));
1308  
1309  	sg_init_one(&sgs[0], rem, pmbl_tag - rem);
1310  	ret = gcm_crypt(con, true, sgs, sgs, rem_tag - rem);
1311  	if (ret)
1312  		return ret;
1313  
1314  	add_out_kvec(con, base, rem - base);
1315  	add_out_kvec(con, pmbl_tag, CEPH_GCM_TAG_LEN);
1316  	add_out_kvec(con, rem, pmbl_tag - rem);
1317  	return 0;
1318  }
1319  
__prepare_control(struct ceph_connection * con,int tag,void * base,int ctrl_len,void * extdata,int extdata_len,bool to_be_signed)1320  static int __prepare_control(struct ceph_connection *con, int tag,
1321  			     void *base, int ctrl_len, void *extdata,
1322  			     int extdata_len, bool to_be_signed)
1323  {
1324  	int total_len = ctrl_len + extdata_len;
1325  	struct ceph_frame_desc desc;
1326  	int ret;
1327  
1328  	dout("%s con %p tag %d len %d (%d+%d)\n", __func__, con, tag,
1329  	     total_len, ctrl_len, extdata_len);
1330  
1331  	/* extdata may be vmalloc'ed but not base */
1332  	if (WARN_ON(is_vmalloc_addr(base) || !ctrl_len))
1333  		return -EINVAL;
1334  
1335  	init_frame_desc(&desc, tag, &total_len, 1);
1336  	encode_preamble(&desc, base);
1337  
1338  	if (con_secure(con)) {
1339  		if (WARN_ON(extdata_len || to_be_signed))
1340  			return -EINVAL;
1341  
1342  		if (ctrl_len <= CEPH_PREAMBLE_INLINE_LEN)
1343  			/* fully inlined, inline buffer may need padding */
1344  			ret = prepare_head_secure_small(con, base, ctrl_len);
1345  		else
1346  			/* partially inlined, inline buffer is full */
1347  			ret = prepare_head_secure_big(con, base, ctrl_len);
1348  		if (ret)
1349  			return ret;
1350  	} else {
1351  		prepare_head_plain(con, base, ctrl_len, extdata, extdata_len,
1352  				   to_be_signed);
1353  	}
1354  
1355  	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1356  	return 0;
1357  }
1358  
prepare_control(struct ceph_connection * con,int tag,void * base,int ctrl_len)1359  static int prepare_control(struct ceph_connection *con, int tag,
1360  			   void *base, int ctrl_len)
1361  {
1362  	return __prepare_control(con, tag, base, ctrl_len, NULL, 0, false);
1363  }
1364  
prepare_hello(struct ceph_connection * con)1365  static int prepare_hello(struct ceph_connection *con)
1366  {
1367  	void *buf, *p;
1368  	int ctrl_len;
1369  
1370  	ctrl_len = 1 + ceph_entity_addr_encoding_len(&con->peer_addr);
1371  	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1372  	if (!buf)
1373  		return -ENOMEM;
1374  
1375  	p = CTRL_BODY(buf);
1376  	ceph_encode_8(&p, CEPH_ENTITY_TYPE_CLIENT);
1377  	ceph_encode_entity_addr(&p, &con->peer_addr);
1378  	WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1379  
1380  	return __prepare_control(con, FRAME_TAG_HELLO, buf, ctrl_len,
1381  				 NULL, 0, true);
1382  }
1383  
1384  /* so that head_onwire_len(AUTH_BUF_LEN, false) is 512 */
1385  #define AUTH_BUF_LEN	(512 - CEPH_CRC_LEN - CEPH_PREAMBLE_PLAIN_LEN)
1386  
prepare_auth_request(struct ceph_connection * con)1387  static int prepare_auth_request(struct ceph_connection *con)
1388  {
1389  	void *authorizer, *authorizer_copy;
1390  	int ctrl_len, authorizer_len;
1391  	void *buf;
1392  	int ret;
1393  
1394  	ctrl_len = AUTH_BUF_LEN;
1395  	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1396  	if (!buf)
1397  		return -ENOMEM;
1398  
1399  	mutex_unlock(&con->mutex);
1400  	ret = con->ops->get_auth_request(con, CTRL_BODY(buf), &ctrl_len,
1401  					 &authorizer, &authorizer_len);
1402  	mutex_lock(&con->mutex);
1403  	if (con->state != CEPH_CON_S_V2_HELLO) {
1404  		dout("%s con %p state changed to %d\n", __func__, con,
1405  		     con->state);
1406  		return -EAGAIN;
1407  	}
1408  
1409  	dout("%s con %p get_auth_request ret %d\n", __func__, con, ret);
1410  	if (ret)
1411  		return ret;
1412  
1413  	authorizer_copy = alloc_conn_buf(con, authorizer_len);
1414  	if (!authorizer_copy)
1415  		return -ENOMEM;
1416  
1417  	memcpy(authorizer_copy, authorizer, authorizer_len);
1418  
1419  	return __prepare_control(con, FRAME_TAG_AUTH_REQUEST, buf, ctrl_len,
1420  				 authorizer_copy, authorizer_len, true);
1421  }
1422  
prepare_auth_request_more(struct ceph_connection * con,void * reply,int reply_len)1423  static int prepare_auth_request_more(struct ceph_connection *con,
1424  				     void *reply, int reply_len)
1425  {
1426  	int ctrl_len, authorizer_len;
1427  	void *authorizer;
1428  	void *buf;
1429  	int ret;
1430  
1431  	ctrl_len = AUTH_BUF_LEN;
1432  	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false));
1433  	if (!buf)
1434  		return -ENOMEM;
1435  
1436  	mutex_unlock(&con->mutex);
1437  	ret = con->ops->handle_auth_reply_more(con, reply, reply_len,
1438  					       CTRL_BODY(buf), &ctrl_len,
1439  					       &authorizer, &authorizer_len);
1440  	mutex_lock(&con->mutex);
1441  	if (con->state != CEPH_CON_S_V2_AUTH) {
1442  		dout("%s con %p state changed to %d\n", __func__, con,
1443  		     con->state);
1444  		return -EAGAIN;
1445  	}
1446  
1447  	dout("%s con %p handle_auth_reply_more ret %d\n", __func__, con, ret);
1448  	if (ret)
1449  		return ret;
1450  
1451  	return __prepare_control(con, FRAME_TAG_AUTH_REQUEST_MORE, buf,
1452  				 ctrl_len, authorizer, authorizer_len, true);
1453  }
1454  
prepare_auth_signature(struct ceph_connection * con)1455  static int prepare_auth_signature(struct ceph_connection *con)
1456  {
1457  	void *buf;
1458  	int ret;
1459  
1460  	buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
1461  						  con_secure(con)));
1462  	if (!buf)
1463  		return -ENOMEM;
1464  
1465  	ret = hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt,
1466  			  CTRL_BODY(buf));
1467  	if (ret)
1468  		return ret;
1469  
1470  	return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf,
1471  			       SHA256_DIGEST_SIZE);
1472  }
1473  
prepare_client_ident(struct ceph_connection * con)1474  static int prepare_client_ident(struct ceph_connection *con)
1475  {
1476  	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1477  	struct ceph_client *client = from_msgr(con->msgr);
1478  	u64 global_id = ceph_client_gid(client);
1479  	void *buf, *p;
1480  	int ctrl_len;
1481  
1482  	WARN_ON(con->v2.server_cookie);
1483  	WARN_ON(con->v2.connect_seq);
1484  	WARN_ON(con->v2.peer_global_seq);
1485  
1486  	if (!con->v2.client_cookie) {
1487  		do {
1488  			get_random_bytes(&con->v2.client_cookie,
1489  					 sizeof(con->v2.client_cookie));
1490  		} while (!con->v2.client_cookie);
1491  		dout("%s con %p generated cookie 0x%llx\n", __func__, con,
1492  		     con->v2.client_cookie);
1493  	} else {
1494  		dout("%s con %p cookie already set 0x%llx\n", __func__, con,
1495  		     con->v2.client_cookie);
1496  	}
1497  
1498  	dout("%s con %p my_addr %s/%u peer_addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx cookie 0x%llx\n",
1499  	     __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1500  	     ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce),
1501  	     global_id, con->v2.global_seq, client->supported_features,
1502  	     client->required_features, con->v2.client_cookie);
1503  
1504  	ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) +
1505  		   ceph_entity_addr_encoding_len(&con->peer_addr) + 6 * 8;
1506  	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1507  	if (!buf)
1508  		return -ENOMEM;
1509  
1510  	p = CTRL_BODY(buf);
1511  	ceph_encode_8(&p, 2);  /* addrvec marker */
1512  	ceph_encode_32(&p, 1);  /* addr_cnt */
1513  	ceph_encode_entity_addr(&p, my_addr);
1514  	ceph_encode_entity_addr(&p, &con->peer_addr);
1515  	ceph_encode_64(&p, global_id);
1516  	ceph_encode_64(&p, con->v2.global_seq);
1517  	ceph_encode_64(&p, client->supported_features);
1518  	ceph_encode_64(&p, client->required_features);
1519  	ceph_encode_64(&p, 0);  /* flags */
1520  	ceph_encode_64(&p, con->v2.client_cookie);
1521  	WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1522  
1523  	return prepare_control(con, FRAME_TAG_CLIENT_IDENT, buf, ctrl_len);
1524  }
1525  
prepare_session_reconnect(struct ceph_connection * con)1526  static int prepare_session_reconnect(struct ceph_connection *con)
1527  {
1528  	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
1529  	void *buf, *p;
1530  	int ctrl_len;
1531  
1532  	WARN_ON(!con->v2.client_cookie);
1533  	WARN_ON(!con->v2.server_cookie);
1534  	WARN_ON(!con->v2.connect_seq);
1535  	WARN_ON(!con->v2.peer_global_seq);
1536  
1537  	dout("%s con %p my_addr %s/%u client_cookie 0x%llx server_cookie 0x%llx global_seq %llu connect_seq %llu in_seq %llu\n",
1538  	     __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce),
1539  	     con->v2.client_cookie, con->v2.server_cookie, con->v2.global_seq,
1540  	     con->v2.connect_seq, con->in_seq);
1541  
1542  	ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) + 5 * 8;
1543  	buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con)));
1544  	if (!buf)
1545  		return -ENOMEM;
1546  
1547  	p = CTRL_BODY(buf);
1548  	ceph_encode_8(&p, 2);  /* entity_addrvec_t marker */
1549  	ceph_encode_32(&p, 1);  /* my_addrs len */
1550  	ceph_encode_entity_addr(&p, my_addr);
1551  	ceph_encode_64(&p, con->v2.client_cookie);
1552  	ceph_encode_64(&p, con->v2.server_cookie);
1553  	ceph_encode_64(&p, con->v2.global_seq);
1554  	ceph_encode_64(&p, con->v2.connect_seq);
1555  	ceph_encode_64(&p, con->in_seq);
1556  	WARN_ON(p != CTRL_BODY(buf) + ctrl_len);
1557  
1558  	return prepare_control(con, FRAME_TAG_SESSION_RECONNECT, buf, ctrl_len);
1559  }
1560  
prepare_keepalive2(struct ceph_connection * con)1561  static int prepare_keepalive2(struct ceph_connection *con)
1562  {
1563  	struct ceph_timespec *ts = CTRL_BODY(con->v2.out_buf);
1564  	struct timespec64 now;
1565  
1566  	ktime_get_real_ts64(&now);
1567  	dout("%s con %p timestamp %lld.%09ld\n", __func__, con, now.tv_sec,
1568  	     now.tv_nsec);
1569  
1570  	ceph_encode_timespec64(ts, &now);
1571  
1572  	reset_out_kvecs(con);
1573  	return prepare_control(con, FRAME_TAG_KEEPALIVE2, con->v2.out_buf,
1574  			       sizeof(struct ceph_timespec));
1575  }
1576  
prepare_ack(struct ceph_connection * con)1577  static int prepare_ack(struct ceph_connection *con)
1578  {
1579  	void *p;
1580  
1581  	dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1582  	     con->in_seq_acked, con->in_seq);
1583  	con->in_seq_acked = con->in_seq;
1584  
1585  	p = CTRL_BODY(con->v2.out_buf);
1586  	ceph_encode_64(&p, con->in_seq_acked);
1587  
1588  	reset_out_kvecs(con);
1589  	return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8);
1590  }
1591  
prepare_epilogue_plain(struct ceph_connection * con,bool aborted)1592  static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted)
1593  {
1594  	dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con,
1595  	     con->out_msg, aborted, con->v2.out_epil.front_crc,
1596  	     con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc);
1597  
1598  	encode_epilogue_plain(con, aborted);
1599  	add_out_kvec(con, &con->v2.out_epil, CEPH_EPILOGUE_PLAIN_LEN);
1600  }
1601  
1602  /*
1603   * For "used" empty segments, crc is -1.  For unused (trailing)
1604   * segments, crc is 0.
1605   */
prepare_message_plain(struct ceph_connection * con)1606  static void prepare_message_plain(struct ceph_connection *con)
1607  {
1608  	struct ceph_msg *msg = con->out_msg;
1609  
1610  	prepare_head_plain(con, con->v2.out_buf,
1611  			   sizeof(struct ceph_msg_header2), NULL, 0, false);
1612  
1613  	if (!front_len(msg) && !middle_len(msg)) {
1614  		if (!data_len(msg)) {
1615  			/*
1616  			 * Empty message: once the head is written,
1617  			 * we are done -- there is no epilogue.
1618  			 */
1619  			con->v2.out_state = OUT_S_FINISH_MESSAGE;
1620  			return;
1621  		}
1622  
1623  		con->v2.out_epil.front_crc = -1;
1624  		con->v2.out_epil.middle_crc = -1;
1625  		con->v2.out_state = OUT_S_QUEUE_DATA;
1626  		return;
1627  	}
1628  
1629  	if (front_len(msg)) {
1630  		con->v2.out_epil.front_crc = crc32c(-1, msg->front.iov_base,
1631  						    front_len(msg));
1632  		add_out_kvec(con, msg->front.iov_base, front_len(msg));
1633  	} else {
1634  		/* middle (at least) is there, checked above */
1635  		con->v2.out_epil.front_crc = -1;
1636  	}
1637  
1638  	if (middle_len(msg)) {
1639  		con->v2.out_epil.middle_crc =
1640  			crc32c(-1, msg->middle->vec.iov_base, middle_len(msg));
1641  		add_out_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
1642  	} else {
1643  		con->v2.out_epil.middle_crc = data_len(msg) ? -1 : 0;
1644  	}
1645  
1646  	if (data_len(msg)) {
1647  		con->v2.out_state = OUT_S_QUEUE_DATA;
1648  	} else {
1649  		con->v2.out_epil.data_crc = 0;
1650  		prepare_epilogue_plain(con, false);
1651  		con->v2.out_state = OUT_S_FINISH_MESSAGE;
1652  	}
1653  }
1654  
1655  /*
1656   * Unfortunately the kernel crypto API doesn't support streaming
1657   * (piecewise) operation for AEAD algorithms, so we can't get away
1658   * with a fixed size buffer and a couple sgs.  Instead, we have to
1659   * allocate pages for the entire tail of the message (currently up
1660   * to ~32M) and two sgs arrays (up to ~256K each)...
1661   */
prepare_message_secure(struct ceph_connection * con)1662  static int prepare_message_secure(struct ceph_connection *con)
1663  {
1664  	void *zerop = page_address(ceph_zero_page);
1665  	struct sg_table enc_sgt = {};
1666  	struct sg_table sgt = {};
1667  	struct page **enc_pages;
1668  	int enc_page_cnt;
1669  	int tail_len;
1670  	int ret;
1671  
1672  	ret = prepare_head_secure_small(con, con->v2.out_buf,
1673  					sizeof(struct ceph_msg_header2));
1674  	if (ret)
1675  		return ret;
1676  
1677  	tail_len = tail_onwire_len(con->out_msg, true);
1678  	if (!tail_len) {
1679  		/*
1680  		 * Empty message: once the head is written,
1681  		 * we are done -- there is no epilogue.
1682  		 */
1683  		con->v2.out_state = OUT_S_FINISH_MESSAGE;
1684  		return 0;
1685  	}
1686  
1687  	encode_epilogue_secure(con, false);
1688  	ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
1689  				&con->v2.out_epil, NULL, 0, false);
1690  	if (ret)
1691  		goto out;
1692  
1693  	enc_page_cnt = calc_pages_for(0, tail_len);
1694  	enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
1695  	if (IS_ERR(enc_pages)) {
1696  		ret = PTR_ERR(enc_pages);
1697  		goto out;
1698  	}
1699  
1700  	WARN_ON(con->v2.out_enc_pages || con->v2.out_enc_page_cnt);
1701  	con->v2.out_enc_pages = enc_pages;
1702  	con->v2.out_enc_page_cnt = enc_page_cnt;
1703  	con->v2.out_enc_resid = tail_len;
1704  	con->v2.out_enc_i = 0;
1705  
1706  	ret = sg_alloc_table_from_pages(&enc_sgt, enc_pages, enc_page_cnt,
1707  					0, tail_len, GFP_NOIO);
1708  	if (ret)
1709  		goto out;
1710  
1711  	ret = gcm_crypt(con, true, sgt.sgl, enc_sgt.sgl,
1712  			tail_len - CEPH_GCM_TAG_LEN);
1713  	if (ret)
1714  		goto out;
1715  
1716  	dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con,
1717  	     con->out_msg, sgt.orig_nents, enc_page_cnt);
1718  	con->v2.out_state = OUT_S_QUEUE_ENC_PAGE;
1719  
1720  out:
1721  	sg_free_table(&sgt);
1722  	sg_free_table(&enc_sgt);
1723  	return ret;
1724  }
1725  
prepare_message(struct ceph_connection * con)1726  static int prepare_message(struct ceph_connection *con)
1727  {
1728  	int lens[] = {
1729  		sizeof(struct ceph_msg_header2),
1730  		front_len(con->out_msg),
1731  		middle_len(con->out_msg),
1732  		data_len(con->out_msg)
1733  	};
1734  	struct ceph_frame_desc desc;
1735  	int ret;
1736  
1737  	dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con,
1738  	     con->out_msg, lens[0], lens[1], lens[2], lens[3]);
1739  
1740  	if (con->in_seq > con->in_seq_acked) {
1741  		dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
1742  		     con->in_seq_acked, con->in_seq);
1743  		con->in_seq_acked = con->in_seq;
1744  	}
1745  
1746  	reset_out_kvecs(con);
1747  	init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4);
1748  	encode_preamble(&desc, con->v2.out_buf);
1749  	fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr,
1750  		     con->in_seq_acked);
1751  
1752  	if (con_secure(con)) {
1753  		ret = prepare_message_secure(con);
1754  		if (ret)
1755  			return ret;
1756  	} else {
1757  		prepare_message_plain(con);
1758  	}
1759  
1760  	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
1761  	return 0;
1762  }
1763  
prepare_read_banner_prefix(struct ceph_connection * con)1764  static int prepare_read_banner_prefix(struct ceph_connection *con)
1765  {
1766  	void *buf;
1767  
1768  	buf = alloc_conn_buf(con, CEPH_BANNER_V2_PREFIX_LEN);
1769  	if (!buf)
1770  		return -ENOMEM;
1771  
1772  	reset_in_kvecs(con);
1773  	add_in_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1774  	add_in_sign_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN);
1775  	con->state = CEPH_CON_S_V2_BANNER_PREFIX;
1776  	return 0;
1777  }
1778  
prepare_read_banner_payload(struct ceph_connection * con,int payload_len)1779  static int prepare_read_banner_payload(struct ceph_connection *con,
1780  				       int payload_len)
1781  {
1782  	void *buf;
1783  
1784  	buf = alloc_conn_buf(con, payload_len);
1785  	if (!buf)
1786  		return -ENOMEM;
1787  
1788  	reset_in_kvecs(con);
1789  	add_in_kvec(con, buf, payload_len);
1790  	add_in_sign_kvec(con, buf, payload_len);
1791  	con->state = CEPH_CON_S_V2_BANNER_PAYLOAD;
1792  	return 0;
1793  }
1794  
prepare_read_preamble(struct ceph_connection * con)1795  static void prepare_read_preamble(struct ceph_connection *con)
1796  {
1797  	reset_in_kvecs(con);
1798  	add_in_kvec(con, con->v2.in_buf,
1799  		    con_secure(con) ? CEPH_PREAMBLE_SECURE_LEN :
1800  				      CEPH_PREAMBLE_PLAIN_LEN);
1801  	con->v2.in_state = IN_S_HANDLE_PREAMBLE;
1802  }
1803  
prepare_read_control(struct ceph_connection * con)1804  static int prepare_read_control(struct ceph_connection *con)
1805  {
1806  	int ctrl_len = con->v2.in_desc.fd_lens[0];
1807  	int head_len;
1808  	void *buf;
1809  
1810  	reset_in_kvecs(con);
1811  	if (con->state == CEPH_CON_S_V2_HELLO ||
1812  	    con->state == CEPH_CON_S_V2_AUTH) {
1813  		head_len = head_onwire_len(ctrl_len, false);
1814  		buf = alloc_conn_buf(con, head_len);
1815  		if (!buf)
1816  			return -ENOMEM;
1817  
1818  		/* preserve preamble */
1819  		memcpy(buf, con->v2.in_buf, CEPH_PREAMBLE_LEN);
1820  
1821  		add_in_kvec(con, CTRL_BODY(buf), ctrl_len);
1822  		add_in_kvec(con, CTRL_BODY(buf) + ctrl_len, CEPH_CRC_LEN);
1823  		add_in_sign_kvec(con, buf, head_len);
1824  	} else {
1825  		if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
1826  			buf = alloc_conn_buf(con, ctrl_len);
1827  			if (!buf)
1828  				return -ENOMEM;
1829  
1830  			add_in_kvec(con, buf, ctrl_len);
1831  		} else {
1832  			add_in_kvec(con, CTRL_BODY(con->v2.in_buf), ctrl_len);
1833  		}
1834  		add_in_kvec(con, con->v2.in_buf, CEPH_CRC_LEN);
1835  	}
1836  	con->v2.in_state = IN_S_HANDLE_CONTROL;
1837  	return 0;
1838  }
1839  
prepare_read_control_remainder(struct ceph_connection * con)1840  static int prepare_read_control_remainder(struct ceph_connection *con)
1841  {
1842  	int ctrl_len = con->v2.in_desc.fd_lens[0];
1843  	int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN;
1844  	void *buf;
1845  
1846  	buf = alloc_conn_buf(con, ctrl_len);
1847  	if (!buf)
1848  		return -ENOMEM;
1849  
1850  	memcpy(buf, CTRL_BODY(con->v2.in_buf), CEPH_PREAMBLE_INLINE_LEN);
1851  
1852  	reset_in_kvecs(con);
1853  	add_in_kvec(con, buf + CEPH_PREAMBLE_INLINE_LEN, rem_len);
1854  	add_in_kvec(con, con->v2.in_buf,
1855  		    padding_len(rem_len) + CEPH_GCM_TAG_LEN);
1856  	con->v2.in_state = IN_S_HANDLE_CONTROL_REMAINDER;
1857  	return 0;
1858  }
1859  
prepare_read_data(struct ceph_connection * con)1860  static int prepare_read_data(struct ceph_connection *con)
1861  {
1862  	struct bio_vec bv;
1863  
1864  	con->in_data_crc = -1;
1865  	ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
1866  				  data_len(con->in_msg));
1867  
1868  	get_bvec_at(&con->v2.in_cursor, &bv);
1869  	if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1870  		if (unlikely(!con->bounce_page)) {
1871  			con->bounce_page = alloc_page(GFP_NOIO);
1872  			if (!con->bounce_page) {
1873  				pr_err("failed to allocate bounce page\n");
1874  				return -ENOMEM;
1875  			}
1876  		}
1877  
1878  		bv.bv_page = con->bounce_page;
1879  		bv.bv_offset = 0;
1880  	}
1881  	set_in_bvec(con, &bv);
1882  	con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
1883  	return 0;
1884  }
1885  
prepare_read_data_cont(struct ceph_connection * con)1886  static void prepare_read_data_cont(struct ceph_connection *con)
1887  {
1888  	struct bio_vec bv;
1889  
1890  	if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1891  		con->in_data_crc = crc32c(con->in_data_crc,
1892  					  page_address(con->bounce_page),
1893  					  con->v2.in_bvec.bv_len);
1894  
1895  		get_bvec_at(&con->v2.in_cursor, &bv);
1896  		memcpy_to_page(bv.bv_page, bv.bv_offset,
1897  			       page_address(con->bounce_page),
1898  			       con->v2.in_bvec.bv_len);
1899  	} else {
1900  		con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
1901  						    con->v2.in_bvec.bv_page,
1902  						    con->v2.in_bvec.bv_offset,
1903  						    con->v2.in_bvec.bv_len);
1904  	}
1905  
1906  	ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
1907  	if (con->v2.in_cursor.total_resid) {
1908  		get_bvec_at(&con->v2.in_cursor, &bv);
1909  		if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1910  			bv.bv_page = con->bounce_page;
1911  			bv.bv_offset = 0;
1912  		}
1913  		set_in_bvec(con, &bv);
1914  		WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
1915  		return;
1916  	}
1917  
1918  	/*
1919  	 * We've read all data.  Prepare to read epilogue.
1920  	 */
1921  	reset_in_kvecs(con);
1922  	add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1923  	con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1924  }
1925  
prepare_sparse_read_cont(struct ceph_connection * con)1926  static int prepare_sparse_read_cont(struct ceph_connection *con)
1927  {
1928  	int ret;
1929  	struct bio_vec bv;
1930  	char *buf = NULL;
1931  	struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
1932  
1933  	WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
1934  
1935  	if (iov_iter_is_bvec(&con->v2.in_iter)) {
1936  		if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1937  			con->in_data_crc = crc32c(con->in_data_crc,
1938  						  page_address(con->bounce_page),
1939  						  con->v2.in_bvec.bv_len);
1940  			get_bvec_at(cursor, &bv);
1941  			memcpy_to_page(bv.bv_page, bv.bv_offset,
1942  				       page_address(con->bounce_page),
1943  				       con->v2.in_bvec.bv_len);
1944  		} else {
1945  			con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
1946  							    con->v2.in_bvec.bv_page,
1947  							    con->v2.in_bvec.bv_offset,
1948  							    con->v2.in_bvec.bv_len);
1949  		}
1950  
1951  		ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
1952  		cursor->sr_resid -= con->v2.in_bvec.bv_len;
1953  		dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
1954  		     con->v2.in_bvec.bv_len, cursor->sr_resid);
1955  		WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
1956  		if (cursor->sr_resid) {
1957  			get_bvec_at(cursor, &bv);
1958  			if (bv.bv_len > cursor->sr_resid)
1959  				bv.bv_len = cursor->sr_resid;
1960  			if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
1961  				bv.bv_page = con->bounce_page;
1962  				bv.bv_offset = 0;
1963  			}
1964  			set_in_bvec(con, &bv);
1965  			con->v2.data_len_remain -= bv.bv_len;
1966  			return 0;
1967  		}
1968  	} else if (iov_iter_is_kvec(&con->v2.in_iter)) {
1969  		/* On first call, we have no kvec so don't compute crc */
1970  		if (con->v2.in_kvec_cnt) {
1971  			WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
1972  			con->in_data_crc = crc32c(con->in_data_crc,
1973  						  con->v2.in_kvecs[0].iov_base,
1974  						  con->v2.in_kvecs[0].iov_len);
1975  		}
1976  	} else {
1977  		return -EIO;
1978  	}
1979  
1980  	/* get next extent */
1981  	ret = con->ops->sparse_read(con, cursor, &buf);
1982  	if (ret <= 0) {
1983  		if (ret < 0)
1984  			return ret;
1985  
1986  		reset_in_kvecs(con);
1987  		add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
1988  		con->v2.in_state = IN_S_HANDLE_EPILOGUE;
1989  		return 0;
1990  	}
1991  
1992  	if (buf) {
1993  		/* receive into buffer */
1994  		reset_in_kvecs(con);
1995  		add_in_kvec(con, buf, ret);
1996  		con->v2.data_len_remain -= ret;
1997  		return 0;
1998  	}
1999  
2000  	if (ret > cursor->total_resid) {
2001  		pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
2002  			__func__, ret, cursor->total_resid, cursor->resid);
2003  		return -EIO;
2004  	}
2005  	get_bvec_at(cursor, &bv);
2006  	if (bv.bv_len > cursor->sr_resid)
2007  		bv.bv_len = cursor->sr_resid;
2008  	if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
2009  		if (unlikely(!con->bounce_page)) {
2010  			con->bounce_page = alloc_page(GFP_NOIO);
2011  			if (!con->bounce_page) {
2012  				pr_err("failed to allocate bounce page\n");
2013  				return -ENOMEM;
2014  			}
2015  		}
2016  
2017  		bv.bv_page = con->bounce_page;
2018  		bv.bv_offset = 0;
2019  	}
2020  	set_in_bvec(con, &bv);
2021  	con->v2.data_len_remain -= ret;
2022  	return ret;
2023  }
2024  
prepare_sparse_read_data(struct ceph_connection * con)2025  static int prepare_sparse_read_data(struct ceph_connection *con)
2026  {
2027  	struct ceph_msg *msg = con->in_msg;
2028  
2029  	dout("%s: starting sparse read\n", __func__);
2030  
2031  	if (WARN_ON_ONCE(!con->ops->sparse_read))
2032  		return -EOPNOTSUPP;
2033  
2034  	if (!con_secure(con))
2035  		con->in_data_crc = -1;
2036  
2037  	ceph_msg_data_cursor_init(&con->v2.in_cursor, msg,
2038  				  msg->sparse_read_total);
2039  
2040  	reset_in_kvecs(con);
2041  	con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
2042  	con->v2.data_len_remain = data_len(msg);
2043  	return prepare_sparse_read_cont(con);
2044  }
2045  
prepare_read_tail_plain(struct ceph_connection * con)2046  static int prepare_read_tail_plain(struct ceph_connection *con)
2047  {
2048  	struct ceph_msg *msg = con->in_msg;
2049  
2050  	if (!front_len(msg) && !middle_len(msg)) {
2051  		WARN_ON(!data_len(msg));
2052  		return prepare_read_data(con);
2053  	}
2054  
2055  	reset_in_kvecs(con);
2056  	if (front_len(msg)) {
2057  		add_in_kvec(con, msg->front.iov_base, front_len(msg));
2058  		WARN_ON(msg->front.iov_len != front_len(msg));
2059  	}
2060  	if (middle_len(msg)) {
2061  		add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
2062  		WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
2063  	}
2064  
2065  	if (data_len(msg)) {
2066  		if (msg->sparse_read_total)
2067  			con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
2068  		else
2069  			con->v2.in_state = IN_S_PREPARE_READ_DATA;
2070  	} else {
2071  		add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
2072  		con->v2.in_state = IN_S_HANDLE_EPILOGUE;
2073  	}
2074  	return 0;
2075  }
2076  
prepare_read_enc_page(struct ceph_connection * con)2077  static void prepare_read_enc_page(struct ceph_connection *con)
2078  {
2079  	struct bio_vec bv;
2080  
2081  	dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
2082  	     con->v2.in_enc_resid);
2083  	WARN_ON(!con->v2.in_enc_resid);
2084  
2085  	bvec_set_page(&bv, con->v2.in_enc_pages[con->v2.in_enc_i],
2086  		      min(con->v2.in_enc_resid, (int)PAGE_SIZE), 0);
2087  
2088  	set_in_bvec(con, &bv);
2089  	con->v2.in_enc_i++;
2090  	con->v2.in_enc_resid -= bv.bv_len;
2091  
2092  	if (con->v2.in_enc_resid) {
2093  		con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
2094  		return;
2095  	}
2096  
2097  	/*
2098  	 * We are set to read the last piece of ciphertext (ending
2099  	 * with epilogue) + auth tag.
2100  	 */
2101  	WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
2102  	con->v2.in_state = IN_S_HANDLE_EPILOGUE;
2103  }
2104  
prepare_read_tail_secure(struct ceph_connection * con)2105  static int prepare_read_tail_secure(struct ceph_connection *con)
2106  {
2107  	struct page **enc_pages;
2108  	int enc_page_cnt;
2109  	int tail_len;
2110  
2111  	tail_len = tail_onwire_len(con->in_msg, true);
2112  	WARN_ON(!tail_len);
2113  
2114  	enc_page_cnt = calc_pages_for(0, tail_len);
2115  	enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
2116  	if (IS_ERR(enc_pages))
2117  		return PTR_ERR(enc_pages);
2118  
2119  	WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
2120  	con->v2.in_enc_pages = enc_pages;
2121  	con->v2.in_enc_page_cnt = enc_page_cnt;
2122  	con->v2.in_enc_resid = tail_len;
2123  	con->v2.in_enc_i = 0;
2124  
2125  	prepare_read_enc_page(con);
2126  	return 0;
2127  }
2128  
__finish_skip(struct ceph_connection * con)2129  static void __finish_skip(struct ceph_connection *con)
2130  {
2131  	con->in_seq++;
2132  	prepare_read_preamble(con);
2133  }
2134  
prepare_skip_message(struct ceph_connection * con)2135  static void prepare_skip_message(struct ceph_connection *con)
2136  {
2137  	struct ceph_frame_desc *desc = &con->v2.in_desc;
2138  	int tail_len;
2139  
2140  	dout("%s con %p %d+%d+%d\n", __func__, con, desc->fd_lens[1],
2141  	     desc->fd_lens[2], desc->fd_lens[3]);
2142  
2143  	tail_len = __tail_onwire_len(desc->fd_lens[1], desc->fd_lens[2],
2144  				     desc->fd_lens[3], con_secure(con));
2145  	if (!tail_len) {
2146  		__finish_skip(con);
2147  	} else {
2148  		set_in_skip(con, tail_len);
2149  		con->v2.in_state = IN_S_FINISH_SKIP;
2150  	}
2151  }
2152  
process_banner_prefix(struct ceph_connection * con)2153  static int process_banner_prefix(struct ceph_connection *con)
2154  {
2155  	int payload_len;
2156  	void *p;
2157  
2158  	WARN_ON(con->v2.in_kvecs[0].iov_len != CEPH_BANNER_V2_PREFIX_LEN);
2159  
2160  	p = con->v2.in_kvecs[0].iov_base;
2161  	if (memcmp(p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN)) {
2162  		if (!memcmp(p, CEPH_BANNER, CEPH_BANNER_LEN))
2163  			con->error_msg = "server is speaking msgr1 protocol";
2164  		else
2165  			con->error_msg = "protocol error, bad banner";
2166  		return -EINVAL;
2167  	}
2168  
2169  	p += CEPH_BANNER_V2_LEN;
2170  	payload_len = ceph_decode_16(&p);
2171  	dout("%s con %p payload_len %d\n", __func__, con, payload_len);
2172  
2173  	return prepare_read_banner_payload(con, payload_len);
2174  }
2175  
process_banner_payload(struct ceph_connection * con)2176  static int process_banner_payload(struct ceph_connection *con)
2177  {
2178  	void *end = con->v2.in_kvecs[0].iov_base + con->v2.in_kvecs[0].iov_len;
2179  	u64 feat = CEPH_MSGR2_SUPPORTED_FEATURES;
2180  	u64 req_feat = CEPH_MSGR2_REQUIRED_FEATURES;
2181  	u64 server_feat, server_req_feat;
2182  	void *p;
2183  	int ret;
2184  
2185  	p = con->v2.in_kvecs[0].iov_base;
2186  	ceph_decode_64_safe(&p, end, server_feat, bad);
2187  	ceph_decode_64_safe(&p, end, server_req_feat, bad);
2188  
2189  	dout("%s con %p server_feat 0x%llx server_req_feat 0x%llx\n",
2190  	     __func__, con, server_feat, server_req_feat);
2191  
2192  	if (req_feat & ~server_feat) {
2193  		pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2194  		       server_feat, req_feat & ~server_feat);
2195  		con->error_msg = "missing required protocol features";
2196  		return -EINVAL;
2197  	}
2198  	if (server_req_feat & ~feat) {
2199  		pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2200  		       feat, server_req_feat & ~feat);
2201  		con->error_msg = "missing required protocol features";
2202  		return -EINVAL;
2203  	}
2204  
2205  	/* no reset_out_kvecs() as our banner may still be pending */
2206  	ret = prepare_hello(con);
2207  	if (ret) {
2208  		pr_err("prepare_hello failed: %d\n", ret);
2209  		return ret;
2210  	}
2211  
2212  	con->state = CEPH_CON_S_V2_HELLO;
2213  	prepare_read_preamble(con);
2214  	return 0;
2215  
2216  bad:
2217  	pr_err("failed to decode banner payload\n");
2218  	return -EINVAL;
2219  }
2220  
process_hello(struct ceph_connection * con,void * p,void * end)2221  static int process_hello(struct ceph_connection *con, void *p, void *end)
2222  {
2223  	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
2224  	struct ceph_entity_addr addr_for_me;
2225  	u8 entity_type;
2226  	int ret;
2227  
2228  	if (con->state != CEPH_CON_S_V2_HELLO) {
2229  		con->error_msg = "protocol error, unexpected hello";
2230  		return -EINVAL;
2231  	}
2232  
2233  	ceph_decode_8_safe(&p, end, entity_type, bad);
2234  	ret = ceph_decode_entity_addr(&p, end, &addr_for_me);
2235  	if (ret) {
2236  		pr_err("failed to decode addr_for_me: %d\n", ret);
2237  		return ret;
2238  	}
2239  
2240  	dout("%s con %p entity_type %d addr_for_me %s\n", __func__, con,
2241  	     entity_type, ceph_pr_addr(&addr_for_me));
2242  
2243  	if (entity_type != con->peer_name.type) {
2244  		pr_err("bad peer type, want %d, got %d\n",
2245  		       con->peer_name.type, entity_type);
2246  		con->error_msg = "wrong peer at address";
2247  		return -EINVAL;
2248  	}
2249  
2250  	/*
2251  	 * Set our address to the address our first peer (i.e. monitor)
2252  	 * sees that we are connecting from.  If we are behind some sort
2253  	 * of NAT and want to be identified by some private (not NATed)
2254  	 * address, ip option should be used.
2255  	 */
2256  	if (ceph_addr_is_blank(my_addr)) {
2257  		memcpy(&my_addr->in_addr, &addr_for_me.in_addr,
2258  		       sizeof(my_addr->in_addr));
2259  		ceph_addr_set_port(my_addr, 0);
2260  		dout("%s con %p set my addr %s, as seen by peer %s\n",
2261  		     __func__, con, ceph_pr_addr(my_addr),
2262  		     ceph_pr_addr(&con->peer_addr));
2263  	} else {
2264  		dout("%s con %p my addr already set %s\n",
2265  		     __func__, con, ceph_pr_addr(my_addr));
2266  	}
2267  
2268  	WARN_ON(ceph_addr_is_blank(my_addr) || ceph_addr_port(my_addr));
2269  	WARN_ON(my_addr->type != CEPH_ENTITY_ADDR_TYPE_ANY);
2270  	WARN_ON(!my_addr->nonce);
2271  
2272  	/* no reset_out_kvecs() as our hello may still be pending */
2273  	ret = prepare_auth_request(con);
2274  	if (ret) {
2275  		if (ret != -EAGAIN)
2276  			pr_err("prepare_auth_request failed: %d\n", ret);
2277  		return ret;
2278  	}
2279  
2280  	con->state = CEPH_CON_S_V2_AUTH;
2281  	return 0;
2282  
2283  bad:
2284  	pr_err("failed to decode hello\n");
2285  	return -EINVAL;
2286  }
2287  
process_auth_bad_method(struct ceph_connection * con,void * p,void * end)2288  static int process_auth_bad_method(struct ceph_connection *con,
2289  				   void *p, void *end)
2290  {
2291  	int allowed_protos[8], allowed_modes[8];
2292  	int allowed_proto_cnt, allowed_mode_cnt;
2293  	int used_proto, result;
2294  	int ret;
2295  	int i;
2296  
2297  	if (con->state != CEPH_CON_S_V2_AUTH) {
2298  		con->error_msg = "protocol error, unexpected auth_bad_method";
2299  		return -EINVAL;
2300  	}
2301  
2302  	ceph_decode_32_safe(&p, end, used_proto, bad);
2303  	ceph_decode_32_safe(&p, end, result, bad);
2304  	dout("%s con %p used_proto %d result %d\n", __func__, con, used_proto,
2305  	     result);
2306  
2307  	ceph_decode_32_safe(&p, end, allowed_proto_cnt, bad);
2308  	if (allowed_proto_cnt > ARRAY_SIZE(allowed_protos)) {
2309  		pr_err("allowed_protos too big %d\n", allowed_proto_cnt);
2310  		return -EINVAL;
2311  	}
2312  	for (i = 0; i < allowed_proto_cnt; i++) {
2313  		ceph_decode_32_safe(&p, end, allowed_protos[i], bad);
2314  		dout("%s con %p allowed_protos[%d] %d\n", __func__, con,
2315  		     i, allowed_protos[i]);
2316  	}
2317  
2318  	ceph_decode_32_safe(&p, end, allowed_mode_cnt, bad);
2319  	if (allowed_mode_cnt > ARRAY_SIZE(allowed_modes)) {
2320  		pr_err("allowed_modes too big %d\n", allowed_mode_cnt);
2321  		return -EINVAL;
2322  	}
2323  	for (i = 0; i < allowed_mode_cnt; i++) {
2324  		ceph_decode_32_safe(&p, end, allowed_modes[i], bad);
2325  		dout("%s con %p allowed_modes[%d] %d\n", __func__, con,
2326  		     i, allowed_modes[i]);
2327  	}
2328  
2329  	mutex_unlock(&con->mutex);
2330  	ret = con->ops->handle_auth_bad_method(con, used_proto, result,
2331  					       allowed_protos,
2332  					       allowed_proto_cnt,
2333  					       allowed_modes,
2334  					       allowed_mode_cnt);
2335  	mutex_lock(&con->mutex);
2336  	if (con->state != CEPH_CON_S_V2_AUTH) {
2337  		dout("%s con %p state changed to %d\n", __func__, con,
2338  		     con->state);
2339  		return -EAGAIN;
2340  	}
2341  
2342  	dout("%s con %p handle_auth_bad_method ret %d\n", __func__, con, ret);
2343  	return ret;
2344  
2345  bad:
2346  	pr_err("failed to decode auth_bad_method\n");
2347  	return -EINVAL;
2348  }
2349  
process_auth_reply_more(struct ceph_connection * con,void * p,void * end)2350  static int process_auth_reply_more(struct ceph_connection *con,
2351  				   void *p, void *end)
2352  {
2353  	int payload_len;
2354  	int ret;
2355  
2356  	if (con->state != CEPH_CON_S_V2_AUTH) {
2357  		con->error_msg = "protocol error, unexpected auth_reply_more";
2358  		return -EINVAL;
2359  	}
2360  
2361  	ceph_decode_32_safe(&p, end, payload_len, bad);
2362  	ceph_decode_need(&p, end, payload_len, bad);
2363  
2364  	dout("%s con %p payload_len %d\n", __func__, con, payload_len);
2365  
2366  	reset_out_kvecs(con);
2367  	ret = prepare_auth_request_more(con, p, payload_len);
2368  	if (ret) {
2369  		if (ret != -EAGAIN)
2370  			pr_err("prepare_auth_request_more failed: %d\n", ret);
2371  		return ret;
2372  	}
2373  
2374  	return 0;
2375  
2376  bad:
2377  	pr_err("failed to decode auth_reply_more\n");
2378  	return -EINVAL;
2379  }
2380  
2381  /*
2382   * Align session_key and con_secret to avoid GFP_ATOMIC allocation
2383   * inside crypto_shash_setkey() and crypto_aead_setkey() called from
2384   * setup_crypto().  __aligned(16) isn't guaranteed to work for stack
2385   * objects, so do it by hand.
2386   */
process_auth_done(struct ceph_connection * con,void * p,void * end)2387  static int process_auth_done(struct ceph_connection *con, void *p, void *end)
2388  {
2389  	u8 session_key_buf[CEPH_KEY_LEN + 16];
2390  	u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
2391  	u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
2392  	u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
2393  	int session_key_len, con_secret_len;
2394  	int payload_len;
2395  	u64 global_id;
2396  	int ret;
2397  
2398  	if (con->state != CEPH_CON_S_V2_AUTH) {
2399  		con->error_msg = "protocol error, unexpected auth_done";
2400  		return -EINVAL;
2401  	}
2402  
2403  	ceph_decode_64_safe(&p, end, global_id, bad);
2404  	ceph_decode_32_safe(&p, end, con->v2.con_mode, bad);
2405  	ceph_decode_32_safe(&p, end, payload_len, bad);
2406  
2407  	dout("%s con %p global_id %llu con_mode %d payload_len %d\n",
2408  	     __func__, con, global_id, con->v2.con_mode, payload_len);
2409  
2410  	mutex_unlock(&con->mutex);
2411  	session_key_len = 0;
2412  	con_secret_len = 0;
2413  	ret = con->ops->handle_auth_done(con, global_id, p, payload_len,
2414  					 session_key, &session_key_len,
2415  					 con_secret, &con_secret_len);
2416  	mutex_lock(&con->mutex);
2417  	if (con->state != CEPH_CON_S_V2_AUTH) {
2418  		dout("%s con %p state changed to %d\n", __func__, con,
2419  		     con->state);
2420  		ret = -EAGAIN;
2421  		goto out;
2422  	}
2423  
2424  	dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret);
2425  	if (ret)
2426  		goto out;
2427  
2428  	ret = setup_crypto(con, session_key, session_key_len, con_secret,
2429  			   con_secret_len);
2430  	if (ret)
2431  		goto out;
2432  
2433  	reset_out_kvecs(con);
2434  	ret = prepare_auth_signature(con);
2435  	if (ret) {
2436  		pr_err("prepare_auth_signature failed: %d\n", ret);
2437  		goto out;
2438  	}
2439  
2440  	con->state = CEPH_CON_S_V2_AUTH_SIGNATURE;
2441  
2442  out:
2443  	memzero_explicit(session_key_buf, sizeof(session_key_buf));
2444  	memzero_explicit(con_secret_buf, sizeof(con_secret_buf));
2445  	return ret;
2446  
2447  bad:
2448  	pr_err("failed to decode auth_done\n");
2449  	return -EINVAL;
2450  }
2451  
process_auth_signature(struct ceph_connection * con,void * p,void * end)2452  static int process_auth_signature(struct ceph_connection *con,
2453  				  void *p, void *end)
2454  {
2455  	u8 hmac[SHA256_DIGEST_SIZE];
2456  	int ret;
2457  
2458  	if (con->state != CEPH_CON_S_V2_AUTH_SIGNATURE) {
2459  		con->error_msg = "protocol error, unexpected auth_signature";
2460  		return -EINVAL;
2461  	}
2462  
2463  	ret = hmac_sha256(con, con->v2.out_sign_kvecs,
2464  			  con->v2.out_sign_kvec_cnt, hmac);
2465  	if (ret)
2466  		return ret;
2467  
2468  	ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad);
2469  	if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) {
2470  		con->error_msg = "integrity error, bad auth signature";
2471  		return -EBADMSG;
2472  	}
2473  
2474  	dout("%s con %p auth signature ok\n", __func__, con);
2475  
2476  	/* no reset_out_kvecs() as our auth_signature may still be pending */
2477  	if (!con->v2.server_cookie) {
2478  		ret = prepare_client_ident(con);
2479  		if (ret) {
2480  			pr_err("prepare_client_ident failed: %d\n", ret);
2481  			return ret;
2482  		}
2483  
2484  		con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2485  	} else {
2486  		ret = prepare_session_reconnect(con);
2487  		if (ret) {
2488  			pr_err("prepare_session_reconnect failed: %d\n", ret);
2489  			return ret;
2490  		}
2491  
2492  		con->state = CEPH_CON_S_V2_SESSION_RECONNECT;
2493  	}
2494  
2495  	return 0;
2496  
2497  bad:
2498  	pr_err("failed to decode auth_signature\n");
2499  	return -EINVAL;
2500  }
2501  
process_server_ident(struct ceph_connection * con,void * p,void * end)2502  static int process_server_ident(struct ceph_connection *con,
2503  				void *p, void *end)
2504  {
2505  	struct ceph_client *client = from_msgr(con->msgr);
2506  	u64 features, required_features;
2507  	struct ceph_entity_addr addr;
2508  	u64 global_seq;
2509  	u64 global_id;
2510  	u64 cookie;
2511  	u64 flags;
2512  	int ret;
2513  
2514  	if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2515  		con->error_msg = "protocol error, unexpected server_ident";
2516  		return -EINVAL;
2517  	}
2518  
2519  	ret = ceph_decode_entity_addrvec(&p, end, true, &addr);
2520  	if (ret) {
2521  		pr_err("failed to decode server addrs: %d\n", ret);
2522  		return ret;
2523  	}
2524  
2525  	ceph_decode_64_safe(&p, end, global_id, bad);
2526  	ceph_decode_64_safe(&p, end, global_seq, bad);
2527  	ceph_decode_64_safe(&p, end, features, bad);
2528  	ceph_decode_64_safe(&p, end, required_features, bad);
2529  	ceph_decode_64_safe(&p, end, flags, bad);
2530  	ceph_decode_64_safe(&p, end, cookie, bad);
2531  
2532  	dout("%s con %p addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx flags 0x%llx cookie 0x%llx\n",
2533  	     __func__, con, ceph_pr_addr(&addr), le32_to_cpu(addr.nonce),
2534  	     global_id, global_seq, features, required_features, flags, cookie);
2535  
2536  	/* is this who we intended to talk to? */
2537  	if (memcmp(&addr, &con->peer_addr, sizeof(con->peer_addr))) {
2538  		pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n",
2539  		       ceph_pr_addr(&con->peer_addr),
2540  		       le32_to_cpu(con->peer_addr.nonce),
2541  		       ceph_pr_addr(&addr), le32_to_cpu(addr.nonce));
2542  		con->error_msg = "wrong peer at address";
2543  		return -EINVAL;
2544  	}
2545  
2546  	if (client->required_features & ~features) {
2547  		pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
2548  		       features, client->required_features & ~features);
2549  		con->error_msg = "missing required protocol features";
2550  		return -EINVAL;
2551  	}
2552  
2553  	/*
2554  	 * Both name->type and name->num are set in ceph_con_open() but
2555  	 * name->num may be bogus in the initial monmap.  name->type is
2556  	 * verified in handle_hello().
2557  	 */
2558  	WARN_ON(!con->peer_name.type);
2559  	con->peer_name.num = cpu_to_le64(global_id);
2560  	con->v2.peer_global_seq = global_seq;
2561  	con->peer_features = features;
2562  	WARN_ON(required_features & ~client->supported_features);
2563  	con->v2.server_cookie = cookie;
2564  
2565  	if (flags & CEPH_MSG_CONNECT_LOSSY) {
2566  		ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX);
2567  		WARN_ON(con->v2.server_cookie);
2568  	} else {
2569  		WARN_ON(!con->v2.server_cookie);
2570  	}
2571  
2572  	clear_in_sign_kvecs(con);
2573  	clear_out_sign_kvecs(con);
2574  	free_conn_bufs(con);
2575  	con->delay = 0;  /* reset backoff memory */
2576  
2577  	con->state = CEPH_CON_S_OPEN;
2578  	con->v2.out_state = OUT_S_GET_NEXT;
2579  	return 0;
2580  
2581  bad:
2582  	pr_err("failed to decode server_ident\n");
2583  	return -EINVAL;
2584  }
2585  
process_ident_missing_features(struct ceph_connection * con,void * p,void * end)2586  static int process_ident_missing_features(struct ceph_connection *con,
2587  					  void *p, void *end)
2588  {
2589  	struct ceph_client *client = from_msgr(con->msgr);
2590  	u64 missing_features;
2591  
2592  	if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) {
2593  		con->error_msg = "protocol error, unexpected ident_missing_features";
2594  		return -EINVAL;
2595  	}
2596  
2597  	ceph_decode_64_safe(&p, end, missing_features, bad);
2598  	pr_err("RADOS feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
2599  	       client->supported_features, missing_features);
2600  	con->error_msg = "missing required protocol features";
2601  	return -EINVAL;
2602  
2603  bad:
2604  	pr_err("failed to decode ident_missing_features\n");
2605  	return -EINVAL;
2606  }
2607  
process_session_reconnect_ok(struct ceph_connection * con,void * p,void * end)2608  static int process_session_reconnect_ok(struct ceph_connection *con,
2609  					void *p, void *end)
2610  {
2611  	u64 seq;
2612  
2613  	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2614  		con->error_msg = "protocol error, unexpected session_reconnect_ok";
2615  		return -EINVAL;
2616  	}
2617  
2618  	ceph_decode_64_safe(&p, end, seq, bad);
2619  
2620  	dout("%s con %p seq %llu\n", __func__, con, seq);
2621  	ceph_con_discard_requeued(con, seq);
2622  
2623  	clear_in_sign_kvecs(con);
2624  	clear_out_sign_kvecs(con);
2625  	free_conn_bufs(con);
2626  	con->delay = 0;  /* reset backoff memory */
2627  
2628  	con->state = CEPH_CON_S_OPEN;
2629  	con->v2.out_state = OUT_S_GET_NEXT;
2630  	return 0;
2631  
2632  bad:
2633  	pr_err("failed to decode session_reconnect_ok\n");
2634  	return -EINVAL;
2635  }
2636  
process_session_retry(struct ceph_connection * con,void * p,void * end)2637  static int process_session_retry(struct ceph_connection *con,
2638  				 void *p, void *end)
2639  {
2640  	u64 connect_seq;
2641  	int ret;
2642  
2643  	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2644  		con->error_msg = "protocol error, unexpected session_retry";
2645  		return -EINVAL;
2646  	}
2647  
2648  	ceph_decode_64_safe(&p, end, connect_seq, bad);
2649  
2650  	dout("%s con %p connect_seq %llu\n", __func__, con, connect_seq);
2651  	WARN_ON(connect_seq <= con->v2.connect_seq);
2652  	con->v2.connect_seq = connect_seq + 1;
2653  
2654  	free_conn_bufs(con);
2655  
2656  	reset_out_kvecs(con);
2657  	ret = prepare_session_reconnect(con);
2658  	if (ret) {
2659  		pr_err("prepare_session_reconnect (cseq) failed: %d\n", ret);
2660  		return ret;
2661  	}
2662  
2663  	return 0;
2664  
2665  bad:
2666  	pr_err("failed to decode session_retry\n");
2667  	return -EINVAL;
2668  }
2669  
process_session_retry_global(struct ceph_connection * con,void * p,void * end)2670  static int process_session_retry_global(struct ceph_connection *con,
2671  					void *p, void *end)
2672  {
2673  	u64 global_seq;
2674  	int ret;
2675  
2676  	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2677  		con->error_msg = "protocol error, unexpected session_retry_global";
2678  		return -EINVAL;
2679  	}
2680  
2681  	ceph_decode_64_safe(&p, end, global_seq, bad);
2682  
2683  	dout("%s con %p global_seq %llu\n", __func__, con, global_seq);
2684  	WARN_ON(global_seq <= con->v2.global_seq);
2685  	con->v2.global_seq = ceph_get_global_seq(con->msgr, global_seq);
2686  
2687  	free_conn_bufs(con);
2688  
2689  	reset_out_kvecs(con);
2690  	ret = prepare_session_reconnect(con);
2691  	if (ret) {
2692  		pr_err("prepare_session_reconnect (gseq) failed: %d\n", ret);
2693  		return ret;
2694  	}
2695  
2696  	return 0;
2697  
2698  bad:
2699  	pr_err("failed to decode session_retry_global\n");
2700  	return -EINVAL;
2701  }
2702  
process_session_reset(struct ceph_connection * con,void * p,void * end)2703  static int process_session_reset(struct ceph_connection *con,
2704  				 void *p, void *end)
2705  {
2706  	bool full;
2707  	int ret;
2708  
2709  	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2710  		con->error_msg = "protocol error, unexpected session_reset";
2711  		return -EINVAL;
2712  	}
2713  
2714  	ceph_decode_8_safe(&p, end, full, bad);
2715  	if (!full) {
2716  		con->error_msg = "protocol error, bad session_reset";
2717  		return -EINVAL;
2718  	}
2719  
2720  	pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name),
2721  		ceph_pr_addr(&con->peer_addr));
2722  	ceph_con_reset_session(con);
2723  
2724  	mutex_unlock(&con->mutex);
2725  	if (con->ops->peer_reset)
2726  		con->ops->peer_reset(con);
2727  	mutex_lock(&con->mutex);
2728  	if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
2729  		dout("%s con %p state changed to %d\n", __func__, con,
2730  		     con->state);
2731  		return -EAGAIN;
2732  	}
2733  
2734  	free_conn_bufs(con);
2735  
2736  	reset_out_kvecs(con);
2737  	ret = prepare_client_ident(con);
2738  	if (ret) {
2739  		pr_err("prepare_client_ident (rst) failed: %d\n", ret);
2740  		return ret;
2741  	}
2742  
2743  	con->state = CEPH_CON_S_V2_SESSION_CONNECT;
2744  	return 0;
2745  
2746  bad:
2747  	pr_err("failed to decode session_reset\n");
2748  	return -EINVAL;
2749  }
2750  
process_keepalive2_ack(struct ceph_connection * con,void * p,void * end)2751  static int process_keepalive2_ack(struct ceph_connection *con,
2752  				  void *p, void *end)
2753  {
2754  	if (con->state != CEPH_CON_S_OPEN) {
2755  		con->error_msg = "protocol error, unexpected keepalive2_ack";
2756  		return -EINVAL;
2757  	}
2758  
2759  	ceph_decode_need(&p, end, sizeof(struct ceph_timespec), bad);
2760  	ceph_decode_timespec64(&con->last_keepalive_ack, p);
2761  
2762  	dout("%s con %p timestamp %lld.%09ld\n", __func__, con,
2763  	     con->last_keepalive_ack.tv_sec, con->last_keepalive_ack.tv_nsec);
2764  
2765  	return 0;
2766  
2767  bad:
2768  	pr_err("failed to decode keepalive2_ack\n");
2769  	return -EINVAL;
2770  }
2771  
process_ack(struct ceph_connection * con,void * p,void * end)2772  static int process_ack(struct ceph_connection *con, void *p, void *end)
2773  {
2774  	u64 seq;
2775  
2776  	if (con->state != CEPH_CON_S_OPEN) {
2777  		con->error_msg = "protocol error, unexpected ack";
2778  		return -EINVAL;
2779  	}
2780  
2781  	ceph_decode_64_safe(&p, end, seq, bad);
2782  
2783  	dout("%s con %p seq %llu\n", __func__, con, seq);
2784  	ceph_con_discard_sent(con, seq);
2785  	return 0;
2786  
2787  bad:
2788  	pr_err("failed to decode ack\n");
2789  	return -EINVAL;
2790  }
2791  
process_control(struct ceph_connection * con,void * p,void * end)2792  static int process_control(struct ceph_connection *con, void *p, void *end)
2793  {
2794  	int tag = con->v2.in_desc.fd_tag;
2795  	int ret;
2796  
2797  	dout("%s con %p tag %d len %d\n", __func__, con, tag, (int)(end - p));
2798  
2799  	switch (tag) {
2800  	case FRAME_TAG_HELLO:
2801  		ret = process_hello(con, p, end);
2802  		break;
2803  	case FRAME_TAG_AUTH_BAD_METHOD:
2804  		ret = process_auth_bad_method(con, p, end);
2805  		break;
2806  	case FRAME_TAG_AUTH_REPLY_MORE:
2807  		ret = process_auth_reply_more(con, p, end);
2808  		break;
2809  	case FRAME_TAG_AUTH_DONE:
2810  		ret = process_auth_done(con, p, end);
2811  		break;
2812  	case FRAME_TAG_AUTH_SIGNATURE:
2813  		ret = process_auth_signature(con, p, end);
2814  		break;
2815  	case FRAME_TAG_SERVER_IDENT:
2816  		ret = process_server_ident(con, p, end);
2817  		break;
2818  	case FRAME_TAG_IDENT_MISSING_FEATURES:
2819  		ret = process_ident_missing_features(con, p, end);
2820  		break;
2821  	case FRAME_TAG_SESSION_RECONNECT_OK:
2822  		ret = process_session_reconnect_ok(con, p, end);
2823  		break;
2824  	case FRAME_TAG_SESSION_RETRY:
2825  		ret = process_session_retry(con, p, end);
2826  		break;
2827  	case FRAME_TAG_SESSION_RETRY_GLOBAL:
2828  		ret = process_session_retry_global(con, p, end);
2829  		break;
2830  	case FRAME_TAG_SESSION_RESET:
2831  		ret = process_session_reset(con, p, end);
2832  		break;
2833  	case FRAME_TAG_KEEPALIVE2_ACK:
2834  		ret = process_keepalive2_ack(con, p, end);
2835  		break;
2836  	case FRAME_TAG_ACK:
2837  		ret = process_ack(con, p, end);
2838  		break;
2839  	default:
2840  		pr_err("bad tag %d\n", tag);
2841  		con->error_msg = "protocol error, bad tag";
2842  		return -EINVAL;
2843  	}
2844  	if (ret) {
2845  		dout("%s con %p error %d\n", __func__, con, ret);
2846  		return ret;
2847  	}
2848  
2849  	prepare_read_preamble(con);
2850  	return 0;
2851  }
2852  
2853  /*
2854   * Return:
2855   *   1 - con->in_msg set, read message
2856   *   0 - skip message
2857   *  <0 - error
2858   */
process_message_header(struct ceph_connection * con,void * p,void * end)2859  static int process_message_header(struct ceph_connection *con,
2860  				  void *p, void *end)
2861  {
2862  	struct ceph_frame_desc *desc = &con->v2.in_desc;
2863  	struct ceph_msg_header2 *hdr2 = p;
2864  	struct ceph_msg_header hdr;
2865  	int skip;
2866  	int ret;
2867  	u64 seq;
2868  
2869  	/* verify seq# */
2870  	seq = le64_to_cpu(hdr2->seq);
2871  	if ((s64)seq - (s64)con->in_seq < 1) {
2872  		pr_info("%s%lld %s skipping old message: seq %llu, expected %llu\n",
2873  			ENTITY_NAME(con->peer_name),
2874  			ceph_pr_addr(&con->peer_addr),
2875  			seq, con->in_seq + 1);
2876  		return 0;
2877  	}
2878  	if ((s64)seq - (s64)con->in_seq > 1) {
2879  		pr_err("bad seq %llu, expected %llu\n", seq, con->in_seq + 1);
2880  		con->error_msg = "bad message sequence # for incoming message";
2881  		return -EBADE;
2882  	}
2883  
2884  	ceph_con_discard_sent(con, le64_to_cpu(hdr2->ack_seq));
2885  
2886  	fill_header(&hdr, hdr2, desc->fd_lens[1], desc->fd_lens[2],
2887  		    desc->fd_lens[3], &con->peer_name);
2888  	ret = ceph_con_in_msg_alloc(con, &hdr, &skip);
2889  	if (ret)
2890  		return ret;
2891  
2892  	WARN_ON(!con->in_msg ^ skip);
2893  	if (skip)
2894  		return 0;
2895  
2896  	WARN_ON(!con->in_msg);
2897  	WARN_ON(con->in_msg->con != con);
2898  	return 1;
2899  }
2900  
process_message(struct ceph_connection * con)2901  static int process_message(struct ceph_connection *con)
2902  {
2903  	ceph_con_process_message(con);
2904  
2905  	/*
2906  	 * We could have been closed by ceph_con_close() because
2907  	 * ceph_con_process_message() temporarily drops con->mutex.
2908  	 */
2909  	if (con->state != CEPH_CON_S_OPEN) {
2910  		dout("%s con %p state changed to %d\n", __func__, con,
2911  		     con->state);
2912  		return -EAGAIN;
2913  	}
2914  
2915  	prepare_read_preamble(con);
2916  	return 0;
2917  }
2918  
__handle_control(struct ceph_connection * con,void * p)2919  static int __handle_control(struct ceph_connection *con, void *p)
2920  {
2921  	void *end = p + con->v2.in_desc.fd_lens[0];
2922  	struct ceph_msg *msg;
2923  	int ret;
2924  
2925  	if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE)
2926  		return process_control(con, p, end);
2927  
2928  	ret = process_message_header(con, p, end);
2929  	if (ret < 0)
2930  		return ret;
2931  	if (ret == 0) {
2932  		prepare_skip_message(con);
2933  		return 0;
2934  	}
2935  
2936  	msg = con->in_msg;  /* set in process_message_header() */
2937  	if (front_len(msg)) {
2938  		WARN_ON(front_len(msg) > msg->front_alloc_len);
2939  		msg->front.iov_len = front_len(msg);
2940  	} else {
2941  		msg->front.iov_len = 0;
2942  	}
2943  	if (middle_len(msg)) {
2944  		WARN_ON(middle_len(msg) > msg->middle->alloc_len);
2945  		msg->middle->vec.iov_len = middle_len(msg);
2946  	} else if (msg->middle) {
2947  		msg->middle->vec.iov_len = 0;
2948  	}
2949  
2950  	if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
2951  		return process_message(con);
2952  
2953  	if (con_secure(con))
2954  		return prepare_read_tail_secure(con);
2955  
2956  	return prepare_read_tail_plain(con);
2957  }
2958  
handle_preamble(struct ceph_connection * con)2959  static int handle_preamble(struct ceph_connection *con)
2960  {
2961  	struct ceph_frame_desc *desc = &con->v2.in_desc;
2962  	int ret;
2963  
2964  	if (con_secure(con)) {
2965  		ret = decrypt_preamble(con);
2966  		if (ret) {
2967  			if (ret == -EBADMSG)
2968  				con->error_msg = "integrity error, bad preamble auth tag";
2969  			return ret;
2970  		}
2971  	}
2972  
2973  	ret = decode_preamble(con->v2.in_buf, desc);
2974  	if (ret) {
2975  		if (ret == -EBADMSG)
2976  			con->error_msg = "integrity error, bad crc";
2977  		else
2978  			con->error_msg = "protocol error, bad preamble";
2979  		return ret;
2980  	}
2981  
2982  	dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__,
2983  	     con, desc->fd_tag, desc->fd_seg_cnt, desc->fd_lens[0],
2984  	     desc->fd_lens[1], desc->fd_lens[2], desc->fd_lens[3]);
2985  
2986  	if (!con_secure(con))
2987  		return prepare_read_control(con);
2988  
2989  	if (desc->fd_lens[0] > CEPH_PREAMBLE_INLINE_LEN)
2990  		return prepare_read_control_remainder(con);
2991  
2992  	return __handle_control(con, CTRL_BODY(con->v2.in_buf));
2993  }
2994  
handle_control(struct ceph_connection * con)2995  static int handle_control(struct ceph_connection *con)
2996  {
2997  	int ctrl_len = con->v2.in_desc.fd_lens[0];
2998  	void *buf;
2999  	int ret;
3000  
3001  	WARN_ON(con_secure(con));
3002  
3003  	ret = verify_control_crc(con);
3004  	if (ret) {
3005  		con->error_msg = "integrity error, bad crc";
3006  		return ret;
3007  	}
3008  
3009  	if (con->state == CEPH_CON_S_V2_AUTH) {
3010  		buf = alloc_conn_buf(con, ctrl_len);
3011  		if (!buf)
3012  			return -ENOMEM;
3013  
3014  		memcpy(buf, con->v2.in_kvecs[0].iov_base, ctrl_len);
3015  		return __handle_control(con, buf);
3016  	}
3017  
3018  	return __handle_control(con, con->v2.in_kvecs[0].iov_base);
3019  }
3020  
handle_control_remainder(struct ceph_connection * con)3021  static int handle_control_remainder(struct ceph_connection *con)
3022  {
3023  	int ret;
3024  
3025  	WARN_ON(!con_secure(con));
3026  
3027  	ret = decrypt_control_remainder(con);
3028  	if (ret) {
3029  		if (ret == -EBADMSG)
3030  			con->error_msg = "integrity error, bad control remainder auth tag";
3031  		return ret;
3032  	}
3033  
3034  	return __handle_control(con, con->v2.in_kvecs[0].iov_base -
3035  				     CEPH_PREAMBLE_INLINE_LEN);
3036  }
3037  
handle_epilogue(struct ceph_connection * con)3038  static int handle_epilogue(struct ceph_connection *con)
3039  {
3040  	u32 front_crc, middle_crc, data_crc;
3041  	int ret;
3042  
3043  	if (con_secure(con)) {
3044  		ret = decrypt_tail(con);
3045  		if (ret) {
3046  			if (ret == -EBADMSG)
3047  				con->error_msg = "integrity error, bad epilogue auth tag";
3048  			return ret;
3049  		}
3050  
3051  		/* just late_status */
3052  		ret = decode_epilogue(con->v2.in_buf, NULL, NULL, NULL);
3053  		if (ret) {
3054  			con->error_msg = "protocol error, bad epilogue";
3055  			return ret;
3056  		}
3057  	} else {
3058  		ret = decode_epilogue(con->v2.in_buf, &front_crc,
3059  				      &middle_crc, &data_crc);
3060  		if (ret) {
3061  			con->error_msg = "protocol error, bad epilogue";
3062  			return ret;
3063  		}
3064  
3065  		ret = verify_epilogue_crcs(con, front_crc, middle_crc,
3066  					   data_crc);
3067  		if (ret) {
3068  			con->error_msg = "integrity error, bad crc";
3069  			return ret;
3070  		}
3071  	}
3072  
3073  	return process_message(con);
3074  }
3075  
finish_skip(struct ceph_connection * con)3076  static void finish_skip(struct ceph_connection *con)
3077  {
3078  	dout("%s con %p\n", __func__, con);
3079  
3080  	if (con_secure(con))
3081  		gcm_inc_nonce(&con->v2.in_gcm_nonce);
3082  
3083  	__finish_skip(con);
3084  }
3085  
populate_in_iter(struct ceph_connection * con)3086  static int populate_in_iter(struct ceph_connection *con)
3087  {
3088  	int ret;
3089  
3090  	dout("%s con %p state %d in_state %d\n", __func__, con, con->state,
3091  	     con->v2.in_state);
3092  	WARN_ON(iov_iter_count(&con->v2.in_iter));
3093  
3094  	if (con->state == CEPH_CON_S_V2_BANNER_PREFIX) {
3095  		ret = process_banner_prefix(con);
3096  	} else if (con->state == CEPH_CON_S_V2_BANNER_PAYLOAD) {
3097  		ret = process_banner_payload(con);
3098  	} else if ((con->state >= CEPH_CON_S_V2_HELLO &&
3099  		    con->state <= CEPH_CON_S_V2_SESSION_RECONNECT) ||
3100  		   con->state == CEPH_CON_S_OPEN) {
3101  		switch (con->v2.in_state) {
3102  		case IN_S_HANDLE_PREAMBLE:
3103  			ret = handle_preamble(con);
3104  			break;
3105  		case IN_S_HANDLE_CONTROL:
3106  			ret = handle_control(con);
3107  			break;
3108  		case IN_S_HANDLE_CONTROL_REMAINDER:
3109  			ret = handle_control_remainder(con);
3110  			break;
3111  		case IN_S_PREPARE_READ_DATA:
3112  			ret = prepare_read_data(con);
3113  			break;
3114  		case IN_S_PREPARE_READ_DATA_CONT:
3115  			prepare_read_data_cont(con);
3116  			ret = 0;
3117  			break;
3118  		case IN_S_PREPARE_READ_ENC_PAGE:
3119  			prepare_read_enc_page(con);
3120  			ret = 0;
3121  			break;
3122  		case IN_S_PREPARE_SPARSE_DATA:
3123  			ret = prepare_sparse_read_data(con);
3124  			break;
3125  		case IN_S_PREPARE_SPARSE_DATA_CONT:
3126  			ret = prepare_sparse_read_cont(con);
3127  			break;
3128  		case IN_S_HANDLE_EPILOGUE:
3129  			ret = handle_epilogue(con);
3130  			break;
3131  		case IN_S_FINISH_SKIP:
3132  			finish_skip(con);
3133  			ret = 0;
3134  			break;
3135  		default:
3136  			WARN(1, "bad in_state %d", con->v2.in_state);
3137  			return -EINVAL;
3138  		}
3139  	} else {
3140  		WARN(1, "bad state %d", con->state);
3141  		return -EINVAL;
3142  	}
3143  	if (ret) {
3144  		dout("%s con %p error %d\n", __func__, con, ret);
3145  		return ret;
3146  	}
3147  
3148  	if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
3149  		return -ENODATA;
3150  	dout("%s con %p populated %zu\n", __func__, con,
3151  	     iov_iter_count(&con->v2.in_iter));
3152  	return 1;
3153  }
3154  
ceph_con_v2_try_read(struct ceph_connection * con)3155  int ceph_con_v2_try_read(struct ceph_connection *con)
3156  {
3157  	int ret;
3158  
3159  	dout("%s con %p state %d need %zu\n", __func__, con, con->state,
3160  	     iov_iter_count(&con->v2.in_iter));
3161  
3162  	if (con->state == CEPH_CON_S_PREOPEN)
3163  		return 0;
3164  
3165  	/*
3166  	 * We should always have something pending here.  If not,
3167  	 * avoid calling populate_in_iter() as if we read something
3168  	 * (ceph_tcp_recv() would immediately return 1).
3169  	 */
3170  	if (WARN_ON(!iov_iter_count(&con->v2.in_iter)))
3171  		return -ENODATA;
3172  
3173  	for (;;) {
3174  		ret = ceph_tcp_recv(con);
3175  		if (ret <= 0)
3176  			return ret;
3177  
3178  		ret = populate_in_iter(con);
3179  		if (ret <= 0) {
3180  			if (ret && ret != -EAGAIN && !con->error_msg)
3181  				con->error_msg = "read processing error";
3182  			return ret;
3183  		}
3184  	}
3185  }
3186  
queue_data(struct ceph_connection * con)3187  static void queue_data(struct ceph_connection *con)
3188  {
3189  	struct bio_vec bv;
3190  
3191  	con->v2.out_epil.data_crc = -1;
3192  	ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg,
3193  				  data_len(con->out_msg));
3194  
3195  	get_bvec_at(&con->v2.out_cursor, &bv);
3196  	set_out_bvec(con, &bv, true);
3197  	con->v2.out_state = OUT_S_QUEUE_DATA_CONT;
3198  }
3199  
queue_data_cont(struct ceph_connection * con)3200  static void queue_data_cont(struct ceph_connection *con)
3201  {
3202  	struct bio_vec bv;
3203  
3204  	con->v2.out_epil.data_crc = ceph_crc32c_page(
3205  		con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
3206  		con->v2.out_bvec.bv_offset, con->v2.out_bvec.bv_len);
3207  
3208  	ceph_msg_data_advance(&con->v2.out_cursor, con->v2.out_bvec.bv_len);
3209  	if (con->v2.out_cursor.total_resid) {
3210  		get_bvec_at(&con->v2.out_cursor, &bv);
3211  		set_out_bvec(con, &bv, true);
3212  		WARN_ON(con->v2.out_state != OUT_S_QUEUE_DATA_CONT);
3213  		return;
3214  	}
3215  
3216  	/*
3217  	 * We've written all data.  Queue epilogue.  Once it's written,
3218  	 * we are done.
3219  	 */
3220  	reset_out_kvecs(con);
3221  	prepare_epilogue_plain(con, false);
3222  	con->v2.out_state = OUT_S_FINISH_MESSAGE;
3223  }
3224  
queue_enc_page(struct ceph_connection * con)3225  static void queue_enc_page(struct ceph_connection *con)
3226  {
3227  	struct bio_vec bv;
3228  
3229  	dout("%s con %p i %d resid %d\n", __func__, con, con->v2.out_enc_i,
3230  	     con->v2.out_enc_resid);
3231  	WARN_ON(!con->v2.out_enc_resid);
3232  
3233  	bvec_set_page(&bv, con->v2.out_enc_pages[con->v2.out_enc_i],
3234  		      min(con->v2.out_enc_resid, (int)PAGE_SIZE), 0);
3235  
3236  	set_out_bvec(con, &bv, false);
3237  	con->v2.out_enc_i++;
3238  	con->v2.out_enc_resid -= bv.bv_len;
3239  
3240  	if (con->v2.out_enc_resid) {
3241  		WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE);
3242  		return;
3243  	}
3244  
3245  	/*
3246  	 * We've queued the last piece of ciphertext (ending with
3247  	 * epilogue) + auth tag.  Once it's written, we are done.
3248  	 */
3249  	WARN_ON(con->v2.out_enc_i != con->v2.out_enc_page_cnt);
3250  	con->v2.out_state = OUT_S_FINISH_MESSAGE;
3251  }
3252  
queue_zeros(struct ceph_connection * con)3253  static void queue_zeros(struct ceph_connection *con)
3254  {
3255  	dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero);
3256  
3257  	if (con->v2.out_zero) {
3258  		set_out_bvec_zero(con);
3259  		con->v2.out_zero -= con->v2.out_bvec.bv_len;
3260  		con->v2.out_state = OUT_S_QUEUE_ZEROS;
3261  		return;
3262  	}
3263  
3264  	/*
3265  	 * We've zero-filled everything up to epilogue.  Queue epilogue
3266  	 * with late_status set to ABORTED and crcs adjusted for zeros.
3267  	 * Once it's written, we are done patching up for the revoke.
3268  	 */
3269  	reset_out_kvecs(con);
3270  	prepare_epilogue_plain(con, true);
3271  	con->v2.out_state = OUT_S_FINISH_MESSAGE;
3272  }
3273  
finish_message(struct ceph_connection * con)3274  static void finish_message(struct ceph_connection *con)
3275  {
3276  	dout("%s con %p msg %p\n", __func__, con, con->out_msg);
3277  
3278  	/* we end up here both plain and secure modes */
3279  	if (con->v2.out_enc_pages) {
3280  		WARN_ON(!con->v2.out_enc_page_cnt);
3281  		ceph_release_page_vector(con->v2.out_enc_pages,
3282  					 con->v2.out_enc_page_cnt);
3283  		con->v2.out_enc_pages = NULL;
3284  		con->v2.out_enc_page_cnt = 0;
3285  	}
3286  	/* message may have been revoked */
3287  	if (con->out_msg) {
3288  		ceph_msg_put(con->out_msg);
3289  		con->out_msg = NULL;
3290  	}
3291  
3292  	con->v2.out_state = OUT_S_GET_NEXT;
3293  }
3294  
populate_out_iter(struct ceph_connection * con)3295  static int populate_out_iter(struct ceph_connection *con)
3296  {
3297  	int ret;
3298  
3299  	dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
3300  	     con->v2.out_state);
3301  	WARN_ON(iov_iter_count(&con->v2.out_iter));
3302  
3303  	if (con->state != CEPH_CON_S_OPEN) {
3304  		WARN_ON(con->state < CEPH_CON_S_V2_BANNER_PREFIX ||
3305  			con->state > CEPH_CON_S_V2_SESSION_RECONNECT);
3306  		goto nothing_pending;
3307  	}
3308  
3309  	switch (con->v2.out_state) {
3310  	case OUT_S_QUEUE_DATA:
3311  		WARN_ON(!con->out_msg);
3312  		queue_data(con);
3313  		goto populated;
3314  	case OUT_S_QUEUE_DATA_CONT:
3315  		WARN_ON(!con->out_msg);
3316  		queue_data_cont(con);
3317  		goto populated;
3318  	case OUT_S_QUEUE_ENC_PAGE:
3319  		queue_enc_page(con);
3320  		goto populated;
3321  	case OUT_S_QUEUE_ZEROS:
3322  		WARN_ON(con->out_msg);  /* revoked */
3323  		queue_zeros(con);
3324  		goto populated;
3325  	case OUT_S_FINISH_MESSAGE:
3326  		finish_message(con);
3327  		break;
3328  	case OUT_S_GET_NEXT:
3329  		break;
3330  	default:
3331  		WARN(1, "bad out_state %d", con->v2.out_state);
3332  		return -EINVAL;
3333  	}
3334  
3335  	WARN_ON(con->v2.out_state != OUT_S_GET_NEXT);
3336  	if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) {
3337  		ret = prepare_keepalive2(con);
3338  		if (ret) {
3339  			pr_err("prepare_keepalive2 failed: %d\n", ret);
3340  			return ret;
3341  		}
3342  	} else if (!list_empty(&con->out_queue)) {
3343  		ceph_con_get_out_msg(con);
3344  		ret = prepare_message(con);
3345  		if (ret) {
3346  			pr_err("prepare_message failed: %d\n", ret);
3347  			return ret;
3348  		}
3349  	} else if (con->in_seq > con->in_seq_acked) {
3350  		ret = prepare_ack(con);
3351  		if (ret) {
3352  			pr_err("prepare_ack failed: %d\n", ret);
3353  			return ret;
3354  		}
3355  	} else {
3356  		goto nothing_pending;
3357  	}
3358  
3359  populated:
3360  	if (WARN_ON(!iov_iter_count(&con->v2.out_iter)))
3361  		return -ENODATA;
3362  	dout("%s con %p populated %zu\n", __func__, con,
3363  	     iov_iter_count(&con->v2.out_iter));
3364  	return 1;
3365  
3366  nothing_pending:
3367  	WARN_ON(iov_iter_count(&con->v2.out_iter));
3368  	dout("%s con %p nothing pending\n", __func__, con);
3369  	ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
3370  	return 0;
3371  }
3372  
ceph_con_v2_try_write(struct ceph_connection * con)3373  int ceph_con_v2_try_write(struct ceph_connection *con)
3374  {
3375  	int ret;
3376  
3377  	dout("%s con %p state %d have %zu\n", __func__, con, con->state,
3378  	     iov_iter_count(&con->v2.out_iter));
3379  
3380  	/* open the socket first? */
3381  	if (con->state == CEPH_CON_S_PREOPEN) {
3382  		WARN_ON(con->peer_addr.type != CEPH_ENTITY_ADDR_TYPE_MSGR2);
3383  
3384  		/*
3385  		 * Always bump global_seq.  Bump connect_seq only if
3386  		 * there is a session (i.e. we are reconnecting and will
3387  		 * send session_reconnect instead of client_ident).
3388  		 */
3389  		con->v2.global_seq = ceph_get_global_seq(con->msgr, 0);
3390  		if (con->v2.server_cookie)
3391  			con->v2.connect_seq++;
3392  
3393  		ret = prepare_read_banner_prefix(con);
3394  		if (ret) {
3395  			pr_err("prepare_read_banner_prefix failed: %d\n", ret);
3396  			con->error_msg = "connect error";
3397  			return ret;
3398  		}
3399  
3400  		reset_out_kvecs(con);
3401  		ret = prepare_banner(con);
3402  		if (ret) {
3403  			pr_err("prepare_banner failed: %d\n", ret);
3404  			con->error_msg = "connect error";
3405  			return ret;
3406  		}
3407  
3408  		ret = ceph_tcp_connect(con);
3409  		if (ret) {
3410  			pr_err("ceph_tcp_connect failed: %d\n", ret);
3411  			con->error_msg = "connect error";
3412  			return ret;
3413  		}
3414  	}
3415  
3416  	if (!iov_iter_count(&con->v2.out_iter)) {
3417  		ret = populate_out_iter(con);
3418  		if (ret <= 0) {
3419  			if (ret && ret != -EAGAIN && !con->error_msg)
3420  				con->error_msg = "write processing error";
3421  			return ret;
3422  		}
3423  	}
3424  
3425  	tcp_sock_set_cork(con->sock->sk, true);
3426  	for (;;) {
3427  		ret = ceph_tcp_send(con);
3428  		if (ret <= 0)
3429  			break;
3430  
3431  		ret = populate_out_iter(con);
3432  		if (ret <= 0) {
3433  			if (ret && ret != -EAGAIN && !con->error_msg)
3434  				con->error_msg = "write processing error";
3435  			break;
3436  		}
3437  	}
3438  
3439  	tcp_sock_set_cork(con->sock->sk, false);
3440  	return ret;
3441  }
3442  
crc32c_zeros(u32 crc,int zero_len)3443  static u32 crc32c_zeros(u32 crc, int zero_len)
3444  {
3445  	int len;
3446  
3447  	while (zero_len) {
3448  		len = min(zero_len, (int)PAGE_SIZE);
3449  		crc = crc32c(crc, page_address(ceph_zero_page), len);
3450  		zero_len -= len;
3451  	}
3452  
3453  	return crc;
3454  }
3455  
prepare_zero_front(struct ceph_connection * con,int resid)3456  static void prepare_zero_front(struct ceph_connection *con, int resid)
3457  {
3458  	int sent;
3459  
3460  	WARN_ON(!resid || resid > front_len(con->out_msg));
3461  	sent = front_len(con->out_msg) - resid;
3462  	dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3463  
3464  	if (sent) {
3465  		con->v2.out_epil.front_crc =
3466  			crc32c(-1, con->out_msg->front.iov_base, sent);
3467  		con->v2.out_epil.front_crc =
3468  			crc32c_zeros(con->v2.out_epil.front_crc, resid);
3469  	} else {
3470  		con->v2.out_epil.front_crc = crc32c_zeros(-1, resid);
3471  	}
3472  
3473  	con->v2.out_iter.count -= resid;
3474  	out_zero_add(con, resid);
3475  }
3476  
prepare_zero_middle(struct ceph_connection * con,int resid)3477  static void prepare_zero_middle(struct ceph_connection *con, int resid)
3478  {
3479  	int sent;
3480  
3481  	WARN_ON(!resid || resid > middle_len(con->out_msg));
3482  	sent = middle_len(con->out_msg) - resid;
3483  	dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3484  
3485  	if (sent) {
3486  		con->v2.out_epil.middle_crc =
3487  			crc32c(-1, con->out_msg->middle->vec.iov_base, sent);
3488  		con->v2.out_epil.middle_crc =
3489  			crc32c_zeros(con->v2.out_epil.middle_crc, resid);
3490  	} else {
3491  		con->v2.out_epil.middle_crc = crc32c_zeros(-1, resid);
3492  	}
3493  
3494  	con->v2.out_iter.count -= resid;
3495  	out_zero_add(con, resid);
3496  }
3497  
prepare_zero_data(struct ceph_connection * con)3498  static void prepare_zero_data(struct ceph_connection *con)
3499  {
3500  	dout("%s con %p\n", __func__, con);
3501  	con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg));
3502  	out_zero_add(con, data_len(con->out_msg));
3503  }
3504  
revoke_at_queue_data(struct ceph_connection * con)3505  static void revoke_at_queue_data(struct ceph_connection *con)
3506  {
3507  	int boundary;
3508  	int resid;
3509  
3510  	WARN_ON(!data_len(con->out_msg));
3511  	WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3512  	resid = iov_iter_count(&con->v2.out_iter);
3513  
3514  	boundary = front_len(con->out_msg) + middle_len(con->out_msg);
3515  	if (resid > boundary) {
3516  		resid -= boundary;
3517  		WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3518  		dout("%s con %p was sending head\n", __func__, con);
3519  		if (front_len(con->out_msg))
3520  			prepare_zero_front(con, front_len(con->out_msg));
3521  		if (middle_len(con->out_msg))
3522  			prepare_zero_middle(con, middle_len(con->out_msg));
3523  		prepare_zero_data(con);
3524  		WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3525  		con->v2.out_state = OUT_S_QUEUE_ZEROS;
3526  		return;
3527  	}
3528  
3529  	boundary = middle_len(con->out_msg);
3530  	if (resid > boundary) {
3531  		resid -= boundary;
3532  		dout("%s con %p was sending front\n", __func__, con);
3533  		prepare_zero_front(con, resid);
3534  		if (middle_len(con->out_msg))
3535  			prepare_zero_middle(con, middle_len(con->out_msg));
3536  		prepare_zero_data(con);
3537  		queue_zeros(con);
3538  		return;
3539  	}
3540  
3541  	WARN_ON(!resid);
3542  	dout("%s con %p was sending middle\n", __func__, con);
3543  	prepare_zero_middle(con, resid);
3544  	prepare_zero_data(con);
3545  	queue_zeros(con);
3546  }
3547  
revoke_at_queue_data_cont(struct ceph_connection * con)3548  static void revoke_at_queue_data_cont(struct ceph_connection *con)
3549  {
3550  	int sent, resid;  /* current piece of data */
3551  
3552  	WARN_ON(!data_len(con->out_msg));
3553  	WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter));
3554  	resid = iov_iter_count(&con->v2.out_iter);
3555  	WARN_ON(!resid || resid > con->v2.out_bvec.bv_len);
3556  	sent = con->v2.out_bvec.bv_len - resid;
3557  	dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
3558  
3559  	if (sent) {
3560  		con->v2.out_epil.data_crc = ceph_crc32c_page(
3561  			con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page,
3562  			con->v2.out_bvec.bv_offset, sent);
3563  		ceph_msg_data_advance(&con->v2.out_cursor, sent);
3564  	}
3565  	WARN_ON(resid > con->v2.out_cursor.total_resid);
3566  	con->v2.out_epil.data_crc = crc32c_zeros(con->v2.out_epil.data_crc,
3567  						con->v2.out_cursor.total_resid);
3568  
3569  	con->v2.out_iter.count -= resid;
3570  	out_zero_add(con, con->v2.out_cursor.total_resid);
3571  	queue_zeros(con);
3572  }
3573  
revoke_at_finish_message(struct ceph_connection * con)3574  static void revoke_at_finish_message(struct ceph_connection *con)
3575  {
3576  	int boundary;
3577  	int resid;
3578  
3579  	WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
3580  	resid = iov_iter_count(&con->v2.out_iter);
3581  
3582  	if (!front_len(con->out_msg) && !middle_len(con->out_msg) &&
3583  	    !data_len(con->out_msg)) {
3584  		WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN);
3585  		dout("%s con %p was sending head (empty message) - noop\n",
3586  		     __func__, con);
3587  		return;
3588  	}
3589  
3590  	boundary = front_len(con->out_msg) + middle_len(con->out_msg) +
3591  		   CEPH_EPILOGUE_PLAIN_LEN;
3592  	if (resid > boundary) {
3593  		resid -= boundary;
3594  		WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
3595  		dout("%s con %p was sending head\n", __func__, con);
3596  		if (front_len(con->out_msg))
3597  			prepare_zero_front(con, front_len(con->out_msg));
3598  		if (middle_len(con->out_msg))
3599  			prepare_zero_middle(con, middle_len(con->out_msg));
3600  		con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3601  		WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
3602  		con->v2.out_state = OUT_S_QUEUE_ZEROS;
3603  		return;
3604  	}
3605  
3606  	boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3607  	if (resid > boundary) {
3608  		resid -= boundary;
3609  		dout("%s con %p was sending front\n", __func__, con);
3610  		prepare_zero_front(con, resid);
3611  		if (middle_len(con->out_msg))
3612  			prepare_zero_middle(con, middle_len(con->out_msg));
3613  		con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3614  		queue_zeros(con);
3615  		return;
3616  	}
3617  
3618  	boundary = CEPH_EPILOGUE_PLAIN_LEN;
3619  	if (resid > boundary) {
3620  		resid -= boundary;
3621  		dout("%s con %p was sending middle\n", __func__, con);
3622  		prepare_zero_middle(con, resid);
3623  		con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
3624  		queue_zeros(con);
3625  		return;
3626  	}
3627  
3628  	WARN_ON(!resid);
3629  	dout("%s con %p was sending epilogue - noop\n", __func__, con);
3630  }
3631  
ceph_con_v2_revoke(struct ceph_connection * con)3632  void ceph_con_v2_revoke(struct ceph_connection *con)
3633  {
3634  	WARN_ON(con->v2.out_zero);
3635  
3636  	if (con_secure(con)) {
3637  		WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE &&
3638  			con->v2.out_state != OUT_S_FINISH_MESSAGE);
3639  		dout("%s con %p secure - noop\n", __func__, con);
3640  		return;
3641  	}
3642  
3643  	switch (con->v2.out_state) {
3644  	case OUT_S_QUEUE_DATA:
3645  		revoke_at_queue_data(con);
3646  		break;
3647  	case OUT_S_QUEUE_DATA_CONT:
3648  		revoke_at_queue_data_cont(con);
3649  		break;
3650  	case OUT_S_FINISH_MESSAGE:
3651  		revoke_at_finish_message(con);
3652  		break;
3653  	default:
3654  		WARN(1, "bad out_state %d", con->v2.out_state);
3655  		break;
3656  	}
3657  }
3658  
revoke_at_prepare_read_data(struct ceph_connection * con)3659  static void revoke_at_prepare_read_data(struct ceph_connection *con)
3660  {
3661  	int remaining;
3662  	int resid;
3663  
3664  	WARN_ON(con_secure(con));
3665  	WARN_ON(!data_len(con->in_msg));
3666  	WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
3667  	resid = iov_iter_count(&con->v2.in_iter);
3668  	WARN_ON(!resid);
3669  
3670  	remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
3671  	dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
3672  	     remaining);
3673  	con->v2.in_iter.count -= resid;
3674  	set_in_skip(con, resid + remaining);
3675  	con->v2.in_state = IN_S_FINISH_SKIP;
3676  }
3677  
revoke_at_prepare_read_data_cont(struct ceph_connection * con)3678  static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
3679  {
3680  	int recved, resid;  /* current piece of data */
3681  	int remaining;
3682  
3683  	WARN_ON(con_secure(con));
3684  	WARN_ON(!data_len(con->in_msg));
3685  	WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3686  	resid = iov_iter_count(&con->v2.in_iter);
3687  	WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3688  	recved = con->v2.in_bvec.bv_len - resid;
3689  	dout("%s con %p recved %d resid %d\n", __func__, con, recved, resid);
3690  
3691  	if (recved)
3692  		ceph_msg_data_advance(&con->v2.in_cursor, recved);
3693  	WARN_ON(resid > con->v2.in_cursor.total_resid);
3694  
3695  	remaining = CEPH_EPILOGUE_PLAIN_LEN;
3696  	dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
3697  	     con->v2.in_cursor.total_resid, remaining);
3698  	con->v2.in_iter.count -= resid;
3699  	set_in_skip(con, con->v2.in_cursor.total_resid + remaining);
3700  	con->v2.in_state = IN_S_FINISH_SKIP;
3701  }
3702  
revoke_at_prepare_read_enc_page(struct ceph_connection * con)3703  static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
3704  {
3705  	int resid;  /* current enc page (not necessarily data) */
3706  
3707  	WARN_ON(!con_secure(con));
3708  	WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3709  	resid = iov_iter_count(&con->v2.in_iter);
3710  	WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
3711  
3712  	dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
3713  	     con->v2.in_enc_resid);
3714  	con->v2.in_iter.count -= resid;
3715  	set_in_skip(con, resid + con->v2.in_enc_resid);
3716  	con->v2.in_state = IN_S_FINISH_SKIP;
3717  }
3718  
revoke_at_prepare_sparse_data(struct ceph_connection * con)3719  static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
3720  {
3721  	int resid;  /* current piece of data */
3722  	int remaining;
3723  
3724  	WARN_ON(con_secure(con));
3725  	WARN_ON(!data_len(con->in_msg));
3726  	WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
3727  	resid = iov_iter_count(&con->v2.in_iter);
3728  	dout("%s con %p resid %d\n", __func__, con, resid);
3729  
3730  	remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
3731  	con->v2.in_iter.count -= resid;
3732  	set_in_skip(con, resid + remaining);
3733  	con->v2.in_state = IN_S_FINISH_SKIP;
3734  }
3735  
revoke_at_handle_epilogue(struct ceph_connection * con)3736  static void revoke_at_handle_epilogue(struct ceph_connection *con)
3737  {
3738  	int resid;
3739  
3740  	resid = iov_iter_count(&con->v2.in_iter);
3741  	WARN_ON(!resid);
3742  
3743  	dout("%s con %p resid %d\n", __func__, con, resid);
3744  	con->v2.in_iter.count -= resid;
3745  	set_in_skip(con, resid);
3746  	con->v2.in_state = IN_S_FINISH_SKIP;
3747  }
3748  
ceph_con_v2_revoke_incoming(struct ceph_connection * con)3749  void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
3750  {
3751  	switch (con->v2.in_state) {
3752  	case IN_S_PREPARE_SPARSE_DATA:
3753  	case IN_S_PREPARE_READ_DATA:
3754  		revoke_at_prepare_read_data(con);
3755  		break;
3756  	case IN_S_PREPARE_READ_DATA_CONT:
3757  		revoke_at_prepare_read_data_cont(con);
3758  		break;
3759  	case IN_S_PREPARE_READ_ENC_PAGE:
3760  		revoke_at_prepare_read_enc_page(con);
3761  		break;
3762  	case IN_S_PREPARE_SPARSE_DATA_CONT:
3763  		revoke_at_prepare_sparse_data(con);
3764  		break;
3765  	case IN_S_HANDLE_EPILOGUE:
3766  		revoke_at_handle_epilogue(con);
3767  		break;
3768  	default:
3769  		WARN(1, "bad in_state %d", con->v2.in_state);
3770  		break;
3771  	}
3772  }
3773  
ceph_con_v2_opened(struct ceph_connection * con)3774  bool ceph_con_v2_opened(struct ceph_connection *con)
3775  {
3776  	return con->v2.peer_global_seq;
3777  }
3778  
ceph_con_v2_reset_session(struct ceph_connection * con)3779  void ceph_con_v2_reset_session(struct ceph_connection *con)
3780  {
3781  	con->v2.client_cookie = 0;
3782  	con->v2.server_cookie = 0;
3783  	con->v2.global_seq = 0;
3784  	con->v2.connect_seq = 0;
3785  	con->v2.peer_global_seq = 0;
3786  }
3787  
ceph_con_v2_reset_protocol(struct ceph_connection * con)3788  void ceph_con_v2_reset_protocol(struct ceph_connection *con)
3789  {
3790  	iov_iter_truncate(&con->v2.in_iter, 0);
3791  	iov_iter_truncate(&con->v2.out_iter, 0);
3792  	con->v2.out_zero = 0;
3793  
3794  	clear_in_sign_kvecs(con);
3795  	clear_out_sign_kvecs(con);
3796  	free_conn_bufs(con);
3797  
3798  	if (con->v2.in_enc_pages) {
3799  		WARN_ON(!con->v2.in_enc_page_cnt);
3800  		ceph_release_page_vector(con->v2.in_enc_pages,
3801  					 con->v2.in_enc_page_cnt);
3802  		con->v2.in_enc_pages = NULL;
3803  		con->v2.in_enc_page_cnt = 0;
3804  	}
3805  	if (con->v2.out_enc_pages) {
3806  		WARN_ON(!con->v2.out_enc_page_cnt);
3807  		ceph_release_page_vector(con->v2.out_enc_pages,
3808  					 con->v2.out_enc_page_cnt);
3809  		con->v2.out_enc_pages = NULL;
3810  		con->v2.out_enc_page_cnt = 0;
3811  	}
3812  
3813  	con->v2.con_mode = CEPH_CON_MODE_UNKNOWN;
3814  	memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN);
3815  	memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN);
3816  
3817  	if (con->v2.hmac_tfm) {
3818  		crypto_free_shash(con->v2.hmac_tfm);
3819  		con->v2.hmac_tfm = NULL;
3820  	}
3821  	if (con->v2.gcm_req) {
3822  		aead_request_free(con->v2.gcm_req);
3823  		con->v2.gcm_req = NULL;
3824  	}
3825  	if (con->v2.gcm_tfm) {
3826  		crypto_free_aead(con->v2.gcm_tfm);
3827  		con->v2.gcm_tfm = NULL;
3828  	}
3829  }
3830