xref: /openbmc/linux/net/vmw_vsock/vmci_transport.c (revision d021c344051af91f42c5ba9fdedc176740cbd238)
1*d021c344SAndy King /*
2*d021c344SAndy King  * VMware vSockets Driver
3*d021c344SAndy King  *
4*d021c344SAndy King  * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5*d021c344SAndy King  *
6*d021c344SAndy King  * This program is free software; you can redistribute it and/or modify it
7*d021c344SAndy King  * under the terms of the GNU General Public License as published by the Free
8*d021c344SAndy King  * Software Foundation version 2 and no later version.
9*d021c344SAndy King  *
10*d021c344SAndy King  * This program is distributed in the hope that it will be useful, but WITHOUT
11*d021c344SAndy King  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12*d021c344SAndy King  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13*d021c344SAndy King  * more details.
14*d021c344SAndy King  */
15*d021c344SAndy King 
16*d021c344SAndy King #include <linux/types.h>
17*d021c344SAndy King 
18*d021c344SAndy King #define EXPORT_SYMTAB
19*d021c344SAndy King #include <linux/bitops.h>
20*d021c344SAndy King #include <linux/cred.h>
21*d021c344SAndy King #include <linux/init.h>
22*d021c344SAndy King #include <linux/io.h>
23*d021c344SAndy King #include <linux/kernel.h>
24*d021c344SAndy King #include <linux/kmod.h>
25*d021c344SAndy King #include <linux/list.h>
26*d021c344SAndy King #include <linux/miscdevice.h>
27*d021c344SAndy King #include <linux/module.h>
28*d021c344SAndy King #include <linux/mutex.h>
29*d021c344SAndy King #include <linux/net.h>
30*d021c344SAndy King #include <linux/poll.h>
31*d021c344SAndy King #include <linux/skbuff.h>
32*d021c344SAndy King #include <linux/smp.h>
33*d021c344SAndy King #include <linux/socket.h>
34*d021c344SAndy King #include <linux/stddef.h>
35*d021c344SAndy King #include <linux/unistd.h>
36*d021c344SAndy King #include <linux/wait.h>
37*d021c344SAndy King #include <linux/workqueue.h>
38*d021c344SAndy King #include <net/sock.h>
39*d021c344SAndy King 
40*d021c344SAndy King #include "af_vsock.h"
41*d021c344SAndy King #include "vmci_transport_notify.h"
42*d021c344SAndy King 
43*d021c344SAndy King static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
44*d021c344SAndy King static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
45*d021c344SAndy King static void vmci_transport_peer_attach_cb(u32 sub_id,
46*d021c344SAndy King 					  const struct vmci_event_data *ed,
47*d021c344SAndy King 					  void *client_data);
48*d021c344SAndy King static void vmci_transport_peer_detach_cb(u32 sub_id,
49*d021c344SAndy King 					  const struct vmci_event_data *ed,
50*d021c344SAndy King 					  void *client_data);
51*d021c344SAndy King static void vmci_transport_recv_pkt_work(struct work_struct *work);
52*d021c344SAndy King static int vmci_transport_recv_listen(struct sock *sk,
53*d021c344SAndy King 				      struct vmci_transport_packet *pkt);
54*d021c344SAndy King static int vmci_transport_recv_connecting_server(
55*d021c344SAndy King 					struct sock *sk,
56*d021c344SAndy King 					struct sock *pending,
57*d021c344SAndy King 					struct vmci_transport_packet *pkt);
58*d021c344SAndy King static int vmci_transport_recv_connecting_client(
59*d021c344SAndy King 					struct sock *sk,
60*d021c344SAndy King 					struct vmci_transport_packet *pkt);
61*d021c344SAndy King static int vmci_transport_recv_connecting_client_negotiate(
62*d021c344SAndy King 					struct sock *sk,
63*d021c344SAndy King 					struct vmci_transport_packet *pkt);
64*d021c344SAndy King static int vmci_transport_recv_connecting_client_invalid(
65*d021c344SAndy King 					struct sock *sk,
66*d021c344SAndy King 					struct vmci_transport_packet *pkt);
67*d021c344SAndy King static int vmci_transport_recv_connected(struct sock *sk,
68*d021c344SAndy King 					 struct vmci_transport_packet *pkt);
69*d021c344SAndy King static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
70*d021c344SAndy King static u16 vmci_transport_new_proto_supported_versions(void);
71*d021c344SAndy King static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
72*d021c344SAndy King 						  bool old_pkt_proto);
73*d021c344SAndy King 
74*d021c344SAndy King struct vmci_transport_recv_pkt_info {
75*d021c344SAndy King 	struct work_struct work;
76*d021c344SAndy King 	struct sock *sk;
77*d021c344SAndy King 	struct vmci_transport_packet pkt;
78*d021c344SAndy King };
79*d021c344SAndy King 
80*d021c344SAndy King static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
81*d021c344SAndy King 							   VMCI_INVALID_ID };
82*d021c344SAndy King static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
83*d021c344SAndy King 
84*d021c344SAndy King static int PROTOCOL_OVERRIDE = -1;
85*d021c344SAndy King 
86*d021c344SAndy King #define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
87*d021c344SAndy King #define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
88*d021c344SAndy King #define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
89*d021c344SAndy King 
90*d021c344SAndy King /* The default peer timeout indicates how long we will wait for a peer response
91*d021c344SAndy King  * to a control message.
92*d021c344SAndy King  */
93*d021c344SAndy King #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
94*d021c344SAndy King 
95*d021c344SAndy King #define SS_LISTEN 255
96*d021c344SAndy King 
97*d021c344SAndy King /* Helper function to convert from a VMCI error code to a VSock error code. */
98*d021c344SAndy King 
99*d021c344SAndy King static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
100*d021c344SAndy King {
101*d021c344SAndy King 	int err;
102*d021c344SAndy King 
103*d021c344SAndy King 	switch (vmci_error) {
104*d021c344SAndy King 	case VMCI_ERROR_NO_MEM:
105*d021c344SAndy King 		err = ENOMEM;
106*d021c344SAndy King 		break;
107*d021c344SAndy King 	case VMCI_ERROR_DUPLICATE_ENTRY:
108*d021c344SAndy King 	case VMCI_ERROR_ALREADY_EXISTS:
109*d021c344SAndy King 		err = EADDRINUSE;
110*d021c344SAndy King 		break;
111*d021c344SAndy King 	case VMCI_ERROR_NO_ACCESS:
112*d021c344SAndy King 		err = EPERM;
113*d021c344SAndy King 		break;
114*d021c344SAndy King 	case VMCI_ERROR_NO_RESOURCES:
115*d021c344SAndy King 		err = ENOBUFS;
116*d021c344SAndy King 		break;
117*d021c344SAndy King 	case VMCI_ERROR_INVALID_RESOURCE:
118*d021c344SAndy King 		err = EHOSTUNREACH;
119*d021c344SAndy King 		break;
120*d021c344SAndy King 	case VMCI_ERROR_INVALID_ARGS:
121*d021c344SAndy King 	default:
122*d021c344SAndy King 		err = EINVAL;
123*d021c344SAndy King 	}
124*d021c344SAndy King 
125*d021c344SAndy King 	return err > 0 ? -err : err;
126*d021c344SAndy King }
127*d021c344SAndy King 
128*d021c344SAndy King static inline void
129*d021c344SAndy King vmci_transport_packet_init(struct vmci_transport_packet *pkt,
130*d021c344SAndy King 			   struct sockaddr_vm *src,
131*d021c344SAndy King 			   struct sockaddr_vm *dst,
132*d021c344SAndy King 			   u8 type,
133*d021c344SAndy King 			   u64 size,
134*d021c344SAndy King 			   u64 mode,
135*d021c344SAndy King 			   struct vmci_transport_waiting_info *wait,
136*d021c344SAndy King 			   u16 proto,
137*d021c344SAndy King 			   struct vmci_handle handle)
138*d021c344SAndy King {
139*d021c344SAndy King 	/* We register the stream control handler as an any cid handle so we
140*d021c344SAndy King 	 * must always send from a source address of VMADDR_CID_ANY
141*d021c344SAndy King 	 */
142*d021c344SAndy King 	pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
143*d021c344SAndy King 				       VMCI_TRANSPORT_PACKET_RID);
144*d021c344SAndy King 	pkt->dg.dst = vmci_make_handle(dst->svm_cid,
145*d021c344SAndy King 				       VMCI_TRANSPORT_PACKET_RID);
146*d021c344SAndy King 	pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
147*d021c344SAndy King 	pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
148*d021c344SAndy King 	pkt->type = type;
149*d021c344SAndy King 	pkt->src_port = src->svm_port;
150*d021c344SAndy King 	pkt->dst_port = dst->svm_port;
151*d021c344SAndy King 	memset(&pkt->proto, 0, sizeof(pkt->proto));
152*d021c344SAndy King 	memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
153*d021c344SAndy King 
154*d021c344SAndy King 	switch (pkt->type) {
155*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
156*d021c344SAndy King 		pkt->u.size = 0;
157*d021c344SAndy King 		break;
158*d021c344SAndy King 
159*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
160*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
161*d021c344SAndy King 		pkt->u.size = size;
162*d021c344SAndy King 		break;
163*d021c344SAndy King 
164*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
165*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
166*d021c344SAndy King 		pkt->u.handle = handle;
167*d021c344SAndy King 		break;
168*d021c344SAndy King 
169*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
170*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_READ:
171*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_RST:
172*d021c344SAndy King 		pkt->u.size = 0;
173*d021c344SAndy King 		break;
174*d021c344SAndy King 
175*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
176*d021c344SAndy King 		pkt->u.mode = mode;
177*d021c344SAndy King 		break;
178*d021c344SAndy King 
179*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
180*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
181*d021c344SAndy King 		memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
182*d021c344SAndy King 		break;
183*d021c344SAndy King 
184*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
185*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
186*d021c344SAndy King 		pkt->u.size = size;
187*d021c344SAndy King 		pkt->proto = proto;
188*d021c344SAndy King 		break;
189*d021c344SAndy King 	}
190*d021c344SAndy King }
191*d021c344SAndy King 
192*d021c344SAndy King static inline void
193*d021c344SAndy King vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
194*d021c344SAndy King 				    struct sockaddr_vm *local,
195*d021c344SAndy King 				    struct sockaddr_vm *remote)
196*d021c344SAndy King {
197*d021c344SAndy King 	vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
198*d021c344SAndy King 	vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
199*d021c344SAndy King }
200*d021c344SAndy King 
201*d021c344SAndy King static int
202*d021c344SAndy King __vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
203*d021c344SAndy King 				  struct sockaddr_vm *src,
204*d021c344SAndy King 				  struct sockaddr_vm *dst,
205*d021c344SAndy King 				  enum vmci_transport_packet_type type,
206*d021c344SAndy King 				  u64 size,
207*d021c344SAndy King 				  u64 mode,
208*d021c344SAndy King 				  struct vmci_transport_waiting_info *wait,
209*d021c344SAndy King 				  u16 proto,
210*d021c344SAndy King 				  struct vmci_handle handle,
211*d021c344SAndy King 				  bool convert_error)
212*d021c344SAndy King {
213*d021c344SAndy King 	int err;
214*d021c344SAndy King 
215*d021c344SAndy King 	vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
216*d021c344SAndy King 				   proto, handle);
217*d021c344SAndy King 	err = vmci_datagram_send(&pkt->dg);
218*d021c344SAndy King 	if (convert_error && (err < 0))
219*d021c344SAndy King 		return vmci_transport_error_to_vsock_error(err);
220*d021c344SAndy King 
221*d021c344SAndy King 	return err;
222*d021c344SAndy King }
223*d021c344SAndy King 
224*d021c344SAndy King static int
225*d021c344SAndy King vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
226*d021c344SAndy King 				      enum vmci_transport_packet_type type,
227*d021c344SAndy King 				      u64 size,
228*d021c344SAndy King 				      u64 mode,
229*d021c344SAndy King 				      struct vmci_transport_waiting_info *wait,
230*d021c344SAndy King 				      struct vmci_handle handle)
231*d021c344SAndy King {
232*d021c344SAndy King 	struct vmci_transport_packet reply;
233*d021c344SAndy King 	struct sockaddr_vm src, dst;
234*d021c344SAndy King 
235*d021c344SAndy King 	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
236*d021c344SAndy King 		return 0;
237*d021c344SAndy King 	} else {
238*d021c344SAndy King 		vmci_transport_packet_get_addresses(pkt, &src, &dst);
239*d021c344SAndy King 		return __vmci_transport_send_control_pkt(&reply, &src, &dst,
240*d021c344SAndy King 							 type,
241*d021c344SAndy King 							 size, mode, wait,
242*d021c344SAndy King 							 VSOCK_PROTO_INVALID,
243*d021c344SAndy King 							 handle, true);
244*d021c344SAndy King 	}
245*d021c344SAndy King }
246*d021c344SAndy King 
247*d021c344SAndy King static int
248*d021c344SAndy King vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
249*d021c344SAndy King 				   struct sockaddr_vm *dst,
250*d021c344SAndy King 				   enum vmci_transport_packet_type type,
251*d021c344SAndy King 				   u64 size,
252*d021c344SAndy King 				   u64 mode,
253*d021c344SAndy King 				   struct vmci_transport_waiting_info *wait,
254*d021c344SAndy King 				   struct vmci_handle handle)
255*d021c344SAndy King {
256*d021c344SAndy King 	/* Note that it is safe to use a single packet across all CPUs since
257*d021c344SAndy King 	 * two tasklets of the same type are guaranteed to not ever run
258*d021c344SAndy King 	 * simultaneously. If that ever changes, or VMCI stops using tasklets,
259*d021c344SAndy King 	 * we can use per-cpu packets.
260*d021c344SAndy King 	 */
261*d021c344SAndy King 	static struct vmci_transport_packet pkt;
262*d021c344SAndy King 
263*d021c344SAndy King 	return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
264*d021c344SAndy King 						 size, mode, wait,
265*d021c344SAndy King 						 VSOCK_PROTO_INVALID, handle,
266*d021c344SAndy King 						 false);
267*d021c344SAndy King }
268*d021c344SAndy King 
269*d021c344SAndy King static int
270*d021c344SAndy King vmci_transport_send_control_pkt(struct sock *sk,
271*d021c344SAndy King 				enum vmci_transport_packet_type type,
272*d021c344SAndy King 				u64 size,
273*d021c344SAndy King 				u64 mode,
274*d021c344SAndy King 				struct vmci_transport_waiting_info *wait,
275*d021c344SAndy King 				u16 proto,
276*d021c344SAndy King 				struct vmci_handle handle)
277*d021c344SAndy King {
278*d021c344SAndy King 	struct vmci_transport_packet *pkt;
279*d021c344SAndy King 	struct vsock_sock *vsk;
280*d021c344SAndy King 	int err;
281*d021c344SAndy King 
282*d021c344SAndy King 	vsk = vsock_sk(sk);
283*d021c344SAndy King 
284*d021c344SAndy King 	if (!vsock_addr_bound(&vsk->local_addr))
285*d021c344SAndy King 		return -EINVAL;
286*d021c344SAndy King 
287*d021c344SAndy King 	if (!vsock_addr_bound(&vsk->remote_addr))
288*d021c344SAndy King 		return -EINVAL;
289*d021c344SAndy King 
290*d021c344SAndy King 	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
291*d021c344SAndy King 	if (!pkt)
292*d021c344SAndy King 		return -ENOMEM;
293*d021c344SAndy King 
294*d021c344SAndy King 	err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
295*d021c344SAndy King 						&vsk->remote_addr, type, size,
296*d021c344SAndy King 						mode, wait, proto, handle,
297*d021c344SAndy King 						true);
298*d021c344SAndy King 	kfree(pkt);
299*d021c344SAndy King 
300*d021c344SAndy King 	return err;
301*d021c344SAndy King }
302*d021c344SAndy King 
303*d021c344SAndy King static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
304*d021c344SAndy King 					struct sockaddr_vm *src,
305*d021c344SAndy King 					struct vmci_transport_packet *pkt)
306*d021c344SAndy King {
307*d021c344SAndy King 	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
308*d021c344SAndy King 		return 0;
309*d021c344SAndy King 	return vmci_transport_send_control_pkt_bh(
310*d021c344SAndy King 					dst, src,
311*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
312*d021c344SAndy King 					0, NULL, VMCI_INVALID_HANDLE);
313*d021c344SAndy King }
314*d021c344SAndy King 
315*d021c344SAndy King static int vmci_transport_send_reset(struct sock *sk,
316*d021c344SAndy King 				     struct vmci_transport_packet *pkt)
317*d021c344SAndy King {
318*d021c344SAndy King 	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
319*d021c344SAndy King 		return 0;
320*d021c344SAndy King 	return vmci_transport_send_control_pkt(sk,
321*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_RST,
322*d021c344SAndy King 					0, 0, NULL, VSOCK_PROTO_INVALID,
323*d021c344SAndy King 					VMCI_INVALID_HANDLE);
324*d021c344SAndy King }
325*d021c344SAndy King 
326*d021c344SAndy King static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
327*d021c344SAndy King {
328*d021c344SAndy King 	return vmci_transport_send_control_pkt(
329*d021c344SAndy King 					sk,
330*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
331*d021c344SAndy King 					size, 0, NULL,
332*d021c344SAndy King 					VSOCK_PROTO_INVALID,
333*d021c344SAndy King 					VMCI_INVALID_HANDLE);
334*d021c344SAndy King }
335*d021c344SAndy King 
336*d021c344SAndy King static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
337*d021c344SAndy King 					  u16 version)
338*d021c344SAndy King {
339*d021c344SAndy King 	return vmci_transport_send_control_pkt(
340*d021c344SAndy King 					sk,
341*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
342*d021c344SAndy King 					size, 0, NULL, version,
343*d021c344SAndy King 					VMCI_INVALID_HANDLE);
344*d021c344SAndy King }
345*d021c344SAndy King 
346*d021c344SAndy King static int vmci_transport_send_qp_offer(struct sock *sk,
347*d021c344SAndy King 					struct vmci_handle handle)
348*d021c344SAndy King {
349*d021c344SAndy King 	return vmci_transport_send_control_pkt(
350*d021c344SAndy King 					sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
351*d021c344SAndy King 					0, NULL,
352*d021c344SAndy King 					VSOCK_PROTO_INVALID, handle);
353*d021c344SAndy King }
354*d021c344SAndy King 
355*d021c344SAndy King static int vmci_transport_send_attach(struct sock *sk,
356*d021c344SAndy King 				      struct vmci_handle handle)
357*d021c344SAndy King {
358*d021c344SAndy King 	return vmci_transport_send_control_pkt(
359*d021c344SAndy King 					sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
360*d021c344SAndy King 					0, 0, NULL, VSOCK_PROTO_INVALID,
361*d021c344SAndy King 					handle);
362*d021c344SAndy King }
363*d021c344SAndy King 
364*d021c344SAndy King static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
365*d021c344SAndy King {
366*d021c344SAndy King 	return vmci_transport_reply_control_pkt_fast(
367*d021c344SAndy King 						pkt,
368*d021c344SAndy King 						VMCI_TRANSPORT_PACKET_TYPE_RST,
369*d021c344SAndy King 						0, 0, NULL,
370*d021c344SAndy King 						VMCI_INVALID_HANDLE);
371*d021c344SAndy King }
372*d021c344SAndy King 
373*d021c344SAndy King static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
374*d021c344SAndy King 					  struct sockaddr_vm *src)
375*d021c344SAndy King {
376*d021c344SAndy King 	return vmci_transport_send_control_pkt_bh(
377*d021c344SAndy King 					dst, src,
378*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_INVALID,
379*d021c344SAndy King 					0, 0, NULL, VMCI_INVALID_HANDLE);
380*d021c344SAndy King }
381*d021c344SAndy King 
382*d021c344SAndy King int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
383*d021c344SAndy King 				 struct sockaddr_vm *src)
384*d021c344SAndy King {
385*d021c344SAndy King 	return vmci_transport_send_control_pkt_bh(
386*d021c344SAndy King 					dst, src,
387*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
388*d021c344SAndy King 					0, NULL, VMCI_INVALID_HANDLE);
389*d021c344SAndy King }
390*d021c344SAndy King 
391*d021c344SAndy King int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
392*d021c344SAndy King 				struct sockaddr_vm *src)
393*d021c344SAndy King {
394*d021c344SAndy King 	return vmci_transport_send_control_pkt_bh(
395*d021c344SAndy King 					dst, src,
396*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
397*d021c344SAndy King 					0, NULL, VMCI_INVALID_HANDLE);
398*d021c344SAndy King }
399*d021c344SAndy King 
400*d021c344SAndy King int vmci_transport_send_wrote(struct sock *sk)
401*d021c344SAndy King {
402*d021c344SAndy King 	return vmci_transport_send_control_pkt(
403*d021c344SAndy King 					sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
404*d021c344SAndy King 					0, NULL, VSOCK_PROTO_INVALID,
405*d021c344SAndy King 					VMCI_INVALID_HANDLE);
406*d021c344SAndy King }
407*d021c344SAndy King 
408*d021c344SAndy King int vmci_transport_send_read(struct sock *sk)
409*d021c344SAndy King {
410*d021c344SAndy King 	return vmci_transport_send_control_pkt(
411*d021c344SAndy King 					sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
412*d021c344SAndy King 					0, NULL, VSOCK_PROTO_INVALID,
413*d021c344SAndy King 					VMCI_INVALID_HANDLE);
414*d021c344SAndy King }
415*d021c344SAndy King 
416*d021c344SAndy King int vmci_transport_send_waiting_write(struct sock *sk,
417*d021c344SAndy King 				      struct vmci_transport_waiting_info *wait)
418*d021c344SAndy King {
419*d021c344SAndy King 	return vmci_transport_send_control_pkt(
420*d021c344SAndy King 				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
421*d021c344SAndy King 				0, 0, wait, VSOCK_PROTO_INVALID,
422*d021c344SAndy King 				VMCI_INVALID_HANDLE);
423*d021c344SAndy King }
424*d021c344SAndy King 
425*d021c344SAndy King int vmci_transport_send_waiting_read(struct sock *sk,
426*d021c344SAndy King 				     struct vmci_transport_waiting_info *wait)
427*d021c344SAndy King {
428*d021c344SAndy King 	return vmci_transport_send_control_pkt(
429*d021c344SAndy King 				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
430*d021c344SAndy King 				0, 0, wait, VSOCK_PROTO_INVALID,
431*d021c344SAndy King 				VMCI_INVALID_HANDLE);
432*d021c344SAndy King }
433*d021c344SAndy King 
434*d021c344SAndy King static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
435*d021c344SAndy King {
436*d021c344SAndy King 	return vmci_transport_send_control_pkt(
437*d021c344SAndy King 					&vsk->sk,
438*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
439*d021c344SAndy King 					0, mode, NULL,
440*d021c344SAndy King 					VSOCK_PROTO_INVALID,
441*d021c344SAndy King 					VMCI_INVALID_HANDLE);
442*d021c344SAndy King }
443*d021c344SAndy King 
444*d021c344SAndy King static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
445*d021c344SAndy King {
446*d021c344SAndy King 	return vmci_transport_send_control_pkt(sk,
447*d021c344SAndy King 					VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
448*d021c344SAndy King 					size, 0, NULL,
449*d021c344SAndy King 					VSOCK_PROTO_INVALID,
450*d021c344SAndy King 					VMCI_INVALID_HANDLE);
451*d021c344SAndy King }
452*d021c344SAndy King 
453*d021c344SAndy King static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
454*d021c344SAndy King 					     u16 version)
455*d021c344SAndy King {
456*d021c344SAndy King 	return vmci_transport_send_control_pkt(
457*d021c344SAndy King 					sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
458*d021c344SAndy King 					size, 0, NULL, version,
459*d021c344SAndy King 					VMCI_INVALID_HANDLE);
460*d021c344SAndy King }
461*d021c344SAndy King 
462*d021c344SAndy King static struct sock *vmci_transport_get_pending(
463*d021c344SAndy King 					struct sock *listener,
464*d021c344SAndy King 					struct vmci_transport_packet *pkt)
465*d021c344SAndy King {
466*d021c344SAndy King 	struct vsock_sock *vlistener;
467*d021c344SAndy King 	struct vsock_sock *vpending;
468*d021c344SAndy King 	struct sock *pending;
469*d021c344SAndy King 
470*d021c344SAndy King 	vlistener = vsock_sk(listener);
471*d021c344SAndy King 
472*d021c344SAndy King 	list_for_each_entry(vpending, &vlistener->pending_links,
473*d021c344SAndy King 			    pending_links) {
474*d021c344SAndy King 		struct sockaddr_vm src;
475*d021c344SAndy King 		struct sockaddr_vm dst;
476*d021c344SAndy King 
477*d021c344SAndy King 		vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
478*d021c344SAndy King 		vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
479*d021c344SAndy King 
480*d021c344SAndy King 		if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
481*d021c344SAndy King 		    vsock_addr_equals_addr(&dst, &vpending->local_addr)) {
482*d021c344SAndy King 			pending = sk_vsock(vpending);
483*d021c344SAndy King 			sock_hold(pending);
484*d021c344SAndy King 			goto found;
485*d021c344SAndy King 		}
486*d021c344SAndy King 	}
487*d021c344SAndy King 
488*d021c344SAndy King 	pending = NULL;
489*d021c344SAndy King found:
490*d021c344SAndy King 	return pending;
491*d021c344SAndy King 
492*d021c344SAndy King }
493*d021c344SAndy King 
494*d021c344SAndy King static void vmci_transport_release_pending(struct sock *pending)
495*d021c344SAndy King {
496*d021c344SAndy King 	sock_put(pending);
497*d021c344SAndy King }
498*d021c344SAndy King 
499*d021c344SAndy King /* We allow two kinds of sockets to communicate with a restricted VM: 1)
500*d021c344SAndy King  * trusted sockets 2) sockets from applications running as the same user as the
501*d021c344SAndy King  * VM (this is only true for the host side and only when using hosted products)
502*d021c344SAndy King  */
503*d021c344SAndy King 
504*d021c344SAndy King static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
505*d021c344SAndy King {
506*d021c344SAndy King 	return vsock->trusted ||
507*d021c344SAndy King 	       vmci_is_context_owner(peer_cid, vsock->owner->uid);
508*d021c344SAndy King }
509*d021c344SAndy King 
510*d021c344SAndy King /* We allow sending datagrams to and receiving datagrams from a restricted VM
511*d021c344SAndy King  * only if it is trusted as described in vmci_transport_is_trusted.
512*d021c344SAndy King  */
513*d021c344SAndy King 
514*d021c344SAndy King static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
515*d021c344SAndy King {
516*d021c344SAndy King 	if (vsock->cached_peer != peer_cid) {
517*d021c344SAndy King 		vsock->cached_peer = peer_cid;
518*d021c344SAndy King 		if (!vmci_transport_is_trusted(vsock, peer_cid) &&
519*d021c344SAndy King 		    (vmci_context_get_priv_flags(peer_cid) &
520*d021c344SAndy King 		     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
521*d021c344SAndy King 			vsock->cached_peer_allow_dgram = false;
522*d021c344SAndy King 		} else {
523*d021c344SAndy King 			vsock->cached_peer_allow_dgram = true;
524*d021c344SAndy King 		}
525*d021c344SAndy King 	}
526*d021c344SAndy King 
527*d021c344SAndy King 	return vsock->cached_peer_allow_dgram;
528*d021c344SAndy King }
529*d021c344SAndy King 
530*d021c344SAndy King static int
531*d021c344SAndy King vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
532*d021c344SAndy King 				struct vmci_handle *handle,
533*d021c344SAndy King 				u64 produce_size,
534*d021c344SAndy King 				u64 consume_size,
535*d021c344SAndy King 				u32 peer, u32 flags, bool trusted)
536*d021c344SAndy King {
537*d021c344SAndy King 	int err = 0;
538*d021c344SAndy King 
539*d021c344SAndy King 	if (trusted) {
540*d021c344SAndy King 		/* Try to allocate our queue pair as trusted. This will only
541*d021c344SAndy King 		 * work if vsock is running in the host.
542*d021c344SAndy King 		 */
543*d021c344SAndy King 
544*d021c344SAndy King 		err = vmci_qpair_alloc(qpair, handle, produce_size,
545*d021c344SAndy King 				       consume_size,
546*d021c344SAndy King 				       peer, flags,
547*d021c344SAndy King 				       VMCI_PRIVILEGE_FLAG_TRUSTED);
548*d021c344SAndy King 		if (err != VMCI_ERROR_NO_ACCESS)
549*d021c344SAndy King 			goto out;
550*d021c344SAndy King 
551*d021c344SAndy King 	}
552*d021c344SAndy King 
553*d021c344SAndy King 	err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
554*d021c344SAndy King 			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
555*d021c344SAndy King out:
556*d021c344SAndy King 	if (err < 0) {
557*d021c344SAndy King 		pr_err("Could not attach to queue pair with %d\n",
558*d021c344SAndy King 		       err);
559*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
560*d021c344SAndy King 	}
561*d021c344SAndy King 
562*d021c344SAndy King 	return err;
563*d021c344SAndy King }
564*d021c344SAndy King 
565*d021c344SAndy King static int
566*d021c344SAndy King vmci_transport_datagram_create_hnd(u32 resource_id,
567*d021c344SAndy King 				   u32 flags,
568*d021c344SAndy King 				   vmci_datagram_recv_cb recv_cb,
569*d021c344SAndy King 				   void *client_data,
570*d021c344SAndy King 				   struct vmci_handle *out_handle)
571*d021c344SAndy King {
572*d021c344SAndy King 	int err = 0;
573*d021c344SAndy King 
574*d021c344SAndy King 	/* Try to allocate our datagram handler as trusted. This will only work
575*d021c344SAndy King 	 * if vsock is running in the host.
576*d021c344SAndy King 	 */
577*d021c344SAndy King 
578*d021c344SAndy King 	err = vmci_datagram_create_handle_priv(resource_id, flags,
579*d021c344SAndy King 					       VMCI_PRIVILEGE_FLAG_TRUSTED,
580*d021c344SAndy King 					       recv_cb,
581*d021c344SAndy King 					       client_data, out_handle);
582*d021c344SAndy King 
583*d021c344SAndy King 	if (err == VMCI_ERROR_NO_ACCESS)
584*d021c344SAndy King 		err = vmci_datagram_create_handle(resource_id, flags,
585*d021c344SAndy King 						  recv_cb, client_data,
586*d021c344SAndy King 						  out_handle);
587*d021c344SAndy King 
588*d021c344SAndy King 	return err;
589*d021c344SAndy King }
590*d021c344SAndy King 
591*d021c344SAndy King /* This is invoked as part of a tasklet that's scheduled when the VMCI
592*d021c344SAndy King  * interrupt fires.  This is run in bottom-half context and if it ever needs to
593*d021c344SAndy King  * sleep it should defer that work to a work queue.
594*d021c344SAndy King  */
595*d021c344SAndy King 
596*d021c344SAndy King static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
597*d021c344SAndy King {
598*d021c344SAndy King 	struct sock *sk;
599*d021c344SAndy King 	size_t size;
600*d021c344SAndy King 	struct sk_buff *skb;
601*d021c344SAndy King 	struct vsock_sock *vsk;
602*d021c344SAndy King 
603*d021c344SAndy King 	sk = (struct sock *)data;
604*d021c344SAndy King 
605*d021c344SAndy King 	/* This handler is privileged when this module is running on the host.
606*d021c344SAndy King 	 * We will get datagrams from all endpoints (even VMs that are in a
607*d021c344SAndy King 	 * restricted context). If we get one from a restricted context then
608*d021c344SAndy King 	 * the destination socket must be trusted.
609*d021c344SAndy King 	 *
610*d021c344SAndy King 	 * NOTE: We access the socket struct without holding the lock here.
611*d021c344SAndy King 	 * This is ok because the field we are interested is never modified
612*d021c344SAndy King 	 * outside of the create and destruct socket functions.
613*d021c344SAndy King 	 */
614*d021c344SAndy King 	vsk = vsock_sk(sk);
615*d021c344SAndy King 	if (!vmci_transport_allow_dgram(vsk, dg->src.context))
616*d021c344SAndy King 		return VMCI_ERROR_NO_ACCESS;
617*d021c344SAndy King 
618*d021c344SAndy King 	size = VMCI_DG_SIZE(dg);
619*d021c344SAndy King 
620*d021c344SAndy King 	/* Attach the packet to the socket's receive queue as an sk_buff. */
621*d021c344SAndy King 	skb = alloc_skb(size, GFP_ATOMIC);
622*d021c344SAndy King 	if (skb) {
623*d021c344SAndy King 		/* sk_receive_skb() will do a sock_put(), so hold here. */
624*d021c344SAndy King 		sock_hold(sk);
625*d021c344SAndy King 		skb_put(skb, size);
626*d021c344SAndy King 		memcpy(skb->data, dg, size);
627*d021c344SAndy King 		sk_receive_skb(sk, skb, 0);
628*d021c344SAndy King 	}
629*d021c344SAndy King 
630*d021c344SAndy King 	return VMCI_SUCCESS;
631*d021c344SAndy King }
632*d021c344SAndy King 
633*d021c344SAndy King static bool vmci_transport_stream_allow(u32 cid, u32 port)
634*d021c344SAndy King {
635*d021c344SAndy King 	static const u32 non_socket_contexts[] = {
636*d021c344SAndy King 		VMADDR_CID_HYPERVISOR,
637*d021c344SAndy King 		VMADDR_CID_RESERVED,
638*d021c344SAndy King 	};
639*d021c344SAndy King 	int i;
640*d021c344SAndy King 
641*d021c344SAndy King 	BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
642*d021c344SAndy King 
643*d021c344SAndy King 	for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
644*d021c344SAndy King 		if (cid == non_socket_contexts[i])
645*d021c344SAndy King 			return false;
646*d021c344SAndy King 	}
647*d021c344SAndy King 
648*d021c344SAndy King 	return true;
649*d021c344SAndy King }
650*d021c344SAndy King 
651*d021c344SAndy King /* This is invoked as part of a tasklet that's scheduled when the VMCI
652*d021c344SAndy King  * interrupt fires.  This is run in bottom-half context but it defers most of
653*d021c344SAndy King  * its work to the packet handling work queue.
654*d021c344SAndy King  */
655*d021c344SAndy King 
656*d021c344SAndy King static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
657*d021c344SAndy King {
658*d021c344SAndy King 	struct sock *sk;
659*d021c344SAndy King 	struct sockaddr_vm dst;
660*d021c344SAndy King 	struct sockaddr_vm src;
661*d021c344SAndy King 	struct vmci_transport_packet *pkt;
662*d021c344SAndy King 	struct vsock_sock *vsk;
663*d021c344SAndy King 	bool bh_process_pkt;
664*d021c344SAndy King 	int err;
665*d021c344SAndy King 
666*d021c344SAndy King 	sk = NULL;
667*d021c344SAndy King 	err = VMCI_SUCCESS;
668*d021c344SAndy King 	bh_process_pkt = false;
669*d021c344SAndy King 
670*d021c344SAndy King 	/* Ignore incoming packets from contexts without sockets, or resources
671*d021c344SAndy King 	 * that aren't vsock implementations.
672*d021c344SAndy King 	 */
673*d021c344SAndy King 
674*d021c344SAndy King 	if (!vmci_transport_stream_allow(dg->src.context, -1)
675*d021c344SAndy King 	    || VMCI_TRANSPORT_PACKET_RID != dg->src.resource)
676*d021c344SAndy King 		return VMCI_ERROR_NO_ACCESS;
677*d021c344SAndy King 
678*d021c344SAndy King 	if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
679*d021c344SAndy King 		/* Drop datagrams that do not contain full VSock packets. */
680*d021c344SAndy King 		return VMCI_ERROR_INVALID_ARGS;
681*d021c344SAndy King 
682*d021c344SAndy King 	pkt = (struct vmci_transport_packet *)dg;
683*d021c344SAndy King 
684*d021c344SAndy King 	/* Find the socket that should handle this packet.  First we look for a
685*d021c344SAndy King 	 * connected socket and if there is none we look for a socket bound to
686*d021c344SAndy King 	 * the destintation address.
687*d021c344SAndy King 	 */
688*d021c344SAndy King 	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
689*d021c344SAndy King 	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
690*d021c344SAndy King 
691*d021c344SAndy King 	sk = vsock_find_connected_socket(&src, &dst);
692*d021c344SAndy King 	if (!sk) {
693*d021c344SAndy King 		sk = vsock_find_bound_socket(&dst);
694*d021c344SAndy King 		if (!sk) {
695*d021c344SAndy King 			/* We could not find a socket for this specified
696*d021c344SAndy King 			 * address.  If this packet is a RST, we just drop it.
697*d021c344SAndy King 			 * If it is another packet, we send a RST.  Note that
698*d021c344SAndy King 			 * we do not send a RST reply to RSTs so that we do not
699*d021c344SAndy King 			 * continually send RSTs between two endpoints.
700*d021c344SAndy King 			 *
701*d021c344SAndy King 			 * Note that since this is a reply, dst is src and src
702*d021c344SAndy King 			 * is dst.
703*d021c344SAndy King 			 */
704*d021c344SAndy King 			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
705*d021c344SAndy King 				pr_err("unable to send reset\n");
706*d021c344SAndy King 
707*d021c344SAndy King 			err = VMCI_ERROR_NOT_FOUND;
708*d021c344SAndy King 			goto out;
709*d021c344SAndy King 		}
710*d021c344SAndy King 	}
711*d021c344SAndy King 
712*d021c344SAndy King 	/* If the received packet type is beyond all types known to this
713*d021c344SAndy King 	 * implementation, reply with an invalid message.  Hopefully this will
714*d021c344SAndy King 	 * help when implementing backwards compatibility in the future.
715*d021c344SAndy King 	 */
716*d021c344SAndy King 	if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
717*d021c344SAndy King 		vmci_transport_send_invalid_bh(&dst, &src);
718*d021c344SAndy King 		err = VMCI_ERROR_INVALID_ARGS;
719*d021c344SAndy King 		goto out;
720*d021c344SAndy King 	}
721*d021c344SAndy King 
722*d021c344SAndy King 	/* This handler is privileged when this module is running on the host.
723*d021c344SAndy King 	 * We will get datagram connect requests from all endpoints (even VMs
724*d021c344SAndy King 	 * that are in a restricted context). If we get one from a restricted
725*d021c344SAndy King 	 * context then the destination socket must be trusted.
726*d021c344SAndy King 	 *
727*d021c344SAndy King 	 * NOTE: We access the socket struct without holding the lock here.
728*d021c344SAndy King 	 * This is ok because the field we are interested is never modified
729*d021c344SAndy King 	 * outside of the create and destruct socket functions.
730*d021c344SAndy King 	 */
731*d021c344SAndy King 	vsk = vsock_sk(sk);
732*d021c344SAndy King 	if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
733*d021c344SAndy King 		err = VMCI_ERROR_NO_ACCESS;
734*d021c344SAndy King 		goto out;
735*d021c344SAndy King 	}
736*d021c344SAndy King 
737*d021c344SAndy King 	/* We do most everything in a work queue, but let's fast path the
738*d021c344SAndy King 	 * notification of reads and writes to help data transfer performance.
739*d021c344SAndy King 	 * We can only do this if there is no process context code executing
740*d021c344SAndy King 	 * for this socket since that may change the state.
741*d021c344SAndy King 	 */
742*d021c344SAndy King 	bh_lock_sock(sk);
743*d021c344SAndy King 
744*d021c344SAndy King 	if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED)
745*d021c344SAndy King 		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
746*d021c344SAndy King 				sk, pkt, true, &dst, &src,
747*d021c344SAndy King 				&bh_process_pkt);
748*d021c344SAndy King 
749*d021c344SAndy King 	bh_unlock_sock(sk);
750*d021c344SAndy King 
751*d021c344SAndy King 	if (!bh_process_pkt) {
752*d021c344SAndy King 		struct vmci_transport_recv_pkt_info *recv_pkt_info;
753*d021c344SAndy King 
754*d021c344SAndy King 		recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
755*d021c344SAndy King 		if (!recv_pkt_info) {
756*d021c344SAndy King 			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
757*d021c344SAndy King 				pr_err("unable to send reset\n");
758*d021c344SAndy King 
759*d021c344SAndy King 			err = VMCI_ERROR_NO_MEM;
760*d021c344SAndy King 			goto out;
761*d021c344SAndy King 		}
762*d021c344SAndy King 
763*d021c344SAndy King 		recv_pkt_info->sk = sk;
764*d021c344SAndy King 		memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
765*d021c344SAndy King 		INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
766*d021c344SAndy King 
767*d021c344SAndy King 		schedule_work(&recv_pkt_info->work);
768*d021c344SAndy King 		/* Clear sk so that the reference count incremented by one of
769*d021c344SAndy King 		 * the Find functions above is not decremented below.  We need
770*d021c344SAndy King 		 * that reference count for the packet handler we've scheduled
771*d021c344SAndy King 		 * to run.
772*d021c344SAndy King 		 */
773*d021c344SAndy King 		sk = NULL;
774*d021c344SAndy King 	}
775*d021c344SAndy King 
776*d021c344SAndy King out:
777*d021c344SAndy King 	if (sk)
778*d021c344SAndy King 		sock_put(sk);
779*d021c344SAndy King 
780*d021c344SAndy King 	return err;
781*d021c344SAndy King }
782*d021c344SAndy King 
783*d021c344SAndy King static void vmci_transport_peer_attach_cb(u32 sub_id,
784*d021c344SAndy King 					  const struct vmci_event_data *e_data,
785*d021c344SAndy King 					  void *client_data)
786*d021c344SAndy King {
787*d021c344SAndy King 	struct sock *sk = client_data;
788*d021c344SAndy King 	const struct vmci_event_payload_qp *e_payload;
789*d021c344SAndy King 	struct vsock_sock *vsk;
790*d021c344SAndy King 
791*d021c344SAndy King 	e_payload = vmci_event_data_const_payload(e_data);
792*d021c344SAndy King 
793*d021c344SAndy King 	vsk = vsock_sk(sk);
794*d021c344SAndy King 
795*d021c344SAndy King 	/* We don't ask for delayed CBs when we subscribe to this event (we
796*d021c344SAndy King 	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
797*d021c344SAndy King 	 * guarantees in that case about what context we might be running in,
798*d021c344SAndy King 	 * so it could be BH or process, blockable or non-blockable.  So we
799*d021c344SAndy King 	 * need to account for all possible contexts here.
800*d021c344SAndy King 	 */
801*d021c344SAndy King 	local_bh_disable();
802*d021c344SAndy King 	bh_lock_sock(sk);
803*d021c344SAndy King 
804*d021c344SAndy King 	/* XXX This is lame, we should provide a way to lookup sockets by
805*d021c344SAndy King 	 * qp_handle.
806*d021c344SAndy King 	 */
807*d021c344SAndy King 	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
808*d021c344SAndy King 				 e_payload->handle)) {
809*d021c344SAndy King 		/* XXX This doesn't do anything, but in the future we may want
810*d021c344SAndy King 		 * to set a flag here to verify the attach really did occur and
811*d021c344SAndy King 		 * we weren't just sent a datagram claiming it was.
812*d021c344SAndy King 		 */
813*d021c344SAndy King 		goto out;
814*d021c344SAndy King 	}
815*d021c344SAndy King 
816*d021c344SAndy King out:
817*d021c344SAndy King 	bh_unlock_sock(sk);
818*d021c344SAndy King 	local_bh_enable();
819*d021c344SAndy King }
820*d021c344SAndy King 
821*d021c344SAndy King static void vmci_transport_handle_detach(struct sock *sk)
822*d021c344SAndy King {
823*d021c344SAndy King 	struct vsock_sock *vsk;
824*d021c344SAndy King 
825*d021c344SAndy King 	vsk = vsock_sk(sk);
826*d021c344SAndy King 	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
827*d021c344SAndy King 		sock_set_flag(sk, SOCK_DONE);
828*d021c344SAndy King 
829*d021c344SAndy King 		/* On a detach the peer will not be sending or receiving
830*d021c344SAndy King 		 * anymore.
831*d021c344SAndy King 		 */
832*d021c344SAndy King 		vsk->peer_shutdown = SHUTDOWN_MASK;
833*d021c344SAndy King 
834*d021c344SAndy King 		/* We should not be sending anymore since the peer won't be
835*d021c344SAndy King 		 * there to receive, but we can still receive if there is data
836*d021c344SAndy King 		 * left in our consume queue.
837*d021c344SAndy King 		 */
838*d021c344SAndy King 		if (vsock_stream_has_data(vsk) <= 0) {
839*d021c344SAndy King 			if (sk->sk_state == SS_CONNECTING) {
840*d021c344SAndy King 				/* The peer may detach from a queue pair while
841*d021c344SAndy King 				 * we are still in the connecting state, i.e.,
842*d021c344SAndy King 				 * if the peer VM is killed after attaching to
843*d021c344SAndy King 				 * a queue pair, but before we complete the
844*d021c344SAndy King 				 * handshake. In that case, we treat the detach
845*d021c344SAndy King 				 * event like a reset.
846*d021c344SAndy King 				 */
847*d021c344SAndy King 
848*d021c344SAndy King 				sk->sk_state = SS_UNCONNECTED;
849*d021c344SAndy King 				sk->sk_err = ECONNRESET;
850*d021c344SAndy King 				sk->sk_error_report(sk);
851*d021c344SAndy King 				return;
852*d021c344SAndy King 			}
853*d021c344SAndy King 			sk->sk_state = SS_UNCONNECTED;
854*d021c344SAndy King 		}
855*d021c344SAndy King 		sk->sk_state_change(sk);
856*d021c344SAndy King 	}
857*d021c344SAndy King }
858*d021c344SAndy King 
859*d021c344SAndy King static void vmci_transport_peer_detach_cb(u32 sub_id,
860*d021c344SAndy King 					  const struct vmci_event_data *e_data,
861*d021c344SAndy King 					  void *client_data)
862*d021c344SAndy King {
863*d021c344SAndy King 	struct sock *sk = client_data;
864*d021c344SAndy King 	const struct vmci_event_payload_qp *e_payload;
865*d021c344SAndy King 	struct vsock_sock *vsk;
866*d021c344SAndy King 
867*d021c344SAndy King 	e_payload = vmci_event_data_const_payload(e_data);
868*d021c344SAndy King 	vsk = vsock_sk(sk);
869*d021c344SAndy King 	if (vmci_handle_is_invalid(e_payload->handle))
870*d021c344SAndy King 		return;
871*d021c344SAndy King 
872*d021c344SAndy King 	/* Same rules for locking as for peer_attach_cb(). */
873*d021c344SAndy King 	local_bh_disable();
874*d021c344SAndy King 	bh_lock_sock(sk);
875*d021c344SAndy King 
876*d021c344SAndy King 	/* XXX This is lame, we should provide a way to lookup sockets by
877*d021c344SAndy King 	 * qp_handle.
878*d021c344SAndy King 	 */
879*d021c344SAndy King 	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
880*d021c344SAndy King 				 e_payload->handle))
881*d021c344SAndy King 		vmci_transport_handle_detach(sk);
882*d021c344SAndy King 
883*d021c344SAndy King 	bh_unlock_sock(sk);
884*d021c344SAndy King 	local_bh_enable();
885*d021c344SAndy King }
886*d021c344SAndy King 
887*d021c344SAndy King static void vmci_transport_qp_resumed_cb(u32 sub_id,
888*d021c344SAndy King 					 const struct vmci_event_data *e_data,
889*d021c344SAndy King 					 void *client_data)
890*d021c344SAndy King {
891*d021c344SAndy King 	vsock_for_each_connected_socket(vmci_transport_handle_detach);
892*d021c344SAndy King }
893*d021c344SAndy King 
894*d021c344SAndy King static void vmci_transport_recv_pkt_work(struct work_struct *work)
895*d021c344SAndy King {
896*d021c344SAndy King 	struct vmci_transport_recv_pkt_info *recv_pkt_info;
897*d021c344SAndy King 	struct vmci_transport_packet *pkt;
898*d021c344SAndy King 	struct sock *sk;
899*d021c344SAndy King 
900*d021c344SAndy King 	recv_pkt_info =
901*d021c344SAndy King 		container_of(work, struct vmci_transport_recv_pkt_info, work);
902*d021c344SAndy King 	sk = recv_pkt_info->sk;
903*d021c344SAndy King 	pkt = &recv_pkt_info->pkt;
904*d021c344SAndy King 
905*d021c344SAndy King 	lock_sock(sk);
906*d021c344SAndy King 
907*d021c344SAndy King 	switch (sk->sk_state) {
908*d021c344SAndy King 	case SS_LISTEN:
909*d021c344SAndy King 		vmci_transport_recv_listen(sk, pkt);
910*d021c344SAndy King 		break;
911*d021c344SAndy King 	case SS_CONNECTING:
912*d021c344SAndy King 		/* Processing of pending connections for servers goes through
913*d021c344SAndy King 		 * the listening socket, so see vmci_transport_recv_listen()
914*d021c344SAndy King 		 * for that path.
915*d021c344SAndy King 		 */
916*d021c344SAndy King 		vmci_transport_recv_connecting_client(sk, pkt);
917*d021c344SAndy King 		break;
918*d021c344SAndy King 	case SS_CONNECTED:
919*d021c344SAndy King 		vmci_transport_recv_connected(sk, pkt);
920*d021c344SAndy King 		break;
921*d021c344SAndy King 	default:
922*d021c344SAndy King 		/* Because this function does not run in the same context as
923*d021c344SAndy King 		 * vmci_transport_recv_stream_cb it is possible that the
924*d021c344SAndy King 		 * socket has closed. We need to let the other side know or it
925*d021c344SAndy King 		 * could be sitting in a connect and hang forever. Send a
926*d021c344SAndy King 		 * reset to prevent that.
927*d021c344SAndy King 		 */
928*d021c344SAndy King 		vmci_transport_send_reset(sk, pkt);
929*d021c344SAndy King 		goto out;
930*d021c344SAndy King 	}
931*d021c344SAndy King 
932*d021c344SAndy King out:
933*d021c344SAndy King 	release_sock(sk);
934*d021c344SAndy King 	kfree(recv_pkt_info);
935*d021c344SAndy King 	/* Release reference obtained in the stream callback when we fetched
936*d021c344SAndy King 	 * this socket out of the bound or connected list.
937*d021c344SAndy King 	 */
938*d021c344SAndy King 	sock_put(sk);
939*d021c344SAndy King }
940*d021c344SAndy King 
941*d021c344SAndy King static int vmci_transport_recv_listen(struct sock *sk,
942*d021c344SAndy King 				      struct vmci_transport_packet *pkt)
943*d021c344SAndy King {
944*d021c344SAndy King 	struct sock *pending;
945*d021c344SAndy King 	struct vsock_sock *vpending;
946*d021c344SAndy King 	int err;
947*d021c344SAndy King 	u64 qp_size;
948*d021c344SAndy King 	bool old_request = false;
949*d021c344SAndy King 	bool old_pkt_proto = false;
950*d021c344SAndy King 
951*d021c344SAndy King 	err = 0;
952*d021c344SAndy King 
953*d021c344SAndy King 	/* Because we are in the listen state, we could be receiving a packet
954*d021c344SAndy King 	 * for ourself or any previous connection requests that we received.
955*d021c344SAndy King 	 * If it's the latter, we try to find a socket in our list of pending
956*d021c344SAndy King 	 * connections and, if we do, call the appropriate handler for the
957*d021c344SAndy King 	 * state that that socket is in.  Otherwise we try to service the
958*d021c344SAndy King 	 * connection request.
959*d021c344SAndy King 	 */
960*d021c344SAndy King 	pending = vmci_transport_get_pending(sk, pkt);
961*d021c344SAndy King 	if (pending) {
962*d021c344SAndy King 		lock_sock(pending);
963*d021c344SAndy King 		switch (pending->sk_state) {
964*d021c344SAndy King 		case SS_CONNECTING:
965*d021c344SAndy King 			err = vmci_transport_recv_connecting_server(sk,
966*d021c344SAndy King 								    pending,
967*d021c344SAndy King 								    pkt);
968*d021c344SAndy King 			break;
969*d021c344SAndy King 		default:
970*d021c344SAndy King 			vmci_transport_send_reset(pending, pkt);
971*d021c344SAndy King 			err = -EINVAL;
972*d021c344SAndy King 		}
973*d021c344SAndy King 
974*d021c344SAndy King 		if (err < 0)
975*d021c344SAndy King 			vsock_remove_pending(sk, pending);
976*d021c344SAndy King 
977*d021c344SAndy King 		release_sock(pending);
978*d021c344SAndy King 		vmci_transport_release_pending(pending);
979*d021c344SAndy King 
980*d021c344SAndy King 		return err;
981*d021c344SAndy King 	}
982*d021c344SAndy King 
983*d021c344SAndy King 	/* The listen state only accepts connection requests.  Reply with a
984*d021c344SAndy King 	 * reset unless we received a reset.
985*d021c344SAndy King 	 */
986*d021c344SAndy King 
987*d021c344SAndy King 	if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
988*d021c344SAndy King 	      pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
989*d021c344SAndy King 		vmci_transport_reply_reset(pkt);
990*d021c344SAndy King 		return -EINVAL;
991*d021c344SAndy King 	}
992*d021c344SAndy King 
993*d021c344SAndy King 	if (pkt->u.size == 0) {
994*d021c344SAndy King 		vmci_transport_reply_reset(pkt);
995*d021c344SAndy King 		return -EINVAL;
996*d021c344SAndy King 	}
997*d021c344SAndy King 
998*d021c344SAndy King 	/* If this socket can't accommodate this connection request, we send a
999*d021c344SAndy King 	 * reset.  Otherwise we create and initialize a child socket and reply
1000*d021c344SAndy King 	 * with a connection negotiation.
1001*d021c344SAndy King 	 */
1002*d021c344SAndy King 	if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
1003*d021c344SAndy King 		vmci_transport_reply_reset(pkt);
1004*d021c344SAndy King 		return -ECONNREFUSED;
1005*d021c344SAndy King 	}
1006*d021c344SAndy King 
1007*d021c344SAndy King 	pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
1008*d021c344SAndy King 				 sk->sk_type);
1009*d021c344SAndy King 	if (!pending) {
1010*d021c344SAndy King 		vmci_transport_send_reset(sk, pkt);
1011*d021c344SAndy King 		return -ENOMEM;
1012*d021c344SAndy King 	}
1013*d021c344SAndy King 
1014*d021c344SAndy King 	vpending = vsock_sk(pending);
1015*d021c344SAndy King 
1016*d021c344SAndy King 	vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
1017*d021c344SAndy King 			pkt->dst_port);
1018*d021c344SAndy King 	vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
1019*d021c344SAndy King 			pkt->src_port);
1020*d021c344SAndy King 
1021*d021c344SAndy King 	/* If the proposed size fits within our min/max, accept it. Otherwise
1022*d021c344SAndy King 	 * propose our own size.
1023*d021c344SAndy King 	 */
1024*d021c344SAndy King 	if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
1025*d021c344SAndy King 	    pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
1026*d021c344SAndy King 		qp_size = pkt->u.size;
1027*d021c344SAndy King 	} else {
1028*d021c344SAndy King 		qp_size = vmci_trans(vpending)->queue_pair_size;
1029*d021c344SAndy King 	}
1030*d021c344SAndy King 
1031*d021c344SAndy King 	/* Figure out if we are using old or new requests based on the
1032*d021c344SAndy King 	 * overrides pkt types sent by our peer.
1033*d021c344SAndy King 	 */
1034*d021c344SAndy King 	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1035*d021c344SAndy King 		old_request = old_pkt_proto;
1036*d021c344SAndy King 	} else {
1037*d021c344SAndy King 		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
1038*d021c344SAndy King 			old_request = true;
1039*d021c344SAndy King 		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
1040*d021c344SAndy King 			old_request = false;
1041*d021c344SAndy King 
1042*d021c344SAndy King 	}
1043*d021c344SAndy King 
1044*d021c344SAndy King 	if (old_request) {
1045*d021c344SAndy King 		/* Handle a REQUEST (or override) */
1046*d021c344SAndy King 		u16 version = VSOCK_PROTO_INVALID;
1047*d021c344SAndy King 		if (vmci_transport_proto_to_notify_struct(
1048*d021c344SAndy King 			pending, &version, true))
1049*d021c344SAndy King 			err = vmci_transport_send_negotiate(pending, qp_size);
1050*d021c344SAndy King 		else
1051*d021c344SAndy King 			err = -EINVAL;
1052*d021c344SAndy King 
1053*d021c344SAndy King 	} else {
1054*d021c344SAndy King 		/* Handle a REQUEST2 (or override) */
1055*d021c344SAndy King 		int proto_int = pkt->proto;
1056*d021c344SAndy King 		int pos;
1057*d021c344SAndy King 		u16 active_proto_version = 0;
1058*d021c344SAndy King 
1059*d021c344SAndy King 		/* The list of possible protocols is the intersection of all
1060*d021c344SAndy King 		 * protocols the client supports ... plus all the protocols we
1061*d021c344SAndy King 		 * support.
1062*d021c344SAndy King 		 */
1063*d021c344SAndy King 		proto_int &= vmci_transport_new_proto_supported_versions();
1064*d021c344SAndy King 
1065*d021c344SAndy King 		/* We choose the highest possible protocol version and use that
1066*d021c344SAndy King 		 * one.
1067*d021c344SAndy King 		 */
1068*d021c344SAndy King 		pos = fls(proto_int);
1069*d021c344SAndy King 		if (pos) {
1070*d021c344SAndy King 			active_proto_version = (1 << (pos - 1));
1071*d021c344SAndy King 			if (vmci_transport_proto_to_notify_struct(
1072*d021c344SAndy King 				pending, &active_proto_version, false))
1073*d021c344SAndy King 				err = vmci_transport_send_negotiate2(pending,
1074*d021c344SAndy King 							qp_size,
1075*d021c344SAndy King 							active_proto_version);
1076*d021c344SAndy King 			else
1077*d021c344SAndy King 				err = -EINVAL;
1078*d021c344SAndy King 
1079*d021c344SAndy King 		} else {
1080*d021c344SAndy King 			err = -EINVAL;
1081*d021c344SAndy King 		}
1082*d021c344SAndy King 	}
1083*d021c344SAndy King 
1084*d021c344SAndy King 	if (err < 0) {
1085*d021c344SAndy King 		vmci_transport_send_reset(sk, pkt);
1086*d021c344SAndy King 		sock_put(pending);
1087*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
1088*d021c344SAndy King 		goto out;
1089*d021c344SAndy King 	}
1090*d021c344SAndy King 
1091*d021c344SAndy King 	vsock_add_pending(sk, pending);
1092*d021c344SAndy King 	sk->sk_ack_backlog++;
1093*d021c344SAndy King 
1094*d021c344SAndy King 	pending->sk_state = SS_CONNECTING;
1095*d021c344SAndy King 	vmci_trans(vpending)->produce_size =
1096*d021c344SAndy King 		vmci_trans(vpending)->consume_size = qp_size;
1097*d021c344SAndy King 	vmci_trans(vpending)->queue_pair_size = qp_size;
1098*d021c344SAndy King 
1099*d021c344SAndy King 	vmci_trans(vpending)->notify_ops->process_request(pending);
1100*d021c344SAndy King 
1101*d021c344SAndy King 	/* We might never receive another message for this socket and it's not
1102*d021c344SAndy King 	 * connected to any process, so we have to ensure it gets cleaned up
1103*d021c344SAndy King 	 * ourself.  Our delayed work function will take care of that.  Note
1104*d021c344SAndy King 	 * that we do not ever cancel this function since we have few
1105*d021c344SAndy King 	 * guarantees about its state when calling cancel_delayed_work().
1106*d021c344SAndy King 	 * Instead we hold a reference on the socket for that function and make
1107*d021c344SAndy King 	 * it capable of handling cases where it needs to do nothing but
1108*d021c344SAndy King 	 * release that reference.
1109*d021c344SAndy King 	 */
1110*d021c344SAndy King 	vpending->listener = sk;
1111*d021c344SAndy King 	sock_hold(sk);
1112*d021c344SAndy King 	sock_hold(pending);
1113*d021c344SAndy King 	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
1114*d021c344SAndy King 	schedule_delayed_work(&vpending->dwork, HZ);
1115*d021c344SAndy King 
1116*d021c344SAndy King out:
1117*d021c344SAndy King 	return err;
1118*d021c344SAndy King }
1119*d021c344SAndy King 
1120*d021c344SAndy King static int
1121*d021c344SAndy King vmci_transport_recv_connecting_server(struct sock *listener,
1122*d021c344SAndy King 				      struct sock *pending,
1123*d021c344SAndy King 				      struct vmci_transport_packet *pkt)
1124*d021c344SAndy King {
1125*d021c344SAndy King 	struct vsock_sock *vpending;
1126*d021c344SAndy King 	struct vmci_handle handle;
1127*d021c344SAndy King 	struct vmci_qp *qpair;
1128*d021c344SAndy King 	bool is_local;
1129*d021c344SAndy King 	u32 flags;
1130*d021c344SAndy King 	u32 detach_sub_id;
1131*d021c344SAndy King 	int err;
1132*d021c344SAndy King 	int skerr;
1133*d021c344SAndy King 
1134*d021c344SAndy King 	vpending = vsock_sk(pending);
1135*d021c344SAndy King 	detach_sub_id = VMCI_INVALID_ID;
1136*d021c344SAndy King 
1137*d021c344SAndy King 	switch (pkt->type) {
1138*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
1139*d021c344SAndy King 		if (vmci_handle_is_invalid(pkt->u.handle)) {
1140*d021c344SAndy King 			vmci_transport_send_reset(pending, pkt);
1141*d021c344SAndy King 			skerr = EPROTO;
1142*d021c344SAndy King 			err = -EINVAL;
1143*d021c344SAndy King 			goto destroy;
1144*d021c344SAndy King 		}
1145*d021c344SAndy King 		break;
1146*d021c344SAndy King 	default:
1147*d021c344SAndy King 		/* Close and cleanup the connection. */
1148*d021c344SAndy King 		vmci_transport_send_reset(pending, pkt);
1149*d021c344SAndy King 		skerr = EPROTO;
1150*d021c344SAndy King 		err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
1151*d021c344SAndy King 		goto destroy;
1152*d021c344SAndy King 	}
1153*d021c344SAndy King 
1154*d021c344SAndy King 	/* In order to complete the connection we need to attach to the offered
1155*d021c344SAndy King 	 * queue pair and send an attach notification.  We also subscribe to the
1156*d021c344SAndy King 	 * detach event so we know when our peer goes away, and we do that
1157*d021c344SAndy King 	 * before attaching so we don't miss an event.  If all this succeeds,
1158*d021c344SAndy King 	 * we update our state and wakeup anything waiting in accept() for a
1159*d021c344SAndy King 	 * connection.
1160*d021c344SAndy King 	 */
1161*d021c344SAndy King 
1162*d021c344SAndy King 	/* We don't care about attach since we ensure the other side has
1163*d021c344SAndy King 	 * attached by specifying the ATTACH_ONLY flag below.
1164*d021c344SAndy King 	 */
1165*d021c344SAndy King 	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1166*d021c344SAndy King 				   vmci_transport_peer_detach_cb,
1167*d021c344SAndy King 				   pending, &detach_sub_id);
1168*d021c344SAndy King 	if (err < VMCI_SUCCESS) {
1169*d021c344SAndy King 		vmci_transport_send_reset(pending, pkt);
1170*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
1171*d021c344SAndy King 		skerr = -err;
1172*d021c344SAndy King 		goto destroy;
1173*d021c344SAndy King 	}
1174*d021c344SAndy King 
1175*d021c344SAndy King 	vmci_trans(vpending)->detach_sub_id = detach_sub_id;
1176*d021c344SAndy King 
1177*d021c344SAndy King 	/* Now attach to the queue pair the client created. */
1178*d021c344SAndy King 	handle = pkt->u.handle;
1179*d021c344SAndy King 
1180*d021c344SAndy King 	/* vpending->local_addr always has a context id so we do not need to
1181*d021c344SAndy King 	 * worry about VMADDR_CID_ANY in this case.
1182*d021c344SAndy King 	 */
1183*d021c344SAndy King 	is_local =
1184*d021c344SAndy King 	    vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
1185*d021c344SAndy King 	flags = VMCI_QPFLAG_ATTACH_ONLY;
1186*d021c344SAndy King 	flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
1187*d021c344SAndy King 
1188*d021c344SAndy King 	err = vmci_transport_queue_pair_alloc(
1189*d021c344SAndy King 					&qpair,
1190*d021c344SAndy King 					&handle,
1191*d021c344SAndy King 					vmci_trans(vpending)->produce_size,
1192*d021c344SAndy King 					vmci_trans(vpending)->consume_size,
1193*d021c344SAndy King 					pkt->dg.src.context,
1194*d021c344SAndy King 					flags,
1195*d021c344SAndy King 					vmci_transport_is_trusted(
1196*d021c344SAndy King 						vpending,
1197*d021c344SAndy King 						vpending->remote_addr.svm_cid));
1198*d021c344SAndy King 	if (err < 0) {
1199*d021c344SAndy King 		vmci_transport_send_reset(pending, pkt);
1200*d021c344SAndy King 		skerr = -err;
1201*d021c344SAndy King 		goto destroy;
1202*d021c344SAndy King 	}
1203*d021c344SAndy King 
1204*d021c344SAndy King 	vmci_trans(vpending)->qp_handle = handle;
1205*d021c344SAndy King 	vmci_trans(vpending)->qpair = qpair;
1206*d021c344SAndy King 
1207*d021c344SAndy King 	/* When we send the attach message, we must be ready to handle incoming
1208*d021c344SAndy King 	 * control messages on the newly connected socket. So we move the
1209*d021c344SAndy King 	 * pending socket to the connected state before sending the attach
1210*d021c344SAndy King 	 * message. Otherwise, an incoming packet triggered by the attach being
1211*d021c344SAndy King 	 * received by the peer may be processed concurrently with what happens
1212*d021c344SAndy King 	 * below after sending the attach message, and that incoming packet
1213*d021c344SAndy King 	 * will find the listening socket instead of the (currently) pending
1214*d021c344SAndy King 	 * socket. Note that enqueueing the socket increments the reference
1215*d021c344SAndy King 	 * count, so even if a reset comes before the connection is accepted,
1216*d021c344SAndy King 	 * the socket will be valid until it is removed from the queue.
1217*d021c344SAndy King 	 *
1218*d021c344SAndy King 	 * If we fail sending the attach below, we remove the socket from the
1219*d021c344SAndy King 	 * connected list and move the socket to SS_UNCONNECTED before
1220*d021c344SAndy King 	 * releasing the lock, so a pending slow path processing of an incoming
1221*d021c344SAndy King 	 * packet will not see the socket in the connected state in that case.
1222*d021c344SAndy King 	 */
1223*d021c344SAndy King 	pending->sk_state = SS_CONNECTED;
1224*d021c344SAndy King 
1225*d021c344SAndy King 	vsock_insert_connected(vpending);
1226*d021c344SAndy King 
1227*d021c344SAndy King 	/* Notify our peer of our attach. */
1228*d021c344SAndy King 	err = vmci_transport_send_attach(pending, handle);
1229*d021c344SAndy King 	if (err < 0) {
1230*d021c344SAndy King 		vsock_remove_connected(vpending);
1231*d021c344SAndy King 		pr_err("Could not send attach\n");
1232*d021c344SAndy King 		vmci_transport_send_reset(pending, pkt);
1233*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
1234*d021c344SAndy King 		skerr = -err;
1235*d021c344SAndy King 		goto destroy;
1236*d021c344SAndy King 	}
1237*d021c344SAndy King 
1238*d021c344SAndy King 	/* We have a connection. Move the now connected socket from the
1239*d021c344SAndy King 	 * listener's pending list to the accept queue so callers of accept()
1240*d021c344SAndy King 	 * can find it.
1241*d021c344SAndy King 	 */
1242*d021c344SAndy King 	vsock_remove_pending(listener, pending);
1243*d021c344SAndy King 	vsock_enqueue_accept(listener, pending);
1244*d021c344SAndy King 
1245*d021c344SAndy King 	/* Callers of accept() will be be waiting on the listening socket, not
1246*d021c344SAndy King 	 * the pending socket.
1247*d021c344SAndy King 	 */
1248*d021c344SAndy King 	listener->sk_state_change(listener);
1249*d021c344SAndy King 
1250*d021c344SAndy King 	return 0;
1251*d021c344SAndy King 
1252*d021c344SAndy King destroy:
1253*d021c344SAndy King 	pending->sk_err = skerr;
1254*d021c344SAndy King 	pending->sk_state = SS_UNCONNECTED;
1255*d021c344SAndy King 	/* As long as we drop our reference, all necessary cleanup will handle
1256*d021c344SAndy King 	 * when the cleanup function drops its reference and our destruct
1257*d021c344SAndy King 	 * implementation is called.  Note that since the listen handler will
1258*d021c344SAndy King 	 * remove pending from the pending list upon our failure, the cleanup
1259*d021c344SAndy King 	 * function won't drop the additional reference, which is why we do it
1260*d021c344SAndy King 	 * here.
1261*d021c344SAndy King 	 */
1262*d021c344SAndy King 	sock_put(pending);
1263*d021c344SAndy King 
1264*d021c344SAndy King 	return err;
1265*d021c344SAndy King }
1266*d021c344SAndy King 
1267*d021c344SAndy King static int
1268*d021c344SAndy King vmci_transport_recv_connecting_client(struct sock *sk,
1269*d021c344SAndy King 				      struct vmci_transport_packet *pkt)
1270*d021c344SAndy King {
1271*d021c344SAndy King 	struct vsock_sock *vsk;
1272*d021c344SAndy King 	int err;
1273*d021c344SAndy King 	int skerr;
1274*d021c344SAndy King 
1275*d021c344SAndy King 	vsk = vsock_sk(sk);
1276*d021c344SAndy King 
1277*d021c344SAndy King 	switch (pkt->type) {
1278*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
1279*d021c344SAndy King 		if (vmci_handle_is_invalid(pkt->u.handle) ||
1280*d021c344SAndy King 		    !vmci_handle_is_equal(pkt->u.handle,
1281*d021c344SAndy King 					  vmci_trans(vsk)->qp_handle)) {
1282*d021c344SAndy King 			skerr = EPROTO;
1283*d021c344SAndy King 			err = -EINVAL;
1284*d021c344SAndy King 			goto destroy;
1285*d021c344SAndy King 		}
1286*d021c344SAndy King 
1287*d021c344SAndy King 		/* Signify the socket is connected and wakeup the waiter in
1288*d021c344SAndy King 		 * connect(). Also place the socket in the connected table for
1289*d021c344SAndy King 		 * accounting (it can already be found since it's in the bound
1290*d021c344SAndy King 		 * table).
1291*d021c344SAndy King 		 */
1292*d021c344SAndy King 		sk->sk_state = SS_CONNECTED;
1293*d021c344SAndy King 		sk->sk_socket->state = SS_CONNECTED;
1294*d021c344SAndy King 		vsock_insert_connected(vsk);
1295*d021c344SAndy King 		sk->sk_state_change(sk);
1296*d021c344SAndy King 
1297*d021c344SAndy King 		break;
1298*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
1299*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
1300*d021c344SAndy King 		if (pkt->u.size == 0
1301*d021c344SAndy King 		    || pkt->dg.src.context != vsk->remote_addr.svm_cid
1302*d021c344SAndy King 		    || pkt->src_port != vsk->remote_addr.svm_port
1303*d021c344SAndy King 		    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
1304*d021c344SAndy King 		    || vmci_trans(vsk)->qpair
1305*d021c344SAndy King 		    || vmci_trans(vsk)->produce_size != 0
1306*d021c344SAndy King 		    || vmci_trans(vsk)->consume_size != 0
1307*d021c344SAndy King 		    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
1308*d021c344SAndy King 		    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1309*d021c344SAndy King 			skerr = EPROTO;
1310*d021c344SAndy King 			err = -EINVAL;
1311*d021c344SAndy King 
1312*d021c344SAndy King 			goto destroy;
1313*d021c344SAndy King 		}
1314*d021c344SAndy King 
1315*d021c344SAndy King 		err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
1316*d021c344SAndy King 		if (err) {
1317*d021c344SAndy King 			skerr = -err;
1318*d021c344SAndy King 			goto destroy;
1319*d021c344SAndy King 		}
1320*d021c344SAndy King 
1321*d021c344SAndy King 		break;
1322*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
1323*d021c344SAndy King 		err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
1324*d021c344SAndy King 		if (err) {
1325*d021c344SAndy King 			skerr = -err;
1326*d021c344SAndy King 			goto destroy;
1327*d021c344SAndy King 		}
1328*d021c344SAndy King 
1329*d021c344SAndy King 		break;
1330*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_RST:
1331*d021c344SAndy King 		/* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
1332*d021c344SAndy King 		 * continue processing here after they sent an INVALID packet.
1333*d021c344SAndy King 		 * This meant that we got a RST after the INVALID. We ignore a
1334*d021c344SAndy King 		 * RST after an INVALID. The common code doesn't send the RST
1335*d021c344SAndy King 		 * ... so we can hang if an old version of the common code
1336*d021c344SAndy King 		 * fails between getting a REQUEST and sending an OFFER back.
1337*d021c344SAndy King 		 * Not much we can do about it... except hope that it doesn't
1338*d021c344SAndy King 		 * happen.
1339*d021c344SAndy King 		 */
1340*d021c344SAndy King 		if (vsk->ignore_connecting_rst) {
1341*d021c344SAndy King 			vsk->ignore_connecting_rst = false;
1342*d021c344SAndy King 		} else {
1343*d021c344SAndy King 			skerr = ECONNRESET;
1344*d021c344SAndy King 			err = 0;
1345*d021c344SAndy King 			goto destroy;
1346*d021c344SAndy King 		}
1347*d021c344SAndy King 
1348*d021c344SAndy King 		break;
1349*d021c344SAndy King 	default:
1350*d021c344SAndy King 		/* Close and cleanup the connection. */
1351*d021c344SAndy King 		skerr = EPROTO;
1352*d021c344SAndy King 		err = -EINVAL;
1353*d021c344SAndy King 		goto destroy;
1354*d021c344SAndy King 	}
1355*d021c344SAndy King 
1356*d021c344SAndy King 	return 0;
1357*d021c344SAndy King 
1358*d021c344SAndy King destroy:
1359*d021c344SAndy King 	vmci_transport_send_reset(sk, pkt);
1360*d021c344SAndy King 
1361*d021c344SAndy King 	sk->sk_state = SS_UNCONNECTED;
1362*d021c344SAndy King 	sk->sk_err = skerr;
1363*d021c344SAndy King 	sk->sk_error_report(sk);
1364*d021c344SAndy King 	return err;
1365*d021c344SAndy King }
1366*d021c344SAndy King 
1367*d021c344SAndy King static int vmci_transport_recv_connecting_client_negotiate(
1368*d021c344SAndy King 					struct sock *sk,
1369*d021c344SAndy King 					struct vmci_transport_packet *pkt)
1370*d021c344SAndy King {
1371*d021c344SAndy King 	int err;
1372*d021c344SAndy King 	struct vsock_sock *vsk;
1373*d021c344SAndy King 	struct vmci_handle handle;
1374*d021c344SAndy King 	struct vmci_qp *qpair;
1375*d021c344SAndy King 	u32 attach_sub_id;
1376*d021c344SAndy King 	u32 detach_sub_id;
1377*d021c344SAndy King 	bool is_local;
1378*d021c344SAndy King 	u32 flags;
1379*d021c344SAndy King 	bool old_proto = true;
1380*d021c344SAndy King 	bool old_pkt_proto;
1381*d021c344SAndy King 	u16 version;
1382*d021c344SAndy King 
1383*d021c344SAndy King 	vsk = vsock_sk(sk);
1384*d021c344SAndy King 	handle = VMCI_INVALID_HANDLE;
1385*d021c344SAndy King 	attach_sub_id = VMCI_INVALID_ID;
1386*d021c344SAndy King 	detach_sub_id = VMCI_INVALID_ID;
1387*d021c344SAndy King 
1388*d021c344SAndy King 	/* If we have gotten here then we should be past the point where old
1389*d021c344SAndy King 	 * linux vsock could have sent the bogus rst.
1390*d021c344SAndy King 	 */
1391*d021c344SAndy King 	vsk->sent_request = false;
1392*d021c344SAndy King 	vsk->ignore_connecting_rst = false;
1393*d021c344SAndy King 
1394*d021c344SAndy King 	/* Verify that we're OK with the proposed queue pair size */
1395*d021c344SAndy King 	if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
1396*d021c344SAndy King 	    pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
1397*d021c344SAndy King 		err = -EINVAL;
1398*d021c344SAndy King 		goto destroy;
1399*d021c344SAndy King 	}
1400*d021c344SAndy King 
1401*d021c344SAndy King 	/* At this point we know the CID the peer is using to talk to us. */
1402*d021c344SAndy King 
1403*d021c344SAndy King 	if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
1404*d021c344SAndy King 		vsk->local_addr.svm_cid = pkt->dg.dst.context;
1405*d021c344SAndy King 
1406*d021c344SAndy King 	/* Setup the notify ops to be the highest supported version that both
1407*d021c344SAndy King 	 * the server and the client support.
1408*d021c344SAndy King 	 */
1409*d021c344SAndy King 
1410*d021c344SAndy King 	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1411*d021c344SAndy King 		old_proto = old_pkt_proto;
1412*d021c344SAndy King 	} else {
1413*d021c344SAndy King 		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
1414*d021c344SAndy King 			old_proto = true;
1415*d021c344SAndy King 		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
1416*d021c344SAndy King 			old_proto = false;
1417*d021c344SAndy King 
1418*d021c344SAndy King 	}
1419*d021c344SAndy King 
1420*d021c344SAndy King 	if (old_proto)
1421*d021c344SAndy King 		version = VSOCK_PROTO_INVALID;
1422*d021c344SAndy King 	else
1423*d021c344SAndy King 		version = pkt->proto;
1424*d021c344SAndy King 
1425*d021c344SAndy King 	if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
1426*d021c344SAndy King 		err = -EINVAL;
1427*d021c344SAndy King 		goto destroy;
1428*d021c344SAndy King 	}
1429*d021c344SAndy King 
1430*d021c344SAndy King 	/* Subscribe to attach and detach events first.
1431*d021c344SAndy King 	 *
1432*d021c344SAndy King 	 * XXX We attach once for each queue pair created for now so it is easy
1433*d021c344SAndy King 	 * to find the socket (it's provided), but later we should only
1434*d021c344SAndy King 	 * subscribe once and add a way to lookup sockets by queue pair handle.
1435*d021c344SAndy King 	 */
1436*d021c344SAndy King 	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
1437*d021c344SAndy King 				   vmci_transport_peer_attach_cb,
1438*d021c344SAndy King 				   sk, &attach_sub_id);
1439*d021c344SAndy King 	if (err < VMCI_SUCCESS) {
1440*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
1441*d021c344SAndy King 		goto destroy;
1442*d021c344SAndy King 	}
1443*d021c344SAndy King 
1444*d021c344SAndy King 	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1445*d021c344SAndy King 				   vmci_transport_peer_detach_cb,
1446*d021c344SAndy King 				   sk, &detach_sub_id);
1447*d021c344SAndy King 	if (err < VMCI_SUCCESS) {
1448*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
1449*d021c344SAndy King 		goto destroy;
1450*d021c344SAndy King 	}
1451*d021c344SAndy King 
1452*d021c344SAndy King 	/* Make VMCI select the handle for us. */
1453*d021c344SAndy King 	handle = VMCI_INVALID_HANDLE;
1454*d021c344SAndy King 	is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
1455*d021c344SAndy King 	flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
1456*d021c344SAndy King 
1457*d021c344SAndy King 	err = vmci_transport_queue_pair_alloc(&qpair,
1458*d021c344SAndy King 					      &handle,
1459*d021c344SAndy King 					      pkt->u.size,
1460*d021c344SAndy King 					      pkt->u.size,
1461*d021c344SAndy King 					      vsk->remote_addr.svm_cid,
1462*d021c344SAndy King 					      flags,
1463*d021c344SAndy King 					      vmci_transport_is_trusted(
1464*d021c344SAndy King 						  vsk,
1465*d021c344SAndy King 						  vsk->
1466*d021c344SAndy King 						  remote_addr.svm_cid));
1467*d021c344SAndy King 	if (err < 0)
1468*d021c344SAndy King 		goto destroy;
1469*d021c344SAndy King 
1470*d021c344SAndy King 	err = vmci_transport_send_qp_offer(sk, handle);
1471*d021c344SAndy King 	if (err < 0) {
1472*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
1473*d021c344SAndy King 		goto destroy;
1474*d021c344SAndy King 	}
1475*d021c344SAndy King 
1476*d021c344SAndy King 	vmci_trans(vsk)->qp_handle = handle;
1477*d021c344SAndy King 	vmci_trans(vsk)->qpair = qpair;
1478*d021c344SAndy King 
1479*d021c344SAndy King 	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1480*d021c344SAndy King 		pkt->u.size;
1481*d021c344SAndy King 
1482*d021c344SAndy King 	vmci_trans(vsk)->attach_sub_id = attach_sub_id;
1483*d021c344SAndy King 	vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1484*d021c344SAndy King 
1485*d021c344SAndy King 	vmci_trans(vsk)->notify_ops->process_negotiate(sk);
1486*d021c344SAndy King 
1487*d021c344SAndy King 	return 0;
1488*d021c344SAndy King 
1489*d021c344SAndy King destroy:
1490*d021c344SAndy King 	if (attach_sub_id != VMCI_INVALID_ID)
1491*d021c344SAndy King 		vmci_event_unsubscribe(attach_sub_id);
1492*d021c344SAndy King 
1493*d021c344SAndy King 	if (detach_sub_id != VMCI_INVALID_ID)
1494*d021c344SAndy King 		vmci_event_unsubscribe(detach_sub_id);
1495*d021c344SAndy King 
1496*d021c344SAndy King 	if (!vmci_handle_is_invalid(handle))
1497*d021c344SAndy King 		vmci_qpair_detach(&qpair);
1498*d021c344SAndy King 
1499*d021c344SAndy King 	return err;
1500*d021c344SAndy King }
1501*d021c344SAndy King 
1502*d021c344SAndy King static int
1503*d021c344SAndy King vmci_transport_recv_connecting_client_invalid(struct sock *sk,
1504*d021c344SAndy King 					      struct vmci_transport_packet *pkt)
1505*d021c344SAndy King {
1506*d021c344SAndy King 	int err = 0;
1507*d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
1508*d021c344SAndy King 
1509*d021c344SAndy King 	if (vsk->sent_request) {
1510*d021c344SAndy King 		vsk->sent_request = false;
1511*d021c344SAndy King 		vsk->ignore_connecting_rst = true;
1512*d021c344SAndy King 
1513*d021c344SAndy King 		err = vmci_transport_send_conn_request(
1514*d021c344SAndy King 			sk, vmci_trans(vsk)->queue_pair_size);
1515*d021c344SAndy King 		if (err < 0)
1516*d021c344SAndy King 			err = vmci_transport_error_to_vsock_error(err);
1517*d021c344SAndy King 		else
1518*d021c344SAndy King 			err = 0;
1519*d021c344SAndy King 
1520*d021c344SAndy King 	}
1521*d021c344SAndy King 
1522*d021c344SAndy King 	return err;
1523*d021c344SAndy King }
1524*d021c344SAndy King 
1525*d021c344SAndy King static int vmci_transport_recv_connected(struct sock *sk,
1526*d021c344SAndy King 					 struct vmci_transport_packet *pkt)
1527*d021c344SAndy King {
1528*d021c344SAndy King 	struct vsock_sock *vsk;
1529*d021c344SAndy King 	bool pkt_processed = false;
1530*d021c344SAndy King 
1531*d021c344SAndy King 	/* In cases where we are closing the connection, it's sufficient to
1532*d021c344SAndy King 	 * mark the state change (and maybe error) and wake up any waiting
1533*d021c344SAndy King 	 * threads. Since this is a connected socket, it's owned by a user
1534*d021c344SAndy King 	 * process and will be cleaned up when the failure is passed back on
1535*d021c344SAndy King 	 * the current or next system call.  Our system call implementations
1536*d021c344SAndy King 	 * must therefore check for error and state changes on entry and when
1537*d021c344SAndy King 	 * being awoken.
1538*d021c344SAndy King 	 */
1539*d021c344SAndy King 	switch (pkt->type) {
1540*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
1541*d021c344SAndy King 		if (pkt->u.mode) {
1542*d021c344SAndy King 			vsk = vsock_sk(sk);
1543*d021c344SAndy King 
1544*d021c344SAndy King 			vsk->peer_shutdown |= pkt->u.mode;
1545*d021c344SAndy King 			sk->sk_state_change(sk);
1546*d021c344SAndy King 		}
1547*d021c344SAndy King 		break;
1548*d021c344SAndy King 
1549*d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_RST:
1550*d021c344SAndy King 		vsk = vsock_sk(sk);
1551*d021c344SAndy King 		/* It is possible that we sent our peer a message (e.g a
1552*d021c344SAndy King 		 * WAITING_READ) right before we got notified that the peer had
1553*d021c344SAndy King 		 * detached. If that happens then we can get a RST pkt back
1554*d021c344SAndy King 		 * from our peer even though there is data available for us to
1555*d021c344SAndy King 		 * read. In that case, don't shutdown the socket completely but
1556*d021c344SAndy King 		 * instead allow the local client to finish reading data off
1557*d021c344SAndy King 		 * the queuepair. Always treat a RST pkt in connected mode like
1558*d021c344SAndy King 		 * a clean shutdown.
1559*d021c344SAndy King 		 */
1560*d021c344SAndy King 		sock_set_flag(sk, SOCK_DONE);
1561*d021c344SAndy King 		vsk->peer_shutdown = SHUTDOWN_MASK;
1562*d021c344SAndy King 		if (vsock_stream_has_data(vsk) <= 0)
1563*d021c344SAndy King 			sk->sk_state = SS_DISCONNECTING;
1564*d021c344SAndy King 
1565*d021c344SAndy King 		sk->sk_state_change(sk);
1566*d021c344SAndy King 		break;
1567*d021c344SAndy King 
1568*d021c344SAndy King 	default:
1569*d021c344SAndy King 		vsk = vsock_sk(sk);
1570*d021c344SAndy King 		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
1571*d021c344SAndy King 				sk, pkt, false, NULL, NULL,
1572*d021c344SAndy King 				&pkt_processed);
1573*d021c344SAndy King 		if (!pkt_processed)
1574*d021c344SAndy King 			return -EINVAL;
1575*d021c344SAndy King 
1576*d021c344SAndy King 		break;
1577*d021c344SAndy King 	}
1578*d021c344SAndy King 
1579*d021c344SAndy King 	return 0;
1580*d021c344SAndy King }
1581*d021c344SAndy King 
1582*d021c344SAndy King static int vmci_transport_socket_init(struct vsock_sock *vsk,
1583*d021c344SAndy King 				      struct vsock_sock *psk)
1584*d021c344SAndy King {
1585*d021c344SAndy King 	vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
1586*d021c344SAndy King 	if (!vsk->trans)
1587*d021c344SAndy King 		return -ENOMEM;
1588*d021c344SAndy King 
1589*d021c344SAndy King 	vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1590*d021c344SAndy King 	vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1591*d021c344SAndy King 	vmci_trans(vsk)->qpair = NULL;
1592*d021c344SAndy King 	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1593*d021c344SAndy King 	vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
1594*d021c344SAndy King 		VMCI_INVALID_ID;
1595*d021c344SAndy King 	vmci_trans(vsk)->notify_ops = NULL;
1596*d021c344SAndy King 	if (psk) {
1597*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_size =
1598*d021c344SAndy King 			vmci_trans(psk)->queue_pair_size;
1599*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_min_size =
1600*d021c344SAndy King 			vmci_trans(psk)->queue_pair_min_size;
1601*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_max_size =
1602*d021c344SAndy King 			vmci_trans(psk)->queue_pair_max_size;
1603*d021c344SAndy King 	} else {
1604*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_size =
1605*d021c344SAndy King 			VMCI_TRANSPORT_DEFAULT_QP_SIZE;
1606*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_min_size =
1607*d021c344SAndy King 			 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
1608*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_max_size =
1609*d021c344SAndy King 			VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
1610*d021c344SAndy King 	}
1611*d021c344SAndy King 
1612*d021c344SAndy King 	return 0;
1613*d021c344SAndy King }
1614*d021c344SAndy King 
1615*d021c344SAndy King static void vmci_transport_destruct(struct vsock_sock *vsk)
1616*d021c344SAndy King {
1617*d021c344SAndy King 	if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
1618*d021c344SAndy King 		vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
1619*d021c344SAndy King 		vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
1620*d021c344SAndy King 	}
1621*d021c344SAndy King 
1622*d021c344SAndy King 	if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1623*d021c344SAndy King 		vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
1624*d021c344SAndy King 		vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1625*d021c344SAndy King 	}
1626*d021c344SAndy King 
1627*d021c344SAndy King 	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
1628*d021c344SAndy King 		vmci_qpair_detach(&vmci_trans(vsk)->qpair);
1629*d021c344SAndy King 		vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1630*d021c344SAndy King 		vmci_trans(vsk)->produce_size = 0;
1631*d021c344SAndy King 		vmci_trans(vsk)->consume_size = 0;
1632*d021c344SAndy King 	}
1633*d021c344SAndy King 
1634*d021c344SAndy King 	if (vmci_trans(vsk)->notify_ops)
1635*d021c344SAndy King 		vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1636*d021c344SAndy King 
1637*d021c344SAndy King 	kfree(vsk->trans);
1638*d021c344SAndy King 	vsk->trans = NULL;
1639*d021c344SAndy King }
1640*d021c344SAndy King 
1641*d021c344SAndy King static void vmci_transport_release(struct vsock_sock *vsk)
1642*d021c344SAndy King {
1643*d021c344SAndy King 	if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
1644*d021c344SAndy King 		vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
1645*d021c344SAndy King 		vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1646*d021c344SAndy King 	}
1647*d021c344SAndy King }
1648*d021c344SAndy King 
1649*d021c344SAndy King static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
1650*d021c344SAndy King 				     struct sockaddr_vm *addr)
1651*d021c344SAndy King {
1652*d021c344SAndy King 	u32 port;
1653*d021c344SAndy King 	u32 flags;
1654*d021c344SAndy King 	int err;
1655*d021c344SAndy King 
1656*d021c344SAndy King 	/* VMCI will select a resource ID for us if we provide
1657*d021c344SAndy King 	 * VMCI_INVALID_ID.
1658*d021c344SAndy King 	 */
1659*d021c344SAndy King 	port = addr->svm_port == VMADDR_PORT_ANY ?
1660*d021c344SAndy King 			VMCI_INVALID_ID : addr->svm_port;
1661*d021c344SAndy King 
1662*d021c344SAndy King 	if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
1663*d021c344SAndy King 		return -EACCES;
1664*d021c344SAndy King 
1665*d021c344SAndy King 	flags = addr->svm_cid == VMADDR_CID_ANY ?
1666*d021c344SAndy King 				VMCI_FLAG_ANYCID_DG_HND : 0;
1667*d021c344SAndy King 
1668*d021c344SAndy King 	err = vmci_transport_datagram_create_hnd(port, flags,
1669*d021c344SAndy King 						 vmci_transport_recv_dgram_cb,
1670*d021c344SAndy King 						 &vsk->sk,
1671*d021c344SAndy King 						 &vmci_trans(vsk)->dg_handle);
1672*d021c344SAndy King 	if (err < VMCI_SUCCESS)
1673*d021c344SAndy King 		return vmci_transport_error_to_vsock_error(err);
1674*d021c344SAndy King 	vsock_addr_init(&vsk->local_addr, addr->svm_cid,
1675*d021c344SAndy King 			vmci_trans(vsk)->dg_handle.resource);
1676*d021c344SAndy King 
1677*d021c344SAndy King 	return 0;
1678*d021c344SAndy King }
1679*d021c344SAndy King 
1680*d021c344SAndy King static int vmci_transport_dgram_enqueue(
1681*d021c344SAndy King 	struct vsock_sock *vsk,
1682*d021c344SAndy King 	struct sockaddr_vm *remote_addr,
1683*d021c344SAndy King 	struct iovec *iov,
1684*d021c344SAndy King 	size_t len)
1685*d021c344SAndy King {
1686*d021c344SAndy King 	int err;
1687*d021c344SAndy King 	struct vmci_datagram *dg;
1688*d021c344SAndy King 
1689*d021c344SAndy King 	if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
1690*d021c344SAndy King 		return -EMSGSIZE;
1691*d021c344SAndy King 
1692*d021c344SAndy King 	if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
1693*d021c344SAndy King 		return -EPERM;
1694*d021c344SAndy King 
1695*d021c344SAndy King 	/* Allocate a buffer for the user's message and our packet header. */
1696*d021c344SAndy King 	dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
1697*d021c344SAndy King 	if (!dg)
1698*d021c344SAndy King 		return -ENOMEM;
1699*d021c344SAndy King 
1700*d021c344SAndy King 	memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
1701*d021c344SAndy King 
1702*d021c344SAndy King 	dg->dst = vmci_make_handle(remote_addr->svm_cid,
1703*d021c344SAndy King 				   remote_addr->svm_port);
1704*d021c344SAndy King 	dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
1705*d021c344SAndy King 				   vsk->local_addr.svm_port);
1706*d021c344SAndy King 	dg->payload_size = len;
1707*d021c344SAndy King 
1708*d021c344SAndy King 	err = vmci_datagram_send(dg);
1709*d021c344SAndy King 	kfree(dg);
1710*d021c344SAndy King 	if (err < 0)
1711*d021c344SAndy King 		return vmci_transport_error_to_vsock_error(err);
1712*d021c344SAndy King 
1713*d021c344SAndy King 	return err - sizeof(*dg);
1714*d021c344SAndy King }
1715*d021c344SAndy King 
1716*d021c344SAndy King static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
1717*d021c344SAndy King 					struct vsock_sock *vsk,
1718*d021c344SAndy King 					struct msghdr *msg, size_t len,
1719*d021c344SAndy King 					int flags)
1720*d021c344SAndy King {
1721*d021c344SAndy King 	int err;
1722*d021c344SAndy King 	int noblock;
1723*d021c344SAndy King 	struct vmci_datagram *dg;
1724*d021c344SAndy King 	size_t payload_len;
1725*d021c344SAndy King 	struct sk_buff *skb;
1726*d021c344SAndy King 
1727*d021c344SAndy King 	noblock = flags & MSG_DONTWAIT;
1728*d021c344SAndy King 
1729*d021c344SAndy King 	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1730*d021c344SAndy King 		return -EOPNOTSUPP;
1731*d021c344SAndy King 
1732*d021c344SAndy King 	/* Retrieve the head sk_buff from the socket's receive queue. */
1733*d021c344SAndy King 	err = 0;
1734*d021c344SAndy King 	skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1735*d021c344SAndy King 	if (err)
1736*d021c344SAndy King 		return err;
1737*d021c344SAndy King 
1738*d021c344SAndy King 	if (!skb)
1739*d021c344SAndy King 		return -EAGAIN;
1740*d021c344SAndy King 
1741*d021c344SAndy King 	dg = (struct vmci_datagram *)skb->data;
1742*d021c344SAndy King 	if (!dg)
1743*d021c344SAndy King 		/* err is 0, meaning we read zero bytes. */
1744*d021c344SAndy King 		goto out;
1745*d021c344SAndy King 
1746*d021c344SAndy King 	payload_len = dg->payload_size;
1747*d021c344SAndy King 	/* Ensure the sk_buff matches the payload size claimed in the packet. */
1748*d021c344SAndy King 	if (payload_len != skb->len - sizeof(*dg)) {
1749*d021c344SAndy King 		err = -EINVAL;
1750*d021c344SAndy King 		goto out;
1751*d021c344SAndy King 	}
1752*d021c344SAndy King 
1753*d021c344SAndy King 	if (payload_len > len) {
1754*d021c344SAndy King 		payload_len = len;
1755*d021c344SAndy King 		msg->msg_flags |= MSG_TRUNC;
1756*d021c344SAndy King 	}
1757*d021c344SAndy King 
1758*d021c344SAndy King 	/* Place the datagram payload in the user's iovec. */
1759*d021c344SAndy King 	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
1760*d021c344SAndy King 		payload_len);
1761*d021c344SAndy King 	if (err)
1762*d021c344SAndy King 		goto out;
1763*d021c344SAndy King 
1764*d021c344SAndy King 	msg->msg_namelen = 0;
1765*d021c344SAndy King 	if (msg->msg_name) {
1766*d021c344SAndy King 		struct sockaddr_vm *vm_addr;
1767*d021c344SAndy King 
1768*d021c344SAndy King 		/* Provide the address of the sender. */
1769*d021c344SAndy King 		vm_addr = (struct sockaddr_vm *)msg->msg_name;
1770*d021c344SAndy King 		vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
1771*d021c344SAndy King 		msg->msg_namelen = sizeof(*vm_addr);
1772*d021c344SAndy King 	}
1773*d021c344SAndy King 	err = payload_len;
1774*d021c344SAndy King 
1775*d021c344SAndy King out:
1776*d021c344SAndy King 	skb_free_datagram(&vsk->sk, skb);
1777*d021c344SAndy King 	return err;
1778*d021c344SAndy King }
1779*d021c344SAndy King 
1780*d021c344SAndy King static bool vmci_transport_dgram_allow(u32 cid, u32 port)
1781*d021c344SAndy King {
1782*d021c344SAndy King 	if (cid == VMADDR_CID_HYPERVISOR) {
1783*d021c344SAndy King 		/* Registrations of PBRPC Servers do not modify VMX/Hypervisor
1784*d021c344SAndy King 		 * state and are allowed.
1785*d021c344SAndy King 		 */
1786*d021c344SAndy King 		return port == VMCI_UNITY_PBRPC_REGISTER;
1787*d021c344SAndy King 	}
1788*d021c344SAndy King 
1789*d021c344SAndy King 	return true;
1790*d021c344SAndy King }
1791*d021c344SAndy King 
1792*d021c344SAndy King static int vmci_transport_connect(struct vsock_sock *vsk)
1793*d021c344SAndy King {
1794*d021c344SAndy King 	int err;
1795*d021c344SAndy King 	bool old_pkt_proto = false;
1796*d021c344SAndy King 	struct sock *sk = &vsk->sk;
1797*d021c344SAndy King 
1798*d021c344SAndy King 	if (vmci_transport_old_proto_override(&old_pkt_proto) &&
1799*d021c344SAndy King 		old_pkt_proto) {
1800*d021c344SAndy King 		err = vmci_transport_send_conn_request(
1801*d021c344SAndy King 			sk, vmci_trans(vsk)->queue_pair_size);
1802*d021c344SAndy King 		if (err < 0) {
1803*d021c344SAndy King 			sk->sk_state = SS_UNCONNECTED;
1804*d021c344SAndy King 			return err;
1805*d021c344SAndy King 		}
1806*d021c344SAndy King 	} else {
1807*d021c344SAndy King 		int supported_proto_versions =
1808*d021c344SAndy King 			vmci_transport_new_proto_supported_versions();
1809*d021c344SAndy King 		err = vmci_transport_send_conn_request2(
1810*d021c344SAndy King 				sk, vmci_trans(vsk)->queue_pair_size,
1811*d021c344SAndy King 				supported_proto_versions);
1812*d021c344SAndy King 		if (err < 0) {
1813*d021c344SAndy King 			sk->sk_state = SS_UNCONNECTED;
1814*d021c344SAndy King 			return err;
1815*d021c344SAndy King 		}
1816*d021c344SAndy King 
1817*d021c344SAndy King 		vsk->sent_request = true;
1818*d021c344SAndy King 	}
1819*d021c344SAndy King 
1820*d021c344SAndy King 	return err;
1821*d021c344SAndy King }
1822*d021c344SAndy King 
1823*d021c344SAndy King static ssize_t vmci_transport_stream_dequeue(
1824*d021c344SAndy King 	struct vsock_sock *vsk,
1825*d021c344SAndy King 	struct iovec *iov,
1826*d021c344SAndy King 	size_t len,
1827*d021c344SAndy King 	int flags)
1828*d021c344SAndy King {
1829*d021c344SAndy King 	if (flags & MSG_PEEK)
1830*d021c344SAndy King 		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
1831*d021c344SAndy King 	else
1832*d021c344SAndy King 		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
1833*d021c344SAndy King }
1834*d021c344SAndy King 
1835*d021c344SAndy King static ssize_t vmci_transport_stream_enqueue(
1836*d021c344SAndy King 	struct vsock_sock *vsk,
1837*d021c344SAndy King 	struct iovec *iov,
1838*d021c344SAndy King 	size_t len)
1839*d021c344SAndy King {
1840*d021c344SAndy King 	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
1841*d021c344SAndy King }
1842*d021c344SAndy King 
1843*d021c344SAndy King static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
1844*d021c344SAndy King {
1845*d021c344SAndy King 	return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
1846*d021c344SAndy King }
1847*d021c344SAndy King 
1848*d021c344SAndy King static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
1849*d021c344SAndy King {
1850*d021c344SAndy King 	return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
1851*d021c344SAndy King }
1852*d021c344SAndy King 
1853*d021c344SAndy King static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1854*d021c344SAndy King {
1855*d021c344SAndy King 	return vmci_trans(vsk)->consume_size;
1856*d021c344SAndy King }
1857*d021c344SAndy King 
1858*d021c344SAndy King static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
1859*d021c344SAndy King {
1860*d021c344SAndy King 	return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
1861*d021c344SAndy King }
1862*d021c344SAndy King 
1863*d021c344SAndy King static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
1864*d021c344SAndy King {
1865*d021c344SAndy King 	return vmci_trans(vsk)->queue_pair_size;
1866*d021c344SAndy King }
1867*d021c344SAndy King 
1868*d021c344SAndy King static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
1869*d021c344SAndy King {
1870*d021c344SAndy King 	return vmci_trans(vsk)->queue_pair_min_size;
1871*d021c344SAndy King }
1872*d021c344SAndy King 
1873*d021c344SAndy King static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
1874*d021c344SAndy King {
1875*d021c344SAndy King 	return vmci_trans(vsk)->queue_pair_max_size;
1876*d021c344SAndy King }
1877*d021c344SAndy King 
1878*d021c344SAndy King static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
1879*d021c344SAndy King {
1880*d021c344SAndy King 	if (val < vmci_trans(vsk)->queue_pair_min_size)
1881*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_min_size = val;
1882*d021c344SAndy King 	if (val > vmci_trans(vsk)->queue_pair_max_size)
1883*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_max_size = val;
1884*d021c344SAndy King 	vmci_trans(vsk)->queue_pair_size = val;
1885*d021c344SAndy King }
1886*d021c344SAndy King 
1887*d021c344SAndy King static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
1888*d021c344SAndy King 					       u64 val)
1889*d021c344SAndy King {
1890*d021c344SAndy King 	if (val > vmci_trans(vsk)->queue_pair_size)
1891*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_size = val;
1892*d021c344SAndy King 	vmci_trans(vsk)->queue_pair_min_size = val;
1893*d021c344SAndy King }
1894*d021c344SAndy King 
1895*d021c344SAndy King static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
1896*d021c344SAndy King 					       u64 val)
1897*d021c344SAndy King {
1898*d021c344SAndy King 	if (val < vmci_trans(vsk)->queue_pair_size)
1899*d021c344SAndy King 		vmci_trans(vsk)->queue_pair_size = val;
1900*d021c344SAndy King 	vmci_trans(vsk)->queue_pair_max_size = val;
1901*d021c344SAndy King }
1902*d021c344SAndy King 
1903*d021c344SAndy King static int vmci_transport_notify_poll_in(
1904*d021c344SAndy King 	struct vsock_sock *vsk,
1905*d021c344SAndy King 	size_t target,
1906*d021c344SAndy King 	bool *data_ready_now)
1907*d021c344SAndy King {
1908*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->poll_in(
1909*d021c344SAndy King 			&vsk->sk, target, data_ready_now);
1910*d021c344SAndy King }
1911*d021c344SAndy King 
1912*d021c344SAndy King static int vmci_transport_notify_poll_out(
1913*d021c344SAndy King 	struct vsock_sock *vsk,
1914*d021c344SAndy King 	size_t target,
1915*d021c344SAndy King 	bool *space_available_now)
1916*d021c344SAndy King {
1917*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->poll_out(
1918*d021c344SAndy King 			&vsk->sk, target, space_available_now);
1919*d021c344SAndy King }
1920*d021c344SAndy King 
1921*d021c344SAndy King static int vmci_transport_notify_recv_init(
1922*d021c344SAndy King 	struct vsock_sock *vsk,
1923*d021c344SAndy King 	size_t target,
1924*d021c344SAndy King 	struct vsock_transport_recv_notify_data *data)
1925*d021c344SAndy King {
1926*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->recv_init(
1927*d021c344SAndy King 			&vsk->sk, target,
1928*d021c344SAndy King 			(struct vmci_transport_recv_notify_data *)data);
1929*d021c344SAndy King }
1930*d021c344SAndy King 
1931*d021c344SAndy King static int vmci_transport_notify_recv_pre_block(
1932*d021c344SAndy King 	struct vsock_sock *vsk,
1933*d021c344SAndy King 	size_t target,
1934*d021c344SAndy King 	struct vsock_transport_recv_notify_data *data)
1935*d021c344SAndy King {
1936*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->recv_pre_block(
1937*d021c344SAndy King 			&vsk->sk, target,
1938*d021c344SAndy King 			(struct vmci_transport_recv_notify_data *)data);
1939*d021c344SAndy King }
1940*d021c344SAndy King 
1941*d021c344SAndy King static int vmci_transport_notify_recv_pre_dequeue(
1942*d021c344SAndy King 	struct vsock_sock *vsk,
1943*d021c344SAndy King 	size_t target,
1944*d021c344SAndy King 	struct vsock_transport_recv_notify_data *data)
1945*d021c344SAndy King {
1946*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
1947*d021c344SAndy King 			&vsk->sk, target,
1948*d021c344SAndy King 			(struct vmci_transport_recv_notify_data *)data);
1949*d021c344SAndy King }
1950*d021c344SAndy King 
1951*d021c344SAndy King static int vmci_transport_notify_recv_post_dequeue(
1952*d021c344SAndy King 	struct vsock_sock *vsk,
1953*d021c344SAndy King 	size_t target,
1954*d021c344SAndy King 	ssize_t copied,
1955*d021c344SAndy King 	bool data_read,
1956*d021c344SAndy King 	struct vsock_transport_recv_notify_data *data)
1957*d021c344SAndy King {
1958*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
1959*d021c344SAndy King 			&vsk->sk, target, copied, data_read,
1960*d021c344SAndy King 			(struct vmci_transport_recv_notify_data *)data);
1961*d021c344SAndy King }
1962*d021c344SAndy King 
1963*d021c344SAndy King static int vmci_transport_notify_send_init(
1964*d021c344SAndy King 	struct vsock_sock *vsk,
1965*d021c344SAndy King 	struct vsock_transport_send_notify_data *data)
1966*d021c344SAndy King {
1967*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->send_init(
1968*d021c344SAndy King 			&vsk->sk,
1969*d021c344SAndy King 			(struct vmci_transport_send_notify_data *)data);
1970*d021c344SAndy King }
1971*d021c344SAndy King 
1972*d021c344SAndy King static int vmci_transport_notify_send_pre_block(
1973*d021c344SAndy King 	struct vsock_sock *vsk,
1974*d021c344SAndy King 	struct vsock_transport_send_notify_data *data)
1975*d021c344SAndy King {
1976*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->send_pre_block(
1977*d021c344SAndy King 			&vsk->sk,
1978*d021c344SAndy King 			(struct vmci_transport_send_notify_data *)data);
1979*d021c344SAndy King }
1980*d021c344SAndy King 
1981*d021c344SAndy King static int vmci_transport_notify_send_pre_enqueue(
1982*d021c344SAndy King 	struct vsock_sock *vsk,
1983*d021c344SAndy King 	struct vsock_transport_send_notify_data *data)
1984*d021c344SAndy King {
1985*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
1986*d021c344SAndy King 			&vsk->sk,
1987*d021c344SAndy King 			(struct vmci_transport_send_notify_data *)data);
1988*d021c344SAndy King }
1989*d021c344SAndy King 
1990*d021c344SAndy King static int vmci_transport_notify_send_post_enqueue(
1991*d021c344SAndy King 	struct vsock_sock *vsk,
1992*d021c344SAndy King 	ssize_t written,
1993*d021c344SAndy King 	struct vsock_transport_send_notify_data *data)
1994*d021c344SAndy King {
1995*d021c344SAndy King 	return vmci_trans(vsk)->notify_ops->send_post_enqueue(
1996*d021c344SAndy King 			&vsk->sk, written,
1997*d021c344SAndy King 			(struct vmci_transport_send_notify_data *)data);
1998*d021c344SAndy King }
1999*d021c344SAndy King 
2000*d021c344SAndy King static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
2001*d021c344SAndy King {
2002*d021c344SAndy King 	if (PROTOCOL_OVERRIDE != -1) {
2003*d021c344SAndy King 		if (PROTOCOL_OVERRIDE == 0)
2004*d021c344SAndy King 			*old_pkt_proto = true;
2005*d021c344SAndy King 		else
2006*d021c344SAndy King 			*old_pkt_proto = false;
2007*d021c344SAndy King 
2008*d021c344SAndy King 		pr_info("Proto override in use\n");
2009*d021c344SAndy King 		return true;
2010*d021c344SAndy King 	}
2011*d021c344SAndy King 
2012*d021c344SAndy King 	return false;
2013*d021c344SAndy King }
2014*d021c344SAndy King 
2015*d021c344SAndy King static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
2016*d021c344SAndy King 						  u16 *proto,
2017*d021c344SAndy King 						  bool old_pkt_proto)
2018*d021c344SAndy King {
2019*d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
2020*d021c344SAndy King 
2021*d021c344SAndy King 	if (old_pkt_proto) {
2022*d021c344SAndy King 		if (*proto != VSOCK_PROTO_INVALID) {
2023*d021c344SAndy King 			pr_err("Can't set both an old and new protocol\n");
2024*d021c344SAndy King 			return false;
2025*d021c344SAndy King 		}
2026*d021c344SAndy King 		vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
2027*d021c344SAndy King 		goto exit;
2028*d021c344SAndy King 	}
2029*d021c344SAndy King 
2030*d021c344SAndy King 	switch (*proto) {
2031*d021c344SAndy King 	case VSOCK_PROTO_PKT_ON_NOTIFY:
2032*d021c344SAndy King 		vmci_trans(vsk)->notify_ops =
2033*d021c344SAndy King 			&vmci_transport_notify_pkt_q_state_ops;
2034*d021c344SAndy King 		break;
2035*d021c344SAndy King 	default:
2036*d021c344SAndy King 		pr_err("Unknown notify protocol version\n");
2037*d021c344SAndy King 		return false;
2038*d021c344SAndy King 	}
2039*d021c344SAndy King 
2040*d021c344SAndy King exit:
2041*d021c344SAndy King 	vmci_trans(vsk)->notify_ops->socket_init(sk);
2042*d021c344SAndy King 	return true;
2043*d021c344SAndy King }
2044*d021c344SAndy King 
2045*d021c344SAndy King static u16 vmci_transport_new_proto_supported_versions(void)
2046*d021c344SAndy King {
2047*d021c344SAndy King 	if (PROTOCOL_OVERRIDE != -1)
2048*d021c344SAndy King 		return PROTOCOL_OVERRIDE;
2049*d021c344SAndy King 
2050*d021c344SAndy King 	return VSOCK_PROTO_ALL_SUPPORTED;
2051*d021c344SAndy King }
2052*d021c344SAndy King 
2053*d021c344SAndy King static u32 vmci_transport_get_local_cid(void)
2054*d021c344SAndy King {
2055*d021c344SAndy King 	return vmci_get_context_id();
2056*d021c344SAndy King }
2057*d021c344SAndy King 
2058*d021c344SAndy King static struct vsock_transport vmci_transport = {
2059*d021c344SAndy King 	.init = vmci_transport_socket_init,
2060*d021c344SAndy King 	.destruct = vmci_transport_destruct,
2061*d021c344SAndy King 	.release = vmci_transport_release,
2062*d021c344SAndy King 	.connect = vmci_transport_connect,
2063*d021c344SAndy King 	.dgram_bind = vmci_transport_dgram_bind,
2064*d021c344SAndy King 	.dgram_dequeue = vmci_transport_dgram_dequeue,
2065*d021c344SAndy King 	.dgram_enqueue = vmci_transport_dgram_enqueue,
2066*d021c344SAndy King 	.dgram_allow = vmci_transport_dgram_allow,
2067*d021c344SAndy King 	.stream_dequeue = vmci_transport_stream_dequeue,
2068*d021c344SAndy King 	.stream_enqueue = vmci_transport_stream_enqueue,
2069*d021c344SAndy King 	.stream_has_data = vmci_transport_stream_has_data,
2070*d021c344SAndy King 	.stream_has_space = vmci_transport_stream_has_space,
2071*d021c344SAndy King 	.stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
2072*d021c344SAndy King 	.stream_is_active = vmci_transport_stream_is_active,
2073*d021c344SAndy King 	.stream_allow = vmci_transport_stream_allow,
2074*d021c344SAndy King 	.notify_poll_in = vmci_transport_notify_poll_in,
2075*d021c344SAndy King 	.notify_poll_out = vmci_transport_notify_poll_out,
2076*d021c344SAndy King 	.notify_recv_init = vmci_transport_notify_recv_init,
2077*d021c344SAndy King 	.notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
2078*d021c344SAndy King 	.notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
2079*d021c344SAndy King 	.notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
2080*d021c344SAndy King 	.notify_send_init = vmci_transport_notify_send_init,
2081*d021c344SAndy King 	.notify_send_pre_block = vmci_transport_notify_send_pre_block,
2082*d021c344SAndy King 	.notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
2083*d021c344SAndy King 	.notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
2084*d021c344SAndy King 	.shutdown = vmci_transport_shutdown,
2085*d021c344SAndy King 	.set_buffer_size = vmci_transport_set_buffer_size,
2086*d021c344SAndy King 	.set_min_buffer_size = vmci_transport_set_min_buffer_size,
2087*d021c344SAndy King 	.set_max_buffer_size = vmci_transport_set_max_buffer_size,
2088*d021c344SAndy King 	.get_buffer_size = vmci_transport_get_buffer_size,
2089*d021c344SAndy King 	.get_min_buffer_size = vmci_transport_get_min_buffer_size,
2090*d021c344SAndy King 	.get_max_buffer_size = vmci_transport_get_max_buffer_size,
2091*d021c344SAndy King 	.get_local_cid = vmci_transport_get_local_cid,
2092*d021c344SAndy King };
2093*d021c344SAndy King 
2094*d021c344SAndy King static int __init vmci_transport_init(void)
2095*d021c344SAndy King {
2096*d021c344SAndy King 	int err;
2097*d021c344SAndy King 
2098*d021c344SAndy King 	/* Create the datagram handle that we will use to send and receive all
2099*d021c344SAndy King 	 * VSocket control messages for this context.
2100*d021c344SAndy King 	 */
2101*d021c344SAndy King 	err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
2102*d021c344SAndy King 						 VMCI_FLAG_ANYCID_DG_HND,
2103*d021c344SAndy King 						 vmci_transport_recv_stream_cb,
2104*d021c344SAndy King 						 NULL,
2105*d021c344SAndy King 						 &vmci_transport_stream_handle);
2106*d021c344SAndy King 	if (err < VMCI_SUCCESS) {
2107*d021c344SAndy King 		pr_err("Unable to create datagram handle. (%d)\n", err);
2108*d021c344SAndy King 		return vmci_transport_error_to_vsock_error(err);
2109*d021c344SAndy King 	}
2110*d021c344SAndy King 
2111*d021c344SAndy King 	err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
2112*d021c344SAndy King 				   vmci_transport_qp_resumed_cb,
2113*d021c344SAndy King 				   NULL, &vmci_transport_qp_resumed_sub_id);
2114*d021c344SAndy King 	if (err < VMCI_SUCCESS) {
2115*d021c344SAndy King 		pr_err("Unable to subscribe to resumed event. (%d)\n", err);
2116*d021c344SAndy King 		err = vmci_transport_error_to_vsock_error(err);
2117*d021c344SAndy King 		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2118*d021c344SAndy King 		goto err_destroy_stream_handle;
2119*d021c344SAndy King 	}
2120*d021c344SAndy King 
2121*d021c344SAndy King 	err = vsock_core_init(&vmci_transport);
2122*d021c344SAndy King 	if (err < 0)
2123*d021c344SAndy King 		goto err_unsubscribe;
2124*d021c344SAndy King 
2125*d021c344SAndy King 	return 0;
2126*d021c344SAndy King 
2127*d021c344SAndy King err_unsubscribe:
2128*d021c344SAndy King 	vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2129*d021c344SAndy King err_destroy_stream_handle:
2130*d021c344SAndy King 	vmci_datagram_destroy_handle(vmci_transport_stream_handle);
2131*d021c344SAndy King 	return err;
2132*d021c344SAndy King }
2133*d021c344SAndy King module_init(vmci_transport_init);
2134*d021c344SAndy King 
2135*d021c344SAndy King static void __exit vmci_transport_exit(void)
2136*d021c344SAndy King {
2137*d021c344SAndy King 	if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2138*d021c344SAndy King 		if (vmci_datagram_destroy_handle(
2139*d021c344SAndy King 			vmci_transport_stream_handle) != VMCI_SUCCESS)
2140*d021c344SAndy King 			pr_err("Couldn't destroy datagram handle\n");
2141*d021c344SAndy King 		vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
2142*d021c344SAndy King 	}
2143*d021c344SAndy King 
2144*d021c344SAndy King 	if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
2145*d021c344SAndy King 		vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2146*d021c344SAndy King 		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2147*d021c344SAndy King 	}
2148*d021c344SAndy King 
2149*d021c344SAndy King 	vsock_core_exit();
2150*d021c344SAndy King }
2151*d021c344SAndy King module_exit(vmci_transport_exit);
2152*d021c344SAndy King 
2153*d021c344SAndy King MODULE_AUTHOR("VMware, Inc.");
2154*d021c344SAndy King MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2155*d021c344SAndy King MODULE_LICENSE("GPL v2");
2156*d021c344SAndy King MODULE_ALIAS("vmware_vsock");
2157*d021c344SAndy King MODULE_ALIAS_NETPROTO(PF_VSOCK);
2158