xref: /openbmc/linux/net/vmw_vsock/vmci_transport_notify.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1685a6bf8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2d021c344SAndy King /*
3d021c344SAndy King  * VMware vSockets Driver
4d021c344SAndy King  *
5d021c344SAndy King  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
6d021c344SAndy King  */
7d021c344SAndy King 
8d021c344SAndy King #include <linux/types.h>
9d021c344SAndy King #include <linux/socket.h>
10d021c344SAndy King #include <linux/stddef.h>
11d021c344SAndy King #include <net/sock.h>
12d021c344SAndy King 
13d021c344SAndy King #include "vmci_transport_notify.h"
14d021c344SAndy King 
15d021c344SAndy King #define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
16d021c344SAndy King 
vmci_transport_notify_waiting_write(struct vsock_sock * vsk)17d021c344SAndy King static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
18d021c344SAndy King {
19d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
20d021c344SAndy King 	bool retval;
21d021c344SAndy King 	u64 notify_limit;
22d021c344SAndy King 
23d021c344SAndy King 	if (!PKT_FIELD(vsk, peer_waiting_write))
24d021c344SAndy King 		return false;
25d021c344SAndy King 
26d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
27d021c344SAndy King 	/* When the sender blocks, we take that as a sign that the sender is
28d021c344SAndy King 	 * faster than the receiver. To reduce the transmit rate of the sender,
29d021c344SAndy King 	 * we delay the sending of the read notification by decreasing the
30d021c344SAndy King 	 * write_notify_window. The notification is delayed until the number of
31d021c344SAndy King 	 * bytes used in the queue drops below the write_notify_window.
32d021c344SAndy King 	 */
33d021c344SAndy King 
34d021c344SAndy King 	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
35d021c344SAndy King 		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
36d021c344SAndy King 		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
37d021c344SAndy King 			PKT_FIELD(vsk, write_notify_window) =
38d021c344SAndy King 			    PKT_FIELD(vsk, write_notify_min_window);
39d021c344SAndy King 		} else {
40d021c344SAndy King 			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
41d021c344SAndy King 			if (PKT_FIELD(vsk, write_notify_window) <
42d021c344SAndy King 			    PKT_FIELD(vsk, write_notify_min_window))
43d021c344SAndy King 				PKT_FIELD(vsk, write_notify_window) =
44d021c344SAndy King 				    PKT_FIELD(vsk, write_notify_min_window);
45d021c344SAndy King 
46d021c344SAndy King 		}
47d021c344SAndy King 	}
48d021c344SAndy King 	notify_limit = vmci_trans(vsk)->consume_size -
49d021c344SAndy King 		PKT_FIELD(vsk, write_notify_window);
50d021c344SAndy King #else
51d021c344SAndy King 	notify_limit = 0;
52d021c344SAndy King #endif
53d021c344SAndy King 
54d021c344SAndy King 	/* For now we ignore the wait information and just see if the free
55d021c344SAndy King 	 * space exceeds the notify limit.  Note that improving this function
56d021c344SAndy King 	 * to be more intelligent will not require a protocol change and will
57d021c344SAndy King 	 * retain compatibility between endpoints with mixed versions of this
58d021c344SAndy King 	 * function.
59d021c344SAndy King 	 *
60d021c344SAndy King 	 * The notify_limit is used to delay notifications in the case where
61d021c344SAndy King 	 * flow control is enabled. Below the test is expressed in terms of
62d021c344SAndy King 	 * free space in the queue: if free_space > ConsumeSize -
63d021c344SAndy King 	 * write_notify_window then notify An alternate way of expressing this
64d021c344SAndy King 	 * is to rewrite the expression to use the data ready in the receive
65d021c344SAndy King 	 * queue: if write_notify_window > bufferReady then notify as
66d021c344SAndy King 	 * free_space == ConsumeSize - bufferReady.
67d021c344SAndy King 	 */
68d021c344SAndy King 	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
69d021c344SAndy King 		notify_limit;
70d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
71d021c344SAndy King 	if (retval) {
72d021c344SAndy King 		/*
73d021c344SAndy King 		 * Once we notify the peer, we reset the detected flag so the
74d021c344SAndy King 		 * next wait will again cause a decrease in the window size.
75d021c344SAndy King 		 */
76d021c344SAndy King 
77d021c344SAndy King 		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
78d021c344SAndy King 	}
79d021c344SAndy King #endif
80d021c344SAndy King 	return retval;
81d021c344SAndy King #else
82d021c344SAndy King 	return true;
83d021c344SAndy King #endif
84d021c344SAndy King }
85d021c344SAndy King 
vmci_transport_notify_waiting_read(struct vsock_sock * vsk)86d021c344SAndy King static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
87d021c344SAndy King {
88d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
89d021c344SAndy King 	if (!PKT_FIELD(vsk, peer_waiting_read))
90d021c344SAndy King 		return false;
91d021c344SAndy King 
92d021c344SAndy King 	/* For now we ignore the wait information and just see if there is any
93d021c344SAndy King 	 * data for our peer to read.  Note that improving this function to be
94d021c344SAndy King 	 * more intelligent will not require a protocol change and will retain
95d021c344SAndy King 	 * compatibility between endpoints with mixed versions of this
96d021c344SAndy King 	 * function.
97d021c344SAndy King 	 */
98d021c344SAndy King 	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
99d021c344SAndy King #else
100d021c344SAndy King 	return true;
101d021c344SAndy King #endif
102d021c344SAndy King }
103d021c344SAndy King 
104d021c344SAndy King static void
vmci_transport_handle_waiting_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)105d021c344SAndy King vmci_transport_handle_waiting_read(struct sock *sk,
106d021c344SAndy King 				   struct vmci_transport_packet *pkt,
107d021c344SAndy King 				   bool bottom_half,
108d021c344SAndy King 				   struct sockaddr_vm *dst,
109d021c344SAndy King 				   struct sockaddr_vm *src)
110d021c344SAndy King {
111d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
112d021c344SAndy King 	struct vsock_sock *vsk;
113d021c344SAndy King 
114d021c344SAndy King 	vsk = vsock_sk(sk);
115d021c344SAndy King 
116d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_read) = true;
117d021c344SAndy King 	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
118d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
119d021c344SAndy King 
120d021c344SAndy King 	if (vmci_transport_notify_waiting_read(vsk)) {
121d021c344SAndy King 		bool sent;
122d021c344SAndy King 
123d021c344SAndy King 		if (bottom_half)
124d021c344SAndy King 			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
125d021c344SAndy King 		else
126d021c344SAndy King 			sent = vmci_transport_send_wrote(sk) > 0;
127d021c344SAndy King 
128d021c344SAndy King 		if (sent)
129d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_read) = false;
130d021c344SAndy King 	}
131d021c344SAndy King #endif
132d021c344SAndy King }
133d021c344SAndy King 
134d021c344SAndy King static void
vmci_transport_handle_waiting_write(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)135d021c344SAndy King vmci_transport_handle_waiting_write(struct sock *sk,
136d021c344SAndy King 				    struct vmci_transport_packet *pkt,
137d021c344SAndy King 				    bool bottom_half,
138d021c344SAndy King 				    struct sockaddr_vm *dst,
139d021c344SAndy King 				    struct sockaddr_vm *src)
140d021c344SAndy King {
141d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
142d021c344SAndy King 	struct vsock_sock *vsk;
143d021c344SAndy King 
144d021c344SAndy King 	vsk = vsock_sk(sk);
145d021c344SAndy King 
146d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_write) = true;
147d021c344SAndy King 	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
148d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
149d021c344SAndy King 
150d021c344SAndy King 	if (vmci_transport_notify_waiting_write(vsk)) {
151d021c344SAndy King 		bool sent;
152d021c344SAndy King 
153d021c344SAndy King 		if (bottom_half)
154d021c344SAndy King 			sent = vmci_transport_send_read_bh(dst, src) > 0;
155d021c344SAndy King 		else
156d021c344SAndy King 			sent = vmci_transport_send_read(sk) > 0;
157d021c344SAndy King 
158d021c344SAndy King 		if (sent)
159d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_write) = false;
160d021c344SAndy King 	}
161d021c344SAndy King #endif
162d021c344SAndy King }
163d021c344SAndy King 
164d021c344SAndy King static void
vmci_transport_handle_read(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)165d021c344SAndy King vmci_transport_handle_read(struct sock *sk,
166d021c344SAndy King 			   struct vmci_transport_packet *pkt,
167d021c344SAndy King 			   bool bottom_half,
168d021c344SAndy King 			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
169d021c344SAndy King {
170d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
171d021c344SAndy King 	struct vsock_sock *vsk;
172d021c344SAndy King 
173d021c344SAndy King 	vsk = vsock_sk(sk);
174d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_write) = false;
175d021c344SAndy King #endif
176d021c344SAndy King 
177d021c344SAndy King 	sk->sk_write_space(sk);
178d021c344SAndy King }
179d021c344SAndy King 
send_waiting_read(struct sock * sk,u64 room_needed)180d021c344SAndy King static bool send_waiting_read(struct sock *sk, u64 room_needed)
181d021c344SAndy King {
182d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
183d021c344SAndy King 	struct vsock_sock *vsk;
184d021c344SAndy King 	struct vmci_transport_waiting_info waiting_info;
185d021c344SAndy King 	u64 tail;
186d021c344SAndy King 	u64 head;
187d021c344SAndy King 	u64 room_left;
188d021c344SAndy King 	bool ret;
189d021c344SAndy King 
190d021c344SAndy King 	vsk = vsock_sk(sk);
191d021c344SAndy King 
192d021c344SAndy King 	if (PKT_FIELD(vsk, sent_waiting_read))
193d021c344SAndy King 		return true;
194d021c344SAndy King 
195d021c344SAndy King 	if (PKT_FIELD(vsk, write_notify_window) <
196d021c344SAndy King 			vmci_trans(vsk)->consume_size)
197d021c344SAndy King 		PKT_FIELD(vsk, write_notify_window) =
198d021c344SAndy King 		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
199d021c344SAndy King 			vmci_trans(vsk)->consume_size);
200d021c344SAndy King 
201d021c344SAndy King 	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
202d021c344SAndy King 	room_left = vmci_trans(vsk)->consume_size - head;
203d021c344SAndy King 	if (room_needed >= room_left) {
204d021c344SAndy King 		waiting_info.offset = room_needed - room_left;
205d021c344SAndy King 		waiting_info.generation =
206d021c344SAndy King 		    PKT_FIELD(vsk, consume_q_generation) + 1;
207d021c344SAndy King 	} else {
208d021c344SAndy King 		waiting_info.offset = head + room_needed;
209d021c344SAndy King 		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
210d021c344SAndy King 	}
211d021c344SAndy King 
212d021c344SAndy King 	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
213d021c344SAndy King 	if (ret)
214d021c344SAndy King 		PKT_FIELD(vsk, sent_waiting_read) = true;
215d021c344SAndy King 
216d021c344SAndy King 	return ret;
217d021c344SAndy King #else
218d021c344SAndy King 	return true;
219d021c344SAndy King #endif
220d021c344SAndy King }
221d021c344SAndy King 
send_waiting_write(struct sock * sk,u64 room_needed)222d021c344SAndy King static bool send_waiting_write(struct sock *sk, u64 room_needed)
223d021c344SAndy King {
224d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
225d021c344SAndy King 	struct vsock_sock *vsk;
226d021c344SAndy King 	struct vmci_transport_waiting_info waiting_info;
227d021c344SAndy King 	u64 tail;
228d021c344SAndy King 	u64 head;
229d021c344SAndy King 	u64 room_left;
230d021c344SAndy King 	bool ret;
231d021c344SAndy King 
232d021c344SAndy King 	vsk = vsock_sk(sk);
233d021c344SAndy King 
234d021c344SAndy King 	if (PKT_FIELD(vsk, sent_waiting_write))
235d021c344SAndy King 		return true;
236d021c344SAndy King 
237d021c344SAndy King 	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
238d021c344SAndy King 	room_left = vmci_trans(vsk)->produce_size - tail;
239d021c344SAndy King 	if (room_needed + 1 >= room_left) {
240d021c344SAndy King 		/* Wraps around to current generation. */
241d021c344SAndy King 		waiting_info.offset = room_needed + 1 - room_left;
242d021c344SAndy King 		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
243d021c344SAndy King 	} else {
244d021c344SAndy King 		waiting_info.offset = tail + room_needed + 1;
245d021c344SAndy King 		waiting_info.generation =
246d021c344SAndy King 		    PKT_FIELD(vsk, produce_q_generation) - 1;
247d021c344SAndy King 	}
248d021c344SAndy King 
249d021c344SAndy King 	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
250d021c344SAndy King 	if (ret)
251d021c344SAndy King 		PKT_FIELD(vsk, sent_waiting_write) = true;
252d021c344SAndy King 
253d021c344SAndy King 	return ret;
254d021c344SAndy King #else
255d021c344SAndy King 	return true;
256d021c344SAndy King #endif
257d021c344SAndy King }
258d021c344SAndy King 
vmci_transport_send_read_notification(struct sock * sk)259d021c344SAndy King static int vmci_transport_send_read_notification(struct sock *sk)
260d021c344SAndy King {
261d021c344SAndy King 	struct vsock_sock *vsk;
262d021c344SAndy King 	bool sent_read;
263d021c344SAndy King 	unsigned int retries;
264d021c344SAndy King 	int err;
265d021c344SAndy King 
266d021c344SAndy King 	vsk = vsock_sk(sk);
267d021c344SAndy King 	sent_read = false;
268d021c344SAndy King 	retries = 0;
269d021c344SAndy King 	err = 0;
270d021c344SAndy King 
271d021c344SAndy King 	if (vmci_transport_notify_waiting_write(vsk)) {
272d021c344SAndy King 		/* Notify the peer that we have read, retrying the send on
273d021c344SAndy King 		 * failure up to our maximum value.  XXX For now we just log
274d021c344SAndy King 		 * the failure, but later we should schedule a work item to
275d021c344SAndy King 		 * handle the resend until it succeeds.  That would require
276d021c344SAndy King 		 * keeping track of work items in the vsk and cleaning them up
277d021c344SAndy King 		 * upon socket close.
278d021c344SAndy King 		 */
279d021c344SAndy King 		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
280d021c344SAndy King 		       !sent_read &&
281d021c344SAndy King 		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
282d021c344SAndy King 			err = vmci_transport_send_read(sk);
283d021c344SAndy King 			if (err >= 0)
284d021c344SAndy King 				sent_read = true;
285d021c344SAndy King 
286d021c344SAndy King 			retries++;
287d021c344SAndy King 		}
288d021c344SAndy King 
289d021c344SAndy King 		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
290d021c344SAndy King 			pr_err("%p unable to send read notify to peer\n", sk);
291d021c344SAndy King 		else
292d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
293d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_write) = false;
294d021c344SAndy King #endif
295d021c344SAndy King 
296d021c344SAndy King 	}
297d021c344SAndy King 	return err;
298d021c344SAndy King }
299d021c344SAndy King 
300d021c344SAndy King static void
vmci_transport_handle_wrote(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src)301d021c344SAndy King vmci_transport_handle_wrote(struct sock *sk,
302d021c344SAndy King 			    struct vmci_transport_packet *pkt,
303d021c344SAndy King 			    bool bottom_half,
304d021c344SAndy King 			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
305d021c344SAndy King {
306d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
307d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
308d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_read) = false;
309d021c344SAndy King #endif
310*e061aed9SArseniy Krasnov 	vsock_data_ready(sk);
311d021c344SAndy King }
312d021c344SAndy King 
vmci_transport_notify_pkt_socket_init(struct sock * sk)313d021c344SAndy King static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
314d021c344SAndy King {
315d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
316d021c344SAndy King 
317d021c344SAndy King 	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
318d021c344SAndy King 	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
319d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_read) = false;
320d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_write) = false;
321d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
322d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_read) = false;
323d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_write) = false;
324d021c344SAndy King 	PKT_FIELD(vsk, produce_q_generation) = 0;
325d021c344SAndy King 	PKT_FIELD(vsk, consume_q_generation) = 0;
326d021c344SAndy King 
327d021c344SAndy King 	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
328d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
329d021c344SAndy King 	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
330d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
331d021c344SAndy King }
332d021c344SAndy King 
vmci_transport_notify_pkt_socket_destruct(struct vsock_sock * vsk)333d021c344SAndy King static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
334d021c344SAndy King {
335d021c344SAndy King }
336d021c344SAndy King 
337d021c344SAndy King static int
vmci_transport_notify_pkt_poll_in(struct sock * sk,size_t target,bool * data_ready_now)338d021c344SAndy King vmci_transport_notify_pkt_poll_in(struct sock *sk,
339d021c344SAndy King 				  size_t target, bool *data_ready_now)
340d021c344SAndy King {
341d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
342d021c344SAndy King 
343a274f6ffSArseniy Krasnov 	if (vsock_stream_has_data(vsk) >= target) {
344d021c344SAndy King 		*data_ready_now = true;
345d021c344SAndy King 	} else {
346a274f6ffSArseniy Krasnov 		/* We can't read right now because there is not enough data
347a274f6ffSArseniy Krasnov 		 * in the queue. Ask for notifications when there is something
348a274f6ffSArseniy Krasnov 		 * to read.
349d021c344SAndy King 		 */
3503b4477d2SStefan Hajnoczi 		if (sk->sk_state == TCP_ESTABLISHED) {
351d021c344SAndy King 			if (!send_waiting_read(sk, 1))
352d021c344SAndy King 				return -1;
353d021c344SAndy King 
354d021c344SAndy King 		}
355d021c344SAndy King 		*data_ready_now = false;
356d021c344SAndy King 	}
357d021c344SAndy King 
358d021c344SAndy King 	return 0;
359d021c344SAndy King }
360d021c344SAndy King 
361d021c344SAndy King static int
vmci_transport_notify_pkt_poll_out(struct sock * sk,size_t target,bool * space_avail_now)362d021c344SAndy King vmci_transport_notify_pkt_poll_out(struct sock *sk,
363d021c344SAndy King 				   size_t target, bool *space_avail_now)
364d021c344SAndy King {
365d021c344SAndy King 	s64 produce_q_free_space;
366d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
367d021c344SAndy King 
368d021c344SAndy King 	produce_q_free_space = vsock_stream_has_space(vsk);
369d021c344SAndy King 	if (produce_q_free_space > 0) {
370d021c344SAndy King 		*space_avail_now = true;
371d021c344SAndy King 		return 0;
372d021c344SAndy King 	} else if (produce_q_free_space == 0) {
373d021c344SAndy King 		/* This is a connected socket but we can't currently send data.
374d021c344SAndy King 		 * Notify the peer that we are waiting if the queue is full. We
375d021c344SAndy King 		 * only send a waiting write if the queue is full because
376d021c344SAndy King 		 * otherwise we end up in an infinite WAITING_WRITE, READ,
377d021c344SAndy King 		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
378d021c344SAndy King 		 * notification as a socket error, passing that back through
379d021c344SAndy King 		 * the mask.
380d021c344SAndy King 		 */
381d021c344SAndy King 		if (!send_waiting_write(sk, 1))
382d021c344SAndy King 			return -1;
383d021c344SAndy King 
384d021c344SAndy King 		*space_avail_now = false;
385d021c344SAndy King 	}
386d021c344SAndy King 
387d021c344SAndy King 	return 0;
388d021c344SAndy King }
389d021c344SAndy King 
390d021c344SAndy King static int
vmci_transport_notify_pkt_recv_init(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)391d021c344SAndy King vmci_transport_notify_pkt_recv_init(
392d021c344SAndy King 			struct sock *sk,
393d021c344SAndy King 			size_t target,
394d021c344SAndy King 			struct vmci_transport_recv_notify_data *data)
395d021c344SAndy King {
396d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
397d021c344SAndy King 
398d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
399d021c344SAndy King 	data->consume_head = 0;
400d021c344SAndy King 	data->produce_tail = 0;
401d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
402d021c344SAndy King 	data->notify_on_block = false;
403d021c344SAndy King 
404d021c344SAndy King 	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
405d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
406d021c344SAndy King 		if (PKT_FIELD(vsk, write_notify_window) <
407d021c344SAndy King 		    PKT_FIELD(vsk, write_notify_min_window)) {
408d021c344SAndy King 			/* If the current window is smaller than the new
409d021c344SAndy King 			 * minimal window size, we need to reevaluate whether
410d021c344SAndy King 			 * we need to notify the sender. If the number of ready
411d021c344SAndy King 			 * bytes are smaller than the new window, we need to
412d021c344SAndy King 			 * send a notification to the sender before we block.
413d021c344SAndy King 			 */
414d021c344SAndy King 
415d021c344SAndy King 			PKT_FIELD(vsk, write_notify_window) =
416d021c344SAndy King 			    PKT_FIELD(vsk, write_notify_min_window);
417d021c344SAndy King 			data->notify_on_block = true;
418d021c344SAndy King 		}
419d021c344SAndy King 	}
420d021c344SAndy King #endif
421d021c344SAndy King #endif
422d021c344SAndy King 
423d021c344SAndy King 	return 0;
424d021c344SAndy King }
425d021c344SAndy King 
426d021c344SAndy King static int
vmci_transport_notify_pkt_recv_pre_block(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)427d021c344SAndy King vmci_transport_notify_pkt_recv_pre_block(
428d021c344SAndy King 				struct sock *sk,
429d021c344SAndy King 				size_t target,
430d021c344SAndy King 				struct vmci_transport_recv_notify_data *data)
431d021c344SAndy King {
432d021c344SAndy King 	int err = 0;
433d021c344SAndy King 
434d021c344SAndy King 	/* Notify our peer that we are waiting for data to read. */
435d021c344SAndy King 	if (!send_waiting_read(sk, target)) {
436d021c344SAndy King 		err = -EHOSTUNREACH;
437d021c344SAndy King 		return err;
438d021c344SAndy King 	}
439d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
440d021c344SAndy King 	if (data->notify_on_block) {
441d021c344SAndy King 		err = vmci_transport_send_read_notification(sk);
442d021c344SAndy King 		if (err < 0)
443d021c344SAndy King 			return err;
444d021c344SAndy King 
445d021c344SAndy King 		data->notify_on_block = false;
446d021c344SAndy King 	}
447d021c344SAndy King #endif
448d021c344SAndy King 
449d021c344SAndy King 	return err;
450d021c344SAndy King }
451d021c344SAndy King 
452d021c344SAndy King static int
vmci_transport_notify_pkt_recv_pre_dequeue(struct sock * sk,size_t target,struct vmci_transport_recv_notify_data * data)453d021c344SAndy King vmci_transport_notify_pkt_recv_pre_dequeue(
454d021c344SAndy King 				struct sock *sk,
455d021c344SAndy King 				size_t target,
456d021c344SAndy King 				struct vmci_transport_recv_notify_data *data)
457d021c344SAndy King {
458d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
459d021c344SAndy King 
460d021c344SAndy King 	/* Now consume up to len bytes from the queue.  Note that since we have
461d021c344SAndy King 	 * the socket locked we should copy at least ready bytes.
462d021c344SAndy King 	 */
463d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
464d021c344SAndy King 	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
465d021c344SAndy King 				       &data->produce_tail,
466d021c344SAndy King 				       &data->consume_head);
467d021c344SAndy King #endif
468d021c344SAndy King 
469d021c344SAndy King 	return 0;
470d021c344SAndy King }
471d021c344SAndy King 
472d021c344SAndy King static int
vmci_transport_notify_pkt_recv_post_dequeue(struct sock * sk,size_t target,ssize_t copied,bool data_read,struct vmci_transport_recv_notify_data * data)473d021c344SAndy King vmci_transport_notify_pkt_recv_post_dequeue(
474d021c344SAndy King 				struct sock *sk,
475d021c344SAndy King 				size_t target,
476d021c344SAndy King 				ssize_t copied,
477d021c344SAndy King 				bool data_read,
478d021c344SAndy King 				struct vmci_transport_recv_notify_data *data)
479d021c344SAndy King {
480d021c344SAndy King 	struct vsock_sock *vsk;
481d021c344SAndy King 	int err;
482d021c344SAndy King 
483d021c344SAndy King 	vsk = vsock_sk(sk);
484d021c344SAndy King 	err = 0;
485d021c344SAndy King 
486d021c344SAndy King 	if (data_read) {
487d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
488d021c344SAndy King 		/* Detect a wrap-around to maintain queue generation.  Note
489d021c344SAndy King 		 * that this is safe since we hold the socket lock across the
490d021c344SAndy King 		 * two queue pair operations.
491d021c344SAndy King 		 */
492d021c344SAndy King 		if (copied >=
493d021c344SAndy King 			vmci_trans(vsk)->consume_size - data->consume_head)
494d021c344SAndy King 			PKT_FIELD(vsk, consume_q_generation)++;
495d021c344SAndy King #endif
496d021c344SAndy King 
497d021c344SAndy King 		err = vmci_transport_send_read_notification(sk);
498d021c344SAndy King 		if (err < 0)
499d021c344SAndy King 			return err;
500d021c344SAndy King 
501d021c344SAndy King 	}
502d021c344SAndy King 	return err;
503d021c344SAndy King }
504d021c344SAndy King 
505d021c344SAndy King static int
vmci_transport_notify_pkt_send_init(struct sock * sk,struct vmci_transport_send_notify_data * data)506d021c344SAndy King vmci_transport_notify_pkt_send_init(
507d021c344SAndy King 			struct sock *sk,
508d021c344SAndy King 			struct vmci_transport_send_notify_data *data)
509d021c344SAndy King {
510d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
511d021c344SAndy King 	data->consume_head = 0;
512d021c344SAndy King 	data->produce_tail = 0;
513d021c344SAndy King #endif
514d021c344SAndy King 
515d021c344SAndy King 	return 0;
516d021c344SAndy King }
517d021c344SAndy King 
518d021c344SAndy King static int
vmci_transport_notify_pkt_send_pre_block(struct sock * sk,struct vmci_transport_send_notify_data * data)519d021c344SAndy King vmci_transport_notify_pkt_send_pre_block(
520d021c344SAndy King 				struct sock *sk,
521d021c344SAndy King 				struct vmci_transport_send_notify_data *data)
522d021c344SAndy King {
523d021c344SAndy King 	/* Notify our peer that we are waiting for room to write. */
524d021c344SAndy King 	if (!send_waiting_write(sk, 1))
525d021c344SAndy King 		return -EHOSTUNREACH;
526d021c344SAndy King 
527d021c344SAndy King 	return 0;
528d021c344SAndy King }
529d021c344SAndy King 
530d021c344SAndy King static int
vmci_transport_notify_pkt_send_pre_enqueue(struct sock * sk,struct vmci_transport_send_notify_data * data)531d021c344SAndy King vmci_transport_notify_pkt_send_pre_enqueue(
532d021c344SAndy King 				struct sock *sk,
533d021c344SAndy King 				struct vmci_transport_send_notify_data *data)
534d021c344SAndy King {
535d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
536d021c344SAndy King 
537d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
538d021c344SAndy King 	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
539d021c344SAndy King 				       &data->produce_tail,
540d021c344SAndy King 				       &data->consume_head);
541d021c344SAndy King #endif
542d021c344SAndy King 
543d021c344SAndy King 	return 0;
544d021c344SAndy King }
545d021c344SAndy King 
546d021c344SAndy King static int
vmci_transport_notify_pkt_send_post_enqueue(struct sock * sk,ssize_t written,struct vmci_transport_send_notify_data * data)547d021c344SAndy King vmci_transport_notify_pkt_send_post_enqueue(
548d021c344SAndy King 				struct sock *sk,
549d021c344SAndy King 				ssize_t written,
550d021c344SAndy King 				struct vmci_transport_send_notify_data *data)
551d021c344SAndy King {
552d021c344SAndy King 	int err = 0;
553d021c344SAndy King 	struct vsock_sock *vsk;
554d021c344SAndy King 	bool sent_wrote = false;
555d021c344SAndy King 	int retries = 0;
556d021c344SAndy King 
557d021c344SAndy King 	vsk = vsock_sk(sk);
558d021c344SAndy King 
559d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
560d021c344SAndy King 	/* Detect a wrap-around to maintain queue generation.  Note that this
561d021c344SAndy King 	 * is safe since we hold the socket lock across the two queue pair
562d021c344SAndy King 	 * operations.
563d021c344SAndy King 	 */
564d021c344SAndy King 	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
565d021c344SAndy King 		PKT_FIELD(vsk, produce_q_generation)++;
566d021c344SAndy King 
567d021c344SAndy King #endif
568d021c344SAndy King 
569d021c344SAndy King 	if (vmci_transport_notify_waiting_read(vsk)) {
570d021c344SAndy King 		/* Notify the peer that we have written, retrying the send on
571d021c344SAndy King 		 * failure up to our maximum value. See the XXX comment for the
572d021c344SAndy King 		 * corresponding piece of code in StreamRecvmsg() for potential
573d021c344SAndy King 		 * improvements.
574d021c344SAndy King 		 */
575d021c344SAndy King 		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
576d021c344SAndy King 		       !sent_wrote &&
577d021c344SAndy King 		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
578d021c344SAndy King 			err = vmci_transport_send_wrote(sk);
579d021c344SAndy King 			if (err >= 0)
580d021c344SAndy King 				sent_wrote = true;
581d021c344SAndy King 
582d021c344SAndy King 			retries++;
583d021c344SAndy King 		}
584d021c344SAndy King 
585d021c344SAndy King 		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
586d021c344SAndy King 			pr_err("%p unable to send wrote notify to peer\n", sk);
587d021c344SAndy King 			return err;
588d021c344SAndy King 		} else {
589d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
590d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_read) = false;
591d021c344SAndy King #endif
592d021c344SAndy King 		}
593d021c344SAndy King 	}
594d021c344SAndy King 	return err;
595d021c344SAndy King }
596d021c344SAndy King 
597d021c344SAndy King static void
vmci_transport_notify_pkt_handle_pkt(struct sock * sk,struct vmci_transport_packet * pkt,bool bottom_half,struct sockaddr_vm * dst,struct sockaddr_vm * src,bool * pkt_processed)598d021c344SAndy King vmci_transport_notify_pkt_handle_pkt(
599d021c344SAndy King 			struct sock *sk,
600d021c344SAndy King 			struct vmci_transport_packet *pkt,
601d021c344SAndy King 			bool bottom_half,
602d021c344SAndy King 			struct sockaddr_vm *dst,
603d021c344SAndy King 			struct sockaddr_vm *src, bool *pkt_processed)
604d021c344SAndy King {
605d021c344SAndy King 	bool processed = false;
606d021c344SAndy King 
607d021c344SAndy King 	switch (pkt->type) {
608d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
609d021c344SAndy King 		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
610d021c344SAndy King 		processed = true;
611d021c344SAndy King 		break;
612d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_READ:
613d021c344SAndy King 		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
614d021c344SAndy King 		processed = true;
615d021c344SAndy King 		break;
616d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
617d021c344SAndy King 		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
618d021c344SAndy King 						    dst, src);
619d021c344SAndy King 		processed = true;
620d021c344SAndy King 		break;
621d021c344SAndy King 
622d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
623d021c344SAndy King 		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
624d021c344SAndy King 						   dst, src);
625d021c344SAndy King 		processed = true;
626d021c344SAndy King 		break;
627d021c344SAndy King 	}
628d021c344SAndy King 
629d021c344SAndy King 	if (pkt_processed)
630d021c344SAndy King 		*pkt_processed = processed;
631d021c344SAndy King }
632d021c344SAndy King 
vmci_transport_notify_pkt_process_request(struct sock * sk)633d021c344SAndy King static void vmci_transport_notify_pkt_process_request(struct sock *sk)
634d021c344SAndy King {
635d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
636d021c344SAndy King 
637d021c344SAndy King 	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
638d021c344SAndy King 	if (vmci_trans(vsk)->consume_size <
639d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window))
640d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window) =
641d021c344SAndy King 			vmci_trans(vsk)->consume_size;
642d021c344SAndy King }
643d021c344SAndy King 
vmci_transport_notify_pkt_process_negotiate(struct sock * sk)644d021c344SAndy King static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
645d021c344SAndy King {
646d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
647d021c344SAndy King 
648d021c344SAndy King 	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
649d021c344SAndy King 	if (vmci_trans(vsk)->consume_size <
650d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window))
651d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window) =
652d021c344SAndy King 			vmci_trans(vsk)->consume_size;
653d021c344SAndy King }
654d021c344SAndy King 
655d021c344SAndy King /* Socket control packet based operations. */
6563b22dae3SJulia Lawall const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
65799a5e178SKees Cook 	.socket_init = vmci_transport_notify_pkt_socket_init,
65899a5e178SKees Cook 	.socket_destruct = vmci_transport_notify_pkt_socket_destruct,
65999a5e178SKees Cook 	.poll_in = vmci_transport_notify_pkt_poll_in,
66099a5e178SKees Cook 	.poll_out = vmci_transport_notify_pkt_poll_out,
66199a5e178SKees Cook 	.handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
66299a5e178SKees Cook 	.recv_init = vmci_transport_notify_pkt_recv_init,
66399a5e178SKees Cook 	.recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
66499a5e178SKees Cook 	.recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
66599a5e178SKees Cook 	.recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
66699a5e178SKees Cook 	.send_init = vmci_transport_notify_pkt_send_init,
66799a5e178SKees Cook 	.send_pre_block = vmci_transport_notify_pkt_send_pre_block,
66899a5e178SKees Cook 	.send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
66999a5e178SKees Cook 	.send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
67099a5e178SKees Cook 	.process_request = vmci_transport_notify_pkt_process_request,
67199a5e178SKees Cook 	.process_negotiate = vmci_transport_notify_pkt_process_negotiate,
672d021c344SAndy King };
673