xref: /openbmc/linux/net/rxrpc/output.c (revision 680ef72a)
1 /* RxRPC packet transmission
2  *
3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 
14 #include <linux/net.h>
15 #include <linux/gfp.h>
16 #include <linux/skbuff.h>
17 #include <linux/export.h>
18 #include <net/sock.h>
19 #include <net/af_rxrpc.h>
20 #include "ar-internal.h"
21 
22 struct rxrpc_ack_buffer {
23 	struct rxrpc_wire_header whdr;
24 	struct rxrpc_ackpacket ack;
25 	u8 acks[255];
26 	u8 pad[3];
27 	struct rxrpc_ackinfo ackinfo;
28 };
29 
30 struct rxrpc_abort_buffer {
31 	struct rxrpc_wire_header whdr;
32 	__be32 abort_code;
33 };
34 
35 /*
36  * Fill out an ACK packet.
37  */
38 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
39 				 struct rxrpc_call *call,
40 				 struct rxrpc_ack_buffer *pkt,
41 				 rxrpc_seq_t *_hard_ack,
42 				 rxrpc_seq_t *_top,
43 				 u8 reason)
44 {
45 	rxrpc_serial_t serial;
46 	rxrpc_seq_t hard_ack, top, seq;
47 	int ix;
48 	u32 mtu, jmax;
49 	u8 *ackp = pkt->acks;
50 
51 	/* Barrier against rxrpc_input_data(). */
52 	serial = call->ackr_serial;
53 	hard_ack = READ_ONCE(call->rx_hard_ack);
54 	top = smp_load_acquire(&call->rx_top);
55 	*_hard_ack = hard_ack;
56 	*_top = top;
57 
58 	pkt->ack.bufferSpace	= htons(8);
59 	pkt->ack.maxSkew	= htons(call->ackr_skew);
60 	pkt->ack.firstPacket	= htonl(hard_ack + 1);
61 	pkt->ack.previousPacket	= htonl(call->ackr_prev_seq);
62 	pkt->ack.serial		= htonl(serial);
63 	pkt->ack.reason		= reason;
64 	pkt->ack.nAcks		= top - hard_ack;
65 
66 	if (reason == RXRPC_ACK_PING)
67 		pkt->whdr.flags |= RXRPC_REQUEST_ACK;
68 
69 	if (after(top, hard_ack)) {
70 		seq = hard_ack + 1;
71 		do {
72 			ix = seq & RXRPC_RXTX_BUFF_MASK;
73 			if (call->rxtx_buffer[ix])
74 				*ackp++ = RXRPC_ACK_TYPE_ACK;
75 			else
76 				*ackp++ = RXRPC_ACK_TYPE_NACK;
77 			seq++;
78 		} while (before_eq(seq, top));
79 	}
80 
81 	mtu = conn->params.peer->if_mtu;
82 	mtu -= conn->params.peer->hdrsize;
83 	jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
84 	pkt->ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
85 	pkt->ackinfo.maxMTU	= htonl(mtu);
86 	pkt->ackinfo.rwind	= htonl(call->rx_winsize);
87 	pkt->ackinfo.jumbo_max	= htonl(jmax);
88 
89 	*ackp++ = 0;
90 	*ackp++ = 0;
91 	*ackp++ = 0;
92 	return top - hard_ack + 3;
93 }
94 
95 /*
96  * Send an ACK call packet.
97  */
98 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
99 {
100 	struct rxrpc_connection *conn = NULL;
101 	struct rxrpc_ack_buffer *pkt;
102 	struct msghdr msg;
103 	struct kvec iov[2];
104 	rxrpc_serial_t serial;
105 	rxrpc_seq_t hard_ack, top;
106 	size_t len, n;
107 	int ret;
108 	u8 reason;
109 
110 	spin_lock_bh(&call->lock);
111 	if (call->conn)
112 		conn = rxrpc_get_connection_maybe(call->conn);
113 	spin_unlock_bh(&call->lock);
114 	if (!conn)
115 		return -ECONNRESET;
116 
117 	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
118 	if (!pkt) {
119 		rxrpc_put_connection(conn);
120 		return -ENOMEM;
121 	}
122 
123 	msg.msg_name	= &call->peer->srx.transport;
124 	msg.msg_namelen	= call->peer->srx.transport_len;
125 	msg.msg_control	= NULL;
126 	msg.msg_controllen = 0;
127 	msg.msg_flags	= 0;
128 
129 	pkt->whdr.epoch		= htonl(conn->proto.epoch);
130 	pkt->whdr.cid		= htonl(call->cid);
131 	pkt->whdr.callNumber	= htonl(call->call_id);
132 	pkt->whdr.seq		= 0;
133 	pkt->whdr.type		= RXRPC_PACKET_TYPE_ACK;
134 	pkt->whdr.flags		= RXRPC_SLOW_START_OK | conn->out_clientflag;
135 	pkt->whdr.userStatus	= 0;
136 	pkt->whdr.securityIndex	= call->security_ix;
137 	pkt->whdr._rsvd		= 0;
138 	pkt->whdr.serviceId	= htons(call->service_id);
139 
140 	spin_lock_bh(&call->lock);
141 	if (ping) {
142 		reason = RXRPC_ACK_PING;
143 	} else {
144 		reason = call->ackr_reason;
145 		if (!call->ackr_reason) {
146 			spin_unlock_bh(&call->lock);
147 			ret = 0;
148 			goto out;
149 		}
150 		call->ackr_reason = 0;
151 	}
152 	n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
153 
154 	spin_unlock_bh(&call->lock);
155 
156 	iov[0].iov_base	= pkt;
157 	iov[0].iov_len	= sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
158 	iov[1].iov_base = &pkt->ackinfo;
159 	iov[1].iov_len	= sizeof(pkt->ackinfo);
160 	len = iov[0].iov_len + iov[1].iov_len;
161 
162 	serial = atomic_inc_return(&conn->serial);
163 	pkt->whdr.serial = htonl(serial);
164 	trace_rxrpc_tx_ack(call, serial,
165 			   ntohl(pkt->ack.firstPacket),
166 			   ntohl(pkt->ack.serial),
167 			   pkt->ack.reason, pkt->ack.nAcks);
168 
169 	if (ping) {
170 		call->ping_serial = serial;
171 		smp_wmb();
172 		/* We need to stick a time in before we send the packet in case
173 		 * the reply gets back before kernel_sendmsg() completes - but
174 		 * asking UDP to send the packet can take a relatively long
175 		 * time, so we update the time after, on the assumption that
176 		 * the packet transmission is more likely to happen towards the
177 		 * end of the kernel_sendmsg() call.
178 		 */
179 		call->ping_time = ktime_get_real();
180 		set_bit(RXRPC_CALL_PINGING, &call->flags);
181 		trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
182 	}
183 
184 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
185 	if (ping)
186 		call->ping_time = ktime_get_real();
187 
188 	if (call->state < RXRPC_CALL_COMPLETE) {
189 		if (ret < 0) {
190 			if (ping)
191 				clear_bit(RXRPC_CALL_PINGING, &call->flags);
192 			rxrpc_propose_ACK(call, pkt->ack.reason,
193 					  ntohs(pkt->ack.maxSkew),
194 					  ntohl(pkt->ack.serial),
195 					  true, true,
196 					  rxrpc_propose_ack_retry_tx);
197 		} else {
198 			spin_lock_bh(&call->lock);
199 			if (after(hard_ack, call->ackr_consumed))
200 				call->ackr_consumed = hard_ack;
201 			if (after(top, call->ackr_seen))
202 				call->ackr_seen = top;
203 			spin_unlock_bh(&call->lock);
204 		}
205 	}
206 
207 out:
208 	rxrpc_put_connection(conn);
209 	kfree(pkt);
210 	return ret;
211 }
212 
213 /*
214  * Send an ABORT call packet.
215  */
216 int rxrpc_send_abort_packet(struct rxrpc_call *call)
217 {
218 	struct rxrpc_connection *conn = NULL;
219 	struct rxrpc_abort_buffer pkt;
220 	struct msghdr msg;
221 	struct kvec iov[1];
222 	rxrpc_serial_t serial;
223 	int ret;
224 
225 	/* Don't bother sending aborts for a client call once the server has
226 	 * hard-ACK'd all of its request data.  After that point, we're not
227 	 * going to stop the operation proceeding, and whilst we might limit
228 	 * the reply, it's not worth it if we can send a new call on the same
229 	 * channel instead, thereby closing off this call.
230 	 */
231 	if (rxrpc_is_client_call(call) &&
232 	    test_bit(RXRPC_CALL_TX_LAST, &call->flags))
233 		return 0;
234 
235 	spin_lock_bh(&call->lock);
236 	if (call->conn)
237 		conn = rxrpc_get_connection_maybe(call->conn);
238 	spin_unlock_bh(&call->lock);
239 	if (!conn)
240 		return -ECONNRESET;
241 
242 	msg.msg_name	= &call->peer->srx.transport;
243 	msg.msg_namelen	= call->peer->srx.transport_len;
244 	msg.msg_control	= NULL;
245 	msg.msg_controllen = 0;
246 	msg.msg_flags	= 0;
247 
248 	pkt.whdr.epoch		= htonl(conn->proto.epoch);
249 	pkt.whdr.cid		= htonl(call->cid);
250 	pkt.whdr.callNumber	= htonl(call->call_id);
251 	pkt.whdr.seq		= 0;
252 	pkt.whdr.type		= RXRPC_PACKET_TYPE_ABORT;
253 	pkt.whdr.flags		= conn->out_clientflag;
254 	pkt.whdr.userStatus	= 0;
255 	pkt.whdr.securityIndex	= call->security_ix;
256 	pkt.whdr._rsvd		= 0;
257 	pkt.whdr.serviceId	= htons(call->service_id);
258 	pkt.abort_code		= htonl(call->abort_code);
259 
260 	iov[0].iov_base	= &pkt;
261 	iov[0].iov_len	= sizeof(pkt);
262 
263 	serial = atomic_inc_return(&conn->serial);
264 	pkt.whdr.serial = htonl(serial);
265 
266 	ret = kernel_sendmsg(conn->params.local->socket,
267 			     &msg, iov, 1, sizeof(pkt));
268 
269 	rxrpc_put_connection(conn);
270 	return ret;
271 }
272 
273 /*
274  * send a packet through the transport endpoint
275  */
276 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
277 			   bool retrans)
278 {
279 	struct rxrpc_connection *conn = call->conn;
280 	struct rxrpc_wire_header whdr;
281 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
282 	struct msghdr msg;
283 	struct kvec iov[2];
284 	rxrpc_serial_t serial;
285 	size_t len;
286 	bool lost = false;
287 	int ret, opt;
288 
289 	_enter(",{%d}", skb->len);
290 
291 	/* Each transmission of a Tx packet needs a new serial number */
292 	serial = atomic_inc_return(&conn->serial);
293 
294 	whdr.epoch	= htonl(conn->proto.epoch);
295 	whdr.cid	= htonl(call->cid);
296 	whdr.callNumber	= htonl(call->call_id);
297 	whdr.seq	= htonl(sp->hdr.seq);
298 	whdr.serial	= htonl(serial);
299 	whdr.type	= RXRPC_PACKET_TYPE_DATA;
300 	whdr.flags	= sp->hdr.flags;
301 	whdr.userStatus	= 0;
302 	whdr.securityIndex = call->security_ix;
303 	whdr._rsvd	= htons(sp->hdr._rsvd);
304 	whdr.serviceId	= htons(call->service_id);
305 
306 	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
307 	    sp->hdr.seq == 1)
308 		whdr.userStatus	= RXRPC_USERSTATUS_SERVICE_UPGRADE;
309 
310 	iov[0].iov_base = &whdr;
311 	iov[0].iov_len = sizeof(whdr);
312 	iov[1].iov_base = skb->head;
313 	iov[1].iov_len = skb->len;
314 	len = iov[0].iov_len + iov[1].iov_len;
315 
316 	msg.msg_name = &call->peer->srx.transport;
317 	msg.msg_namelen = call->peer->srx.transport_len;
318 	msg.msg_control = NULL;
319 	msg.msg_controllen = 0;
320 	msg.msg_flags = 0;
321 
322 	/* If our RTT cache needs working on, request an ACK.  Also request
323 	 * ACKs if a DATA packet appears to have been lost.
324 	 */
325 	if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
326 	    (retrans ||
327 	     call->cong_mode == RXRPC_CALL_SLOW_START ||
328 	     (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
329 	     ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
330 			  ktime_get_real())))
331 		whdr.flags |= RXRPC_REQUEST_ACK;
332 
333 	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
334 		static int lose;
335 		if ((lose++ & 7) == 7) {
336 			ret = 0;
337 			lost = true;
338 			goto done;
339 		}
340 	}
341 
342 	_proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
343 
344 	/* send the packet with the don't fragment bit set if we currently
345 	 * think it's small enough */
346 	if (iov[1].iov_len >= call->peer->maxdata)
347 		goto send_fragmentable;
348 
349 	down_read(&conn->params.local->defrag_sem);
350 	/* send the packet by UDP
351 	 * - returns -EMSGSIZE if UDP would have to fragment the packet
352 	 *   to go out of the interface
353 	 *   - in which case, we'll have processed the ICMP error
354 	 *     message and update the peer record
355 	 */
356 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
357 
358 	up_read(&conn->params.local->defrag_sem);
359 	if (ret == -EMSGSIZE)
360 		goto send_fragmentable;
361 
362 done:
363 	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
364 			    retrans, lost);
365 	if (ret >= 0) {
366 		ktime_t now = ktime_get_real();
367 		skb->tstamp = now;
368 		smp_wmb();
369 		sp->hdr.serial = serial;
370 		if (whdr.flags & RXRPC_REQUEST_ACK) {
371 			call->peer->rtt_last_req = now;
372 			trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
373 		}
374 	}
375 	_leave(" = %d [%u]", ret, call->peer->maxdata);
376 	return ret;
377 
378 send_fragmentable:
379 	/* attempt to send this message with fragmentation enabled */
380 	_debug("send fragment");
381 
382 	down_write(&conn->params.local->defrag_sem);
383 
384 	switch (conn->params.local->srx.transport.family) {
385 	case AF_INET:
386 		opt = IP_PMTUDISC_DONT;
387 		ret = kernel_setsockopt(conn->params.local->socket,
388 					SOL_IP, IP_MTU_DISCOVER,
389 					(char *)&opt, sizeof(opt));
390 		if (ret == 0) {
391 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
392 					     iov, 2, len);
393 
394 			opt = IP_PMTUDISC_DO;
395 			kernel_setsockopt(conn->params.local->socket, SOL_IP,
396 					  IP_MTU_DISCOVER,
397 					  (char *)&opt, sizeof(opt));
398 		}
399 		break;
400 
401 #ifdef CONFIG_AF_RXRPC_IPV6
402 	case AF_INET6:
403 		opt = IPV6_PMTUDISC_DONT;
404 		ret = kernel_setsockopt(conn->params.local->socket,
405 					SOL_IPV6, IPV6_MTU_DISCOVER,
406 					(char *)&opt, sizeof(opt));
407 		if (ret == 0) {
408 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
409 					     iov, 1, iov[0].iov_len);
410 
411 			opt = IPV6_PMTUDISC_DO;
412 			kernel_setsockopt(conn->params.local->socket,
413 					  SOL_IPV6, IPV6_MTU_DISCOVER,
414 					  (char *)&opt, sizeof(opt));
415 		}
416 		break;
417 #endif
418 	}
419 
420 	up_write(&conn->params.local->defrag_sem);
421 	goto done;
422 }
423 
424 /*
425  * reject packets through the local endpoint
426  */
427 void rxrpc_reject_packets(struct rxrpc_local *local)
428 {
429 	struct sockaddr_rxrpc srx;
430 	struct rxrpc_skb_priv *sp;
431 	struct rxrpc_wire_header whdr;
432 	struct sk_buff *skb;
433 	struct msghdr msg;
434 	struct kvec iov[2];
435 	size_t size;
436 	__be32 code;
437 
438 	_enter("%d", local->debug_id);
439 
440 	iov[0].iov_base = &whdr;
441 	iov[0].iov_len = sizeof(whdr);
442 	iov[1].iov_base = &code;
443 	iov[1].iov_len = sizeof(code);
444 	size = sizeof(whdr) + sizeof(code);
445 
446 	msg.msg_name = &srx.transport;
447 	msg.msg_control = NULL;
448 	msg.msg_controllen = 0;
449 	msg.msg_flags = 0;
450 
451 	memset(&whdr, 0, sizeof(whdr));
452 	whdr.type = RXRPC_PACKET_TYPE_ABORT;
453 
454 	while ((skb = skb_dequeue(&local->reject_queue))) {
455 		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
456 		sp = rxrpc_skb(skb);
457 
458 		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
459 			msg.msg_namelen = srx.transport_len;
460 
461 			code = htonl(skb->priority);
462 
463 			whdr.epoch	= htonl(sp->hdr.epoch);
464 			whdr.cid	= htonl(sp->hdr.cid);
465 			whdr.callNumber	= htonl(sp->hdr.callNumber);
466 			whdr.serviceId	= htons(sp->hdr.serviceId);
467 			whdr.flags	= sp->hdr.flags;
468 			whdr.flags	^= RXRPC_CLIENT_INITIATED;
469 			whdr.flags	&= RXRPC_CLIENT_INITIATED;
470 
471 			kernel_sendmsg(local->socket, &msg, iov, 2, size);
472 		}
473 
474 		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
475 	}
476 
477 	_leave("");
478 }
479