xref: /openbmc/linux/drivers/net/ethernet/sfc/tx_tso.c (revision e7bae9bb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2005-2006 Fen Systems Ltd.
5  * Copyright 2005-2015 Solarflare Communications Inc.
6  */
7 
8 #include <linux/pci.h>
9 #include <linux/tcp.h>
10 #include <linux/ip.h>
11 #include <linux/in.h>
12 #include <linux/ipv6.h>
13 #include <linux/slab.h>
14 #include <net/ipv6.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include <linux/moduleparam.h>
18 #include <linux/cache.h>
19 #include "net_driver.h"
20 #include "efx.h"
21 #include "io.h"
22 #include "nic.h"
23 #include "tx.h"
24 #include "workarounds.h"
25 #include "ef10_regs.h"
26 
27 /* Efx legacy TCP segmentation acceleration.
28  *
29  * Utilises firmware support to go faster than GSO (but not as fast as TSOv2).
30  *
31  * Requires TX checksum offload support.
32  */
33 
34 #define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
35 
36 /**
37  * struct tso_state - TSO state for an SKB
38  * @out_len: Remaining length in current segment
39  * @seqnum: Current sequence number
40  * @ipv4_id: Current IPv4 ID, host endian
41  * @packet_space: Remaining space in current packet
42  * @dma_addr: DMA address of current position
43  * @in_len: Remaining length in current SKB fragment
44  * @unmap_len: Length of SKB fragment
45  * @unmap_addr: DMA address of SKB fragment
46  * @protocol: Network protocol (after any VLAN header)
47  * @ip_off: Offset of IP header
48  * @tcp_off: Offset of TCP header
49  * @header_len: Number of bytes of header
50  * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
51  * @header_dma_addr: Header DMA address
52  * @header_unmap_len: Header DMA mapped length
53  *
54  * The state used during segmentation.  It is put into this data structure
55  * just to make it easy to pass into inline functions.
56  */
57 struct tso_state {
58 	/* Output position */
59 	unsigned int out_len;
60 	unsigned int seqnum;
61 	u16 ipv4_id;
62 	unsigned int packet_space;
63 
64 	/* Input position */
65 	dma_addr_t dma_addr;
66 	unsigned int in_len;
67 	unsigned int unmap_len;
68 	dma_addr_t unmap_addr;
69 
70 	__be16 protocol;
71 	unsigned int ip_off;
72 	unsigned int tcp_off;
73 	unsigned int header_len;
74 	unsigned int ip_base_len;
75 	dma_addr_t header_dma_addr;
76 	unsigned int header_unmap_len;
77 };
78 
79 static inline void prefetch_ptr(struct efx_tx_queue *tx_queue)
80 {
81 	unsigned int insert_ptr = efx_tx_queue_get_insert_index(tx_queue);
82 	char *ptr;
83 
84 	ptr = (char *) (tx_queue->buffer + insert_ptr);
85 	prefetch(ptr);
86 	prefetch(ptr + 0x80);
87 
88 	ptr = (char *) (((efx_qword_t *)tx_queue->txd.buf.addr) + insert_ptr);
89 	prefetch(ptr);
90 	prefetch(ptr + 0x80);
91 }
92 
93 /**
94  * efx_tx_queue_insert - push descriptors onto the TX queue
95  * @tx_queue:		Efx TX queue
96  * @dma_addr:		DMA address of fragment
97  * @len:		Length of fragment
98  * @final_buffer:	The final buffer inserted into the queue
99  *
100  * Push descriptors onto the TX queue.
101  */
102 static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
103 				dma_addr_t dma_addr, unsigned int len,
104 				struct efx_tx_buffer **final_buffer)
105 {
106 	struct efx_tx_buffer *buffer;
107 	unsigned int dma_len;
108 
109 	EFX_WARN_ON_ONCE_PARANOID(len <= 0);
110 
111 	while (1) {
112 		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
113 		++tx_queue->insert_count;
114 
115 		EFX_WARN_ON_ONCE_PARANOID(tx_queue->insert_count -
116 					  tx_queue->read_count >=
117 					  tx_queue->efx->txq_entries);
118 
119 		buffer->dma_addr = dma_addr;
120 
121 		dma_len = tx_queue->efx->type->tx_limit_len(tx_queue,
122 				dma_addr, len);
123 
124 		/* If there's space for everything this is our last buffer. */
125 		if (dma_len >= len)
126 			break;
127 
128 		buffer->len = dma_len;
129 		buffer->flags = EFX_TX_BUF_CONT;
130 		dma_addr += dma_len;
131 		len -= dma_len;
132 	}
133 
134 	EFX_WARN_ON_ONCE_PARANOID(!len);
135 	buffer->len = len;
136 	*final_buffer = buffer;
137 }
138 
139 /*
140  * Verify that our various assumptions about sk_buffs and the conditions
141  * under which TSO will be attempted hold true.  Return the protocol number.
142  */
143 static __be16 efx_tso_check_protocol(struct sk_buff *skb)
144 {
145 	__be16 protocol = skb->protocol;
146 
147 	EFX_WARN_ON_ONCE_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
148 				  protocol);
149 	if (protocol == htons(ETH_P_8021Q)) {
150 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
151 
152 		protocol = veh->h_vlan_encapsulated_proto;
153 	}
154 
155 	if (protocol == htons(ETH_P_IP)) {
156 		EFX_WARN_ON_ONCE_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
157 	} else {
158 		EFX_WARN_ON_ONCE_PARANOID(protocol != htons(ETH_P_IPV6));
159 		EFX_WARN_ON_ONCE_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
160 	}
161 	EFX_WARN_ON_ONCE_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) +
162 				   (tcp_hdr(skb)->doff << 2u)) >
163 				  skb_headlen(skb));
164 
165 	return protocol;
166 }
167 
168 /* Parse the SKB header and initialise state. */
169 static int tso_start(struct tso_state *st, struct efx_nic *efx,
170 		     struct efx_tx_queue *tx_queue,
171 		     const struct sk_buff *skb)
172 {
173 	struct device *dma_dev = &efx->pci_dev->dev;
174 	unsigned int header_len, in_len;
175 	dma_addr_t dma_addr;
176 
177 	st->ip_off = skb_network_header(skb) - skb->data;
178 	st->tcp_off = skb_transport_header(skb) - skb->data;
179 	header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
180 	in_len = skb_headlen(skb) - header_len;
181 	st->header_len = header_len;
182 	st->in_len = in_len;
183 	if (st->protocol == htons(ETH_P_IP)) {
184 		st->ip_base_len = st->header_len - st->ip_off;
185 		st->ipv4_id = ntohs(ip_hdr(skb)->id);
186 	} else {
187 		st->ip_base_len = st->header_len - st->tcp_off;
188 		st->ipv4_id = 0;
189 	}
190 	st->seqnum = ntohl(tcp_hdr(skb)->seq);
191 
192 	EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->urg);
193 	EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->syn);
194 	EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->rst);
195 
196 	st->out_len = skb->len - header_len;
197 
198 	dma_addr = dma_map_single(dma_dev, skb->data,
199 				  skb_headlen(skb), DMA_TO_DEVICE);
200 	st->header_dma_addr = dma_addr;
201 	st->header_unmap_len = skb_headlen(skb);
202 	st->dma_addr = dma_addr + header_len;
203 	st->unmap_len = 0;
204 
205 	return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0;
206 }
207 
208 static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
209 			    skb_frag_t *frag)
210 {
211 	st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
212 					  skb_frag_size(frag), DMA_TO_DEVICE);
213 	if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
214 		st->unmap_len = skb_frag_size(frag);
215 		st->in_len = skb_frag_size(frag);
216 		st->dma_addr = st->unmap_addr;
217 		return 0;
218 	}
219 	return -ENOMEM;
220 }
221 
222 
223 /**
224  * tso_fill_packet_with_fragment - form descriptors for the current fragment
225  * @tx_queue:		Efx TX queue
226  * @skb:		Socket buffer
227  * @st:			TSO state
228  *
229  * Form descriptors for the current fragment, until we reach the end
230  * of fragment or end-of-packet.
231  */
232 static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
233 					  const struct sk_buff *skb,
234 					  struct tso_state *st)
235 {
236 	struct efx_tx_buffer *buffer;
237 	int n;
238 
239 	if (st->in_len == 0)
240 		return;
241 	if (st->packet_space == 0)
242 		return;
243 
244 	EFX_WARN_ON_ONCE_PARANOID(st->in_len <= 0);
245 	EFX_WARN_ON_ONCE_PARANOID(st->packet_space <= 0);
246 
247 	n = min(st->in_len, st->packet_space);
248 
249 	st->packet_space -= n;
250 	st->out_len -= n;
251 	st->in_len -= n;
252 
253 	efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
254 
255 	if (st->out_len == 0) {
256 		/* Transfer ownership of the skb */
257 		buffer->skb = skb;
258 		buffer->flags = EFX_TX_BUF_SKB;
259 	} else if (st->packet_space != 0) {
260 		buffer->flags = EFX_TX_BUF_CONT;
261 	}
262 
263 	if (st->in_len == 0) {
264 		/* Transfer ownership of the DMA mapping */
265 		buffer->unmap_len = st->unmap_len;
266 		buffer->dma_offset = buffer->unmap_len - buffer->len;
267 		st->unmap_len = 0;
268 	}
269 
270 	st->dma_addr += n;
271 }
272 
273 
274 #define TCP_FLAGS_OFFSET 13
275 
276 /**
277  * tso_start_new_packet - generate a new header and prepare for the new packet
278  * @tx_queue:		Efx TX queue
279  * @skb:		Socket buffer
280  * @st:			TSO state
281  *
282  * Generate a new header and prepare for the new packet.  Return 0 on
283  * success, or -%ENOMEM if failed to alloc header, or other negative error.
284  */
285 static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
286 				const struct sk_buff *skb,
287 				struct tso_state *st)
288 {
289 	struct efx_tx_buffer *buffer =
290 		efx_tx_queue_get_insert_buffer(tx_queue);
291 	bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
292 	u8 tcp_flags_mask, tcp_flags;
293 
294 	if (!is_last) {
295 		st->packet_space = skb_shinfo(skb)->gso_size;
296 		tcp_flags_mask = 0x09; /* mask out FIN and PSH */
297 	} else {
298 		st->packet_space = st->out_len;
299 		tcp_flags_mask = 0x00;
300 	}
301 
302 	if (WARN_ON(!st->header_unmap_len))
303 		return -EINVAL;
304 	/* Send the original headers with a TSO option descriptor
305 	 * in front
306 	 */
307 	tcp_flags = ((u8 *)tcp_hdr(skb))[TCP_FLAGS_OFFSET] & ~tcp_flags_mask;
308 
309 	buffer->flags = EFX_TX_BUF_OPTION;
310 	buffer->len = 0;
311 	buffer->unmap_len = 0;
312 	EFX_POPULATE_QWORD_5(buffer->option,
313 			     ESF_DZ_TX_DESC_IS_OPT, 1,
314 			     ESF_DZ_TX_OPTION_TYPE,
315 			     ESE_DZ_TX_OPTION_DESC_TSO,
316 			     ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags,
317 			     ESF_DZ_TX_TSO_IP_ID, st->ipv4_id,
318 			     ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum);
319 	++tx_queue->insert_count;
320 
321 	/* We mapped the headers in tso_start().  Unmap them
322 	 * when the last segment is completed.
323 	 */
324 	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
325 	buffer->dma_addr = st->header_dma_addr;
326 	buffer->len = st->header_len;
327 	if (is_last) {
328 		buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE;
329 		buffer->unmap_len = st->header_unmap_len;
330 		buffer->dma_offset = 0;
331 		/* Ensure we only unmap them once in case of a
332 		 * later DMA mapping error and rollback
333 		 */
334 		st->header_unmap_len = 0;
335 	} else {
336 		buffer->flags = EFX_TX_BUF_CONT;
337 		buffer->unmap_len = 0;
338 	}
339 	++tx_queue->insert_count;
340 
341 	st->seqnum += skb_shinfo(skb)->gso_size;
342 
343 	/* Linux leaves suitable gaps in the IP ID space for us to fill. */
344 	++st->ipv4_id;
345 
346 	return 0;
347 }
348 
349 /**
350  * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
351  * @tx_queue:		Efx TX queue
352  * @skb:		Socket buffer
353  * @data_mapped:        Did we map the data? Always set to true
354  *                      by this on success.
355  *
356  * Context: You must hold netif_tx_lock() to call this function.
357  *
358  * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
359  * @skb was not enqueued.  @skb is consumed unless return value is
360  * %EINVAL.
361  */
362 int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
363 			struct sk_buff *skb,
364 			bool *data_mapped)
365 {
366 	struct efx_nic *efx = tx_queue->efx;
367 	int frag_i, rc;
368 	struct tso_state state;
369 
370 	if (tx_queue->tso_version != 1)
371 		return -EINVAL;
372 
373 	prefetch(skb->data);
374 
375 	/* Find the packet protocol and sanity-check it */
376 	state.protocol = efx_tso_check_protocol(skb);
377 
378 	EFX_WARN_ON_ONCE_PARANOID(tx_queue->write_count != tx_queue->insert_count);
379 
380 	rc = tso_start(&state, efx, tx_queue, skb);
381 	if (rc)
382 		goto fail;
383 
384 	if (likely(state.in_len == 0)) {
385 		/* Grab the first payload fragment. */
386 		EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->nr_frags < 1);
387 		frag_i = 0;
388 		rc = tso_get_fragment(&state, efx,
389 				      skb_shinfo(skb)->frags + frag_i);
390 		if (rc)
391 			goto fail;
392 	} else {
393 		/* Payload starts in the header area. */
394 		frag_i = -1;
395 	}
396 
397 	rc = tso_start_new_packet(tx_queue, skb, &state);
398 	if (rc)
399 		goto fail;
400 
401 	prefetch_ptr(tx_queue);
402 
403 	while (1) {
404 		tso_fill_packet_with_fragment(tx_queue, skb, &state);
405 
406 		/* Move onto the next fragment? */
407 		if (state.in_len == 0) {
408 			if (++frag_i >= skb_shinfo(skb)->nr_frags)
409 				/* End of payload reached. */
410 				break;
411 			rc = tso_get_fragment(&state, efx,
412 					      skb_shinfo(skb)->frags + frag_i);
413 			if (rc)
414 				goto fail;
415 		}
416 
417 		/* Start at new packet? */
418 		if (state.packet_space == 0) {
419 			rc = tso_start_new_packet(tx_queue, skb, &state);
420 			if (rc)
421 				goto fail;
422 		}
423 	}
424 
425 	*data_mapped = true;
426 
427 	return 0;
428 
429 fail:
430 	if (rc == -ENOMEM)
431 		netif_err(efx, tx_err, efx->net_dev,
432 			  "Out of memory for TSO headers, or DMA mapping error\n");
433 	else
434 		netif_err(efx, tx_err, efx->net_dev, "TSO failed, rc = %d\n", rc);
435 
436 	/* Free the DMA mapping we were in the process of writing out */
437 	if (state.unmap_len) {
438 		dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr,
439 			       state.unmap_len, DMA_TO_DEVICE);
440 	}
441 
442 	/* Free the header DMA mapping */
443 	if (state.header_unmap_len)
444 		dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr,
445 				 state.header_unmap_len, DMA_TO_DEVICE);
446 
447 	return rc;
448 }
449