1 /******************************************************************************
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2017 Intel Deutschland GmbH
9  * Copyright(c) 2018 - 2020 Intel Corporation
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of version 2 of the GNU General Public License as
13  * published by the Free Software Foundation.
14  *
15  * This program is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * BSD LICENSE
21  *
22  * Copyright(c) 2017 Intel Deutschland GmbH
23  * Copyright(c) 2018 - 2020 Intel Corporation
24  * All rights reserved.
25  *
26  * Redistribution and use in source and binary forms, with or without
27  * modification, are permitted provided that the following conditions
28  * are met:
29  *
30  *  * Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  *  * Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in
34  *    the documentation and/or other materials provided with the
35  *    distribution.
36  *  * Neither the name Intel Corporation nor the names of its
37  *    contributors may be used to endorse or promote products derived
38  *    from this software without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51  *
52  *****************************************************************************/
53 #include <net/tso.h>
54 #include <linux/tcp.h>
55 
56 #include "iwl-debug.h"
57 #include "iwl-csr.h"
58 #include "iwl-io.h"
59 #include "internal.h"
60 #include "fw/api/tx.h"
61 
62  /*
63  * iwl_pcie_gen2_tx_stop - Stop all Tx DMA channels
64  */
65 void iwl_pcie_gen2_tx_stop(struct iwl_trans *trans)
66 {
67 	int txq_id;
68 
69 	/*
70 	 * This function can be called before the op_mode disabled the
71 	 * queues. This happens when we have an rfkill interrupt.
72 	 * Since we stop Tx altogether - mark the queues as stopped.
73 	 */
74 	memset(trans->txqs.queue_stopped, 0,
75 	       sizeof(trans->txqs.queue_stopped));
76 	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
77 
78 	/* Unmap DMA from host system and free skb's */
79 	for (txq_id = 0; txq_id < ARRAY_SIZE(trans->txqs.txq); txq_id++) {
80 		if (!trans->txqs.txq[txq_id])
81 			continue;
82 		iwl_pcie_gen2_txq_unmap(trans, txq_id);
83 	}
84 }
85 
86 /*
87  * iwl_pcie_txq_update_byte_tbl - Set up entry in Tx byte-count array
88  */
89 static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans_pcie *trans_pcie,
90 					  struct iwl_txq *txq, u16 byte_cnt,
91 					  int num_tbs)
92 {
93 	struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
94 	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
95 	u8 filled_tfd_size, num_fetch_chunks;
96 	u16 len = byte_cnt;
97 	__le16 bc_ent;
98 
99 	if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window))
100 		return;
101 
102 	filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
103 			  num_tbs * sizeof(struct iwl_tfh_tb);
104 	/*
105 	 * filled_tfd_size contains the number of filled bytes in the TFD.
106 	 * Dividing it by 64 will give the number of chunks to fetch
107 	 * to SRAM- 0 for one chunk, 1 for 2 and so on.
108 	 * If, for example, TFD contains only 3 TBs then 32 bytes
109 	 * of the TFD are used, and only one chunk of 64 bytes should
110 	 * be fetched
111 	 */
112 	num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
113 
114 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
115 		struct iwl_gen3_bc_tbl *scd_bc_tbl_gen3 = txq->bc_tbl.addr;
116 
117 		/* Starting from AX210, the HW expects bytes */
118 		WARN_ON(trans_pcie->bc_table_dword);
119 		WARN_ON(len > 0x3FFF);
120 		bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14));
121 		scd_bc_tbl_gen3->tfd_offset[idx] = bc_ent;
122 	} else {
123 		struct iwlagn_scd_bc_tbl *scd_bc_tbl = txq->bc_tbl.addr;
124 
125 		/* Before AX210, the HW expects DW */
126 		WARN_ON(!trans_pcie->bc_table_dword);
127 		len = DIV_ROUND_UP(len, 4);
128 		WARN_ON(len > 0xFFF);
129 		bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
130 		scd_bc_tbl->tfd_offset[idx] = bc_ent;
131 	}
132 }
133 
134 /*
135  * iwl_pcie_gen2_txq_inc_wr_ptr - Send new write index to hardware
136  */
137 void iwl_pcie_gen2_txq_inc_wr_ptr(struct iwl_trans *trans,
138 				  struct iwl_txq *txq)
139 {
140 	lockdep_assert_held(&txq->lock);
141 
142 	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq->id, txq->write_ptr);
143 
144 	/*
145 	 * if not in power-save mode, uCode will never sleep when we're
146 	 * trying to tx (during RFKILL, we're not trying to tx).
147 	 */
148 	iwl_write32(trans, HBUS_TARG_WRPTR, txq->write_ptr | (txq->id << 16));
149 }
150 
151 static u8 iwl_pcie_gen2_get_num_tbs(struct iwl_trans *trans,
152 				    struct iwl_tfh_tfd *tfd)
153 {
154 	return le16_to_cpu(tfd->num_tbs) & 0x1f;
155 }
156 
157 static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
158 				    struct iwl_cmd_meta *meta,
159 				    struct iwl_tfh_tfd *tfd)
160 {
161 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
162 	int i, num_tbs;
163 
164 	/* Sanity check on number of chunks */
165 	num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd);
166 
167 	if (num_tbs > trans_pcie->max_tbs) {
168 		IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
169 		return;
170 	}
171 
172 	/* first TB is never freed - it's the bidirectional DMA data */
173 	for (i = 1; i < num_tbs; i++) {
174 		if (meta->tbs & BIT(i))
175 			dma_unmap_page(trans->dev,
176 				       le64_to_cpu(tfd->tbs[i].addr),
177 				       le16_to_cpu(tfd->tbs[i].tb_len),
178 				       DMA_TO_DEVICE);
179 		else
180 			dma_unmap_single(trans->dev,
181 					 le64_to_cpu(tfd->tbs[i].addr),
182 					 le16_to_cpu(tfd->tbs[i].tb_len),
183 					 DMA_TO_DEVICE);
184 	}
185 
186 	tfd->num_tbs = 0;
187 }
188 
189 static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
190 {
191 	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
192 	 * idx is bounded by n_window
193 	 */
194 	int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
195 
196 	lockdep_assert_held(&txq->lock);
197 
198 	iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
199 				iwl_pcie_get_tfd(trans, txq, idx));
200 
201 	/* free SKB */
202 	if (txq->entries) {
203 		struct sk_buff *skb;
204 
205 		skb = txq->entries[idx].skb;
206 
207 		/* Can be called from irqs-disabled context
208 		 * If skb is not NULL, it means that the whole queue is being
209 		 * freed and that the queue is not empty - free the skb
210 		 */
211 		if (skb) {
212 			iwl_op_mode_free_skb(trans->op_mode, skb);
213 			txq->entries[idx].skb = NULL;
214 		}
215 	}
216 }
217 
218 static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans,
219 				struct iwl_tfh_tfd *tfd, dma_addr_t addr,
220 				u16 len)
221 {
222 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
223 	int idx = iwl_pcie_gen2_get_num_tbs(trans, tfd);
224 	struct iwl_tfh_tb *tb;
225 
226 	/*
227 	 * Only WARN here so we know about the issue, but we mess up our
228 	 * unmap path because not every place currently checks for errors
229 	 * returned from this function - it can only return an error if
230 	 * there's no more space, and so when we know there is enough we
231 	 * don't always check ...
232 	 */
233 	WARN(iwl_pcie_crosses_4g_boundary(addr, len),
234 	     "possible DMA problem with iova:0x%llx, len:%d\n",
235 	     (unsigned long long)addr, len);
236 
237 	if (WARN_ON(idx >= IWL_TFH_NUM_TBS))
238 		return -EINVAL;
239 	tb = &tfd->tbs[idx];
240 
241 	/* Each TFD can point to a maximum max_tbs Tx buffers */
242 	if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->max_tbs) {
243 		IWL_ERR(trans, "Error can not send more than %d chunks\n",
244 			trans_pcie->max_tbs);
245 		return -EINVAL;
246 	}
247 
248 	put_unaligned_le64(addr, &tb->addr);
249 	tb->tb_len = cpu_to_le16(len);
250 
251 	tfd->num_tbs = cpu_to_le16(idx + 1);
252 
253 	return idx;
254 }
255 
256 static struct page *get_workaround_page(struct iwl_trans *trans,
257 					struct sk_buff *skb)
258 {
259 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
260 	struct page **page_ptr;
261 	struct page *ret;
262 
263 	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
264 
265 	ret = alloc_page(GFP_ATOMIC);
266 	if (!ret)
267 		return NULL;
268 
269 	/* set the chaining pointer to the previous page if there */
270 	*(void **)(page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr;
271 	*page_ptr = ret;
272 
273 	return ret;
274 }
275 
276 /*
277  * Add a TB and if needed apply the FH HW bug workaround;
278  * meta != NULL indicates that it's a page mapping and we
279  * need to dma_unmap_page() and set the meta->tbs bit in
280  * this case.
281  */
282 static int iwl_pcie_gen2_set_tb_with_wa(struct iwl_trans *trans,
283 					struct sk_buff *skb,
284 					struct iwl_tfh_tfd *tfd,
285 					dma_addr_t phys, void *virt,
286 					u16 len, struct iwl_cmd_meta *meta)
287 {
288 	dma_addr_t oldphys = phys;
289 	struct page *page;
290 	int ret;
291 
292 	if (unlikely(dma_mapping_error(trans->dev, phys)))
293 		return -ENOMEM;
294 
295 	if (likely(!iwl_pcie_crosses_4g_boundary(phys, len))) {
296 		ret = iwl_pcie_gen2_set_tb(trans, tfd, phys, len);
297 
298 		if (ret < 0)
299 			goto unmap;
300 
301 		if (meta)
302 			meta->tbs |= BIT(ret);
303 
304 		ret = 0;
305 		goto trace;
306 	}
307 
308 	/*
309 	 * Work around a hardware bug. If (as expressed in the
310 	 * condition above) the TB ends on a 32-bit boundary,
311 	 * then the next TB may be accessed with the wrong
312 	 * address.
313 	 * To work around it, copy the data elsewhere and make
314 	 * a new mapping for it so the device will not fail.
315 	 */
316 
317 	if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) {
318 		ret = -ENOBUFS;
319 		goto unmap;
320 	}
321 
322 	page = get_workaround_page(trans, skb);
323 	if (!page) {
324 		ret = -ENOMEM;
325 		goto unmap;
326 	}
327 
328 	memcpy(page_address(page), virt, len);
329 
330 	phys = dma_map_single(trans->dev, page_address(page), len,
331 			      DMA_TO_DEVICE);
332 	if (unlikely(dma_mapping_error(trans->dev, phys)))
333 		return -ENOMEM;
334 	ret = iwl_pcie_gen2_set_tb(trans, tfd, phys, len);
335 	if (ret < 0) {
336 		/* unmap the new allocation as single */
337 		oldphys = phys;
338 		meta = NULL;
339 		goto unmap;
340 	}
341 	IWL_WARN(trans,
342 		 "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n",
343 		 len, (unsigned long long)oldphys, (unsigned long long)phys);
344 
345 	ret = 0;
346 unmap:
347 	if (meta)
348 		dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE);
349 	else
350 		dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE);
351 trace:
352 	trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len);
353 
354 	return ret;
355 }
356 
357 static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
358 				     struct sk_buff *skb,
359 				     struct iwl_tfh_tfd *tfd, int start_len,
360 				     u8 hdr_len,
361 				     struct iwl_device_tx_cmd *dev_cmd)
362 {
363 #ifdef CONFIG_INET
364 	struct iwl_tx_cmd_gen2 *tx_cmd = (void *)dev_cmd->payload;
365 	struct ieee80211_hdr *hdr = (void *)skb->data;
366 	unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
367 	unsigned int mss = skb_shinfo(skb)->gso_size;
368 	u16 length, amsdu_pad;
369 	u8 *start_hdr;
370 	struct iwl_tso_hdr_page *hdr_page;
371 	struct tso_t tso;
372 
373 	trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
374 			     &dev_cmd->hdr, start_len, 0);
375 
376 	ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
377 	snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
378 	total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len;
379 	amsdu_pad = 0;
380 
381 	/* total amount of header we may need for this A-MSDU */
382 	hdr_room = DIV_ROUND_UP(total_len, mss) *
383 		(3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr));
384 
385 	/* Our device supports 9 segments at most, it will fit in 1 page */
386 	hdr_page = get_page_hdr(trans, hdr_room, skb);
387 	if (!hdr_page)
388 		return -ENOMEM;
389 
390 	start_hdr = hdr_page->pos;
391 
392 	/*
393 	 * Pull the ieee80211 header to be able to use TSO core,
394 	 * we will restore it for the tx_status flow.
395 	 */
396 	skb_pull(skb, hdr_len);
397 
398 	/*
399 	 * Remove the length of all the headers that we don't actually
400 	 * have in the MPDU by themselves, but that we duplicate into
401 	 * all the different MSDUs inside the A-MSDU.
402 	 */
403 	le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen);
404 
405 	tso_start(skb, &tso);
406 
407 	while (total_len) {
408 		/* this is the data left for this subframe */
409 		unsigned int data_left = min_t(unsigned int, mss, total_len);
410 		struct sk_buff *csum_skb = NULL;
411 		unsigned int tb_len;
412 		dma_addr_t tb_phys;
413 		u8 *subf_hdrs_start = hdr_page->pos;
414 
415 		total_len -= data_left;
416 
417 		memset(hdr_page->pos, 0, amsdu_pad);
418 		hdr_page->pos += amsdu_pad;
419 		amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
420 				  data_left)) & 0x3;
421 		ether_addr_copy(hdr_page->pos, ieee80211_get_DA(hdr));
422 		hdr_page->pos += ETH_ALEN;
423 		ether_addr_copy(hdr_page->pos, ieee80211_get_SA(hdr));
424 		hdr_page->pos += ETH_ALEN;
425 
426 		length = snap_ip_tcp_hdrlen + data_left;
427 		*((__be16 *)hdr_page->pos) = cpu_to_be16(length);
428 		hdr_page->pos += sizeof(length);
429 
430 		/*
431 		 * This will copy the SNAP as well which will be considered
432 		 * as MAC header.
433 		 */
434 		tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len);
435 
436 		hdr_page->pos += snap_ip_tcp_hdrlen;
437 
438 		tb_len = hdr_page->pos - start_hdr;
439 		tb_phys = dma_map_single(trans->dev, start_hdr,
440 					 tb_len, DMA_TO_DEVICE);
441 		if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
442 			dev_kfree_skb(csum_skb);
443 			goto out_err;
444 		}
445 		/*
446 		 * No need for _with_wa, this is from the TSO page and
447 		 * we leave some space at the end of it so can't hit
448 		 * the buggy scenario.
449 		 */
450 		iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, tb_len);
451 		trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
452 					tb_phys, tb_len);
453 		/* add this subframe's headers' length to the tx_cmd */
454 		le16_add_cpu(&tx_cmd->len, hdr_page->pos - subf_hdrs_start);
455 
456 		/* prepare the start_hdr for the next subframe */
457 		start_hdr = hdr_page->pos;
458 
459 		/* put the payload */
460 		while (data_left) {
461 			int ret;
462 
463 			tb_len = min_t(unsigned int, tso.size, data_left);
464 			tb_phys = dma_map_single(trans->dev, tso.data,
465 						 tb_len, DMA_TO_DEVICE);
466 			ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd,
467 							   tb_phys, tso.data,
468 							   tb_len, NULL);
469 			if (ret) {
470 				dev_kfree_skb(csum_skb);
471 				goto out_err;
472 			}
473 
474 			data_left -= tb_len;
475 			tso_build_data(skb, &tso, tb_len);
476 		}
477 	}
478 
479 	/* re -add the WiFi header */
480 	skb_push(skb, hdr_len);
481 
482 	return 0;
483 
484 out_err:
485 #endif
486 	return -EINVAL;
487 }
488 
489 static struct
490 iwl_tfh_tfd *iwl_pcie_gen2_build_tx_amsdu(struct iwl_trans *trans,
491 					  struct iwl_txq *txq,
492 					  struct iwl_device_tx_cmd *dev_cmd,
493 					  struct sk_buff *skb,
494 					  struct iwl_cmd_meta *out_meta,
495 					  int hdr_len,
496 					  int tx_cmd_len)
497 {
498 	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
499 	struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
500 	dma_addr_t tb_phys;
501 	int len;
502 	void *tb1_addr;
503 
504 	tb_phys = iwl_pcie_get_first_tb_dma(txq, idx);
505 
506 	/*
507 	 * No need for _with_wa, the first TB allocation is aligned up
508 	 * to a 64-byte boundary and thus can't be at the end or cross
509 	 * a page boundary (much less a 2^32 boundary).
510 	 */
511 	iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE);
512 
513 	/*
514 	 * The second TB (tb1) points to the remainder of the TX command
515 	 * and the 802.11 header - dword aligned size
516 	 * (This calculation modifies the TX command, so do it before the
517 	 * setup of the first TB)
518 	 */
519 	len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len -
520 	      IWL_FIRST_TB_SIZE;
521 
522 	/* do not align A-MSDU to dword as the subframe header aligns it */
523 
524 	/* map the data for TB1 */
525 	tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
526 	tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE);
527 	if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
528 		goto out_err;
529 	/*
530 	 * No need for _with_wa(), we ensure (via alignment) that the data
531 	 * here can never cross or end at a page boundary.
532 	 */
533 	iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, len);
534 
535 	if (iwl_pcie_gen2_build_amsdu(trans, skb, tfd,
536 				      len + IWL_FIRST_TB_SIZE,
537 				      hdr_len, dev_cmd))
538 		goto out_err;
539 
540 	/* building the A-MSDU might have changed this data, memcpy it now */
541 	memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE);
542 	return tfd;
543 
544 out_err:
545 	iwl_pcie_gen2_tfd_unmap(trans, out_meta, tfd);
546 	return NULL;
547 }
548 
549 static int iwl_pcie_gen2_tx_add_frags(struct iwl_trans *trans,
550 				      struct sk_buff *skb,
551 				      struct iwl_tfh_tfd *tfd,
552 				      struct iwl_cmd_meta *out_meta)
553 {
554 	int i;
555 
556 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
557 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
558 		dma_addr_t tb_phys;
559 		unsigned int fragsz = skb_frag_size(frag);
560 		int ret;
561 
562 		if (!fragsz)
563 			continue;
564 
565 		tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
566 					   fragsz, DMA_TO_DEVICE);
567 		ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
568 						   skb_frag_address(frag),
569 						   fragsz, out_meta);
570 		if (ret)
571 			return ret;
572 	}
573 
574 	return 0;
575 }
576 
577 static struct
578 iwl_tfh_tfd *iwl_pcie_gen2_build_tx(struct iwl_trans *trans,
579 				    struct iwl_txq *txq,
580 				    struct iwl_device_tx_cmd *dev_cmd,
581 				    struct sk_buff *skb,
582 				    struct iwl_cmd_meta *out_meta,
583 				    int hdr_len,
584 				    int tx_cmd_len,
585 				    bool pad)
586 {
587 	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
588 	struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
589 	dma_addr_t tb_phys;
590 	int len, tb1_len, tb2_len;
591 	void *tb1_addr;
592 	struct sk_buff *frag;
593 
594 	tb_phys = iwl_pcie_get_first_tb_dma(txq, idx);
595 
596 	/* The first TB points to bi-directional DMA data */
597 	memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE);
598 
599 	/*
600 	 * No need for _with_wa, the first TB allocation is aligned up
601 	 * to a 64-byte boundary and thus can't be at the end or cross
602 	 * a page boundary (much less a 2^32 boundary).
603 	 */
604 	iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE);
605 
606 	/*
607 	 * The second TB (tb1) points to the remainder of the TX command
608 	 * and the 802.11 header - dword aligned size
609 	 * (This calculation modifies the TX command, so do it before the
610 	 * setup of the first TB)
611 	 */
612 	len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len -
613 	      IWL_FIRST_TB_SIZE;
614 
615 	if (pad)
616 		tb1_len = ALIGN(len, 4);
617 	else
618 		tb1_len = len;
619 
620 	/* map the data for TB1 */
621 	tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
622 	tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
623 	if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
624 		goto out_err;
625 	/*
626 	 * No need for _with_wa(), we ensure (via alignment) that the data
627 	 * here can never cross or end at a page boundary.
628 	 */
629 	iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, tb1_len);
630 	trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr,
631 			     IWL_FIRST_TB_SIZE + tb1_len, hdr_len);
632 
633 	/* set up TFD's third entry to point to remainder of skb's head */
634 	tb2_len = skb_headlen(skb) - hdr_len;
635 
636 	if (tb2_len > 0) {
637 		int ret;
638 
639 		tb_phys = dma_map_single(trans->dev, skb->data + hdr_len,
640 					 tb2_len, DMA_TO_DEVICE);
641 		ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
642 						   skb->data + hdr_len, tb2_len,
643 						   NULL);
644 		if (ret)
645 			goto out_err;
646 	}
647 
648 	if (iwl_pcie_gen2_tx_add_frags(trans, skb, tfd, out_meta))
649 		goto out_err;
650 
651 	skb_walk_frags(skb, frag) {
652 		int ret;
653 
654 		tb_phys = dma_map_single(trans->dev, frag->data,
655 					 skb_headlen(frag), DMA_TO_DEVICE);
656 		ret = iwl_pcie_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
657 						   frag->data,
658 						   skb_headlen(frag), NULL);
659 		if (ret)
660 			goto out_err;
661 		if (iwl_pcie_gen2_tx_add_frags(trans, frag, tfd, out_meta))
662 			goto out_err;
663 	}
664 
665 	return tfd;
666 
667 out_err:
668 	iwl_pcie_gen2_tfd_unmap(trans, out_meta, tfd);
669 	return NULL;
670 }
671 
672 static
673 struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
674 					    struct iwl_txq *txq,
675 					    struct iwl_device_tx_cmd *dev_cmd,
676 					    struct sk_buff *skb,
677 					    struct iwl_cmd_meta *out_meta)
678 {
679 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
680 	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
681 	struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
682 	int len, hdr_len;
683 	bool amsdu;
684 
685 	/* There must be data left over for TB1 or this code must be changed */
686 	BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen2) < IWL_FIRST_TB_SIZE);
687 
688 	memset(tfd, 0, sizeof(*tfd));
689 
690 	if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
691 		len = sizeof(struct iwl_tx_cmd_gen2);
692 	else
693 		len = sizeof(struct iwl_tx_cmd_gen3);
694 
695 	amsdu = ieee80211_is_data_qos(hdr->frame_control) &&
696 			(*ieee80211_get_qos_ctl(hdr) &
697 			 IEEE80211_QOS_CTL_A_MSDU_PRESENT);
698 
699 	hdr_len = ieee80211_hdrlen(hdr->frame_control);
700 
701 	/*
702 	 * Only build A-MSDUs here if doing so by GSO, otherwise it may be
703 	 * an A-MSDU for other reasons, e.g. NAN or an A-MSDU having been
704 	 * built in the higher layers already.
705 	 */
706 	if (amsdu && skb_shinfo(skb)->gso_size)
707 		return iwl_pcie_gen2_build_tx_amsdu(trans, txq, dev_cmd, skb,
708 						    out_meta, hdr_len, len);
709 
710 	return iwl_pcie_gen2_build_tx(trans, txq, dev_cmd, skb, out_meta,
711 				      hdr_len, len, !amsdu);
712 }
713 
714 int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
715 			   struct iwl_device_tx_cmd *dev_cmd, int txq_id)
716 {
717 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
718 	struct iwl_cmd_meta *out_meta;
719 	struct iwl_txq *txq = trans->txqs.txq[txq_id];
720 	u16 cmd_len;
721 	int idx;
722 	void *tfd;
723 
724 	if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES,
725 		      "queue %d out of range", txq_id))
726 		return -EINVAL;
727 
728 	if (WARN_ONCE(!test_bit(txq_id, trans->txqs.queue_used),
729 		      "TX on unused queue %d\n", txq_id))
730 		return -EINVAL;
731 
732 	if (skb_is_nonlinear(skb) &&
733 	    skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
734 	    __skb_linearize(skb))
735 		return -ENOMEM;
736 
737 	spin_lock(&txq->lock);
738 
739 	if (iwl_queue_space(trans, txq) < txq->high_mark) {
740 		iwl_stop_queue(trans, txq);
741 
742 		/* don't put the packet on the ring, if there is no room */
743 		if (unlikely(iwl_queue_space(trans, txq) < 3)) {
744 			struct iwl_device_tx_cmd **dev_cmd_ptr;
745 
746 			dev_cmd_ptr = (void *)((u8 *)skb->cb +
747 					       trans_pcie->dev_cmd_offs);
748 
749 			*dev_cmd_ptr = dev_cmd;
750 			__skb_queue_tail(&txq->overflow_q, skb);
751 			spin_unlock(&txq->lock);
752 			return 0;
753 		}
754 	}
755 
756 	idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
757 
758 	/* Set up driver data for this TFD */
759 	txq->entries[idx].skb = skb;
760 	txq->entries[idx].cmd = dev_cmd;
761 
762 	dev_cmd->hdr.sequence =
763 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
764 			    INDEX_TO_SEQ(idx)));
765 
766 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
767 	out_meta = &txq->entries[idx].meta;
768 	out_meta->flags = 0;
769 
770 	tfd = iwl_pcie_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta);
771 	if (!tfd) {
772 		spin_unlock(&txq->lock);
773 		return -1;
774 	}
775 
776 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
777 		struct iwl_tx_cmd_gen3 *tx_cmd_gen3 =
778 			(void *)dev_cmd->payload;
779 
780 		cmd_len = le16_to_cpu(tx_cmd_gen3->len);
781 	} else {
782 		struct iwl_tx_cmd_gen2 *tx_cmd_gen2 =
783 			(void *)dev_cmd->payload;
784 
785 		cmd_len = le16_to_cpu(tx_cmd_gen2->len);
786 	}
787 
788 	/* Set up entry for this TFD in Tx byte-count array */
789 	iwl_pcie_gen2_update_byte_tbl(trans_pcie, txq, cmd_len,
790 				      iwl_pcie_gen2_get_num_tbs(trans, tfd));
791 
792 	/* start timer if queue currently empty */
793 	if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
794 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
795 
796 	/* Tell device the write index *just past* this latest filled TFD */
797 	txq->write_ptr = iwl_queue_inc_wrap(trans, txq->write_ptr);
798 	iwl_pcie_gen2_txq_inc_wr_ptr(trans, txq);
799 	/*
800 	 * At this point the frame is "transmitted" successfully
801 	 * and we will get a TX status notification eventually.
802 	 */
803 	spin_unlock(&txq->lock);
804 	return 0;
805 }
806 
807 /*************** HOST COMMAND QUEUE FUNCTIONS   *****/
808 
809 /*
810  * iwl_pcie_gen2_enqueue_hcmd - enqueue a uCode command
811  * @priv: device private data point
812  * @cmd: a pointer to the ucode command structure
813  *
814  * The function returns < 0 values to indicate the operation
815  * failed. On success, it returns the index (>= 0) of command in the
816  * command queue.
817  */
818 static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
819 				      struct iwl_host_cmd *cmd)
820 {
821 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
822 	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
823 	struct iwl_device_cmd *out_cmd;
824 	struct iwl_cmd_meta *out_meta;
825 	unsigned long flags;
826 	void *dup_buf = NULL;
827 	dma_addr_t phys_addr;
828 	int i, cmd_pos, idx;
829 	u16 copy_size, cmd_size, tb0_size;
830 	bool had_nocopy = false;
831 	u8 group_id = iwl_cmd_groupid(cmd->id);
832 	const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
833 	u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
834 	struct iwl_tfh_tfd *tfd;
835 
836 	copy_size = sizeof(struct iwl_cmd_header_wide);
837 	cmd_size = sizeof(struct iwl_cmd_header_wide);
838 
839 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
840 		cmddata[i] = cmd->data[i];
841 		cmdlen[i] = cmd->len[i];
842 
843 		if (!cmd->len[i])
844 			continue;
845 
846 		/* need at least IWL_FIRST_TB_SIZE copied */
847 		if (copy_size < IWL_FIRST_TB_SIZE) {
848 			int copy = IWL_FIRST_TB_SIZE - copy_size;
849 
850 			if (copy > cmdlen[i])
851 				copy = cmdlen[i];
852 			cmdlen[i] -= copy;
853 			cmddata[i] += copy;
854 			copy_size += copy;
855 		}
856 
857 		if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) {
858 			had_nocopy = true;
859 			if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) {
860 				idx = -EINVAL;
861 				goto free_dup_buf;
862 			}
863 		} else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) {
864 			/*
865 			 * This is also a chunk that isn't copied
866 			 * to the static buffer so set had_nocopy.
867 			 */
868 			had_nocopy = true;
869 
870 			/* only allowed once */
871 			if (WARN_ON(dup_buf)) {
872 				idx = -EINVAL;
873 				goto free_dup_buf;
874 			}
875 
876 			dup_buf = kmemdup(cmddata[i], cmdlen[i],
877 					  GFP_ATOMIC);
878 			if (!dup_buf)
879 				return -ENOMEM;
880 		} else {
881 			/* NOCOPY must not be followed by normal! */
882 			if (WARN_ON(had_nocopy)) {
883 				idx = -EINVAL;
884 				goto free_dup_buf;
885 			}
886 			copy_size += cmdlen[i];
887 		}
888 		cmd_size += cmd->len[i];
889 	}
890 
891 	/*
892 	 * If any of the command structures end up being larger than the
893 	 * TFD_MAX_PAYLOAD_SIZE and they aren't dynamically allocated into
894 	 * separate TFDs, then we will need to increase the size of the buffers
895 	 */
896 	if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE,
897 		 "Command %s (%#x) is too large (%d bytes)\n",
898 		 iwl_get_cmd_string(trans, cmd->id), cmd->id, copy_size)) {
899 		idx = -EINVAL;
900 		goto free_dup_buf;
901 	}
902 
903 	spin_lock_bh(&txq->lock);
904 
905 	idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
906 	tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
907 	memset(tfd, 0, sizeof(*tfd));
908 
909 	if (iwl_queue_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
910 		spin_unlock_bh(&txq->lock);
911 
912 		IWL_ERR(trans, "No space in command queue\n");
913 		iwl_op_mode_cmd_queue_full(trans->op_mode);
914 		idx = -ENOSPC;
915 		goto free_dup_buf;
916 	}
917 
918 	out_cmd = txq->entries[idx].cmd;
919 	out_meta = &txq->entries[idx].meta;
920 
921 	/* re-initialize to NULL */
922 	memset(out_meta, 0, sizeof(*out_meta));
923 	if (cmd->flags & CMD_WANT_SKB)
924 		out_meta->source = cmd;
925 
926 	/* set up the header */
927 	out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id);
928 	out_cmd->hdr_wide.group_id = group_id;
929 	out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id);
930 	out_cmd->hdr_wide.length =
931 		cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide));
932 	out_cmd->hdr_wide.reserved = 0;
933 	out_cmd->hdr_wide.sequence =
934 		cpu_to_le16(QUEUE_TO_SEQ(trans->txqs.cmd.q_id) |
935 					 INDEX_TO_SEQ(txq->write_ptr));
936 
937 	cmd_pos = sizeof(struct iwl_cmd_header_wide);
938 	copy_size = sizeof(struct iwl_cmd_header_wide);
939 
940 	/* and copy the data that needs to be copied */
941 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
942 		int copy;
943 
944 		if (!cmd->len[i])
945 			continue;
946 
947 		/* copy everything if not nocopy/dup */
948 		if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
949 					   IWL_HCMD_DFL_DUP))) {
950 			copy = cmd->len[i];
951 
952 			memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
953 			cmd_pos += copy;
954 			copy_size += copy;
955 			continue;
956 		}
957 
958 		/*
959 		 * Otherwise we need at least IWL_FIRST_TB_SIZE copied
960 		 * in total (for bi-directional DMA), but copy up to what
961 		 * we can fit into the payload for debug dump purposes.
962 		 */
963 		copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
964 
965 		memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
966 		cmd_pos += copy;
967 
968 		/* However, treat copy_size the proper way, we need it below */
969 		if (copy_size < IWL_FIRST_TB_SIZE) {
970 			copy = IWL_FIRST_TB_SIZE - copy_size;
971 
972 			if (copy > cmd->len[i])
973 				copy = cmd->len[i];
974 			copy_size += copy;
975 		}
976 	}
977 
978 	IWL_DEBUG_HC(trans,
979 		     "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
980 		     iwl_get_cmd_string(trans, cmd->id), group_id,
981 		     out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence),
982 		     cmd_size, txq->write_ptr, idx, trans->txqs.cmd.q_id);
983 
984 	/* start the TFD with the minimum copy bytes */
985 	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
986 	memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size);
987 	iwl_pcie_gen2_set_tb(trans, tfd, iwl_pcie_get_first_tb_dma(txq, idx),
988 			     tb0_size);
989 
990 	/* map first command fragment, if any remains */
991 	if (copy_size > tb0_size) {
992 		phys_addr = dma_map_single(trans->dev,
993 					   (u8 *)out_cmd + tb0_size,
994 					   copy_size - tb0_size,
995 					   DMA_TO_DEVICE);
996 		if (dma_mapping_error(trans->dev, phys_addr)) {
997 			idx = -ENOMEM;
998 			iwl_pcie_gen2_tfd_unmap(trans, out_meta, tfd);
999 			goto out;
1000 		}
1001 		iwl_pcie_gen2_set_tb(trans, tfd, phys_addr,
1002 				     copy_size - tb0_size);
1003 	}
1004 
1005 	/* map the remaining (adjusted) nocopy/dup fragments */
1006 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1007 		const void *data = cmddata[i];
1008 
1009 		if (!cmdlen[i])
1010 			continue;
1011 		if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1012 					   IWL_HCMD_DFL_DUP)))
1013 			continue;
1014 		if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
1015 			data = dup_buf;
1016 		phys_addr = dma_map_single(trans->dev, (void *)data,
1017 					   cmdlen[i], DMA_TO_DEVICE);
1018 		if (dma_mapping_error(trans->dev, phys_addr)) {
1019 			idx = -ENOMEM;
1020 			iwl_pcie_gen2_tfd_unmap(trans, out_meta, tfd);
1021 			goto out;
1022 		}
1023 		iwl_pcie_gen2_set_tb(trans, tfd, phys_addr, cmdlen[i]);
1024 	}
1025 
1026 	BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
1027 	out_meta->flags = cmd->flags;
1028 	if (WARN_ON_ONCE(txq->entries[idx].free_buf))
1029 		kfree_sensitive(txq->entries[idx].free_buf);
1030 	txq->entries[idx].free_buf = dup_buf;
1031 
1032 	trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
1033 
1034 	/* start timer if queue currently empty */
1035 	if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
1036 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1037 
1038 	spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1039 	/* Increment and update queue's write index */
1040 	txq->write_ptr = iwl_queue_inc_wrap(trans, txq->write_ptr);
1041 	iwl_pcie_gen2_txq_inc_wr_ptr(trans, txq);
1042 	spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1043 
1044 out:
1045 	spin_unlock_bh(&txq->lock);
1046 free_dup_buf:
1047 	if (idx < 0)
1048 		kfree(dup_buf);
1049 	return idx;
1050 }
1051 
1052 #define HOST_COMPLETE_TIMEOUT	(2 * HZ)
1053 
1054 static int iwl_pcie_gen2_send_hcmd_sync(struct iwl_trans *trans,
1055 					struct iwl_host_cmd *cmd)
1056 {
1057 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1058 	const char *cmd_str = iwl_get_cmd_string(trans, cmd->id);
1059 	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
1060 	int cmd_idx;
1061 	int ret;
1062 
1063 	IWL_DEBUG_INFO(trans, "Attempting to send sync command %s\n", cmd_str);
1064 
1065 	if (WARN(test_and_set_bit(STATUS_SYNC_HCMD_ACTIVE,
1066 				  &trans->status),
1067 		 "Command %s: a command is already active!\n", cmd_str))
1068 		return -EIO;
1069 
1070 	IWL_DEBUG_INFO(trans, "Setting HCMD_ACTIVE for command %s\n", cmd_str);
1071 
1072 	cmd_idx = iwl_pcie_gen2_enqueue_hcmd(trans, cmd);
1073 	if (cmd_idx < 0) {
1074 		ret = cmd_idx;
1075 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1076 		IWL_ERR(trans, "Error sending %s: enqueue_hcmd failed: %d\n",
1077 			cmd_str, ret);
1078 		return ret;
1079 	}
1080 
1081 	ret = wait_event_timeout(trans_pcie->wait_command_queue,
1082 				 !test_bit(STATUS_SYNC_HCMD_ACTIVE,
1083 					   &trans->status),
1084 				 HOST_COMPLETE_TIMEOUT);
1085 	if (!ret) {
1086 		IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
1087 			cmd_str, jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
1088 
1089 		IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
1090 			txq->read_ptr, txq->write_ptr);
1091 
1092 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1093 		IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1094 			       cmd_str);
1095 		ret = -ETIMEDOUT;
1096 
1097 		iwl_trans_pcie_sync_nmi(trans);
1098 		goto cancel;
1099 	}
1100 
1101 	if (test_bit(STATUS_FW_ERROR, &trans->status)) {
1102 		IWL_ERR(trans, "FW error in SYNC CMD %s\n", cmd_str);
1103 		dump_stack();
1104 		ret = -EIO;
1105 		goto cancel;
1106 	}
1107 
1108 	if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1109 	    test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
1110 		IWL_DEBUG_RF_KILL(trans, "RFKILL in SYNC CMD... no rsp\n");
1111 		ret = -ERFKILL;
1112 		goto cancel;
1113 	}
1114 
1115 	if ((cmd->flags & CMD_WANT_SKB) && !cmd->resp_pkt) {
1116 		IWL_ERR(trans, "Error: Response NULL in '%s'\n", cmd_str);
1117 		ret = -EIO;
1118 		goto cancel;
1119 	}
1120 
1121 	return 0;
1122 
1123 cancel:
1124 	if (cmd->flags & CMD_WANT_SKB) {
1125 		/*
1126 		 * Cancel the CMD_WANT_SKB flag for the cmd in the
1127 		 * TX cmd queue. Otherwise in case the cmd comes
1128 		 * in later, it will possibly set an invalid
1129 		 * address (cmd->meta.source).
1130 		 */
1131 		txq->entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
1132 	}
1133 
1134 	if (cmd->resp_pkt) {
1135 		iwl_free_resp(cmd);
1136 		cmd->resp_pkt = NULL;
1137 	}
1138 
1139 	return ret;
1140 }
1141 
1142 int iwl_trans_pcie_gen2_send_hcmd(struct iwl_trans *trans,
1143 				  struct iwl_host_cmd *cmd)
1144 {
1145 	if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1146 	    test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
1147 		IWL_DEBUG_RF_KILL(trans, "Dropping CMD 0x%x: RF KILL\n",
1148 				  cmd->id);
1149 		return -ERFKILL;
1150 	}
1151 
1152 	if (cmd->flags & CMD_ASYNC) {
1153 		int ret;
1154 
1155 		/* An asynchronous command can not expect an SKB to be set. */
1156 		if (WARN_ON(cmd->flags & CMD_WANT_SKB))
1157 			return -EINVAL;
1158 
1159 		ret = iwl_pcie_gen2_enqueue_hcmd(trans, cmd);
1160 		if (ret < 0) {
1161 			IWL_ERR(trans,
1162 				"Error sending %s: enqueue_hcmd failed: %d\n",
1163 				iwl_get_cmd_string(trans, cmd->id), ret);
1164 			return ret;
1165 		}
1166 		return 0;
1167 	}
1168 
1169 	return iwl_pcie_gen2_send_hcmd_sync(trans, cmd);
1170 }
1171 
1172 /*
1173  * iwl_pcie_gen2_txq_unmap -  Unmap any remaining DMA mappings and free skb's
1174  */
1175 void iwl_pcie_gen2_txq_unmap(struct iwl_trans *trans, int txq_id)
1176 {
1177 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1178 	struct iwl_txq *txq = trans->txqs.txq[txq_id];
1179 
1180 	spin_lock_bh(&txq->lock);
1181 	while (txq->write_ptr != txq->read_ptr) {
1182 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
1183 				   txq_id, txq->read_ptr);
1184 
1185 		if (txq_id != trans->txqs.cmd.q_id) {
1186 			int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
1187 			struct sk_buff *skb = txq->entries[idx].skb;
1188 
1189 			if (WARN_ON_ONCE(!skb))
1190 				continue;
1191 
1192 			iwl_pcie_free_tso_page(trans_pcie, skb);
1193 		}
1194 		iwl_pcie_gen2_free_tfd(trans, txq);
1195 		txq->read_ptr = iwl_queue_inc_wrap(trans, txq->read_ptr);
1196 	}
1197 
1198 	while (!skb_queue_empty(&txq->overflow_q)) {
1199 		struct sk_buff *skb = __skb_dequeue(&txq->overflow_q);
1200 
1201 		iwl_op_mode_free_skb(trans->op_mode, skb);
1202 	}
1203 
1204 	spin_unlock_bh(&txq->lock);
1205 
1206 	/* just in case - this queue may have been stopped */
1207 	iwl_wake_queue(trans, txq);
1208 }
1209 
1210 void iwl_pcie_gen2_txq_free_memory(struct iwl_trans *trans,
1211 				   struct iwl_txq *txq)
1212 {
1213 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1214 	struct device *dev = trans->dev;
1215 
1216 	/* De-alloc circular buffer of TFDs */
1217 	if (txq->tfds) {
1218 		dma_free_coherent(dev,
1219 				  trans_pcie->tfd_size * txq->n_window,
1220 				  txq->tfds, txq->dma_addr);
1221 		dma_free_coherent(dev,
1222 				  sizeof(*txq->first_tb_bufs) * txq->n_window,
1223 				  txq->first_tb_bufs, txq->first_tb_dma);
1224 	}
1225 
1226 	kfree(txq->entries);
1227 	if (txq->bc_tbl.addr)
1228 		dma_pool_free(trans_pcie->bc_pool, txq->bc_tbl.addr,
1229 			      txq->bc_tbl.dma);
1230 	kfree(txq);
1231 }
1232 
1233 /*
1234  * iwl_pcie_txq_free - Deallocate DMA queue.
1235  * @txq: Transmit queue to deallocate.
1236  *
1237  * Empty queue by removing and destroying all BD's.
1238  * Free all buffers.
1239  * 0-fill, but do not free "txq" descriptor structure.
1240  */
1241 static void iwl_pcie_gen2_txq_free(struct iwl_trans *trans, int txq_id)
1242 {
1243 	struct iwl_txq *txq;
1244 	int i;
1245 
1246 	if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES,
1247 		      "queue %d out of range", txq_id))
1248 		return;
1249 
1250 	txq = trans->txqs.txq[txq_id];
1251 
1252 	if (WARN_ON(!txq))
1253 		return;
1254 
1255 	iwl_pcie_gen2_txq_unmap(trans, txq_id);
1256 
1257 	/* De-alloc array of command/tx buffers */
1258 	if (txq_id == trans->txqs.cmd.q_id)
1259 		for (i = 0; i < txq->n_window; i++) {
1260 			kfree_sensitive(txq->entries[i].cmd);
1261 			kfree_sensitive(txq->entries[i].free_buf);
1262 		}
1263 	del_timer_sync(&txq->stuck_timer);
1264 
1265 	iwl_pcie_gen2_txq_free_memory(trans, txq);
1266 
1267 	trans->txqs.txq[txq_id] = NULL;
1268 
1269 	clear_bit(txq_id, trans->txqs.queue_used);
1270 }
1271 
1272 int iwl_trans_pcie_dyn_txq_alloc_dma(struct iwl_trans *trans,
1273 				     struct iwl_txq **intxq, int size,
1274 				     unsigned int timeout)
1275 {
1276 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1277 	size_t bc_tbl_size, bc_tbl_entries;
1278 	struct iwl_txq *txq;
1279 	int ret;
1280 
1281 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
1282 		bc_tbl_size = sizeof(struct iwl_gen3_bc_tbl);
1283 		bc_tbl_entries = bc_tbl_size / sizeof(u16);
1284 	} else {
1285 		bc_tbl_size = sizeof(struct iwlagn_scd_bc_tbl);
1286 		bc_tbl_entries = bc_tbl_size / sizeof(u16);
1287 	}
1288 
1289 	if (WARN_ON(size > bc_tbl_entries))
1290 		return -EINVAL;
1291 
1292 	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
1293 	if (!txq)
1294 		return -ENOMEM;
1295 
1296 	txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->bc_pool, GFP_KERNEL,
1297 					  &txq->bc_tbl.dma);
1298 	if (!txq->bc_tbl.addr) {
1299 		IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
1300 		kfree(txq);
1301 		return -ENOMEM;
1302 	}
1303 
1304 	ret = iwl_pcie_txq_alloc(trans, txq, size, false);
1305 	if (ret) {
1306 		IWL_ERR(trans, "Tx queue alloc failed\n");
1307 		goto error;
1308 	}
1309 	ret = iwl_pcie_txq_init(trans, txq, size, false);
1310 	if (ret) {
1311 		IWL_ERR(trans, "Tx queue init failed\n");
1312 		goto error;
1313 	}
1314 
1315 	txq->wd_timeout = msecs_to_jiffies(timeout);
1316 
1317 	*intxq = txq;
1318 	return 0;
1319 
1320 error:
1321 	iwl_pcie_gen2_txq_free_memory(trans, txq);
1322 	return ret;
1323 }
1324 
1325 int iwl_trans_pcie_txq_alloc_response(struct iwl_trans *trans,
1326 				      struct iwl_txq *txq,
1327 				      struct iwl_host_cmd *hcmd)
1328 {
1329 	struct iwl_tx_queue_cfg_rsp *rsp;
1330 	int ret, qid;
1331 	u32 wr_ptr;
1332 
1333 	if (WARN_ON(iwl_rx_packet_payload_len(hcmd->resp_pkt) !=
1334 		    sizeof(*rsp))) {
1335 		ret = -EINVAL;
1336 		goto error_free_resp;
1337 	}
1338 
1339 	rsp = (void *)hcmd->resp_pkt->data;
1340 	qid = le16_to_cpu(rsp->queue_number);
1341 	wr_ptr = le16_to_cpu(rsp->write_pointer);
1342 
1343 	if (qid >= ARRAY_SIZE(trans->txqs.txq)) {
1344 		WARN_ONCE(1, "queue index %d unsupported", qid);
1345 		ret = -EIO;
1346 		goto error_free_resp;
1347 	}
1348 
1349 	if (test_and_set_bit(qid, trans->txqs.queue_used)) {
1350 		WARN_ONCE(1, "queue %d already used", qid);
1351 		ret = -EIO;
1352 		goto error_free_resp;
1353 	}
1354 
1355 	txq->id = qid;
1356 	trans->txqs.txq[qid] = txq;
1357 	wr_ptr &= (trans->trans_cfg->base_params->max_tfd_queue_size - 1);
1358 
1359 	/* Place first TFD at index corresponding to start sequence number */
1360 	txq->read_ptr = wr_ptr;
1361 	txq->write_ptr = wr_ptr;
1362 
1363 	IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid);
1364 
1365 	iwl_free_resp(hcmd);
1366 	return qid;
1367 
1368 error_free_resp:
1369 	iwl_free_resp(hcmd);
1370 	iwl_pcie_gen2_txq_free_memory(trans, txq);
1371 	return ret;
1372 }
1373 
1374 int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
1375 				 __le16 flags, u8 sta_id, u8 tid,
1376 				 int cmd_id, int size,
1377 				 unsigned int timeout)
1378 {
1379 	struct iwl_txq *txq = NULL;
1380 	struct iwl_tx_queue_cfg_cmd cmd = {
1381 		.flags = flags,
1382 		.sta_id = sta_id,
1383 		.tid = tid,
1384 	};
1385 	struct iwl_host_cmd hcmd = {
1386 		.id = cmd_id,
1387 		.len = { sizeof(cmd) },
1388 		.data = { &cmd, },
1389 		.flags = CMD_WANT_SKB,
1390 	};
1391 	int ret;
1392 
1393 	ret = iwl_trans_pcie_dyn_txq_alloc_dma(trans, &txq, size, timeout);
1394 	if (ret)
1395 		return ret;
1396 
1397 	cmd.tfdq_addr = cpu_to_le64(txq->dma_addr);
1398 	cmd.byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma);
1399 	cmd.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size));
1400 
1401 	ret = iwl_trans_send_cmd(trans, &hcmd);
1402 	if (ret)
1403 		goto error;
1404 
1405 	return iwl_trans_pcie_txq_alloc_response(trans, txq, &hcmd);
1406 
1407 error:
1408 	iwl_pcie_gen2_txq_free_memory(trans, txq);
1409 	return ret;
1410 }
1411 
1412 void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue)
1413 {
1414 	if (WARN(queue >= IWL_MAX_TVQM_QUEUES,
1415 		 "queue %d out of range", queue))
1416 		return;
1417 
1418 	/*
1419 	 * Upon HW Rfkill - we stop the device, and then stop the queues
1420 	 * in the op_mode. Just for the sake of the simplicity of the op_mode,
1421 	 * allow the op_mode to call txq_disable after it already called
1422 	 * stop_device.
1423 	 */
1424 	if (!test_and_clear_bit(queue, trans->txqs.queue_used)) {
1425 		WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
1426 			  "queue %d not used", queue);
1427 		return;
1428 	}
1429 
1430 	iwl_pcie_gen2_txq_unmap(trans, queue);
1431 
1432 	iwl_pcie_gen2_txq_free_memory(trans, trans->txqs.txq[queue]);
1433 	trans->txqs.txq[queue] = NULL;
1434 
1435 	IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue);
1436 }
1437 
1438 void iwl_pcie_gen2_tx_free(struct iwl_trans *trans)
1439 {
1440 	int i;
1441 
1442 	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
1443 
1444 	/* Free all TX queues */
1445 	for (i = 0; i < ARRAY_SIZE(trans->txqs.txq); i++) {
1446 		if (!trans->txqs.txq[i])
1447 			continue;
1448 
1449 		iwl_pcie_gen2_txq_free(trans, i);
1450 	}
1451 }
1452 
1453 int iwl_pcie_gen2_tx_init(struct iwl_trans *trans, int txq_id, int queue_size)
1454 {
1455 	struct iwl_txq *queue;
1456 	int ret;
1457 
1458 	/* alloc and init the tx queue */
1459 	if (!trans->txqs.txq[txq_id]) {
1460 		queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1461 		if (!queue) {
1462 			IWL_ERR(trans, "Not enough memory for tx queue\n");
1463 			return -ENOMEM;
1464 		}
1465 		trans->txqs.txq[txq_id] = queue;
1466 		ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true);
1467 		if (ret) {
1468 			IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
1469 			goto error;
1470 		}
1471 	} else {
1472 		queue = trans->txqs.txq[txq_id];
1473 	}
1474 
1475 	ret = iwl_pcie_txq_init(trans, queue, queue_size,
1476 				(txq_id == trans->txqs.cmd.q_id));
1477 	if (ret) {
1478 		IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
1479 		goto error;
1480 	}
1481 	trans->txqs.txq[txq_id]->id = txq_id;
1482 	set_bit(txq_id, trans->txqs.queue_used);
1483 
1484 	return 0;
1485 
1486 error:
1487 	iwl_pcie_gen2_tx_free(trans);
1488 	return ret;
1489 }
1490 
1491