1 /******************************************************************************
2  *
3  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
4  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
5  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
6  * Copyright(c) 2018 Intel Corporation
7  *
8  * Portions of this file are derived from the ipw3945 project, as well
9  * as portions of the ieee80211 subsystem header files.
10  *
11  * This program is free software; you can redistribute it and/or modify it
12  * under the terms of version 2 of the GNU General Public License as
13  * published by the Free Software Foundation.
14  *
15  * This program is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18  * more details.
19  *
20  * You should have received a copy of the GNU General Public License along with
21  * this program; if not, write to the Free Software Foundation, Inc.,
22  * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
23  *
24  * The full GNU General Public License is included in this distribution in the
25  * file called LICENSE.
26  *
27  * Contact Information:
28  *  Intel Linux Wireless <linuxwifi@intel.com>
29  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
30  *
31  *****************************************************************************/
32 #include <linux/etherdevice.h>
33 #include <linux/ieee80211.h>
34 #include <linux/slab.h>
35 #include <linux/sched.h>
36 #include <linux/pm_runtime.h>
37 #include <net/ip6_checksum.h>
38 #include <net/tso.h>
39 
40 #include "iwl-debug.h"
41 #include "iwl-csr.h"
42 #include "iwl-prph.h"
43 #include "iwl-io.h"
44 #include "iwl-scd.h"
45 #include "iwl-op-mode.h"
46 #include "internal.h"
47 #include "fw/api/tx.h"
48 
49 #define IWL_TX_CRC_SIZE 4
50 #define IWL_TX_DELIMITER_SIZE 4
51 
52 /*************** DMA-QUEUE-GENERAL-FUNCTIONS  *****
53  * DMA services
54  *
55  * Theory of operation
56  *
57  * A Tx or Rx queue resides in host DRAM, and is comprised of a circular buffer
58  * of buffer descriptors, each of which points to one or more data buffers for
59  * the device to read from or fill.  Driver and device exchange status of each
60  * queue via "read" and "write" pointers.  Driver keeps minimum of 2 empty
61  * entries in each circular buffer, to protect against confusing empty and full
62  * queue states.
63  *
64  * The device reads or writes the data in the queues via the device's several
65  * DMA/FIFO channels.  Each queue is mapped to a single DMA channel.
66  *
67  * For Tx queue, there are low mark and high mark limits. If, after queuing
68  * the packet for Tx, free space become < low mark, Tx queue stopped. When
69  * reclaiming packets (on 'tx done IRQ), if free space become > high mark,
70  * Tx queue resumed.
71  *
72  ***************************************************/
73 
74 int iwl_queue_space(const struct iwl_txq *q)
75 {
76 	unsigned int max;
77 	unsigned int used;
78 
79 	/*
80 	 * To avoid ambiguity between empty and completely full queues, there
81 	 * should always be less than TFD_QUEUE_SIZE_MAX elements in the queue.
82 	 * If q->n_window is smaller than TFD_QUEUE_SIZE_MAX, there is no need
83 	 * to reserve any queue entries for this purpose.
84 	 */
85 	if (q->n_window < TFD_QUEUE_SIZE_MAX)
86 		max = q->n_window;
87 	else
88 		max = TFD_QUEUE_SIZE_MAX - 1;
89 
90 	/*
91 	 * TFD_QUEUE_SIZE_MAX is a power of 2, so the following is equivalent to
92 	 * modulo by TFD_QUEUE_SIZE_MAX and is well defined.
93 	 */
94 	used = (q->write_ptr - q->read_ptr) & (TFD_QUEUE_SIZE_MAX - 1);
95 
96 	if (WARN_ON(used > max))
97 		return 0;
98 
99 	return max - used;
100 }
101 
102 /*
103  * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
104  */
105 static int iwl_queue_init(struct iwl_txq *q, int slots_num)
106 {
107 	q->n_window = slots_num;
108 
109 	/* slots_num must be power-of-two size, otherwise
110 	 * iwl_pcie_get_cmd_index is broken. */
111 	if (WARN_ON(!is_power_of_2(slots_num)))
112 		return -EINVAL;
113 
114 	q->low_mark = q->n_window / 4;
115 	if (q->low_mark < 4)
116 		q->low_mark = 4;
117 
118 	q->high_mark = q->n_window / 8;
119 	if (q->high_mark < 2)
120 		q->high_mark = 2;
121 
122 	q->write_ptr = 0;
123 	q->read_ptr = 0;
124 
125 	return 0;
126 }
127 
128 int iwl_pcie_alloc_dma_ptr(struct iwl_trans *trans,
129 			   struct iwl_dma_ptr *ptr, size_t size)
130 {
131 	if (WARN_ON(ptr->addr))
132 		return -EINVAL;
133 
134 	ptr->addr = dma_alloc_coherent(trans->dev, size,
135 				       &ptr->dma, GFP_KERNEL);
136 	if (!ptr->addr)
137 		return -ENOMEM;
138 	ptr->size = size;
139 	return 0;
140 }
141 
142 void iwl_pcie_free_dma_ptr(struct iwl_trans *trans, struct iwl_dma_ptr *ptr)
143 {
144 	if (unlikely(!ptr->addr))
145 		return;
146 
147 	dma_free_coherent(trans->dev, ptr->size, ptr->addr, ptr->dma);
148 	memset(ptr, 0, sizeof(*ptr));
149 }
150 
151 static void iwl_pcie_txq_stuck_timer(struct timer_list *t)
152 {
153 	struct iwl_txq *txq = from_timer(txq, t, stuck_timer);
154 	struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
155 	struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
156 
157 	spin_lock(&txq->lock);
158 	/* check if triggered erroneously */
159 	if (txq->read_ptr == txq->write_ptr) {
160 		spin_unlock(&txq->lock);
161 		return;
162 	}
163 	spin_unlock(&txq->lock);
164 
165 	iwl_trans_pcie_log_scd_error(trans, txq);
166 
167 	iwl_force_nmi(trans);
168 }
169 
170 /*
171  * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array
172  */
173 static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
174 					     struct iwl_txq *txq, u16 byte_cnt,
175 					     int num_tbs)
176 {
177 	struct iwlagn_scd_bc_tbl *scd_bc_tbl;
178 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
179 	int write_ptr = txq->write_ptr;
180 	int txq_id = txq->id;
181 	u8 sec_ctl = 0;
182 	u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
183 	__le16 bc_ent;
184 	struct iwl_tx_cmd *tx_cmd =
185 		(void *)txq->entries[txq->write_ptr].cmd->payload;
186 	u8 sta_id = tx_cmd->sta_id;
187 
188 	scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
189 
190 	sec_ctl = tx_cmd->sec_ctl;
191 
192 	switch (sec_ctl & TX_CMD_SEC_MSK) {
193 	case TX_CMD_SEC_CCM:
194 		len += IEEE80211_CCMP_MIC_LEN;
195 		break;
196 	case TX_CMD_SEC_TKIP:
197 		len += IEEE80211_TKIP_ICV_LEN;
198 		break;
199 	case TX_CMD_SEC_WEP:
200 		len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN;
201 		break;
202 	}
203 	if (trans_pcie->bc_table_dword)
204 		len = DIV_ROUND_UP(len, 4);
205 
206 	if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
207 		return;
208 
209 	bc_ent = cpu_to_le16(len | (sta_id << 12));
210 
211 	scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent;
212 
213 	if (write_ptr < TFD_QUEUE_SIZE_BC_DUP)
214 		scd_bc_tbl[txq_id].
215 			tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent;
216 }
217 
218 static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
219 					    struct iwl_txq *txq)
220 {
221 	struct iwl_trans_pcie *trans_pcie =
222 		IWL_TRANS_GET_PCIE_TRANS(trans);
223 	struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
224 	int txq_id = txq->id;
225 	int read_ptr = txq->read_ptr;
226 	u8 sta_id = 0;
227 	__le16 bc_ent;
228 	struct iwl_tx_cmd *tx_cmd =
229 		(void *)txq->entries[read_ptr].cmd->payload;
230 
231 	WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
232 
233 	if (txq_id != trans_pcie->cmd_queue)
234 		sta_id = tx_cmd->sta_id;
235 
236 	bc_ent = cpu_to_le16(1 | (sta_id << 12));
237 
238 	scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
239 
240 	if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
241 		scd_bc_tbl[txq_id].
242 			tfd_offset[TFD_QUEUE_SIZE_MAX + read_ptr] = bc_ent;
243 }
244 
245 /*
246  * iwl_pcie_txq_inc_wr_ptr - Send new write index to hardware
247  */
248 static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
249 				    struct iwl_txq *txq)
250 {
251 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
252 	u32 reg = 0;
253 	int txq_id = txq->id;
254 
255 	lockdep_assert_held(&txq->lock);
256 
257 	/*
258 	 * explicitly wake up the NIC if:
259 	 * 1. shadow registers aren't enabled
260 	 * 2. NIC is woken up for CMD regardless of shadow outside this function
261 	 * 3. there is a chance that the NIC is asleep
262 	 */
263 	if (!trans->cfg->base_params->shadow_reg_enable &&
264 	    txq_id != trans_pcie->cmd_queue &&
265 	    test_bit(STATUS_TPOWER_PMI, &trans->status)) {
266 		/*
267 		 * wake up nic if it's powered down ...
268 		 * uCode will wake up, and interrupt us again, so next
269 		 * time we'll skip this part.
270 		 */
271 		reg = iwl_read32(trans, CSR_UCODE_DRV_GP1);
272 
273 		if (reg & CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP) {
274 			IWL_DEBUG_INFO(trans, "Tx queue %d requesting wakeup, GP1 = 0x%x\n",
275 				       txq_id, reg);
276 			iwl_set_bit(trans, CSR_GP_CNTRL,
277 				    BIT(trans->cfg->csr->flag_mac_access_req));
278 			txq->need_update = true;
279 			return;
280 		}
281 	}
282 
283 	/*
284 	 * if not in power-save mode, uCode will never sleep when we're
285 	 * trying to tx (during RFKILL, we're not trying to tx).
286 	 */
287 	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
288 	if (!txq->block)
289 		iwl_write32(trans, HBUS_TARG_WRPTR,
290 			    txq->write_ptr | (txq_id << 8));
291 }
292 
293 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
294 {
295 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
296 	int i;
297 
298 	for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
299 		struct iwl_txq *txq = trans_pcie->txq[i];
300 
301 		if (!test_bit(i, trans_pcie->queue_used))
302 			continue;
303 
304 		spin_lock_bh(&txq->lock);
305 		if (txq->need_update) {
306 			iwl_pcie_txq_inc_wr_ptr(trans, txq);
307 			txq->need_update = false;
308 		}
309 		spin_unlock_bh(&txq->lock);
310 	}
311 }
312 
313 static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans,
314 						  void *_tfd, u8 idx)
315 {
316 
317 	if (trans->cfg->use_tfh) {
318 		struct iwl_tfh_tfd *tfd = _tfd;
319 		struct iwl_tfh_tb *tb = &tfd->tbs[idx];
320 
321 		return (dma_addr_t)(le64_to_cpu(tb->addr));
322 	} else {
323 		struct iwl_tfd *tfd = _tfd;
324 		struct iwl_tfd_tb *tb = &tfd->tbs[idx];
325 		dma_addr_t addr = get_unaligned_le32(&tb->lo);
326 		dma_addr_t hi_len;
327 
328 		if (sizeof(dma_addr_t) <= sizeof(u32))
329 			return addr;
330 
331 		hi_len = le16_to_cpu(tb->hi_n_len) & 0xF;
332 
333 		/*
334 		 * shift by 16 twice to avoid warnings on 32-bit
335 		 * (where this code never runs anyway due to the
336 		 * if statement above)
337 		 */
338 		return addr | ((hi_len << 16) << 16);
339 	}
340 }
341 
342 static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd,
343 				       u8 idx, dma_addr_t addr, u16 len)
344 {
345 	struct iwl_tfd *tfd_fh = (void *)tfd;
346 	struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx];
347 
348 	u16 hi_n_len = len << 4;
349 
350 	put_unaligned_le32(addr, &tb->lo);
351 	hi_n_len |= iwl_get_dma_hi_addr(addr);
352 
353 	tb->hi_n_len = cpu_to_le16(hi_n_len);
354 
355 	tfd_fh->num_tbs = idx + 1;
356 }
357 
358 static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd)
359 {
360 	if (trans->cfg->use_tfh) {
361 		struct iwl_tfh_tfd *tfd = _tfd;
362 
363 		return le16_to_cpu(tfd->num_tbs) & 0x1f;
364 	} else {
365 		struct iwl_tfd *tfd = _tfd;
366 
367 		return tfd->num_tbs & 0x1f;
368 	}
369 }
370 
371 static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
372 			       struct iwl_cmd_meta *meta,
373 			       struct iwl_txq *txq, int index)
374 {
375 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
376 	int i, num_tbs;
377 	void *tfd = iwl_pcie_get_tfd(trans, txq, index);
378 
379 	/* Sanity check on number of chunks */
380 	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
381 
382 	if (num_tbs > trans_pcie->max_tbs) {
383 		IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
384 		/* @todo issue fatal error, it is quite serious situation */
385 		return;
386 	}
387 
388 	/* first TB is never freed - it's the bidirectional DMA data */
389 
390 	for (i = 1; i < num_tbs; i++) {
391 		if (meta->tbs & BIT(i))
392 			dma_unmap_page(trans->dev,
393 				       iwl_pcie_tfd_tb_get_addr(trans, tfd, i),
394 				       iwl_pcie_tfd_tb_get_len(trans, tfd, i),
395 				       DMA_TO_DEVICE);
396 		else
397 			dma_unmap_single(trans->dev,
398 					 iwl_pcie_tfd_tb_get_addr(trans, tfd,
399 								  i),
400 					 iwl_pcie_tfd_tb_get_len(trans, tfd,
401 								 i),
402 					 DMA_TO_DEVICE);
403 	}
404 
405 	if (trans->cfg->use_tfh) {
406 		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
407 
408 		tfd_fh->num_tbs = 0;
409 	} else {
410 		struct iwl_tfd *tfd_fh = (void *)tfd;
411 
412 		tfd_fh->num_tbs = 0;
413 	}
414 
415 }
416 
417 /*
418  * iwl_pcie_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr]
419  * @trans - transport private data
420  * @txq - tx queue
421  * @dma_dir - the direction of the DMA mapping
422  *
423  * Does NOT advance any TFD circular buffer read/write indexes
424  * Does NOT free the TFD itself (which is within circular buffer)
425  */
426 void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
427 {
428 	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
429 	 * idx is bounded by n_window
430 	 */
431 	int rd_ptr = txq->read_ptr;
432 	int idx = iwl_pcie_get_cmd_index(txq, rd_ptr);
433 
434 	lockdep_assert_held(&txq->lock);
435 
436 	/* We have only q->n_window txq->entries, but we use
437 	 * TFD_QUEUE_SIZE_MAX tfds
438 	 */
439 	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr);
440 
441 	/* free SKB */
442 	if (txq->entries) {
443 		struct sk_buff *skb;
444 
445 		skb = txq->entries[idx].skb;
446 
447 		/* Can be called from irqs-disabled context
448 		 * If skb is not NULL, it means that the whole queue is being
449 		 * freed and that the queue is not empty - free the skb
450 		 */
451 		if (skb) {
452 			iwl_op_mode_free_skb(trans->op_mode, skb);
453 			txq->entries[idx].skb = NULL;
454 		}
455 	}
456 }
457 
458 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
459 				  dma_addr_t addr, u16 len, bool reset)
460 {
461 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
462 	void *tfd;
463 	u32 num_tbs;
464 
465 	tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr;
466 
467 	if (reset)
468 		memset(tfd, 0, trans_pcie->tfd_size);
469 
470 	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
471 
472 	/* Each TFD can point to a maximum max_tbs Tx buffers */
473 	if (num_tbs >= trans_pcie->max_tbs) {
474 		IWL_ERR(trans, "Error can not send more than %d chunks\n",
475 			trans_pcie->max_tbs);
476 		return -EINVAL;
477 	}
478 
479 	if (WARN(addr & ~IWL_TX_DMA_MASK,
480 		 "Unaligned address = %llx\n", (unsigned long long)addr))
481 		return -EINVAL;
482 
483 	iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len);
484 
485 	return num_tbs;
486 }
487 
488 int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq,
489 		       int slots_num, bool cmd_queue)
490 {
491 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
492 	size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX;
493 	size_t tb0_buf_sz;
494 	int i;
495 
496 	if (WARN_ON(txq->entries || txq->tfds))
497 		return -EINVAL;
498 
499 	if (trans->cfg->use_tfh)
500 		tfd_sz = trans_pcie->tfd_size * slots_num;
501 
502 	timer_setup(&txq->stuck_timer, iwl_pcie_txq_stuck_timer, 0);
503 	txq->trans_pcie = trans_pcie;
504 
505 	txq->n_window = slots_num;
506 
507 	txq->entries = kcalloc(slots_num,
508 			       sizeof(struct iwl_pcie_txq_entry),
509 			       GFP_KERNEL);
510 
511 	if (!txq->entries)
512 		goto error;
513 
514 	if (cmd_queue)
515 		for (i = 0; i < slots_num; i++) {
516 			txq->entries[i].cmd =
517 				kmalloc(sizeof(struct iwl_device_cmd),
518 					GFP_KERNEL);
519 			if (!txq->entries[i].cmd)
520 				goto error;
521 		}
522 
523 	/* Circular buffer of transmit frame descriptors (TFDs),
524 	 * shared with device */
525 	txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
526 				       &txq->dma_addr, GFP_KERNEL);
527 	if (!txq->tfds)
528 		goto error;
529 
530 	BUILD_BUG_ON(IWL_FIRST_TB_SIZE_ALIGN != sizeof(*txq->first_tb_bufs));
531 
532 	tb0_buf_sz = sizeof(*txq->first_tb_bufs) * slots_num;
533 
534 	txq->first_tb_bufs = dma_alloc_coherent(trans->dev, tb0_buf_sz,
535 					      &txq->first_tb_dma,
536 					      GFP_KERNEL);
537 	if (!txq->first_tb_bufs)
538 		goto err_free_tfds;
539 
540 	return 0;
541 err_free_tfds:
542 	dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
543 error:
544 	if (txq->entries && cmd_queue)
545 		for (i = 0; i < slots_num; i++)
546 			kfree(txq->entries[i].cmd);
547 	kfree(txq->entries);
548 	txq->entries = NULL;
549 
550 	return -ENOMEM;
551 
552 }
553 
554 int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
555 		      int slots_num, bool cmd_queue)
556 {
557 	int ret;
558 
559 	txq->need_update = false;
560 
561 	/* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise
562 	 * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
563 	BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
564 
565 	/* Initialize queue's high/low-water marks, and head/tail indexes */
566 	ret = iwl_queue_init(txq, slots_num);
567 	if (ret)
568 		return ret;
569 
570 	spin_lock_init(&txq->lock);
571 
572 	if (cmd_queue) {
573 		static struct lock_class_key iwl_pcie_cmd_queue_lock_class;
574 
575 		lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class);
576 	}
577 
578 	__skb_queue_head_init(&txq->overflow_q);
579 
580 	return 0;
581 }
582 
583 void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
584 			    struct sk_buff *skb)
585 {
586 	struct page **page_ptr;
587 
588 	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
589 
590 	if (*page_ptr) {
591 		__free_page(*page_ptr);
592 		*page_ptr = NULL;
593 	}
594 }
595 
596 static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
597 {
598 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
599 
600 	lockdep_assert_held(&trans_pcie->reg_lock);
601 
602 	if (trans_pcie->ref_cmd_in_flight) {
603 		trans_pcie->ref_cmd_in_flight = false;
604 		IWL_DEBUG_RPM(trans, "clear ref_cmd_in_flight - unref\n");
605 		iwl_trans_unref(trans);
606 	}
607 
608 	if (!trans->cfg->base_params->apmg_wake_up_wa)
609 		return;
610 	if (WARN_ON(!trans_pcie->cmd_hold_nic_awake))
611 		return;
612 
613 	trans_pcie->cmd_hold_nic_awake = false;
614 	__iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
615 				   BIT(trans->cfg->csr->flag_mac_access_req));
616 }
617 
618 /*
619  * iwl_pcie_txq_unmap -  Unmap any remaining DMA mappings and free skb's
620  */
621 static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
622 {
623 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
624 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
625 
626 	spin_lock_bh(&txq->lock);
627 	while (txq->write_ptr != txq->read_ptr) {
628 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
629 				   txq_id, txq->read_ptr);
630 
631 		if (txq_id != trans_pcie->cmd_queue) {
632 			struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
633 
634 			if (WARN_ON_ONCE(!skb))
635 				continue;
636 
637 			iwl_pcie_free_tso_page(trans_pcie, skb);
638 		}
639 		iwl_pcie_txq_free_tfd(trans, txq);
640 		txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr);
641 
642 		if (txq->read_ptr == txq->write_ptr) {
643 			unsigned long flags;
644 
645 			spin_lock_irqsave(&trans_pcie->reg_lock, flags);
646 			if (txq_id != trans_pcie->cmd_queue) {
647 				IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n",
648 					      txq->id);
649 				iwl_trans_unref(trans);
650 			} else {
651 				iwl_pcie_clear_cmd_in_flight(trans);
652 			}
653 			spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
654 		}
655 	}
656 
657 	while (!skb_queue_empty(&txq->overflow_q)) {
658 		struct sk_buff *skb = __skb_dequeue(&txq->overflow_q);
659 
660 		iwl_op_mode_free_skb(trans->op_mode, skb);
661 	}
662 
663 	spin_unlock_bh(&txq->lock);
664 
665 	/* just in case - this queue may have been stopped */
666 	iwl_wake_queue(trans, txq);
667 }
668 
669 /*
670  * iwl_pcie_txq_free - Deallocate DMA queue.
671  * @txq: Transmit queue to deallocate.
672  *
673  * Empty queue by removing and destroying all BD's.
674  * Free all buffers.
675  * 0-fill, but do not free "txq" descriptor structure.
676  */
677 static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
678 {
679 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
680 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
681 	struct device *dev = trans->dev;
682 	int i;
683 
684 	if (WARN_ON(!txq))
685 		return;
686 
687 	iwl_pcie_txq_unmap(trans, txq_id);
688 
689 	/* De-alloc array of command/tx buffers */
690 	if (txq_id == trans_pcie->cmd_queue)
691 		for (i = 0; i < txq->n_window; i++) {
692 			kzfree(txq->entries[i].cmd);
693 			kzfree(txq->entries[i].free_buf);
694 		}
695 
696 	/* De-alloc circular buffer of TFDs */
697 	if (txq->tfds) {
698 		dma_free_coherent(dev,
699 				  trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX,
700 				  txq->tfds, txq->dma_addr);
701 		txq->dma_addr = 0;
702 		txq->tfds = NULL;
703 
704 		dma_free_coherent(dev,
705 				  sizeof(*txq->first_tb_bufs) * txq->n_window,
706 				  txq->first_tb_bufs, txq->first_tb_dma);
707 	}
708 
709 	kfree(txq->entries);
710 	txq->entries = NULL;
711 
712 	del_timer_sync(&txq->stuck_timer);
713 
714 	/* 0-fill queue descriptor structure */
715 	memset(txq, 0, sizeof(*txq));
716 }
717 
718 void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
719 {
720 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
721 	int nq = trans->cfg->base_params->num_of_queues;
722 	int chan;
723 	u32 reg_val;
724 	int clear_dwords = (SCD_TRANS_TBL_OFFSET_QUEUE(nq) -
725 				SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(u32);
726 
727 	/* make sure all queue are not stopped/used */
728 	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
729 	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
730 
731 	trans_pcie->scd_base_addr =
732 		iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
733 
734 	WARN_ON(scd_base_addr != 0 &&
735 		scd_base_addr != trans_pcie->scd_base_addr);
736 
737 	/* reset context data, TX status and translation data */
738 	iwl_trans_write_mem(trans, trans_pcie->scd_base_addr +
739 				   SCD_CONTEXT_MEM_LOWER_BOUND,
740 			    NULL, clear_dwords);
741 
742 	iwl_write_prph(trans, SCD_DRAM_BASE_ADDR,
743 		       trans_pcie->scd_bc_tbls.dma >> 10);
744 
745 	/* The chain extension of the SCD doesn't work well. This feature is
746 	 * enabled by default by the HW, so we need to disable it manually.
747 	 */
748 	if (trans->cfg->base_params->scd_chain_ext_wa)
749 		iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
750 
751 	iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
752 				trans_pcie->cmd_fifo,
753 				trans_pcie->cmd_q_wdg_timeout);
754 
755 	/* Activate all Tx DMA/FIFO channels */
756 	iwl_scd_activate_fifos(trans);
757 
758 	/* Enable DMA channel */
759 	for (chan = 0; chan < FH_TCSR_CHNL_NUM; chan++)
760 		iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan),
761 				   FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
762 				   FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
763 
764 	/* Update FH chicken bits */
765 	reg_val = iwl_read_direct32(trans, FH_TX_CHICKEN_BITS_REG);
766 	iwl_write_direct32(trans, FH_TX_CHICKEN_BITS_REG,
767 			   reg_val | FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
768 
769 	/* Enable L1-Active */
770 	if (trans->cfg->device_family < IWL_DEVICE_FAMILY_8000)
771 		iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG,
772 				    APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
773 }
774 
775 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
776 {
777 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
778 	int txq_id;
779 
780 	/*
781 	 * we should never get here in gen2 trans mode return early to avoid
782 	 * having invalid accesses
783 	 */
784 	if (WARN_ON_ONCE(trans->cfg->gen2))
785 		return;
786 
787 	for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
788 	     txq_id++) {
789 		struct iwl_txq *txq = trans_pcie->txq[txq_id];
790 		if (trans->cfg->use_tfh)
791 			iwl_write_direct64(trans,
792 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
793 					   txq->dma_addr);
794 		else
795 			iwl_write_direct32(trans,
796 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
797 					   txq->dma_addr >> 8);
798 		iwl_pcie_txq_unmap(trans, txq_id);
799 		txq->read_ptr = 0;
800 		txq->write_ptr = 0;
801 	}
802 
803 	/* Tell NIC where to find the "keep warm" buffer */
804 	iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
805 			   trans_pcie->kw.dma >> 4);
806 
807 	/*
808 	 * Send 0 as the scd_base_addr since the device may have be reset
809 	 * while we were in WoWLAN in which case SCD_SRAM_BASE_ADDR will
810 	 * contain garbage.
811 	 */
812 	iwl_pcie_tx_start(trans, 0);
813 }
814 
815 static void iwl_pcie_tx_stop_fh(struct iwl_trans *trans)
816 {
817 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
818 	unsigned long flags;
819 	int ch, ret;
820 	u32 mask = 0;
821 
822 	spin_lock(&trans_pcie->irq_lock);
823 
824 	if (!iwl_trans_grab_nic_access(trans, &flags))
825 		goto out;
826 
827 	/* Stop each Tx DMA channel */
828 	for (ch = 0; ch < FH_TCSR_CHNL_NUM; ch++) {
829 		iwl_write32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(ch), 0x0);
830 		mask |= FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(ch);
831 	}
832 
833 	/* Wait for DMA channels to be idle */
834 	ret = iwl_poll_bit(trans, FH_TSSR_TX_STATUS_REG, mask, mask, 5000);
835 	if (ret < 0)
836 		IWL_ERR(trans,
837 			"Failing on timeout while stopping DMA channel %d [0x%08x]\n",
838 			ch, iwl_read32(trans, FH_TSSR_TX_STATUS_REG));
839 
840 	iwl_trans_release_nic_access(trans, &flags);
841 
842 out:
843 	spin_unlock(&trans_pcie->irq_lock);
844 }
845 
846 /*
847  * iwl_pcie_tx_stop - Stop all Tx DMA channels
848  */
849 int iwl_pcie_tx_stop(struct iwl_trans *trans)
850 {
851 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
852 	int txq_id;
853 
854 	/* Turn off all Tx DMA fifos */
855 	iwl_scd_deactivate_fifos(trans);
856 
857 	/* Turn off all Tx DMA channels */
858 	iwl_pcie_tx_stop_fh(trans);
859 
860 	/*
861 	 * This function can be called before the op_mode disabled the
862 	 * queues. This happens when we have an rfkill interrupt.
863 	 * Since we stop Tx altogether - mark the queues as stopped.
864 	 */
865 	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
866 	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
867 
868 	/* This can happen: start_hw, stop_device */
869 	if (!trans_pcie->txq_memory)
870 		return 0;
871 
872 	/* Unmap DMA from host system and free skb's */
873 	for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
874 	     txq_id++)
875 		iwl_pcie_txq_unmap(trans, txq_id);
876 
877 	return 0;
878 }
879 
880 /*
881  * iwl_trans_tx_free - Free TXQ Context
882  *
883  * Destroy all TX DMA queues and structures
884  */
885 void iwl_pcie_tx_free(struct iwl_trans *trans)
886 {
887 	int txq_id;
888 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
889 
890 	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
891 
892 	/* Tx queues */
893 	if (trans_pcie->txq_memory) {
894 		for (txq_id = 0;
895 		     txq_id < trans->cfg->base_params->num_of_queues;
896 		     txq_id++) {
897 			iwl_pcie_txq_free(trans, txq_id);
898 			trans_pcie->txq[txq_id] = NULL;
899 		}
900 	}
901 
902 	kfree(trans_pcie->txq_memory);
903 	trans_pcie->txq_memory = NULL;
904 
905 	iwl_pcie_free_dma_ptr(trans, &trans_pcie->kw);
906 
907 	iwl_pcie_free_dma_ptr(trans, &trans_pcie->scd_bc_tbls);
908 }
909 
910 /*
911  * iwl_pcie_tx_alloc - allocate TX context
912  * Allocate all Tx DMA structures and initialize them
913  */
914 static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
915 {
916 	int ret;
917 	int txq_id, slots_num;
918 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
919 
920 	u16 scd_bc_tbls_size = trans->cfg->base_params->num_of_queues *
921 			sizeof(struct iwlagn_scd_bc_tbl);
922 
923 	/*It is not allowed to alloc twice, so warn when this happens.
924 	 * We cannot rely on the previous allocation, so free and fail */
925 	if (WARN_ON(trans_pcie->txq_memory)) {
926 		ret = -EINVAL;
927 		goto error;
928 	}
929 
930 	ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->scd_bc_tbls,
931 				   scd_bc_tbls_size);
932 	if (ret) {
933 		IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
934 		goto error;
935 	}
936 
937 	/* Alloc keep-warm buffer */
938 	ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->kw, IWL_KW_SIZE);
939 	if (ret) {
940 		IWL_ERR(trans, "Keep Warm allocation failed\n");
941 		goto error;
942 	}
943 
944 	trans_pcie->txq_memory = kcalloc(trans->cfg->base_params->num_of_queues,
945 					 sizeof(struct iwl_txq), GFP_KERNEL);
946 	if (!trans_pcie->txq_memory) {
947 		IWL_ERR(trans, "Not enough memory for txq\n");
948 		ret = -ENOMEM;
949 		goto error;
950 	}
951 
952 	/* Alloc and init all Tx queues, including the command queue (#4/#9) */
953 	for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
954 	     txq_id++) {
955 		bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
956 
957 		slots_num = cmd_queue ? TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
958 		trans_pcie->txq[txq_id] = &trans_pcie->txq_memory[txq_id];
959 		ret = iwl_pcie_txq_alloc(trans, trans_pcie->txq[txq_id],
960 					 slots_num, cmd_queue);
961 		if (ret) {
962 			IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
963 			goto error;
964 		}
965 		trans_pcie->txq[txq_id]->id = txq_id;
966 	}
967 
968 	return 0;
969 
970 error:
971 	iwl_pcie_tx_free(trans);
972 
973 	return ret;
974 }
975 
976 int iwl_pcie_tx_init(struct iwl_trans *trans)
977 {
978 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
979 	int ret;
980 	int txq_id, slots_num;
981 	bool alloc = false;
982 
983 	if (!trans_pcie->txq_memory) {
984 		ret = iwl_pcie_tx_alloc(trans);
985 		if (ret)
986 			goto error;
987 		alloc = true;
988 	}
989 
990 	spin_lock(&trans_pcie->irq_lock);
991 
992 	/* Turn off all Tx DMA fifos */
993 	iwl_scd_deactivate_fifos(trans);
994 
995 	/* Tell NIC where to find the "keep warm" buffer */
996 	iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
997 			   trans_pcie->kw.dma >> 4);
998 
999 	spin_unlock(&trans_pcie->irq_lock);
1000 
1001 	/* Alloc and init all Tx queues, including the command queue (#4/#9) */
1002 	for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
1003 	     txq_id++) {
1004 		bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
1005 
1006 		slots_num = cmd_queue ? TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
1007 		ret = iwl_pcie_txq_init(trans, trans_pcie->txq[txq_id],
1008 					slots_num, cmd_queue);
1009 		if (ret) {
1010 			IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
1011 			goto error;
1012 		}
1013 
1014 		/*
1015 		 * Tell nic where to find circular buffer of TFDs for a
1016 		 * given Tx queue, and enable the DMA channel used for that
1017 		 * queue.
1018 		 * Circular buffer (TFD queue in DRAM) physical base address
1019 		 */
1020 		iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
1021 				   trans_pcie->txq[txq_id]->dma_addr >> 8);
1022 	}
1023 
1024 	iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
1025 	if (trans->cfg->base_params->num_of_queues > 20)
1026 		iwl_set_bits_prph(trans, SCD_GP_CTRL,
1027 				  SCD_GP_CTRL_ENABLE_31_QUEUES);
1028 
1029 	return 0;
1030 error:
1031 	/*Upon error, free only if we allocated something */
1032 	if (alloc)
1033 		iwl_pcie_tx_free(trans);
1034 	return ret;
1035 }
1036 
1037 static inline void iwl_pcie_txq_progress(struct iwl_txq *txq)
1038 {
1039 	lockdep_assert_held(&txq->lock);
1040 
1041 	if (!txq->wd_timeout)
1042 		return;
1043 
1044 	/*
1045 	 * station is asleep and we send data - that must
1046 	 * be uAPSD or PS-Poll. Don't rearm the timer.
1047 	 */
1048 	if (txq->frozen)
1049 		return;
1050 
1051 	/*
1052 	 * if empty delete timer, otherwise move timer forward
1053 	 * since we're making progress on this queue
1054 	 */
1055 	if (txq->read_ptr == txq->write_ptr)
1056 		del_timer(&txq->stuck_timer);
1057 	else
1058 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1059 }
1060 
1061 /* Frees buffers until index _not_ inclusive */
1062 void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
1063 			    struct sk_buff_head *skbs)
1064 {
1065 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1066 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
1067 	int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1);
1068 	int last_to_free;
1069 
1070 	/* This function is not meant to release cmd queue*/
1071 	if (WARN_ON(txq_id == trans_pcie->cmd_queue))
1072 		return;
1073 
1074 	spin_lock_bh(&txq->lock);
1075 
1076 	if (!test_bit(txq_id, trans_pcie->queue_used)) {
1077 		IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
1078 				    txq_id, ssn);
1079 		goto out;
1080 	}
1081 
1082 	if (txq->read_ptr == tfd_num)
1083 		goto out;
1084 
1085 	IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
1086 			   txq_id, txq->read_ptr, tfd_num, ssn);
1087 
1088 	/*Since we free until index _not_ inclusive, the one before index is
1089 	 * the last we will free. This one must be used */
1090 	last_to_free = iwl_queue_dec_wrap(tfd_num);
1091 
1092 	if (!iwl_queue_used(txq, last_to_free)) {
1093 		IWL_ERR(trans,
1094 			"%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
1095 			__func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX,
1096 			txq->write_ptr, txq->read_ptr);
1097 		goto out;
1098 	}
1099 
1100 	if (WARN_ON(!skb_queue_empty(skbs)))
1101 		goto out;
1102 
1103 	for (;
1104 	     txq->read_ptr != tfd_num;
1105 	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
1106 		int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
1107 		struct sk_buff *skb = txq->entries[idx].skb;
1108 
1109 		if (WARN_ON_ONCE(!skb))
1110 			continue;
1111 
1112 		iwl_pcie_free_tso_page(trans_pcie, skb);
1113 
1114 		__skb_queue_tail(skbs, skb);
1115 
1116 		txq->entries[idx].skb = NULL;
1117 
1118 		if (!trans->cfg->use_tfh)
1119 			iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
1120 
1121 		iwl_pcie_txq_free_tfd(trans, txq);
1122 	}
1123 
1124 	iwl_pcie_txq_progress(txq);
1125 
1126 	if (iwl_queue_space(txq) > txq->low_mark &&
1127 	    test_bit(txq_id, trans_pcie->queue_stopped)) {
1128 		struct sk_buff_head overflow_skbs;
1129 
1130 		__skb_queue_head_init(&overflow_skbs);
1131 		skb_queue_splice_init(&txq->overflow_q, &overflow_skbs);
1132 
1133 		/*
1134 		 * This is tricky: we are in reclaim path which is non
1135 		 * re-entrant, so noone will try to take the access the
1136 		 * txq data from that path. We stopped tx, so we can't
1137 		 * have tx as well. Bottom line, we can unlock and re-lock
1138 		 * later.
1139 		 */
1140 		spin_unlock_bh(&txq->lock);
1141 
1142 		while (!skb_queue_empty(&overflow_skbs)) {
1143 			struct sk_buff *skb = __skb_dequeue(&overflow_skbs);
1144 			struct iwl_device_cmd *dev_cmd_ptr;
1145 
1146 			dev_cmd_ptr = *(void **)((u8 *)skb->cb +
1147 						 trans_pcie->dev_cmd_offs);
1148 
1149 			/*
1150 			 * Note that we can very well be overflowing again.
1151 			 * In that case, iwl_queue_space will be small again
1152 			 * and we won't wake mac80211's queue.
1153 			 */
1154 			iwl_trans_tx(trans, skb, dev_cmd_ptr, txq_id);
1155 		}
1156 		spin_lock_bh(&txq->lock);
1157 
1158 		if (iwl_queue_space(txq) > txq->low_mark)
1159 			iwl_wake_queue(trans, txq);
1160 	}
1161 
1162 	if (txq->read_ptr == txq->write_ptr) {
1163 		IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id);
1164 		iwl_trans_unref(trans);
1165 	}
1166 
1167 out:
1168 	spin_unlock_bh(&txq->lock);
1169 }
1170 
1171 static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
1172 				      const struct iwl_host_cmd *cmd)
1173 {
1174 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1175 	const struct iwl_cfg *cfg = trans->cfg;
1176 	int ret;
1177 
1178 	lockdep_assert_held(&trans_pcie->reg_lock);
1179 
1180 	if (!(cmd->flags & CMD_SEND_IN_IDLE) &&
1181 	    !trans_pcie->ref_cmd_in_flight) {
1182 		trans_pcie->ref_cmd_in_flight = true;
1183 		IWL_DEBUG_RPM(trans, "set ref_cmd_in_flight - ref\n");
1184 		iwl_trans_ref(trans);
1185 	}
1186 
1187 	/*
1188 	 * wake up the NIC to make sure that the firmware will see the host
1189 	 * command - we will let the NIC sleep once all the host commands
1190 	 * returned. This needs to be done only on NICs that have
1191 	 * apmg_wake_up_wa set.
1192 	 */
1193 	if (cfg->base_params->apmg_wake_up_wa &&
1194 	    !trans_pcie->cmd_hold_nic_awake) {
1195 		__iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
1196 					 BIT(cfg->csr->flag_mac_access_req));
1197 
1198 		ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
1199 				   BIT(cfg->csr->flag_val_mac_access_en),
1200 				   (BIT(cfg->csr->flag_mac_clock_ready) |
1201 				    CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP),
1202 				   15000);
1203 		if (ret < 0) {
1204 			__iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
1205 					BIT(cfg->csr->flag_mac_access_req));
1206 			IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
1207 			return -EIO;
1208 		}
1209 		trans_pcie->cmd_hold_nic_awake = true;
1210 	}
1211 
1212 	return 0;
1213 }
1214 
1215 /*
1216  * iwl_pcie_cmdq_reclaim - Reclaim TX command queue entries already Tx'd
1217  *
1218  * When FW advances 'R' index, all entries between old and new 'R' index
1219  * need to be reclaimed. As result, some free space forms.  If there is
1220  * enough free space (> low mark), wake the stack that feeds us.
1221  */
1222 static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
1223 {
1224 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1225 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
1226 	unsigned long flags;
1227 	int nfreed = 0;
1228 
1229 	lockdep_assert_held(&txq->lock);
1230 
1231 	if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) {
1232 		IWL_ERR(trans,
1233 			"%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
1234 			__func__, txq_id, idx, TFD_QUEUE_SIZE_MAX,
1235 			txq->write_ptr, txq->read_ptr);
1236 		return;
1237 	}
1238 
1239 	for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx;
1240 	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
1241 
1242 		if (nfreed++ > 0) {
1243 			IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
1244 				idx, txq->write_ptr, txq->read_ptr);
1245 			iwl_force_nmi(trans);
1246 		}
1247 	}
1248 
1249 	if (txq->read_ptr == txq->write_ptr) {
1250 		spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1251 		iwl_pcie_clear_cmd_in_flight(trans);
1252 		spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1253 	}
1254 
1255 	iwl_pcie_txq_progress(txq);
1256 }
1257 
1258 static int iwl_pcie_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
1259 				 u16 txq_id)
1260 {
1261 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1262 	u32 tbl_dw_addr;
1263 	u32 tbl_dw;
1264 	u16 scd_q2ratid;
1265 
1266 	scd_q2ratid = ra_tid & SCD_QUEUE_RA_TID_MAP_RATID_MSK;
1267 
1268 	tbl_dw_addr = trans_pcie->scd_base_addr +
1269 			SCD_TRANS_TBL_OFFSET_QUEUE(txq_id);
1270 
1271 	tbl_dw = iwl_trans_read_mem32(trans, tbl_dw_addr);
1272 
1273 	if (txq_id & 0x1)
1274 		tbl_dw = (scd_q2ratid << 16) | (tbl_dw & 0x0000FFFF);
1275 	else
1276 		tbl_dw = scd_q2ratid | (tbl_dw & 0xFFFF0000);
1277 
1278 	iwl_trans_write_mem32(trans, tbl_dw_addr, tbl_dw);
1279 
1280 	return 0;
1281 }
1282 
1283 /* Receiver address (actually, Rx station's index into station table),
1284  * combined with Traffic ID (QOS priority), in format used by Tx Scheduler */
1285 #define BUILD_RAxTID(sta_id, tid)	(((sta_id) << 4) + (tid))
1286 
1287 bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
1288 			       const struct iwl_trans_txq_scd_cfg *cfg,
1289 			       unsigned int wdg_timeout)
1290 {
1291 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1292 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
1293 	int fifo = -1;
1294 	bool scd_bug = false;
1295 
1296 	if (test_and_set_bit(txq_id, trans_pcie->queue_used))
1297 		WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
1298 
1299 	txq->wd_timeout = msecs_to_jiffies(wdg_timeout);
1300 
1301 	if (cfg) {
1302 		fifo = cfg->fifo;
1303 
1304 		/* Disable the scheduler prior configuring the cmd queue */
1305 		if (txq_id == trans_pcie->cmd_queue &&
1306 		    trans_pcie->scd_set_active)
1307 			iwl_scd_enable_set_active(trans, 0);
1308 
1309 		/* Stop this Tx queue before configuring it */
1310 		iwl_scd_txq_set_inactive(trans, txq_id);
1311 
1312 		/* Set this queue as a chain-building queue unless it is CMD */
1313 		if (txq_id != trans_pcie->cmd_queue)
1314 			iwl_scd_txq_set_chain(trans, txq_id);
1315 
1316 		if (cfg->aggregate) {
1317 			u16 ra_tid = BUILD_RAxTID(cfg->sta_id, cfg->tid);
1318 
1319 			/* Map receiver-address / traffic-ID to this queue */
1320 			iwl_pcie_txq_set_ratid_map(trans, ra_tid, txq_id);
1321 
1322 			/* enable aggregations for the queue */
1323 			iwl_scd_txq_enable_agg(trans, txq_id);
1324 			txq->ampdu = true;
1325 		} else {
1326 			/*
1327 			 * disable aggregations for the queue, this will also
1328 			 * make the ra_tid mapping configuration irrelevant
1329 			 * since it is now a non-AGG queue.
1330 			 */
1331 			iwl_scd_txq_disable_agg(trans, txq_id);
1332 
1333 			ssn = txq->read_ptr;
1334 		}
1335 	} else {
1336 		/*
1337 		 * If we need to move the SCD write pointer by steps of
1338 		 * 0x40, 0x80 or 0xc0, it gets stuck. Avoids this and let
1339 		 * the op_mode know by returning true later.
1340 		 * Do this only in case cfg is NULL since this trick can
1341 		 * be done only if we have DQA enabled which is true for mvm
1342 		 * only. And mvm never sets a cfg pointer.
1343 		 * This is really ugly, but this is the easiest way out for
1344 		 * this sad hardware issue.
1345 		 * This bug has been fixed on devices 9000 and up.
1346 		 */
1347 		scd_bug = !trans->cfg->mq_rx_supported &&
1348 			!((ssn - txq->write_ptr) & 0x3f) &&
1349 			(ssn != txq->write_ptr);
1350 		if (scd_bug)
1351 			ssn++;
1352 	}
1353 
1354 	/* Place first TFD at index corresponding to start sequence number.
1355 	 * Assumes that ssn_idx is valid (!= 0xFFF) */
1356 	txq->read_ptr = (ssn & 0xff);
1357 	txq->write_ptr = (ssn & 0xff);
1358 	iwl_write_direct32(trans, HBUS_TARG_WRPTR,
1359 			   (ssn & 0xff) | (txq_id << 8));
1360 
1361 	if (cfg) {
1362 		u8 frame_limit = cfg->frame_limit;
1363 
1364 		iwl_write_prph(trans, SCD_QUEUE_RDPTR(txq_id), ssn);
1365 
1366 		/* Set up Tx window size and frame limit for this queue */
1367 		iwl_trans_write_mem32(trans, trans_pcie->scd_base_addr +
1368 				SCD_CONTEXT_QUEUE_OFFSET(txq_id), 0);
1369 		iwl_trans_write_mem32(trans,
1370 			trans_pcie->scd_base_addr +
1371 			SCD_CONTEXT_QUEUE_OFFSET(txq_id) + sizeof(u32),
1372 			SCD_QUEUE_CTX_REG2_VAL(WIN_SIZE, frame_limit) |
1373 			SCD_QUEUE_CTX_REG2_VAL(FRAME_LIMIT, frame_limit));
1374 
1375 		/* Set up status area in SRAM, map to Tx DMA/FIFO, activate */
1376 		iwl_write_prph(trans, SCD_QUEUE_STATUS_BITS(txq_id),
1377 			       (1 << SCD_QUEUE_STTS_REG_POS_ACTIVE) |
1378 			       (cfg->fifo << SCD_QUEUE_STTS_REG_POS_TXF) |
1379 			       (1 << SCD_QUEUE_STTS_REG_POS_WSL) |
1380 			       SCD_QUEUE_STTS_REG_MSK);
1381 
1382 		/* enable the scheduler for this queue (only) */
1383 		if (txq_id == trans_pcie->cmd_queue &&
1384 		    trans_pcie->scd_set_active)
1385 			iwl_scd_enable_set_active(trans, BIT(txq_id));
1386 
1387 		IWL_DEBUG_TX_QUEUES(trans,
1388 				    "Activate queue %d on FIFO %d WrPtr: %d\n",
1389 				    txq_id, fifo, ssn & 0xff);
1390 	} else {
1391 		IWL_DEBUG_TX_QUEUES(trans,
1392 				    "Activate queue %d WrPtr: %d\n",
1393 				    txq_id, ssn & 0xff);
1394 	}
1395 
1396 	return scd_bug;
1397 }
1398 
1399 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
1400 					bool shared_mode)
1401 {
1402 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1403 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
1404 
1405 	txq->ampdu = !shared_mode;
1406 }
1407 
1408 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
1409 				bool configure_scd)
1410 {
1411 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1412 	u32 stts_addr = trans_pcie->scd_base_addr +
1413 			SCD_TX_STTS_QUEUE_OFFSET(txq_id);
1414 	static const u32 zero_val[4] = {};
1415 
1416 	trans_pcie->txq[txq_id]->frozen_expiry_remainder = 0;
1417 	trans_pcie->txq[txq_id]->frozen = false;
1418 
1419 	/*
1420 	 * Upon HW Rfkill - we stop the device, and then stop the queues
1421 	 * in the op_mode. Just for the sake of the simplicity of the op_mode,
1422 	 * allow the op_mode to call txq_disable after it already called
1423 	 * stop_device.
1424 	 */
1425 	if (!test_and_clear_bit(txq_id, trans_pcie->queue_used)) {
1426 		WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
1427 			  "queue %d not used", txq_id);
1428 		return;
1429 	}
1430 
1431 	if (configure_scd) {
1432 		iwl_scd_txq_set_inactive(trans, txq_id);
1433 
1434 		iwl_trans_write_mem(trans, stts_addr, (void *)zero_val,
1435 				    ARRAY_SIZE(zero_val));
1436 	}
1437 
1438 	iwl_pcie_txq_unmap(trans, txq_id);
1439 	trans_pcie->txq[txq_id]->ampdu = false;
1440 
1441 	IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", txq_id);
1442 }
1443 
1444 /*************** HOST COMMAND QUEUE FUNCTIONS   *****/
1445 
1446 /*
1447  * iwl_pcie_enqueue_hcmd - enqueue a uCode command
1448  * @priv: device private data point
1449  * @cmd: a pointer to the ucode command structure
1450  *
1451  * The function returns < 0 values to indicate the operation
1452  * failed. On success, it returns the index (>= 0) of command in the
1453  * command queue.
1454  */
1455 static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1456 				 struct iwl_host_cmd *cmd)
1457 {
1458 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1459 	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
1460 	struct iwl_device_cmd *out_cmd;
1461 	struct iwl_cmd_meta *out_meta;
1462 	unsigned long flags;
1463 	void *dup_buf = NULL;
1464 	dma_addr_t phys_addr;
1465 	int idx;
1466 	u16 copy_size, cmd_size, tb0_size;
1467 	bool had_nocopy = false;
1468 	u8 group_id = iwl_cmd_groupid(cmd->id);
1469 	int i, ret;
1470 	u32 cmd_pos;
1471 	const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
1472 	u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
1473 
1474 	if (WARN(!trans->wide_cmd_header &&
1475 		 group_id > IWL_ALWAYS_LONG_GROUP,
1476 		 "unsupported wide command %#x\n", cmd->id))
1477 		return -EINVAL;
1478 
1479 	if (group_id != 0) {
1480 		copy_size = sizeof(struct iwl_cmd_header_wide);
1481 		cmd_size = sizeof(struct iwl_cmd_header_wide);
1482 	} else {
1483 		copy_size = sizeof(struct iwl_cmd_header);
1484 		cmd_size = sizeof(struct iwl_cmd_header);
1485 	}
1486 
1487 	/* need one for the header if the first is NOCOPY */
1488 	BUILD_BUG_ON(IWL_MAX_CMD_TBS_PER_TFD > IWL_NUM_OF_TBS - 1);
1489 
1490 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1491 		cmddata[i] = cmd->data[i];
1492 		cmdlen[i] = cmd->len[i];
1493 
1494 		if (!cmd->len[i])
1495 			continue;
1496 
1497 		/* need at least IWL_FIRST_TB_SIZE copied */
1498 		if (copy_size < IWL_FIRST_TB_SIZE) {
1499 			int copy = IWL_FIRST_TB_SIZE - copy_size;
1500 
1501 			if (copy > cmdlen[i])
1502 				copy = cmdlen[i];
1503 			cmdlen[i] -= copy;
1504 			cmddata[i] += copy;
1505 			copy_size += copy;
1506 		}
1507 
1508 		if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) {
1509 			had_nocopy = true;
1510 			if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) {
1511 				idx = -EINVAL;
1512 				goto free_dup_buf;
1513 			}
1514 		} else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) {
1515 			/*
1516 			 * This is also a chunk that isn't copied
1517 			 * to the static buffer so set had_nocopy.
1518 			 */
1519 			had_nocopy = true;
1520 
1521 			/* only allowed once */
1522 			if (WARN_ON(dup_buf)) {
1523 				idx = -EINVAL;
1524 				goto free_dup_buf;
1525 			}
1526 
1527 			dup_buf = kmemdup(cmddata[i], cmdlen[i],
1528 					  GFP_ATOMIC);
1529 			if (!dup_buf)
1530 				return -ENOMEM;
1531 		} else {
1532 			/* NOCOPY must not be followed by normal! */
1533 			if (WARN_ON(had_nocopy)) {
1534 				idx = -EINVAL;
1535 				goto free_dup_buf;
1536 			}
1537 			copy_size += cmdlen[i];
1538 		}
1539 		cmd_size += cmd->len[i];
1540 	}
1541 
1542 	/*
1543 	 * If any of the command structures end up being larger than
1544 	 * the TFD_MAX_PAYLOAD_SIZE and they aren't dynamically
1545 	 * allocated into separate TFDs, then we will need to
1546 	 * increase the size of the buffers.
1547 	 */
1548 	if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE,
1549 		 "Command %s (%#x) is too large (%d bytes)\n",
1550 		 iwl_get_cmd_string(trans, cmd->id),
1551 		 cmd->id, copy_size)) {
1552 		idx = -EINVAL;
1553 		goto free_dup_buf;
1554 	}
1555 
1556 	spin_lock_bh(&txq->lock);
1557 
1558 	if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
1559 		spin_unlock_bh(&txq->lock);
1560 
1561 		IWL_ERR(trans, "No space in command queue\n");
1562 		iwl_op_mode_cmd_queue_full(trans->op_mode);
1563 		idx = -ENOSPC;
1564 		goto free_dup_buf;
1565 	}
1566 
1567 	idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
1568 	out_cmd = txq->entries[idx].cmd;
1569 	out_meta = &txq->entries[idx].meta;
1570 
1571 	memset(out_meta, 0, sizeof(*out_meta));	/* re-initialize to NULL */
1572 	if (cmd->flags & CMD_WANT_SKB)
1573 		out_meta->source = cmd;
1574 
1575 	/* set up the header */
1576 	if (group_id != 0) {
1577 		out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id);
1578 		out_cmd->hdr_wide.group_id = group_id;
1579 		out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id);
1580 		out_cmd->hdr_wide.length =
1581 			cpu_to_le16(cmd_size -
1582 				    sizeof(struct iwl_cmd_header_wide));
1583 		out_cmd->hdr_wide.reserved = 0;
1584 		out_cmd->hdr_wide.sequence =
1585 			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
1586 						 INDEX_TO_SEQ(txq->write_ptr));
1587 
1588 		cmd_pos = sizeof(struct iwl_cmd_header_wide);
1589 		copy_size = sizeof(struct iwl_cmd_header_wide);
1590 	} else {
1591 		out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
1592 		out_cmd->hdr.sequence =
1593 			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
1594 						 INDEX_TO_SEQ(txq->write_ptr));
1595 		out_cmd->hdr.group_id = 0;
1596 
1597 		cmd_pos = sizeof(struct iwl_cmd_header);
1598 		copy_size = sizeof(struct iwl_cmd_header);
1599 	}
1600 
1601 	/* and copy the data that needs to be copied */
1602 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1603 		int copy;
1604 
1605 		if (!cmd->len[i])
1606 			continue;
1607 
1608 		/* copy everything if not nocopy/dup */
1609 		if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1610 					   IWL_HCMD_DFL_DUP))) {
1611 			copy = cmd->len[i];
1612 
1613 			memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1614 			cmd_pos += copy;
1615 			copy_size += copy;
1616 			continue;
1617 		}
1618 
1619 		/*
1620 		 * Otherwise we need at least IWL_FIRST_TB_SIZE copied
1621 		 * in total (for bi-directional DMA), but copy up to what
1622 		 * we can fit into the payload for debug dump purposes.
1623 		 */
1624 		copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
1625 
1626 		memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1627 		cmd_pos += copy;
1628 
1629 		/* However, treat copy_size the proper way, we need it below */
1630 		if (copy_size < IWL_FIRST_TB_SIZE) {
1631 			copy = IWL_FIRST_TB_SIZE - copy_size;
1632 
1633 			if (copy > cmd->len[i])
1634 				copy = cmd->len[i];
1635 			copy_size += copy;
1636 		}
1637 	}
1638 
1639 	IWL_DEBUG_HC(trans,
1640 		     "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1641 		     iwl_get_cmd_string(trans, cmd->id),
1642 		     group_id, out_cmd->hdr.cmd,
1643 		     le16_to_cpu(out_cmd->hdr.sequence),
1644 		     cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
1645 
1646 	/* start the TFD with the minimum copy bytes */
1647 	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
1648 	memcpy(&txq->first_tb_bufs[idx], &out_cmd->hdr, tb0_size);
1649 	iwl_pcie_txq_build_tfd(trans, txq,
1650 			       iwl_pcie_get_first_tb_dma(txq, idx),
1651 			       tb0_size, true);
1652 
1653 	/* map first command fragment, if any remains */
1654 	if (copy_size > tb0_size) {
1655 		phys_addr = dma_map_single(trans->dev,
1656 					   ((u8 *)&out_cmd->hdr) + tb0_size,
1657 					   copy_size - tb0_size,
1658 					   DMA_TO_DEVICE);
1659 		if (dma_mapping_error(trans->dev, phys_addr)) {
1660 			iwl_pcie_tfd_unmap(trans, out_meta, txq,
1661 					   txq->write_ptr);
1662 			idx = -ENOMEM;
1663 			goto out;
1664 		}
1665 
1666 		iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
1667 				       copy_size - tb0_size, false);
1668 	}
1669 
1670 	/* map the remaining (adjusted) nocopy/dup fragments */
1671 	for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1672 		const void *data = cmddata[i];
1673 
1674 		if (!cmdlen[i])
1675 			continue;
1676 		if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1677 					   IWL_HCMD_DFL_DUP)))
1678 			continue;
1679 		if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
1680 			data = dup_buf;
1681 		phys_addr = dma_map_single(trans->dev, (void *)data,
1682 					   cmdlen[i], DMA_TO_DEVICE);
1683 		if (dma_mapping_error(trans->dev, phys_addr)) {
1684 			iwl_pcie_tfd_unmap(trans, out_meta, txq,
1685 					   txq->write_ptr);
1686 			idx = -ENOMEM;
1687 			goto out;
1688 		}
1689 
1690 		iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
1691 	}
1692 
1693 	BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
1694 	out_meta->flags = cmd->flags;
1695 	if (WARN_ON_ONCE(txq->entries[idx].free_buf))
1696 		kzfree(txq->entries[idx].free_buf);
1697 	txq->entries[idx].free_buf = dup_buf;
1698 
1699 	trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
1700 
1701 	/* start timer if queue currently empty */
1702 	if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
1703 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1704 
1705 	spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1706 	ret = iwl_pcie_set_cmd_in_flight(trans, cmd);
1707 	if (ret < 0) {
1708 		idx = ret;
1709 		spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1710 		goto out;
1711 	}
1712 
1713 	/* Increment and update queue's write index */
1714 	txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
1715 	iwl_pcie_txq_inc_wr_ptr(trans, txq);
1716 
1717 	spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1718 
1719  out:
1720 	spin_unlock_bh(&txq->lock);
1721  free_dup_buf:
1722 	if (idx < 0)
1723 		kfree(dup_buf);
1724 	return idx;
1725 }
1726 
1727 /*
1728  * iwl_pcie_hcmd_complete - Pull unused buffers off the queue and reclaim them
1729  * @rxb: Rx buffer to reclaim
1730  */
1731 void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
1732 			    struct iwl_rx_cmd_buffer *rxb)
1733 {
1734 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
1735 	u16 sequence = le16_to_cpu(pkt->hdr.sequence);
1736 	u8 group_id;
1737 	u32 cmd_id;
1738 	int txq_id = SEQ_TO_QUEUE(sequence);
1739 	int index = SEQ_TO_INDEX(sequence);
1740 	int cmd_index;
1741 	struct iwl_device_cmd *cmd;
1742 	struct iwl_cmd_meta *meta;
1743 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1744 	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
1745 
1746 	/* If a Tx command is being handled and it isn't in the actual
1747 	 * command queue then there a command routing bug has been introduced
1748 	 * in the queue management code. */
1749 	if (WARN(txq_id != trans_pcie->cmd_queue,
1750 		 "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n",
1751 		 txq_id, trans_pcie->cmd_queue, sequence, txq->read_ptr,
1752 		 txq->write_ptr)) {
1753 		iwl_print_hex_error(trans, pkt, 32);
1754 		return;
1755 	}
1756 
1757 	spin_lock_bh(&txq->lock);
1758 
1759 	cmd_index = iwl_pcie_get_cmd_index(txq, index);
1760 	cmd = txq->entries[cmd_index].cmd;
1761 	meta = &txq->entries[cmd_index].meta;
1762 	group_id = cmd->hdr.group_id;
1763 	cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0);
1764 
1765 	iwl_pcie_tfd_unmap(trans, meta, txq, index);
1766 
1767 	/* Input error checking is done when commands are added to queue. */
1768 	if (meta->flags & CMD_WANT_SKB) {
1769 		struct page *p = rxb_steal_page(rxb);
1770 
1771 		meta->source->resp_pkt = pkt;
1772 		meta->source->_rx_page_addr = (unsigned long)page_address(p);
1773 		meta->source->_rx_page_order = trans_pcie->rx_page_order;
1774 	}
1775 
1776 	if (meta->flags & CMD_WANT_ASYNC_CALLBACK)
1777 		iwl_op_mode_async_cb(trans->op_mode, cmd);
1778 
1779 	iwl_pcie_cmdq_reclaim(trans, txq_id, index);
1780 
1781 	if (!(meta->flags & CMD_ASYNC)) {
1782 		if (!test_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status)) {
1783 			IWL_WARN(trans,
1784 				 "HCMD_ACTIVE already clear for command %s\n",
1785 				 iwl_get_cmd_string(trans, cmd_id));
1786 		}
1787 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1788 		IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1789 			       iwl_get_cmd_string(trans, cmd_id));
1790 		wake_up(&trans_pcie->wait_command_queue);
1791 	}
1792 
1793 	if (meta->flags & CMD_MAKE_TRANS_IDLE) {
1794 		IWL_DEBUG_INFO(trans, "complete %s - mark trans as idle\n",
1795 			       iwl_get_cmd_string(trans, cmd->hdr.cmd));
1796 		set_bit(STATUS_TRANS_IDLE, &trans->status);
1797 		wake_up(&trans_pcie->d0i3_waitq);
1798 	}
1799 
1800 	if (meta->flags & CMD_WAKE_UP_TRANS) {
1801 		IWL_DEBUG_INFO(trans, "complete %s - clear trans idle flag\n",
1802 			       iwl_get_cmd_string(trans, cmd->hdr.cmd));
1803 		clear_bit(STATUS_TRANS_IDLE, &trans->status);
1804 		wake_up(&trans_pcie->d0i3_waitq);
1805 	}
1806 
1807 	meta->flags = 0;
1808 
1809 	spin_unlock_bh(&txq->lock);
1810 }
1811 
1812 #define HOST_COMPLETE_TIMEOUT	(2 * HZ)
1813 
1814 static int iwl_pcie_send_hcmd_async(struct iwl_trans *trans,
1815 				    struct iwl_host_cmd *cmd)
1816 {
1817 	int ret;
1818 
1819 	/* An asynchronous command can not expect an SKB to be set. */
1820 	if (WARN_ON(cmd->flags & CMD_WANT_SKB))
1821 		return -EINVAL;
1822 
1823 	ret = iwl_pcie_enqueue_hcmd(trans, cmd);
1824 	if (ret < 0) {
1825 		IWL_ERR(trans,
1826 			"Error sending %s: enqueue_hcmd failed: %d\n",
1827 			iwl_get_cmd_string(trans, cmd->id), ret);
1828 		return ret;
1829 	}
1830 	return 0;
1831 }
1832 
1833 static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
1834 				   struct iwl_host_cmd *cmd)
1835 {
1836 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1837 	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
1838 	int cmd_idx;
1839 	int ret;
1840 
1841 	IWL_DEBUG_INFO(trans, "Attempting to send sync command %s\n",
1842 		       iwl_get_cmd_string(trans, cmd->id));
1843 
1844 	if (WARN(test_and_set_bit(STATUS_SYNC_HCMD_ACTIVE,
1845 				  &trans->status),
1846 		 "Command %s: a command is already active!\n",
1847 		 iwl_get_cmd_string(trans, cmd->id)))
1848 		return -EIO;
1849 
1850 	IWL_DEBUG_INFO(trans, "Setting HCMD_ACTIVE for command %s\n",
1851 		       iwl_get_cmd_string(trans, cmd->id));
1852 
1853 	if (pm_runtime_suspended(&trans_pcie->pci_dev->dev)) {
1854 		ret = wait_event_timeout(trans_pcie->d0i3_waitq,
1855 				 pm_runtime_active(&trans_pcie->pci_dev->dev),
1856 				 msecs_to_jiffies(IWL_TRANS_IDLE_TIMEOUT));
1857 		if (!ret) {
1858 			IWL_ERR(trans, "Timeout exiting D0i3 before hcmd\n");
1859 			return -ETIMEDOUT;
1860 		}
1861 	}
1862 
1863 	cmd_idx = iwl_pcie_enqueue_hcmd(trans, cmd);
1864 	if (cmd_idx < 0) {
1865 		ret = cmd_idx;
1866 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1867 		IWL_ERR(trans,
1868 			"Error sending %s: enqueue_hcmd failed: %d\n",
1869 			iwl_get_cmd_string(trans, cmd->id), ret);
1870 		return ret;
1871 	}
1872 
1873 	ret = wait_event_timeout(trans_pcie->wait_command_queue,
1874 				 !test_bit(STATUS_SYNC_HCMD_ACTIVE,
1875 					   &trans->status),
1876 				 HOST_COMPLETE_TIMEOUT);
1877 	if (!ret) {
1878 		IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
1879 			iwl_get_cmd_string(trans, cmd->id),
1880 			jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
1881 
1882 		IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
1883 			txq->read_ptr, txq->write_ptr);
1884 
1885 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1886 		IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1887 			       iwl_get_cmd_string(trans, cmd->id));
1888 		ret = -ETIMEDOUT;
1889 
1890 		iwl_force_nmi(trans);
1891 		iwl_trans_fw_error(trans);
1892 
1893 		goto cancel;
1894 	}
1895 
1896 	if (test_bit(STATUS_FW_ERROR, &trans->status)) {
1897 		iwl_trans_dump_regs(trans);
1898 		IWL_ERR(trans, "FW error in SYNC CMD %s\n",
1899 			iwl_get_cmd_string(trans, cmd->id));
1900 		dump_stack();
1901 		ret = -EIO;
1902 		goto cancel;
1903 	}
1904 
1905 	if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1906 	    test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
1907 		IWL_DEBUG_RF_KILL(trans, "RFKILL in SYNC CMD... no rsp\n");
1908 		ret = -ERFKILL;
1909 		goto cancel;
1910 	}
1911 
1912 	if ((cmd->flags & CMD_WANT_SKB) && !cmd->resp_pkt) {
1913 		IWL_ERR(trans, "Error: Response NULL in '%s'\n",
1914 			iwl_get_cmd_string(trans, cmd->id));
1915 		ret = -EIO;
1916 		goto cancel;
1917 	}
1918 
1919 	return 0;
1920 
1921 cancel:
1922 	if (cmd->flags & CMD_WANT_SKB) {
1923 		/*
1924 		 * Cancel the CMD_WANT_SKB flag for the cmd in the
1925 		 * TX cmd queue. Otherwise in case the cmd comes
1926 		 * in later, it will possibly set an invalid
1927 		 * address (cmd->meta.source).
1928 		 */
1929 		txq->entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
1930 	}
1931 
1932 	if (cmd->resp_pkt) {
1933 		iwl_free_resp(cmd);
1934 		cmd->resp_pkt = NULL;
1935 	}
1936 
1937 	return ret;
1938 }
1939 
1940 int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
1941 {
1942 	if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1943 	    test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
1944 		IWL_DEBUG_RF_KILL(trans, "Dropping CMD 0x%x: RF KILL\n",
1945 				  cmd->id);
1946 		return -ERFKILL;
1947 	}
1948 
1949 	if (cmd->flags & CMD_ASYNC)
1950 		return iwl_pcie_send_hcmd_async(trans, cmd);
1951 
1952 	/* We still can fail on RFKILL that can be asserted while we wait */
1953 	return iwl_pcie_send_hcmd_sync(trans, cmd);
1954 }
1955 
1956 static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
1957 			     struct iwl_txq *txq, u8 hdr_len,
1958 			     struct iwl_cmd_meta *out_meta,
1959 			     struct iwl_device_cmd *dev_cmd, u16 tb1_len)
1960 {
1961 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1962 	u16 tb2_len;
1963 	int i;
1964 
1965 	/*
1966 	 * Set up TFD's third entry to point directly to remainder
1967 	 * of skb's head, if any
1968 	 */
1969 	tb2_len = skb_headlen(skb) - hdr_len;
1970 
1971 	if (tb2_len > 0) {
1972 		dma_addr_t tb2_phys = dma_map_single(trans->dev,
1973 						     skb->data + hdr_len,
1974 						     tb2_len, DMA_TO_DEVICE);
1975 		if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
1976 			iwl_pcie_tfd_unmap(trans, out_meta, txq,
1977 					   txq->write_ptr);
1978 			return -EINVAL;
1979 		}
1980 		iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false);
1981 	}
1982 
1983 	/* set up the remaining entries to point to the data */
1984 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1985 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1986 		dma_addr_t tb_phys;
1987 		int tb_idx;
1988 
1989 		if (!skb_frag_size(frag))
1990 			continue;
1991 
1992 		tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
1993 					   skb_frag_size(frag), DMA_TO_DEVICE);
1994 
1995 		if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
1996 			iwl_pcie_tfd_unmap(trans, out_meta, txq,
1997 					   txq->write_ptr);
1998 			return -EINVAL;
1999 		}
2000 		tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2001 						skb_frag_size(frag), false);
2002 
2003 		out_meta->tbs |= BIT(tb_idx);
2004 	}
2005 
2006 	trace_iwlwifi_dev_tx(trans->dev, skb,
2007 			     iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
2008 			     trans_pcie->tfd_size,
2009 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
2010 			     hdr_len);
2011 	trace_iwlwifi_dev_tx_data(trans->dev, skb, hdr_len);
2012 	return 0;
2013 }
2014 
2015 #ifdef CONFIG_INET
2016 struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len)
2017 {
2018 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2019 	struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->tso_hdr_page);
2020 
2021 	if (!p->page)
2022 		goto alloc;
2023 
2024 	/* enough room on this page */
2025 	if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE)
2026 		return p;
2027 
2028 	/* We don't have enough room on this page, get a new one. */
2029 	__free_page(p->page);
2030 
2031 alloc:
2032 	p->page = alloc_page(GFP_ATOMIC);
2033 	if (!p->page)
2034 		return NULL;
2035 	p->pos = page_address(p->page);
2036 	return p;
2037 }
2038 
2039 static void iwl_compute_pseudo_hdr_csum(void *iph, struct tcphdr *tcph,
2040 					bool ipv6, unsigned int len)
2041 {
2042 	if (ipv6) {
2043 		struct ipv6hdr *iphv6 = iph;
2044 
2045 		tcph->check = ~csum_ipv6_magic(&iphv6->saddr, &iphv6->daddr,
2046 					       len + tcph->doff * 4,
2047 					       IPPROTO_TCP, 0);
2048 	} else {
2049 		struct iphdr *iphv4 = iph;
2050 
2051 		ip_send_check(iphv4);
2052 		tcph->check = ~csum_tcpudp_magic(iphv4->saddr, iphv4->daddr,
2053 						 len + tcph->doff * 4,
2054 						 IPPROTO_TCP, 0);
2055 	}
2056 }
2057 
2058 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2059 				   struct iwl_txq *txq, u8 hdr_len,
2060 				   struct iwl_cmd_meta *out_meta,
2061 				   struct iwl_device_cmd *dev_cmd, u16 tb1_len)
2062 {
2063 	struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload;
2064 	struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
2065 	struct ieee80211_hdr *hdr = (void *)skb->data;
2066 	unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
2067 	unsigned int mss = skb_shinfo(skb)->gso_size;
2068 	u16 length, iv_len, amsdu_pad;
2069 	u8 *start_hdr;
2070 	struct iwl_tso_hdr_page *hdr_page;
2071 	struct page **page_ptr;
2072 	int ret;
2073 	struct tso_t tso;
2074 
2075 	/* if the packet is protected, then it must be CCMP or GCMP */
2076 	BUILD_BUG_ON(IEEE80211_CCMP_HDR_LEN != IEEE80211_GCMP_HDR_LEN);
2077 	iv_len = ieee80211_has_protected(hdr->frame_control) ?
2078 		IEEE80211_CCMP_HDR_LEN : 0;
2079 
2080 	trace_iwlwifi_dev_tx(trans->dev, skb,
2081 			     iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
2082 			     trans_pcie->tfd_size,
2083 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
2084 
2085 	ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
2086 	snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
2087 	total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
2088 	amsdu_pad = 0;
2089 
2090 	/* total amount of header we may need for this A-MSDU */
2091 	hdr_room = DIV_ROUND_UP(total_len, mss) *
2092 		(3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
2093 
2094 	/* Our device supports 9 segments at most, it will fit in 1 page */
2095 	hdr_page = get_page_hdr(trans, hdr_room);
2096 	if (!hdr_page)
2097 		return -ENOMEM;
2098 
2099 	get_page(hdr_page->page);
2100 	start_hdr = hdr_page->pos;
2101 	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
2102 	*page_ptr = hdr_page->page;
2103 	memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
2104 	hdr_page->pos += iv_len;
2105 
2106 	/*
2107 	 * Pull the ieee80211 header + IV to be able to use TSO core,
2108 	 * we will restore it for the tx_status flow.
2109 	 */
2110 	skb_pull(skb, hdr_len + iv_len);
2111 
2112 	/*
2113 	 * Remove the length of all the headers that we don't actually
2114 	 * have in the MPDU by themselves, but that we duplicate into
2115 	 * all the different MSDUs inside the A-MSDU.
2116 	 */
2117 	le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen);
2118 
2119 	tso_start(skb, &tso);
2120 
2121 	while (total_len) {
2122 		/* this is the data left for this subframe */
2123 		unsigned int data_left =
2124 			min_t(unsigned int, mss, total_len);
2125 		struct sk_buff *csum_skb = NULL;
2126 		unsigned int hdr_tb_len;
2127 		dma_addr_t hdr_tb_phys;
2128 		struct tcphdr *tcph;
2129 		u8 *iph, *subf_hdrs_start = hdr_page->pos;
2130 
2131 		total_len -= data_left;
2132 
2133 		memset(hdr_page->pos, 0, amsdu_pad);
2134 		hdr_page->pos += amsdu_pad;
2135 		amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
2136 				  data_left)) & 0x3;
2137 		ether_addr_copy(hdr_page->pos, ieee80211_get_DA(hdr));
2138 		hdr_page->pos += ETH_ALEN;
2139 		ether_addr_copy(hdr_page->pos, ieee80211_get_SA(hdr));
2140 		hdr_page->pos += ETH_ALEN;
2141 
2142 		length = snap_ip_tcp_hdrlen + data_left;
2143 		*((__be16 *)hdr_page->pos) = cpu_to_be16(length);
2144 		hdr_page->pos += sizeof(length);
2145 
2146 		/*
2147 		 * This will copy the SNAP as well which will be considered
2148 		 * as MAC header.
2149 		 */
2150 		tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len);
2151 		iph = hdr_page->pos + 8;
2152 		tcph = (void *)(iph + ip_hdrlen);
2153 
2154 		/* For testing on current hardware only */
2155 		if (trans_pcie->sw_csum_tx) {
2156 			csum_skb = alloc_skb(data_left + tcp_hdrlen(skb),
2157 					     GFP_ATOMIC);
2158 			if (!csum_skb) {
2159 				ret = -ENOMEM;
2160 				goto out_unmap;
2161 			}
2162 
2163 			iwl_compute_pseudo_hdr_csum(iph, tcph,
2164 						    skb->protocol ==
2165 							htons(ETH_P_IPV6),
2166 						    data_left);
2167 
2168 			skb_put_data(csum_skb, tcph, tcp_hdrlen(skb));
2169 			skb_reset_transport_header(csum_skb);
2170 			csum_skb->csum_start =
2171 				(unsigned char *)tcp_hdr(csum_skb) -
2172 						 csum_skb->head;
2173 		}
2174 
2175 		hdr_page->pos += snap_ip_tcp_hdrlen;
2176 
2177 		hdr_tb_len = hdr_page->pos - start_hdr;
2178 		hdr_tb_phys = dma_map_single(trans->dev, start_hdr,
2179 					     hdr_tb_len, DMA_TO_DEVICE);
2180 		if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys))) {
2181 			dev_kfree_skb(csum_skb);
2182 			ret = -EINVAL;
2183 			goto out_unmap;
2184 		}
2185 		iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
2186 				       hdr_tb_len, false);
2187 		trace_iwlwifi_dev_tx_tso_chunk(trans->dev, start_hdr,
2188 					       hdr_tb_len);
2189 		/* add this subframe's headers' length to the tx_cmd */
2190 		le16_add_cpu(&tx_cmd->len, hdr_page->pos - subf_hdrs_start);
2191 
2192 		/* prepare the start_hdr for the next subframe */
2193 		start_hdr = hdr_page->pos;
2194 
2195 		/* put the payload */
2196 		while (data_left) {
2197 			unsigned int size = min_t(unsigned int, tso.size,
2198 						  data_left);
2199 			dma_addr_t tb_phys;
2200 
2201 			if (trans_pcie->sw_csum_tx)
2202 				skb_put_data(csum_skb, tso.data, size);
2203 
2204 			tb_phys = dma_map_single(trans->dev, tso.data,
2205 						 size, DMA_TO_DEVICE);
2206 			if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
2207 				dev_kfree_skb(csum_skb);
2208 				ret = -EINVAL;
2209 				goto out_unmap;
2210 			}
2211 
2212 			iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2213 					       size, false);
2214 			trace_iwlwifi_dev_tx_tso_chunk(trans->dev, tso.data,
2215 						       size);
2216 
2217 			data_left -= size;
2218 			tso_build_data(skb, &tso, size);
2219 		}
2220 
2221 		/* For testing on early hardware only */
2222 		if (trans_pcie->sw_csum_tx) {
2223 			__wsum csum;
2224 
2225 			csum = skb_checksum(csum_skb,
2226 					    skb_checksum_start_offset(csum_skb),
2227 					    csum_skb->len -
2228 					    skb_checksum_start_offset(csum_skb),
2229 					    0);
2230 			dev_kfree_skb(csum_skb);
2231 			dma_sync_single_for_cpu(trans->dev, hdr_tb_phys,
2232 						hdr_tb_len, DMA_TO_DEVICE);
2233 			tcph->check = csum_fold(csum);
2234 			dma_sync_single_for_device(trans->dev, hdr_tb_phys,
2235 						   hdr_tb_len, DMA_TO_DEVICE);
2236 		}
2237 	}
2238 
2239 	/* re -add the WiFi header and IV */
2240 	skb_push(skb, hdr_len + iv_len);
2241 
2242 	return 0;
2243 
2244 out_unmap:
2245 	iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr);
2246 	return ret;
2247 }
2248 #else /* CONFIG_INET */
2249 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2250 				   struct iwl_txq *txq, u8 hdr_len,
2251 				   struct iwl_cmd_meta *out_meta,
2252 				   struct iwl_device_cmd *dev_cmd, u16 tb1_len)
2253 {
2254 	/* No A-MSDU without CONFIG_INET */
2255 	WARN_ON(1);
2256 
2257 	return -1;
2258 }
2259 #endif /* CONFIG_INET */
2260 
2261 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
2262 		      struct iwl_device_cmd *dev_cmd, int txq_id)
2263 {
2264 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2265 	struct ieee80211_hdr *hdr;
2266 	struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
2267 	struct iwl_cmd_meta *out_meta;
2268 	struct iwl_txq *txq;
2269 	dma_addr_t tb0_phys, tb1_phys, scratch_phys;
2270 	void *tb1_addr;
2271 	void *tfd;
2272 	u16 len, tb1_len;
2273 	bool wait_write_ptr;
2274 	__le16 fc;
2275 	u8 hdr_len;
2276 	u16 wifi_seq;
2277 	bool amsdu;
2278 
2279 	txq = trans_pcie->txq[txq_id];
2280 
2281 	if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
2282 		      "TX on unused queue %d\n", txq_id))
2283 		return -EINVAL;
2284 
2285 	if (unlikely(trans_pcie->sw_csum_tx &&
2286 		     skb->ip_summed == CHECKSUM_PARTIAL)) {
2287 		int offs = skb_checksum_start_offset(skb);
2288 		int csum_offs = offs + skb->csum_offset;
2289 		__wsum csum;
2290 
2291 		if (skb_ensure_writable(skb, csum_offs + sizeof(__sum16)))
2292 			return -1;
2293 
2294 		csum = skb_checksum(skb, offs, skb->len - offs, 0);
2295 		*(__sum16 *)(skb->data + csum_offs) = csum_fold(csum);
2296 
2297 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2298 	}
2299 
2300 	if (skb_is_nonlinear(skb) &&
2301 	    skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
2302 	    __skb_linearize(skb))
2303 		return -ENOMEM;
2304 
2305 	/* mac80211 always puts the full header into the SKB's head,
2306 	 * so there's no need to check if it's readable there
2307 	 */
2308 	hdr = (struct ieee80211_hdr *)skb->data;
2309 	fc = hdr->frame_control;
2310 	hdr_len = ieee80211_hdrlen(fc);
2311 
2312 	spin_lock(&txq->lock);
2313 
2314 	if (iwl_queue_space(txq) < txq->high_mark) {
2315 		iwl_stop_queue(trans, txq);
2316 
2317 		/* don't put the packet on the ring, if there is no room */
2318 		if (unlikely(iwl_queue_space(txq) < 3)) {
2319 			struct iwl_device_cmd **dev_cmd_ptr;
2320 
2321 			dev_cmd_ptr = (void *)((u8 *)skb->cb +
2322 					       trans_pcie->dev_cmd_offs);
2323 
2324 			*dev_cmd_ptr = dev_cmd;
2325 			__skb_queue_tail(&txq->overflow_q, skb);
2326 
2327 			spin_unlock(&txq->lock);
2328 			return 0;
2329 		}
2330 	}
2331 
2332 	/* In AGG mode, the index in the ring must correspond to the WiFi
2333 	 * sequence number. This is a HW requirements to help the SCD to parse
2334 	 * the BA.
2335 	 * Check here that the packets are in the right place on the ring.
2336 	 */
2337 	wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
2338 	WARN_ONCE(txq->ampdu &&
2339 		  (wifi_seq & 0xff) != txq->write_ptr,
2340 		  "Q: %d WiFi Seq %d tfdNum %d",
2341 		  txq_id, wifi_seq, txq->write_ptr);
2342 
2343 	/* Set up driver data for this TFD */
2344 	txq->entries[txq->write_ptr].skb = skb;
2345 	txq->entries[txq->write_ptr].cmd = dev_cmd;
2346 
2347 	dev_cmd->hdr.sequence =
2348 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
2349 			    INDEX_TO_SEQ(txq->write_ptr)));
2350 
2351 	tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
2352 	scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
2353 		       offsetof(struct iwl_tx_cmd, scratch);
2354 
2355 	tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
2356 	tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
2357 
2358 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
2359 	out_meta = &txq->entries[txq->write_ptr].meta;
2360 	out_meta->flags = 0;
2361 
2362 	/*
2363 	 * The second TB (tb1) points to the remainder of the TX command
2364 	 * and the 802.11 header - dword aligned size
2365 	 * (This calculation modifies the TX command, so do it before the
2366 	 * setup of the first TB)
2367 	 */
2368 	len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
2369 	      hdr_len - IWL_FIRST_TB_SIZE;
2370 	/* do not align A-MSDU to dword as the subframe header aligns it */
2371 	amsdu = ieee80211_is_data_qos(fc) &&
2372 		(*ieee80211_get_qos_ctl(hdr) &
2373 		 IEEE80211_QOS_CTL_A_MSDU_PRESENT);
2374 	if (trans_pcie->sw_csum_tx || !amsdu) {
2375 		tb1_len = ALIGN(len, 4);
2376 		/* Tell NIC about any 2-byte padding after MAC header */
2377 		if (tb1_len != len)
2378 			tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_MH_PAD);
2379 	} else {
2380 		tb1_len = len;
2381 	}
2382 
2383 	/*
2384 	 * The first TB points to bi-directional DMA data, we'll
2385 	 * memcpy the data into it later.
2386 	 */
2387 	iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
2388 			       IWL_FIRST_TB_SIZE, true);
2389 
2390 	/* there must be data left over for TB1 or this code must be changed */
2391 	BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE);
2392 
2393 	/* map the data for TB1 */
2394 	tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
2395 	tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
2396 	if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
2397 		goto out_err;
2398 	iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, false);
2399 
2400 	/*
2401 	 * If gso_size wasn't set, don't give the frame "amsdu treatment"
2402 	 * (adding subframes, etc.).
2403 	 * This can happen in some testing flows when the amsdu was already
2404 	 * pre-built, and we just need to send the resulting skb.
2405 	 */
2406 	if (amsdu && skb_shinfo(skb)->gso_size) {
2407 		if (unlikely(iwl_fill_data_tbs_amsdu(trans, skb, txq, hdr_len,
2408 						     out_meta, dev_cmd,
2409 						     tb1_len)))
2410 			goto out_err;
2411 	} else if (unlikely(iwl_fill_data_tbs(trans, skb, txq, hdr_len,
2412 				       out_meta, dev_cmd, tb1_len))) {
2413 		goto out_err;
2414 	}
2415 
2416 	/* building the A-MSDU might have changed this data, so memcpy it now */
2417 	memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
2418 	       IWL_FIRST_TB_SIZE);
2419 
2420 	tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
2421 	/* Set up entry for this TFD in Tx byte-count array */
2422 	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
2423 					 iwl_pcie_tfd_get_num_tbs(trans, tfd));
2424 
2425 	wait_write_ptr = ieee80211_has_morefrags(fc);
2426 
2427 	/* start timer if queue currently empty */
2428 	if (txq->read_ptr == txq->write_ptr) {
2429 		if (txq->wd_timeout) {
2430 			/*
2431 			 * If the TXQ is active, then set the timer, if not,
2432 			 * set the timer in remainder so that the timer will
2433 			 * be armed with the right value when the station will
2434 			 * wake up.
2435 			 */
2436 			if (!txq->frozen)
2437 				mod_timer(&txq->stuck_timer,
2438 					  jiffies + txq->wd_timeout);
2439 			else
2440 				txq->frozen_expiry_remainder = txq->wd_timeout;
2441 		}
2442 		IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id);
2443 		iwl_trans_ref(trans);
2444 	}
2445 
2446 	/* Tell device the write index *just past* this latest filled TFD */
2447 	txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
2448 	if (!wait_write_ptr)
2449 		iwl_pcie_txq_inc_wr_ptr(trans, txq);
2450 
2451 	/*
2452 	 * At this point the frame is "transmitted" successfully
2453 	 * and we will get a TX status notification eventually.
2454 	 */
2455 	spin_unlock(&txq->lock);
2456 	return 0;
2457 out_err:
2458 	spin_unlock(&txq->lock);
2459 	return -1;
2460 }
2461