xref: /openbmc/linux/drivers/net/wireless/ath/ath10k/ce.c (revision c4a7b9b5)
1 // SPDX-License-Identifier: ISC
2 /*
3  * Copyright (c) 2005-2011 Atheros Communications Inc.
4  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
5  * Copyright (c) 2018 The Linux Foundation. All rights reserved.
6  */
7 
8 #include "hif.h"
9 #include "ce.h"
10 #include "debug.h"
11 
12 /*
13  * Support for Copy Engine hardware, which is mainly used for
14  * communication between Host and Target over a PCIe interconnect.
15  */
16 
17 /*
18  * A single CopyEngine (CE) comprises two "rings":
19  *   a source ring
20  *   a destination ring
21  *
22  * Each ring consists of a number of descriptors which specify
23  * an address, length, and meta-data.
24  *
25  * Typically, one side of the PCIe/AHB/SNOC interconnect (Host or Target)
26  * controls one ring and the other side controls the other ring.
27  * The source side chooses when to initiate a transfer and it
28  * chooses what to send (buffer address, length). The destination
29  * side keeps a supply of "anonymous receive buffers" available and
30  * it handles incoming data as it arrives (when the destination
31  * receives an interrupt).
32  *
33  * The sender may send a simple buffer (address/length) or it may
34  * send a small list of buffers.  When a small list is sent, hardware
35  * "gathers" these and they end up in a single destination buffer
36  * with a single interrupt.
37  *
38  * There are several "contexts" managed by this layer -- more, it
39  * may seem -- than should be needed. These are provided mainly for
40  * maximum flexibility and especially to facilitate a simpler HIF
41  * implementation. There are per-CopyEngine recv, send, and watermark
42  * contexts. These are supplied by the caller when a recv, send,
43  * or watermark handler is established and they are echoed back to
44  * the caller when the respective callbacks are invoked. There is
45  * also a per-transfer context supplied by the caller when a buffer
46  * (or sendlist) is sent and when a buffer is enqueued for recv.
47  * These per-transfer contexts are echoed back to the caller when
48  * the buffer is sent/received.
49  */
50 
51 static inline u32 shadow_sr_wr_ind_addr(struct ath10k *ar,
52 					struct ath10k_ce_pipe *ce_state)
53 {
54 	u32 ce_id = ce_state->id;
55 	u32 addr = 0;
56 
57 	switch (ce_id) {
58 	case 0:
59 		addr = 0x00032000;
60 		break;
61 	case 3:
62 		addr = 0x0003200C;
63 		break;
64 	case 4:
65 		addr = 0x00032010;
66 		break;
67 	case 5:
68 		addr = 0x00032014;
69 		break;
70 	case 7:
71 		addr = 0x0003201C;
72 		break;
73 	default:
74 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
75 		break;
76 	}
77 	return addr;
78 }
79 
80 static inline u32 shadow_dst_wr_ind_addr(struct ath10k *ar,
81 					 struct ath10k_ce_pipe *ce_state)
82 {
83 	u32 ce_id = ce_state->id;
84 	u32 addr = 0;
85 
86 	switch (ce_id) {
87 	case 1:
88 		addr = 0x00032034;
89 		break;
90 	case 2:
91 		addr = 0x00032038;
92 		break;
93 	case 5:
94 		addr = 0x00032044;
95 		break;
96 	case 7:
97 		addr = 0x0003204C;
98 		break;
99 	case 8:
100 		addr = 0x00032050;
101 		break;
102 	case 9:
103 		addr = 0x00032054;
104 		break;
105 	case 10:
106 		addr = 0x00032058;
107 		break;
108 	case 11:
109 		addr = 0x0003205C;
110 		break;
111 	default:
112 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
113 		break;
114 	}
115 
116 	return addr;
117 }
118 
119 static inline unsigned int
120 ath10k_set_ring_byte(unsigned int offset,
121 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
122 {
123 	return ((offset << addr_map->lsb) & addr_map->mask);
124 }
125 
126 static inline unsigned int
127 ath10k_get_ring_byte(unsigned int offset,
128 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
129 {
130 	return ((offset & addr_map->mask) >> (addr_map->lsb));
131 }
132 
133 static inline u32 ath10k_ce_read32(struct ath10k *ar, u32 offset)
134 {
135 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
136 
137 	return ce->bus_ops->read32(ar, offset);
138 }
139 
140 static inline void ath10k_ce_write32(struct ath10k *ar, u32 offset, u32 value)
141 {
142 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
143 
144 	ce->bus_ops->write32(ar, offset, value);
145 }
146 
147 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
148 						       u32 ce_ctrl_addr,
149 						       unsigned int n)
150 {
151 	ath10k_ce_write32(ar, ce_ctrl_addr +
152 			  ar->hw_ce_regs->dst_wr_index_addr, n);
153 }
154 
155 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
156 						      u32 ce_ctrl_addr)
157 {
158 	return ath10k_ce_read32(ar, ce_ctrl_addr +
159 				ar->hw_ce_regs->dst_wr_index_addr);
160 }
161 
162 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
163 						      u32 ce_ctrl_addr,
164 						      unsigned int n)
165 {
166 	ath10k_ce_write32(ar, ce_ctrl_addr +
167 			  ar->hw_ce_regs->sr_wr_index_addr, n);
168 }
169 
170 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
171 						     u32 ce_ctrl_addr)
172 {
173 	return ath10k_ce_read32(ar, ce_ctrl_addr +
174 				ar->hw_ce_regs->sr_wr_index_addr);
175 }
176 
177 static inline u32 ath10k_ce_src_ring_read_index_from_ddr(struct ath10k *ar,
178 							 u32 ce_id)
179 {
180 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
181 
182 	return ce->vaddr_rri[ce_id] & CE_DDR_RRI_MASK;
183 }
184 
185 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
186 						    u32 ce_ctrl_addr)
187 {
188 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
189 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
190 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
191 	u32 index;
192 
193 	if (ar->hw_params.rri_on_ddr &&
194 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
195 		index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_id);
196 	else
197 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
198 					 ar->hw_ce_regs->current_srri_addr);
199 
200 	return index;
201 }
202 
203 static inline void
204 ath10k_ce_shadow_src_ring_write_index_set(struct ath10k *ar,
205 					  struct ath10k_ce_pipe *ce_state,
206 					  unsigned int value)
207 {
208 	ath10k_ce_write32(ar, shadow_sr_wr_ind_addr(ar, ce_state), value);
209 }
210 
211 static inline void
212 ath10k_ce_shadow_dest_ring_write_index_set(struct ath10k *ar,
213 					   struct ath10k_ce_pipe *ce_state,
214 					   unsigned int value)
215 {
216 	ath10k_ce_write32(ar, shadow_dst_wr_ind_addr(ar, ce_state), value);
217 }
218 
219 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
220 						    u32 ce_id,
221 						    u64 addr)
222 {
223 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
224 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
225 	u32 ce_ctrl_addr = ath10k_ce_base_address(ar, ce_id);
226 	u32 addr_lo = lower_32_bits(addr);
227 
228 	ath10k_ce_write32(ar, ce_ctrl_addr +
229 			  ar->hw_ce_regs->sr_base_addr_lo, addr_lo);
230 
231 	if (ce_state->ops->ce_set_src_ring_base_addr_hi) {
232 		ce_state->ops->ce_set_src_ring_base_addr_hi(ar, ce_ctrl_addr,
233 							    addr);
234 	}
235 }
236 
237 static void ath10k_ce_set_src_ring_base_addr_hi(struct ath10k *ar,
238 						u32 ce_ctrl_addr,
239 						u64 addr)
240 {
241 	u32 addr_hi = upper_32_bits(addr) & CE_DESC_ADDR_HI_MASK;
242 
243 	ath10k_ce_write32(ar, ce_ctrl_addr +
244 			  ar->hw_ce_regs->sr_base_addr_hi, addr_hi);
245 }
246 
247 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
248 					       u32 ce_ctrl_addr,
249 					       unsigned int n)
250 {
251 	ath10k_ce_write32(ar, ce_ctrl_addr +
252 			  ar->hw_ce_regs->sr_size_addr, n);
253 }
254 
255 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
256 					       u32 ce_ctrl_addr,
257 					       unsigned int n)
258 {
259 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
260 
261 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
262 					  ctrl_regs->addr);
263 
264 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
265 			  (ctrl1_addr &  ~(ctrl_regs->dmax->mask)) |
266 			  ath10k_set_ring_byte(n, ctrl_regs->dmax));
267 }
268 
269 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
270 						    u32 ce_ctrl_addr,
271 						    unsigned int n)
272 {
273 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
274 
275 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
276 					  ctrl_regs->addr);
277 
278 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
279 			  (ctrl1_addr & ~(ctrl_regs->src_ring->mask)) |
280 			  ath10k_set_ring_byte(n, ctrl_regs->src_ring));
281 }
282 
283 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
284 						     u32 ce_ctrl_addr,
285 						     unsigned int n)
286 {
287 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
288 
289 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
290 					  ctrl_regs->addr);
291 
292 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
293 			  (ctrl1_addr & ~(ctrl_regs->dst_ring->mask)) |
294 			  ath10k_set_ring_byte(n, ctrl_regs->dst_ring));
295 }
296 
297 static inline
298 	u32 ath10k_ce_dest_ring_read_index_from_ddr(struct ath10k *ar, u32 ce_id)
299 {
300 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
301 
302 	return (ce->vaddr_rri[ce_id] >> CE_DDR_DRRI_SHIFT) &
303 		CE_DDR_RRI_MASK;
304 }
305 
306 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
307 						     u32 ce_ctrl_addr)
308 {
309 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
310 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
311 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
312 	u32 index;
313 
314 	if (ar->hw_params.rri_on_ddr &&
315 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
316 		index = ath10k_ce_dest_ring_read_index_from_ddr(ar, ce_id);
317 	else
318 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
319 					 ar->hw_ce_regs->current_drri_addr);
320 
321 	return index;
322 }
323 
324 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
325 						     u32 ce_id,
326 						     u64 addr)
327 {
328 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
329 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
330 	u32 ce_ctrl_addr = ath10k_ce_base_address(ar, ce_id);
331 	u32 addr_lo = lower_32_bits(addr);
332 
333 	ath10k_ce_write32(ar, ce_ctrl_addr +
334 			  ar->hw_ce_regs->dr_base_addr_lo, addr_lo);
335 
336 	if (ce_state->ops->ce_set_dest_ring_base_addr_hi) {
337 		ce_state->ops->ce_set_dest_ring_base_addr_hi(ar, ce_ctrl_addr,
338 							     addr);
339 	}
340 }
341 
342 static void ath10k_ce_set_dest_ring_base_addr_hi(struct ath10k *ar,
343 						 u32 ce_ctrl_addr,
344 						 u64 addr)
345 {
346 	u32 addr_hi = upper_32_bits(addr) & CE_DESC_ADDR_HI_MASK;
347 	u32 reg_value;
348 
349 	reg_value = ath10k_ce_read32(ar, ce_ctrl_addr +
350 				     ar->hw_ce_regs->dr_base_addr_hi);
351 	reg_value &= ~CE_DESC_ADDR_HI_MASK;
352 	reg_value |= addr_hi;
353 	ath10k_ce_write32(ar, ce_ctrl_addr +
354 			  ar->hw_ce_regs->dr_base_addr_hi, reg_value);
355 }
356 
357 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
358 						u32 ce_ctrl_addr,
359 						unsigned int n)
360 {
361 	ath10k_ce_write32(ar, ce_ctrl_addr +
362 			  ar->hw_ce_regs->dr_size_addr, n);
363 }
364 
365 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
366 						   u32 ce_ctrl_addr,
367 						   unsigned int n)
368 {
369 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
370 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
371 
372 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
373 			  (addr & ~(srcr_wm->wm_high->mask)) |
374 			  (ath10k_set_ring_byte(n, srcr_wm->wm_high)));
375 }
376 
377 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
378 						  u32 ce_ctrl_addr,
379 						  unsigned int n)
380 {
381 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
382 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
383 
384 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
385 			  (addr & ~(srcr_wm->wm_low->mask)) |
386 			  (ath10k_set_ring_byte(n, srcr_wm->wm_low)));
387 }
388 
389 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
390 						    u32 ce_ctrl_addr,
391 						    unsigned int n)
392 {
393 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
394 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
395 
396 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
397 			  (addr & ~(dstr_wm->wm_high->mask)) |
398 			  (ath10k_set_ring_byte(n, dstr_wm->wm_high)));
399 }
400 
401 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
402 						   u32 ce_ctrl_addr,
403 						   unsigned int n)
404 {
405 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
406 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
407 
408 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
409 			  (addr & ~(dstr_wm->wm_low->mask)) |
410 			  (ath10k_set_ring_byte(n, dstr_wm->wm_low)));
411 }
412 
413 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
414 							u32 ce_ctrl_addr)
415 {
416 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
417 
418 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
419 					    ar->hw_ce_regs->host_ie_addr);
420 
421 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
422 			  host_ie_addr | host_ie->copy_complete->mask);
423 }
424 
425 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
426 							u32 ce_ctrl_addr)
427 {
428 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
429 
430 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
431 					    ar->hw_ce_regs->host_ie_addr);
432 
433 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
434 			  host_ie_addr & ~(host_ie->copy_complete->mask));
435 }
436 
437 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
438 						    u32 ce_ctrl_addr)
439 {
440 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
441 
442 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
443 					    ar->hw_ce_regs->host_ie_addr);
444 
445 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
446 			  host_ie_addr & ~(wm_regs->wm_mask));
447 }
448 
449 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
450 					       u32 ce_ctrl_addr)
451 {
452 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
453 
454 	u32 misc_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
455 					    ar->hw_ce_regs->misc_ie_addr);
456 
457 	ath10k_ce_write32(ar,
458 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
459 			  misc_ie_addr | misc_regs->err_mask);
460 }
461 
462 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
463 						u32 ce_ctrl_addr)
464 {
465 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
466 
467 	u32 misc_ie_addr = ath10k_ce_read32(ar,
468 			ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr);
469 
470 	ath10k_ce_write32(ar,
471 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
472 			  misc_ie_addr & ~(misc_regs->err_mask));
473 }
474 
475 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
476 						     u32 ce_ctrl_addr,
477 						     unsigned int mask)
478 {
479 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
480 
481 	ath10k_ce_write32(ar, ce_ctrl_addr + wm_regs->addr, mask);
482 }
483 
484 /*
485  * Guts of ath10k_ce_send.
486  * The caller takes responsibility for any needed locking.
487  */
488 static int _ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
489 				  void *per_transfer_context,
490 				  dma_addr_t buffer,
491 				  unsigned int nbytes,
492 				  unsigned int transfer_id,
493 				  unsigned int flags)
494 {
495 	struct ath10k *ar = ce_state->ar;
496 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
497 	struct ce_desc *desc, sdesc;
498 	unsigned int nentries_mask = src_ring->nentries_mask;
499 	unsigned int sw_index = src_ring->sw_index;
500 	unsigned int write_index = src_ring->write_index;
501 	u32 ctrl_addr = ce_state->ctrl_addr;
502 	u32 desc_flags = 0;
503 	int ret = 0;
504 
505 	if (nbytes > ce_state->src_sz_max)
506 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
507 			    __func__, nbytes, ce_state->src_sz_max);
508 
509 	if (unlikely(CE_RING_DELTA(nentries_mask,
510 				   write_index, sw_index - 1) <= 0)) {
511 		ret = -ENOSR;
512 		goto exit;
513 	}
514 
515 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
516 				   write_index);
517 
518 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
519 
520 	if (flags & CE_SEND_FLAG_GATHER)
521 		desc_flags |= CE_DESC_FLAGS_GATHER;
522 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
523 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
524 
525 	sdesc.addr   = __cpu_to_le32(buffer);
526 	sdesc.nbytes = __cpu_to_le16(nbytes);
527 	sdesc.flags  = __cpu_to_le16(desc_flags);
528 
529 	*desc = sdesc;
530 
531 	src_ring->per_transfer_context[write_index] = per_transfer_context;
532 
533 	/* Update Source Ring Write Index */
534 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
535 
536 	/* WORKAROUND */
537 	if (!(flags & CE_SEND_FLAG_GATHER))
538 		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
539 
540 	src_ring->write_index = write_index;
541 exit:
542 	return ret;
543 }
544 
545 static int _ath10k_ce_send_nolock_64(struct ath10k_ce_pipe *ce_state,
546 				     void *per_transfer_context,
547 				     dma_addr_t buffer,
548 				     unsigned int nbytes,
549 				     unsigned int transfer_id,
550 				     unsigned int flags)
551 {
552 	struct ath10k *ar = ce_state->ar;
553 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
554 	struct ce_desc_64 *desc, sdesc;
555 	unsigned int nentries_mask = src_ring->nentries_mask;
556 	unsigned int sw_index;
557 	unsigned int write_index = src_ring->write_index;
558 	u32 ctrl_addr = ce_state->ctrl_addr;
559 	__le32 *addr;
560 	u32 desc_flags = 0;
561 	int ret = 0;
562 
563 	if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
564 		return -ESHUTDOWN;
565 
566 	if (nbytes > ce_state->src_sz_max)
567 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
568 			    __func__, nbytes, ce_state->src_sz_max);
569 
570 	if (ar->hw_params.rri_on_ddr)
571 		sw_index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_state->id);
572 	else
573 		sw_index = src_ring->sw_index;
574 
575 	if (unlikely(CE_RING_DELTA(nentries_mask,
576 				   write_index, sw_index - 1) <= 0)) {
577 		ret = -ENOSR;
578 		goto exit;
579 	}
580 
581 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
582 				      write_index);
583 
584 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
585 
586 	if (flags & CE_SEND_FLAG_GATHER)
587 		desc_flags |= CE_DESC_FLAGS_GATHER;
588 
589 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
590 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
591 
592 	addr = (__le32 *)&sdesc.addr;
593 
594 	flags |= upper_32_bits(buffer) & CE_DESC_ADDR_HI_MASK;
595 	addr[0] = __cpu_to_le32(buffer);
596 	addr[1] = __cpu_to_le32(flags);
597 	if (flags & CE_SEND_FLAG_GATHER)
598 		addr[1] |= __cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER);
599 	else
600 		addr[1] &= ~(__cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER));
601 
602 	sdesc.nbytes = __cpu_to_le16(nbytes);
603 	sdesc.flags  = __cpu_to_le16(desc_flags);
604 
605 	*desc = sdesc;
606 
607 	src_ring->per_transfer_context[write_index] = per_transfer_context;
608 
609 	/* Update Source Ring Write Index */
610 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
611 
612 	if (!(flags & CE_SEND_FLAG_GATHER)) {
613 		if (ar->hw_params.shadow_reg_support)
614 			ath10k_ce_shadow_src_ring_write_index_set(ar, ce_state,
615 								  write_index);
616 		else
617 			ath10k_ce_src_ring_write_index_set(ar, ctrl_addr,
618 							   write_index);
619 	}
620 
621 	src_ring->write_index = write_index;
622 exit:
623 	return ret;
624 }
625 
626 int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
627 			  void *per_transfer_context,
628 			  dma_addr_t buffer,
629 			  unsigned int nbytes,
630 			  unsigned int transfer_id,
631 			  unsigned int flags)
632 {
633 	return ce_state->ops->ce_send_nolock(ce_state, per_transfer_context,
634 				    buffer, nbytes, transfer_id, flags);
635 }
636 EXPORT_SYMBOL(ath10k_ce_send_nolock);
637 
638 void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
639 {
640 	struct ath10k *ar = pipe->ar;
641 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
642 	struct ath10k_ce_ring *src_ring = pipe->src_ring;
643 	u32 ctrl_addr = pipe->ctrl_addr;
644 
645 	lockdep_assert_held(&ce->ce_lock);
646 
647 	/*
648 	 * This function must be called only if there is an incomplete
649 	 * scatter-gather transfer (before index register is updated)
650 	 * that needs to be cleaned up.
651 	 */
652 	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
653 		return;
654 
655 	if (WARN_ON_ONCE(src_ring->write_index ==
656 			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
657 		return;
658 
659 	src_ring->write_index--;
660 	src_ring->write_index &= src_ring->nentries_mask;
661 
662 	src_ring->per_transfer_context[src_ring->write_index] = NULL;
663 }
664 EXPORT_SYMBOL(__ath10k_ce_send_revert);
665 
666 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
667 		   void *per_transfer_context,
668 		   dma_addr_t buffer,
669 		   unsigned int nbytes,
670 		   unsigned int transfer_id,
671 		   unsigned int flags)
672 {
673 	struct ath10k *ar = ce_state->ar;
674 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
675 	int ret;
676 
677 	spin_lock_bh(&ce->ce_lock);
678 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
679 				    buffer, nbytes, transfer_id, flags);
680 	spin_unlock_bh(&ce->ce_lock);
681 
682 	return ret;
683 }
684 EXPORT_SYMBOL(ath10k_ce_send);
685 
686 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
687 {
688 	struct ath10k *ar = pipe->ar;
689 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
690 	int delta;
691 
692 	spin_lock_bh(&ce->ce_lock);
693 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
694 			      pipe->src_ring->write_index,
695 			      pipe->src_ring->sw_index - 1);
696 	spin_unlock_bh(&ce->ce_lock);
697 
698 	return delta;
699 }
700 EXPORT_SYMBOL(ath10k_ce_num_free_src_entries);
701 
702 int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
703 {
704 	struct ath10k *ar = pipe->ar;
705 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
706 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
707 	unsigned int nentries_mask = dest_ring->nentries_mask;
708 	unsigned int write_index = dest_ring->write_index;
709 	unsigned int sw_index = dest_ring->sw_index;
710 
711 	lockdep_assert_held(&ce->ce_lock);
712 
713 	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
714 }
715 EXPORT_SYMBOL(__ath10k_ce_rx_num_free_bufs);
716 
717 static int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
718 				   dma_addr_t paddr)
719 {
720 	struct ath10k *ar = pipe->ar;
721 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
722 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
723 	unsigned int nentries_mask = dest_ring->nentries_mask;
724 	unsigned int write_index = dest_ring->write_index;
725 	unsigned int sw_index = dest_ring->sw_index;
726 	struct ce_desc *base = dest_ring->base_addr_owner_space;
727 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
728 	u32 ctrl_addr = pipe->ctrl_addr;
729 
730 	lockdep_assert_held(&ce->ce_lock);
731 
732 	if ((pipe->id != 5) &&
733 	    CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
734 		return -ENOSPC;
735 
736 	desc->addr = __cpu_to_le32(paddr);
737 	desc->nbytes = 0;
738 
739 	dest_ring->per_transfer_context[write_index] = ctx;
740 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
741 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
742 	dest_ring->write_index = write_index;
743 
744 	return 0;
745 }
746 
747 static int __ath10k_ce_rx_post_buf_64(struct ath10k_ce_pipe *pipe,
748 				      void *ctx,
749 				      dma_addr_t paddr)
750 {
751 	struct ath10k *ar = pipe->ar;
752 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
753 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
754 	unsigned int nentries_mask = dest_ring->nentries_mask;
755 	unsigned int write_index = dest_ring->write_index;
756 	unsigned int sw_index = dest_ring->sw_index;
757 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
758 	struct ce_desc_64 *desc =
759 			CE_DEST_RING_TO_DESC_64(base, write_index);
760 	u32 ctrl_addr = pipe->ctrl_addr;
761 
762 	lockdep_assert_held(&ce->ce_lock);
763 
764 	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
765 		return -ENOSPC;
766 
767 	desc->addr = __cpu_to_le64(paddr);
768 	desc->addr &= __cpu_to_le64(CE_DESC_ADDR_MASK);
769 
770 	desc->nbytes = 0;
771 
772 	dest_ring->per_transfer_context[write_index] = ctx;
773 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
774 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
775 	dest_ring->write_index = write_index;
776 
777 	return 0;
778 }
779 
780 void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
781 {
782 	struct ath10k *ar = pipe->ar;
783 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
784 	unsigned int nentries_mask = dest_ring->nentries_mask;
785 	unsigned int write_index = dest_ring->write_index;
786 	u32 ctrl_addr = pipe->ctrl_addr;
787 	u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
788 
789 	/* Prevent CE ring stuck issue that will occur when ring is full.
790 	 * Make sure that write index is 1 less than read index.
791 	 */
792 	if (((cur_write_idx + nentries) & nentries_mask) == dest_ring->sw_index)
793 		nentries -= 1;
794 
795 	write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
796 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
797 	dest_ring->write_index = write_index;
798 }
799 EXPORT_SYMBOL(ath10k_ce_rx_update_write_idx);
800 
801 int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
802 			  dma_addr_t paddr)
803 {
804 	struct ath10k *ar = pipe->ar;
805 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
806 	int ret;
807 
808 	spin_lock_bh(&ce->ce_lock);
809 	ret = pipe->ops->ce_rx_post_buf(pipe, ctx, paddr);
810 	spin_unlock_bh(&ce->ce_lock);
811 
812 	return ret;
813 }
814 EXPORT_SYMBOL(ath10k_ce_rx_post_buf);
815 
816 /*
817  * Guts of ath10k_ce_completed_recv_next.
818  * The caller takes responsibility for any necessary locking.
819  */
820 static int
821 	 _ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
822 					       void **per_transfer_contextp,
823 					       unsigned int *nbytesp)
824 {
825 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
826 	unsigned int nentries_mask = dest_ring->nentries_mask;
827 	unsigned int sw_index = dest_ring->sw_index;
828 
829 	struct ce_desc *base = dest_ring->base_addr_owner_space;
830 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
831 	struct ce_desc sdesc;
832 	u16 nbytes;
833 
834 	/* Copy in one go for performance reasons */
835 	sdesc = *desc;
836 
837 	nbytes = __le16_to_cpu(sdesc.nbytes);
838 	if (nbytes == 0) {
839 		/*
840 		 * This closes a relatively unusual race where the Host
841 		 * sees the updated DRRI before the update to the
842 		 * corresponding descriptor has completed. We treat this
843 		 * as a descriptor that is not yet done.
844 		 */
845 		return -EIO;
846 	}
847 
848 	desc->nbytes = 0;
849 
850 	/* Return data from completed destination descriptor */
851 	*nbytesp = nbytes;
852 
853 	if (per_transfer_contextp)
854 		*per_transfer_contextp =
855 			dest_ring->per_transfer_context[sw_index];
856 
857 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
858 	 * So update transfer context all CEs except CE5.
859 	 */
860 	if (ce_state->id != 5)
861 		dest_ring->per_transfer_context[sw_index] = NULL;
862 
863 	/* Update sw_index */
864 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
865 	dest_ring->sw_index = sw_index;
866 
867 	return 0;
868 }
869 
870 static int
871 _ath10k_ce_completed_recv_next_nolock_64(struct ath10k_ce_pipe *ce_state,
872 					 void **per_transfer_contextp,
873 					 unsigned int *nbytesp)
874 {
875 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
876 	unsigned int nentries_mask = dest_ring->nentries_mask;
877 	unsigned int sw_index = dest_ring->sw_index;
878 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
879 	struct ce_desc_64 *desc =
880 		CE_DEST_RING_TO_DESC_64(base, sw_index);
881 	struct ce_desc_64 sdesc;
882 	u16 nbytes;
883 
884 	/* Copy in one go for performance reasons */
885 	sdesc = *desc;
886 
887 	nbytes = __le16_to_cpu(sdesc.nbytes);
888 	if (nbytes == 0) {
889 		/* This closes a relatively unusual race where the Host
890 		 * sees the updated DRRI before the update to the
891 		 * corresponding descriptor has completed. We treat this
892 		 * as a descriptor that is not yet done.
893 		 */
894 		return -EIO;
895 	}
896 
897 	desc->nbytes = 0;
898 
899 	/* Return data from completed destination descriptor */
900 	*nbytesp = nbytes;
901 
902 	if (per_transfer_contextp)
903 		*per_transfer_contextp =
904 			dest_ring->per_transfer_context[sw_index];
905 
906 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
907 	 * So update transfer context all CEs except CE5.
908 	 */
909 	if (ce_state->id != 5)
910 		dest_ring->per_transfer_context[sw_index] = NULL;
911 
912 	/* Update sw_index */
913 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
914 	dest_ring->sw_index = sw_index;
915 
916 	return 0;
917 }
918 
919 int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
920 					 void **per_transfer_ctx,
921 					 unsigned int *nbytesp)
922 {
923 	return ce_state->ops->ce_completed_recv_next_nolock(ce_state,
924 							    per_transfer_ctx,
925 							    nbytesp);
926 }
927 EXPORT_SYMBOL(ath10k_ce_completed_recv_next_nolock);
928 
929 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
930 				  void **per_transfer_contextp,
931 				  unsigned int *nbytesp)
932 {
933 	struct ath10k *ar = ce_state->ar;
934 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
935 	int ret;
936 
937 	spin_lock_bh(&ce->ce_lock);
938 	ret = ce_state->ops->ce_completed_recv_next_nolock(ce_state,
939 						   per_transfer_contextp,
940 						   nbytesp);
941 
942 	spin_unlock_bh(&ce->ce_lock);
943 
944 	return ret;
945 }
946 EXPORT_SYMBOL(ath10k_ce_completed_recv_next);
947 
948 static int _ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
949 				       void **per_transfer_contextp,
950 				       dma_addr_t *bufferp)
951 {
952 	struct ath10k_ce_ring *dest_ring;
953 	unsigned int nentries_mask;
954 	unsigned int sw_index;
955 	unsigned int write_index;
956 	int ret;
957 	struct ath10k *ar;
958 	struct ath10k_ce *ce;
959 
960 	dest_ring = ce_state->dest_ring;
961 
962 	if (!dest_ring)
963 		return -EIO;
964 
965 	ar = ce_state->ar;
966 	ce = ath10k_ce_priv(ar);
967 
968 	spin_lock_bh(&ce->ce_lock);
969 
970 	nentries_mask = dest_ring->nentries_mask;
971 	sw_index = dest_ring->sw_index;
972 	write_index = dest_ring->write_index;
973 	if (write_index != sw_index) {
974 		struct ce_desc *base = dest_ring->base_addr_owner_space;
975 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
976 
977 		/* Return data from completed destination descriptor */
978 		*bufferp = __le32_to_cpu(desc->addr);
979 
980 		if (per_transfer_contextp)
981 			*per_transfer_contextp =
982 				dest_ring->per_transfer_context[sw_index];
983 
984 		/* sanity */
985 		dest_ring->per_transfer_context[sw_index] = NULL;
986 		desc->nbytes = 0;
987 
988 		/* Update sw_index */
989 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
990 		dest_ring->sw_index = sw_index;
991 		ret = 0;
992 	} else {
993 		ret = -EIO;
994 	}
995 
996 	spin_unlock_bh(&ce->ce_lock);
997 
998 	return ret;
999 }
1000 
1001 static int _ath10k_ce_revoke_recv_next_64(struct ath10k_ce_pipe *ce_state,
1002 					  void **per_transfer_contextp,
1003 					  dma_addr_t *bufferp)
1004 {
1005 	struct ath10k_ce_ring *dest_ring;
1006 	unsigned int nentries_mask;
1007 	unsigned int sw_index;
1008 	unsigned int write_index;
1009 	int ret;
1010 	struct ath10k *ar;
1011 	struct ath10k_ce *ce;
1012 
1013 	dest_ring = ce_state->dest_ring;
1014 
1015 	if (!dest_ring)
1016 		return -EIO;
1017 
1018 	ar = ce_state->ar;
1019 	ce = ath10k_ce_priv(ar);
1020 
1021 	spin_lock_bh(&ce->ce_lock);
1022 
1023 	nentries_mask = dest_ring->nentries_mask;
1024 	sw_index = dest_ring->sw_index;
1025 	write_index = dest_ring->write_index;
1026 	if (write_index != sw_index) {
1027 		struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
1028 		struct ce_desc_64 *desc =
1029 			CE_DEST_RING_TO_DESC_64(base, sw_index);
1030 
1031 		/* Return data from completed destination descriptor */
1032 		*bufferp = __le64_to_cpu(desc->addr);
1033 
1034 		if (per_transfer_contextp)
1035 			*per_transfer_contextp =
1036 				dest_ring->per_transfer_context[sw_index];
1037 
1038 		/* sanity */
1039 		dest_ring->per_transfer_context[sw_index] = NULL;
1040 		desc->nbytes = 0;
1041 
1042 		/* Update sw_index */
1043 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1044 		dest_ring->sw_index = sw_index;
1045 		ret = 0;
1046 	} else {
1047 		ret = -EIO;
1048 	}
1049 
1050 	spin_unlock_bh(&ce->ce_lock);
1051 
1052 	return ret;
1053 }
1054 
1055 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
1056 			       void **per_transfer_contextp,
1057 			       dma_addr_t *bufferp)
1058 {
1059 	return ce_state->ops->ce_revoke_recv_next(ce_state,
1060 						  per_transfer_contextp,
1061 						  bufferp);
1062 }
1063 EXPORT_SYMBOL(ath10k_ce_revoke_recv_next);
1064 
1065 /*
1066  * Guts of ath10k_ce_completed_send_next.
1067  * The caller takes responsibility for any necessary locking.
1068  */
1069 static int _ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1070 						 void **per_transfer_contextp)
1071 {
1072 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1073 	u32 ctrl_addr = ce_state->ctrl_addr;
1074 	struct ath10k *ar = ce_state->ar;
1075 	unsigned int nentries_mask = src_ring->nentries_mask;
1076 	unsigned int sw_index = src_ring->sw_index;
1077 	unsigned int read_index;
1078 	struct ce_desc *desc;
1079 
1080 	if (src_ring->hw_index == sw_index) {
1081 		/*
1082 		 * The SW completion index has caught up with the cached
1083 		 * version of the HW completion index.
1084 		 * Update the cached HW completion index to see whether
1085 		 * the SW has really caught up to the HW, or if the cached
1086 		 * value of the HW index has become stale.
1087 		 */
1088 
1089 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1090 		if (read_index == 0xffffffff)
1091 			return -ENODEV;
1092 
1093 		read_index &= nentries_mask;
1094 		src_ring->hw_index = read_index;
1095 	}
1096 
1097 	if (ar->hw_params.rri_on_ddr)
1098 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1099 	else
1100 		read_index = src_ring->hw_index;
1101 
1102 	if (read_index == sw_index)
1103 		return -EIO;
1104 
1105 	if (per_transfer_contextp)
1106 		*per_transfer_contextp =
1107 			src_ring->per_transfer_context[sw_index];
1108 
1109 	/* sanity */
1110 	src_ring->per_transfer_context[sw_index] = NULL;
1111 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
1112 				   sw_index);
1113 	desc->nbytes = 0;
1114 
1115 	/* Update sw_index */
1116 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1117 	src_ring->sw_index = sw_index;
1118 
1119 	return 0;
1120 }
1121 
1122 static int _ath10k_ce_completed_send_next_nolock_64(struct ath10k_ce_pipe *ce_state,
1123 						    void **per_transfer_contextp)
1124 {
1125 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1126 	u32 ctrl_addr = ce_state->ctrl_addr;
1127 	struct ath10k *ar = ce_state->ar;
1128 	unsigned int nentries_mask = src_ring->nentries_mask;
1129 	unsigned int sw_index = src_ring->sw_index;
1130 	unsigned int read_index;
1131 	struct ce_desc_64 *desc;
1132 
1133 	if (src_ring->hw_index == sw_index) {
1134 		/*
1135 		 * The SW completion index has caught up with the cached
1136 		 * version of the HW completion index.
1137 		 * Update the cached HW completion index to see whether
1138 		 * the SW has really caught up to the HW, or if the cached
1139 		 * value of the HW index has become stale.
1140 		 */
1141 
1142 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1143 		if (read_index == 0xffffffff)
1144 			return -ENODEV;
1145 
1146 		read_index &= nentries_mask;
1147 		src_ring->hw_index = read_index;
1148 	}
1149 
1150 	if (ar->hw_params.rri_on_ddr)
1151 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1152 	else
1153 		read_index = src_ring->hw_index;
1154 
1155 	if (read_index == sw_index)
1156 		return -EIO;
1157 
1158 	if (per_transfer_contextp)
1159 		*per_transfer_contextp =
1160 			src_ring->per_transfer_context[sw_index];
1161 
1162 	/* sanity */
1163 	src_ring->per_transfer_context[sw_index] = NULL;
1164 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
1165 				      sw_index);
1166 	desc->nbytes = 0;
1167 
1168 	/* Update sw_index */
1169 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1170 	src_ring->sw_index = sw_index;
1171 
1172 	return 0;
1173 }
1174 
1175 int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1176 					 void **per_transfer_contextp)
1177 {
1178 	return ce_state->ops->ce_completed_send_next_nolock(ce_state,
1179 							    per_transfer_contextp);
1180 }
1181 EXPORT_SYMBOL(ath10k_ce_completed_send_next_nolock);
1182 
1183 static void ath10k_ce_extract_desc_data(struct ath10k *ar,
1184 					struct ath10k_ce_ring *src_ring,
1185 					u32 sw_index,
1186 					dma_addr_t *bufferp,
1187 					u32 *nbytesp,
1188 					u32 *transfer_idp)
1189 {
1190 		struct ce_desc *base = src_ring->base_addr_owner_space;
1191 		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
1192 
1193 		/* Return data from completed source descriptor */
1194 		*bufferp = __le32_to_cpu(desc->addr);
1195 		*nbytesp = __le16_to_cpu(desc->nbytes);
1196 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1197 				   CE_DESC_FLAGS_META_DATA);
1198 }
1199 
1200 static void ath10k_ce_extract_desc_data_64(struct ath10k *ar,
1201 					   struct ath10k_ce_ring *src_ring,
1202 					   u32 sw_index,
1203 					   dma_addr_t *bufferp,
1204 					   u32 *nbytesp,
1205 					   u32 *transfer_idp)
1206 {
1207 		struct ce_desc_64 *base = src_ring->base_addr_owner_space;
1208 		struct ce_desc_64 *desc =
1209 			CE_SRC_RING_TO_DESC_64(base, sw_index);
1210 
1211 		/* Return data from completed source descriptor */
1212 		*bufferp = __le64_to_cpu(desc->addr);
1213 		*nbytesp = __le16_to_cpu(desc->nbytes);
1214 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1215 				   CE_DESC_FLAGS_META_DATA);
1216 }
1217 
1218 /* NB: Modeled after ath10k_ce_completed_send_next */
1219 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
1220 			       void **per_transfer_contextp,
1221 			       dma_addr_t *bufferp,
1222 			       unsigned int *nbytesp,
1223 			       unsigned int *transfer_idp)
1224 {
1225 	struct ath10k_ce_ring *src_ring;
1226 	unsigned int nentries_mask;
1227 	unsigned int sw_index;
1228 	unsigned int write_index;
1229 	int ret;
1230 	struct ath10k *ar;
1231 	struct ath10k_ce *ce;
1232 
1233 	src_ring = ce_state->src_ring;
1234 
1235 	if (!src_ring)
1236 		return -EIO;
1237 
1238 	ar = ce_state->ar;
1239 	ce = ath10k_ce_priv(ar);
1240 
1241 	spin_lock_bh(&ce->ce_lock);
1242 
1243 	nentries_mask = src_ring->nentries_mask;
1244 	sw_index = src_ring->sw_index;
1245 	write_index = src_ring->write_index;
1246 
1247 	if (write_index != sw_index) {
1248 		ce_state->ops->ce_extract_desc_data(ar, src_ring, sw_index,
1249 						    bufferp, nbytesp,
1250 						    transfer_idp);
1251 
1252 		if (per_transfer_contextp)
1253 			*per_transfer_contextp =
1254 				src_ring->per_transfer_context[sw_index];
1255 
1256 		/* sanity */
1257 		src_ring->per_transfer_context[sw_index] = NULL;
1258 
1259 		/* Update sw_index */
1260 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1261 		src_ring->sw_index = sw_index;
1262 		ret = 0;
1263 	} else {
1264 		ret = -EIO;
1265 	}
1266 
1267 	spin_unlock_bh(&ce->ce_lock);
1268 
1269 	return ret;
1270 }
1271 EXPORT_SYMBOL(ath10k_ce_cancel_send_next);
1272 
1273 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
1274 				  void **per_transfer_contextp)
1275 {
1276 	struct ath10k *ar = ce_state->ar;
1277 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1278 	int ret;
1279 
1280 	spin_lock_bh(&ce->ce_lock);
1281 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
1282 						   per_transfer_contextp);
1283 	spin_unlock_bh(&ce->ce_lock);
1284 
1285 	return ret;
1286 }
1287 EXPORT_SYMBOL(ath10k_ce_completed_send_next);
1288 
1289 /*
1290  * Guts of interrupt handler for per-engine interrupts on a particular CE.
1291  *
1292  * Invokes registered callbacks for recv_complete,
1293  * send_complete, and watermarks.
1294  */
1295 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
1296 {
1297 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1298 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1299 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
1300 	u32 ctrl_addr = ce_state->ctrl_addr;
1301 
1302 	/*
1303 	 * Clear before handling
1304 	 *
1305 	 * Misc CE interrupts are not being handled, but still need
1306 	 * to be cleared.
1307 	 *
1308 	 * NOTE: When the last copy engine interrupt is cleared the
1309 	 * hardware will go to sleep.  Once this happens any access to
1310 	 * the CE registers can cause a hardware fault.
1311 	 */
1312 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
1313 					  wm_regs->cc_mask | wm_regs->wm_mask);
1314 
1315 	if (ce_state->recv_cb)
1316 		ce_state->recv_cb(ce_state);
1317 
1318 	if (ce_state->send_cb)
1319 		ce_state->send_cb(ce_state);
1320 }
1321 EXPORT_SYMBOL(ath10k_ce_per_engine_service);
1322 
1323 /*
1324  * Handler for per-engine interrupts on ALL active CEs.
1325  * This is used in cases where the system is sharing a
1326  * single interrupt for all CEs
1327  */
1328 
1329 void ath10k_ce_per_engine_service_any(struct ath10k *ar)
1330 {
1331 	int ce_id;
1332 	u32 intr_summary;
1333 
1334 	intr_summary = ath10k_ce_interrupt_summary(ar);
1335 
1336 	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
1337 		if (intr_summary & (1 << ce_id))
1338 			intr_summary &= ~(1 << ce_id);
1339 		else
1340 			/* no intr pending on this CE */
1341 			continue;
1342 
1343 		ath10k_ce_per_engine_service(ar, ce_id);
1344 	}
1345 }
1346 EXPORT_SYMBOL(ath10k_ce_per_engine_service_any);
1347 
1348 /*
1349  * Adjust interrupts for the copy complete handler.
1350  * If it's needed for either send or recv, then unmask
1351  * this interrupt; otherwise, mask it.
1352  *
1353  * Called with ce_lock held.
1354  */
1355 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
1356 {
1357 	u32 ctrl_addr = ce_state->ctrl_addr;
1358 	struct ath10k *ar = ce_state->ar;
1359 	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
1360 
1361 	if ((!disable_copy_compl_intr) &&
1362 	    (ce_state->send_cb || ce_state->recv_cb))
1363 		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
1364 	else
1365 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1366 
1367 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1368 }
1369 
1370 void ath10k_ce_disable_interrupt(struct ath10k *ar, int ce_id)
1371 {
1372 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1373 	struct ath10k_ce_pipe *ce_state;
1374 	u32 ctrl_addr;
1375 
1376 	ce_state  = &ce->ce_states[ce_id];
1377 	if (ce_state->attr_flags & CE_ATTR_POLL)
1378 		return;
1379 
1380 	ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1381 
1382 	ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1383 	ath10k_ce_error_intr_disable(ar, ctrl_addr);
1384 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1385 }
1386 EXPORT_SYMBOL(ath10k_ce_disable_interrupt);
1387 
1388 void ath10k_ce_disable_interrupts(struct ath10k *ar)
1389 {
1390 	int ce_id;
1391 
1392 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
1393 		ath10k_ce_disable_interrupt(ar, ce_id);
1394 }
1395 EXPORT_SYMBOL(ath10k_ce_disable_interrupts);
1396 
1397 void ath10k_ce_enable_interrupt(struct ath10k *ar, int ce_id)
1398 {
1399 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1400 	struct ath10k_ce_pipe *ce_state;
1401 
1402 	ce_state  = &ce->ce_states[ce_id];
1403 	if (ce_state->attr_flags & CE_ATTR_POLL)
1404 		return;
1405 
1406 	ath10k_ce_per_engine_handler_adjust(ce_state);
1407 }
1408 EXPORT_SYMBOL(ath10k_ce_enable_interrupt);
1409 
1410 void ath10k_ce_enable_interrupts(struct ath10k *ar)
1411 {
1412 	int ce_id;
1413 
1414 	/* Enable interrupts for copy engine that
1415 	 * are not using polling mode.
1416 	 */
1417 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
1418 		ath10k_ce_enable_interrupt(ar, ce_id);
1419 }
1420 EXPORT_SYMBOL(ath10k_ce_enable_interrupts);
1421 
1422 static int ath10k_ce_init_src_ring(struct ath10k *ar,
1423 				   unsigned int ce_id,
1424 				   const struct ce_attr *attr)
1425 {
1426 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1427 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1428 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1429 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1430 
1431 	nentries = roundup_pow_of_two(attr->src_nentries);
1432 
1433 	if (ar->hw_params.target_64bit)
1434 		memset(src_ring->base_addr_owner_space, 0,
1435 		       nentries * sizeof(struct ce_desc_64));
1436 	else
1437 		memset(src_ring->base_addr_owner_space, 0,
1438 		       nentries * sizeof(struct ce_desc));
1439 
1440 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1441 	src_ring->sw_index &= src_ring->nentries_mask;
1442 	src_ring->hw_index = src_ring->sw_index;
1443 
1444 	src_ring->write_index =
1445 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
1446 	src_ring->write_index &= src_ring->nentries_mask;
1447 
1448 	ath10k_ce_src_ring_base_addr_set(ar, ce_id,
1449 					 src_ring->base_addr_ce_space);
1450 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
1451 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
1452 	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
1453 	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
1454 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
1455 
1456 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1457 		   "boot init ce src ring id %d entries %d base_addr %pK\n",
1458 		   ce_id, nentries, src_ring->base_addr_owner_space);
1459 
1460 	return 0;
1461 }
1462 
1463 static int ath10k_ce_init_dest_ring(struct ath10k *ar,
1464 				    unsigned int ce_id,
1465 				    const struct ce_attr *attr)
1466 {
1467 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1468 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1469 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
1470 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1471 
1472 	nentries = roundup_pow_of_two(attr->dest_nentries);
1473 
1474 	if (ar->hw_params.target_64bit)
1475 		memset(dest_ring->base_addr_owner_space, 0,
1476 		       nentries * sizeof(struct ce_desc_64));
1477 	else
1478 		memset(dest_ring->base_addr_owner_space, 0,
1479 		       nentries * sizeof(struct ce_desc));
1480 
1481 	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
1482 	dest_ring->sw_index &= dest_ring->nentries_mask;
1483 	dest_ring->write_index =
1484 		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
1485 	dest_ring->write_index &= dest_ring->nentries_mask;
1486 
1487 	ath10k_ce_dest_ring_base_addr_set(ar, ce_id,
1488 					  dest_ring->base_addr_ce_space);
1489 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1490 	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1491 	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1492 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1493 
1494 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1495 		   "boot ce dest ring id %d entries %d base_addr %pK\n",
1496 		   ce_id, nentries, dest_ring->base_addr_owner_space);
1497 
1498 	return 0;
1499 }
1500 
1501 static int ath10k_ce_alloc_shadow_base(struct ath10k *ar,
1502 				       struct ath10k_ce_ring *src_ring,
1503 				       u32 nentries)
1504 {
1505 	src_ring->shadow_base_unaligned = kcalloc(nentries,
1506 						  sizeof(struct ce_desc_64),
1507 						  GFP_KERNEL);
1508 	if (!src_ring->shadow_base_unaligned)
1509 		return -ENOMEM;
1510 
1511 	src_ring->shadow_base = (struct ce_desc_64 *)
1512 			PTR_ALIGN(src_ring->shadow_base_unaligned,
1513 				  CE_DESC_RING_ALIGN);
1514 	return 0;
1515 }
1516 
1517 static struct ath10k_ce_ring *
1518 ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
1519 			 const struct ce_attr *attr)
1520 {
1521 	struct ath10k_ce_ring *src_ring;
1522 	u32 nentries = attr->src_nentries;
1523 	dma_addr_t base_addr;
1524 	int ret;
1525 
1526 	nentries = roundup_pow_of_two(nentries);
1527 
1528 	src_ring = kzalloc(struct_size(src_ring, per_transfer_context,
1529 				       nentries), GFP_KERNEL);
1530 	if (src_ring == NULL)
1531 		return ERR_PTR(-ENOMEM);
1532 
1533 	src_ring->nentries = nentries;
1534 	src_ring->nentries_mask = nentries - 1;
1535 
1536 	/*
1537 	 * Legacy platforms that do not support cache
1538 	 * coherent DMA are unsupported
1539 	 */
1540 	src_ring->base_addr_owner_space_unaligned =
1541 		dma_alloc_coherent(ar->dev,
1542 				   (nentries * sizeof(struct ce_desc) +
1543 				    CE_DESC_RING_ALIGN),
1544 				   &base_addr, GFP_KERNEL);
1545 	if (!src_ring->base_addr_owner_space_unaligned) {
1546 		kfree(src_ring);
1547 		return ERR_PTR(-ENOMEM);
1548 	}
1549 
1550 	src_ring->base_addr_ce_space_unaligned = base_addr;
1551 
1552 	src_ring->base_addr_owner_space =
1553 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1554 				  CE_DESC_RING_ALIGN);
1555 	src_ring->base_addr_ce_space =
1556 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1557 			      CE_DESC_RING_ALIGN);
1558 
1559 	if (ar->hw_params.shadow_reg_support) {
1560 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1561 		if (ret) {
1562 			dma_free_coherent(ar->dev,
1563 					  (nentries * sizeof(struct ce_desc) +
1564 					   CE_DESC_RING_ALIGN),
1565 					  src_ring->base_addr_owner_space_unaligned,
1566 					  base_addr);
1567 			kfree(src_ring);
1568 			return ERR_PTR(ret);
1569 		}
1570 	}
1571 
1572 	return src_ring;
1573 }
1574 
1575 static struct ath10k_ce_ring *
1576 ath10k_ce_alloc_src_ring_64(struct ath10k *ar, unsigned int ce_id,
1577 			    const struct ce_attr *attr)
1578 {
1579 	struct ath10k_ce_ring *src_ring;
1580 	u32 nentries = attr->src_nentries;
1581 	dma_addr_t base_addr;
1582 	int ret;
1583 
1584 	nentries = roundup_pow_of_two(nentries);
1585 
1586 	src_ring = kzalloc(struct_size(src_ring, per_transfer_context,
1587 				       nentries), GFP_KERNEL);
1588 	if (!src_ring)
1589 		return ERR_PTR(-ENOMEM);
1590 
1591 	src_ring->nentries = nentries;
1592 	src_ring->nentries_mask = nentries - 1;
1593 
1594 	/* Legacy platforms that do not support cache
1595 	 * coherent DMA are unsupported
1596 	 */
1597 	src_ring->base_addr_owner_space_unaligned =
1598 		dma_alloc_coherent(ar->dev,
1599 				   (nentries * sizeof(struct ce_desc_64) +
1600 				    CE_DESC_RING_ALIGN),
1601 				   &base_addr, GFP_KERNEL);
1602 	if (!src_ring->base_addr_owner_space_unaligned) {
1603 		kfree(src_ring);
1604 		return ERR_PTR(-ENOMEM);
1605 	}
1606 
1607 	src_ring->base_addr_ce_space_unaligned = base_addr;
1608 
1609 	src_ring->base_addr_owner_space =
1610 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1611 				  CE_DESC_RING_ALIGN);
1612 	src_ring->base_addr_ce_space =
1613 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1614 			      CE_DESC_RING_ALIGN);
1615 
1616 	if (ar->hw_params.shadow_reg_support) {
1617 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1618 		if (ret) {
1619 			dma_free_coherent(ar->dev,
1620 					  (nentries * sizeof(struct ce_desc_64) +
1621 					   CE_DESC_RING_ALIGN),
1622 					  src_ring->base_addr_owner_space_unaligned,
1623 					  base_addr);
1624 			kfree(src_ring);
1625 			return ERR_PTR(ret);
1626 		}
1627 	}
1628 
1629 	return src_ring;
1630 }
1631 
1632 static struct ath10k_ce_ring *
1633 ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
1634 			  const struct ce_attr *attr)
1635 {
1636 	struct ath10k_ce_ring *dest_ring;
1637 	u32 nentries;
1638 	dma_addr_t base_addr;
1639 
1640 	nentries = roundup_pow_of_two(attr->dest_nentries);
1641 
1642 	dest_ring = kzalloc(struct_size(dest_ring, per_transfer_context,
1643 					nentries), GFP_KERNEL);
1644 	if (dest_ring == NULL)
1645 		return ERR_PTR(-ENOMEM);
1646 
1647 	dest_ring->nentries = nentries;
1648 	dest_ring->nentries_mask = nentries - 1;
1649 
1650 	/*
1651 	 * Legacy platforms that do not support cache
1652 	 * coherent DMA are unsupported
1653 	 */
1654 	dest_ring->base_addr_owner_space_unaligned =
1655 		dma_alloc_coherent(ar->dev,
1656 				   (nentries * sizeof(struct ce_desc) +
1657 				    CE_DESC_RING_ALIGN),
1658 				   &base_addr, GFP_KERNEL);
1659 	if (!dest_ring->base_addr_owner_space_unaligned) {
1660 		kfree(dest_ring);
1661 		return ERR_PTR(-ENOMEM);
1662 	}
1663 
1664 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1665 
1666 	dest_ring->base_addr_owner_space =
1667 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1668 				  CE_DESC_RING_ALIGN);
1669 	dest_ring->base_addr_ce_space =
1670 				ALIGN(dest_ring->base_addr_ce_space_unaligned,
1671 				      CE_DESC_RING_ALIGN);
1672 
1673 	return dest_ring;
1674 }
1675 
1676 static struct ath10k_ce_ring *
1677 ath10k_ce_alloc_dest_ring_64(struct ath10k *ar, unsigned int ce_id,
1678 			     const struct ce_attr *attr)
1679 {
1680 	struct ath10k_ce_ring *dest_ring;
1681 	u32 nentries;
1682 	dma_addr_t base_addr;
1683 
1684 	nentries = roundup_pow_of_two(attr->dest_nentries);
1685 
1686 	dest_ring = kzalloc(struct_size(dest_ring, per_transfer_context,
1687 					nentries), GFP_KERNEL);
1688 	if (!dest_ring)
1689 		return ERR_PTR(-ENOMEM);
1690 
1691 	dest_ring->nentries = nentries;
1692 	dest_ring->nentries_mask = nentries - 1;
1693 
1694 	/* Legacy platforms that do not support cache
1695 	 * coherent DMA are unsupported
1696 	 */
1697 	dest_ring->base_addr_owner_space_unaligned =
1698 		dma_alloc_coherent(ar->dev,
1699 				   (nentries * sizeof(struct ce_desc_64) +
1700 				    CE_DESC_RING_ALIGN),
1701 				   &base_addr, GFP_KERNEL);
1702 	if (!dest_ring->base_addr_owner_space_unaligned) {
1703 		kfree(dest_ring);
1704 		return ERR_PTR(-ENOMEM);
1705 	}
1706 
1707 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1708 
1709 	/* Correctly initialize memory to 0 to prevent garbage
1710 	 * data crashing system when download firmware
1711 	 */
1712 	dest_ring->base_addr_owner_space =
1713 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1714 				  CE_DESC_RING_ALIGN);
1715 	dest_ring->base_addr_ce_space =
1716 			ALIGN(dest_ring->base_addr_ce_space_unaligned,
1717 			      CE_DESC_RING_ALIGN);
1718 
1719 	return dest_ring;
1720 }
1721 
1722 /*
1723  * Initialize a Copy Engine based on caller-supplied attributes.
1724  * This may be called once to initialize both source and destination
1725  * rings or it may be called twice for separate source and destination
1726  * initialization. It may be that only one side or the other is
1727  * initialized by software/firmware.
1728  */
1729 int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1730 			const struct ce_attr *attr)
1731 {
1732 	int ret;
1733 
1734 	if (attr->src_nentries) {
1735 		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1736 		if (ret) {
1737 			ath10k_err(ar, "Failed to initialize CE src ring for ID: %d (%d)\n",
1738 				   ce_id, ret);
1739 			return ret;
1740 		}
1741 	}
1742 
1743 	if (attr->dest_nentries) {
1744 		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1745 		if (ret) {
1746 			ath10k_err(ar, "Failed to initialize CE dest ring for ID: %d (%d)\n",
1747 				   ce_id, ret);
1748 			return ret;
1749 		}
1750 	}
1751 
1752 	return 0;
1753 }
1754 EXPORT_SYMBOL(ath10k_ce_init_pipe);
1755 
1756 static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1757 {
1758 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1759 
1760 	ath10k_ce_src_ring_base_addr_set(ar, ce_id, 0);
1761 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1762 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1763 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1764 }
1765 
1766 static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1767 {
1768 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1769 
1770 	ath10k_ce_dest_ring_base_addr_set(ar, ce_id, 0);
1771 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1772 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1773 }
1774 
1775 void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1776 {
1777 	ath10k_ce_deinit_src_ring(ar, ce_id);
1778 	ath10k_ce_deinit_dest_ring(ar, ce_id);
1779 }
1780 EXPORT_SYMBOL(ath10k_ce_deinit_pipe);
1781 
1782 static void _ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1783 {
1784 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1785 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1786 
1787 	if (ce_state->src_ring) {
1788 		if (ar->hw_params.shadow_reg_support)
1789 			kfree(ce_state->src_ring->shadow_base_unaligned);
1790 		dma_free_coherent(ar->dev,
1791 				  (ce_state->src_ring->nentries *
1792 				   sizeof(struct ce_desc) +
1793 				   CE_DESC_RING_ALIGN),
1794 				  ce_state->src_ring->base_addr_owner_space,
1795 				  ce_state->src_ring->base_addr_ce_space);
1796 		kfree(ce_state->src_ring);
1797 	}
1798 
1799 	if (ce_state->dest_ring) {
1800 		dma_free_coherent(ar->dev,
1801 				  (ce_state->dest_ring->nentries *
1802 				   sizeof(struct ce_desc) +
1803 				   CE_DESC_RING_ALIGN),
1804 				  ce_state->dest_ring->base_addr_owner_space,
1805 				  ce_state->dest_ring->base_addr_ce_space);
1806 		kfree(ce_state->dest_ring);
1807 	}
1808 
1809 	ce_state->src_ring = NULL;
1810 	ce_state->dest_ring = NULL;
1811 }
1812 
1813 static void _ath10k_ce_free_pipe_64(struct ath10k *ar, int ce_id)
1814 {
1815 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1816 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1817 
1818 	if (ce_state->src_ring) {
1819 		if (ar->hw_params.shadow_reg_support)
1820 			kfree(ce_state->src_ring->shadow_base_unaligned);
1821 		dma_free_coherent(ar->dev,
1822 				  (ce_state->src_ring->nentries *
1823 				   sizeof(struct ce_desc_64) +
1824 				   CE_DESC_RING_ALIGN),
1825 				  ce_state->src_ring->base_addr_owner_space,
1826 				  ce_state->src_ring->base_addr_ce_space);
1827 		kfree(ce_state->src_ring);
1828 	}
1829 
1830 	if (ce_state->dest_ring) {
1831 		dma_free_coherent(ar->dev,
1832 				  (ce_state->dest_ring->nentries *
1833 				   sizeof(struct ce_desc_64) +
1834 				   CE_DESC_RING_ALIGN),
1835 				  ce_state->dest_ring->base_addr_owner_space,
1836 				  ce_state->dest_ring->base_addr_ce_space);
1837 		kfree(ce_state->dest_ring);
1838 	}
1839 
1840 	ce_state->src_ring = NULL;
1841 	ce_state->dest_ring = NULL;
1842 }
1843 
1844 void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1845 {
1846 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1847 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1848 
1849 	ce_state->ops->ce_free_pipe(ar, ce_id);
1850 }
1851 EXPORT_SYMBOL(ath10k_ce_free_pipe);
1852 
1853 void ath10k_ce_dump_registers(struct ath10k *ar,
1854 			      struct ath10k_fw_crash_data *crash_data)
1855 {
1856 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1857 	struct ath10k_ce_crash_data ce_data;
1858 	u32 addr, id;
1859 
1860 	lockdep_assert_held(&ar->dump_mutex);
1861 
1862 	ath10k_err(ar, "Copy Engine register dump:\n");
1863 
1864 	spin_lock_bh(&ce->ce_lock);
1865 	for (id = 0; id < CE_COUNT; id++) {
1866 		addr = ath10k_ce_base_address(ar, id);
1867 		ce_data.base_addr = cpu_to_le32(addr);
1868 
1869 		ce_data.src_wr_idx =
1870 			cpu_to_le32(ath10k_ce_src_ring_write_index_get(ar, addr));
1871 		ce_data.src_r_idx =
1872 			cpu_to_le32(ath10k_ce_src_ring_read_index_get(ar, addr));
1873 		ce_data.dst_wr_idx =
1874 			cpu_to_le32(ath10k_ce_dest_ring_write_index_get(ar, addr));
1875 		ce_data.dst_r_idx =
1876 			cpu_to_le32(ath10k_ce_dest_ring_read_index_get(ar, addr));
1877 
1878 		if (crash_data)
1879 			crash_data->ce_crash_data[id] = ce_data;
1880 
1881 		ath10k_err(ar, "[%02d]: 0x%08x %3u %3u %3u %3u", id,
1882 			   le32_to_cpu(ce_data.base_addr),
1883 			   le32_to_cpu(ce_data.src_wr_idx),
1884 			   le32_to_cpu(ce_data.src_r_idx),
1885 			   le32_to_cpu(ce_data.dst_wr_idx),
1886 			   le32_to_cpu(ce_data.dst_r_idx));
1887 	}
1888 
1889 	spin_unlock_bh(&ce->ce_lock);
1890 }
1891 EXPORT_SYMBOL(ath10k_ce_dump_registers);
1892 
1893 static const struct ath10k_ce_ops ce_ops = {
1894 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring,
1895 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring,
1896 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf,
1897 	.ce_completed_recv_next_nolock = _ath10k_ce_completed_recv_next_nolock,
1898 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next,
1899 	.ce_extract_desc_data = ath10k_ce_extract_desc_data,
1900 	.ce_free_pipe = _ath10k_ce_free_pipe,
1901 	.ce_send_nolock = _ath10k_ce_send_nolock,
1902 	.ce_set_src_ring_base_addr_hi = NULL,
1903 	.ce_set_dest_ring_base_addr_hi = NULL,
1904 	.ce_completed_send_next_nolock = _ath10k_ce_completed_send_next_nolock,
1905 };
1906 
1907 static const struct ath10k_ce_ops ce_64_ops = {
1908 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring_64,
1909 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring_64,
1910 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf_64,
1911 	.ce_completed_recv_next_nolock =
1912 				_ath10k_ce_completed_recv_next_nolock_64,
1913 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next_64,
1914 	.ce_extract_desc_data = ath10k_ce_extract_desc_data_64,
1915 	.ce_free_pipe = _ath10k_ce_free_pipe_64,
1916 	.ce_send_nolock = _ath10k_ce_send_nolock_64,
1917 	.ce_set_src_ring_base_addr_hi = ath10k_ce_set_src_ring_base_addr_hi,
1918 	.ce_set_dest_ring_base_addr_hi = ath10k_ce_set_dest_ring_base_addr_hi,
1919 	.ce_completed_send_next_nolock = _ath10k_ce_completed_send_next_nolock_64,
1920 };
1921 
1922 static void ath10k_ce_set_ops(struct ath10k *ar,
1923 			      struct ath10k_ce_pipe *ce_state)
1924 {
1925 	switch (ar->hw_rev) {
1926 	case ATH10K_HW_WCN3990:
1927 		ce_state->ops = &ce_64_ops;
1928 		break;
1929 	default:
1930 		ce_state->ops = &ce_ops;
1931 		break;
1932 	}
1933 }
1934 
1935 int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1936 			 const struct ce_attr *attr)
1937 {
1938 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1939 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1940 	int ret;
1941 
1942 	ath10k_ce_set_ops(ar, ce_state);
1943 	/* Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1944 	 * additional TX locking checks.
1945 	 *
1946 	 * For the lack of a better place do the check here.
1947 	 */
1948 	BUILD_BUG_ON(2 * TARGET_NUM_MSDU_DESC >
1949 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1950 	BUILD_BUG_ON(2 * TARGET_10_4_NUM_MSDU_DESC_PFC >
1951 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1952 	BUILD_BUG_ON(2 * TARGET_TLV_NUM_MSDU_DESC >
1953 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1954 
1955 	ce_state->ar = ar;
1956 	ce_state->id = ce_id;
1957 	ce_state->ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1958 	ce_state->attr_flags = attr->flags;
1959 	ce_state->src_sz_max = attr->src_sz_max;
1960 
1961 	if (attr->src_nentries)
1962 		ce_state->send_cb = attr->send_cb;
1963 
1964 	if (attr->dest_nentries)
1965 		ce_state->recv_cb = attr->recv_cb;
1966 
1967 	if (attr->src_nentries) {
1968 		ce_state->src_ring =
1969 			ce_state->ops->ce_alloc_src_ring(ar, ce_id, attr);
1970 		if (IS_ERR(ce_state->src_ring)) {
1971 			ret = PTR_ERR(ce_state->src_ring);
1972 			ath10k_err(ar, "failed to alloc CE src ring %d: %d\n",
1973 				   ce_id, ret);
1974 			ce_state->src_ring = NULL;
1975 			return ret;
1976 		}
1977 	}
1978 
1979 	if (attr->dest_nentries) {
1980 		ce_state->dest_ring = ce_state->ops->ce_alloc_dst_ring(ar,
1981 									ce_id,
1982 									attr);
1983 		if (IS_ERR(ce_state->dest_ring)) {
1984 			ret = PTR_ERR(ce_state->dest_ring);
1985 			ath10k_err(ar, "failed to alloc CE dest ring %d: %d\n",
1986 				   ce_id, ret);
1987 			ce_state->dest_ring = NULL;
1988 			return ret;
1989 		}
1990 	}
1991 
1992 	return 0;
1993 }
1994 EXPORT_SYMBOL(ath10k_ce_alloc_pipe);
1995 
1996 void ath10k_ce_alloc_rri(struct ath10k *ar)
1997 {
1998 	int i;
1999 	u32 value;
2000 	u32 ctrl1_regs;
2001 	u32 ce_base_addr;
2002 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
2003 
2004 	ce->vaddr_rri = dma_alloc_coherent(ar->dev,
2005 					   (CE_COUNT * sizeof(u32)),
2006 					   &ce->paddr_rri, GFP_KERNEL);
2007 
2008 	if (!ce->vaddr_rri)
2009 		return;
2010 
2011 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_low,
2012 			  lower_32_bits(ce->paddr_rri));
2013 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_high,
2014 			  (upper_32_bits(ce->paddr_rri) &
2015 			  CE_DESC_ADDR_HI_MASK));
2016 
2017 	for (i = 0; i < CE_COUNT; i++) {
2018 		ctrl1_regs = ar->hw_ce_regs->ctrl1_regs->addr;
2019 		ce_base_addr = ath10k_ce_base_address(ar, i);
2020 		value = ath10k_ce_read32(ar, ce_base_addr + ctrl1_regs);
2021 		value |= ar->hw_ce_regs->upd->mask;
2022 		ath10k_ce_write32(ar, ce_base_addr + ctrl1_regs, value);
2023 	}
2024 }
2025 EXPORT_SYMBOL(ath10k_ce_alloc_rri);
2026 
2027 void ath10k_ce_free_rri(struct ath10k *ar)
2028 {
2029 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
2030 
2031 	dma_free_coherent(ar->dev, (CE_COUNT * sizeof(u32)),
2032 			  ce->vaddr_rri,
2033 			  ce->paddr_rri);
2034 }
2035 EXPORT_SYMBOL(ath10k_ce_free_rri);
2036