xref: /openbmc/linux/drivers/net/wireless/ath/ath10k/ce.c (revision d47a97bd)
1 // SPDX-License-Identifier: ISC
2 /*
3  * Copyright (c) 2005-2011 Atheros Communications Inc.
4  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
5  * Copyright (c) 2018 The Linux Foundation. All rights reserved.
6  */
7 
8 #include "hif.h"
9 #include "ce.h"
10 #include "debug.h"
11 
12 /*
13  * Support for Copy Engine hardware, which is mainly used for
14  * communication between Host and Target over a PCIe interconnect.
15  */
16 
17 /*
18  * A single CopyEngine (CE) comprises two "rings":
19  *   a source ring
20  *   a destination ring
21  *
22  * Each ring consists of a number of descriptors which specify
23  * an address, length, and meta-data.
24  *
25  * Typically, one side of the PCIe/AHB/SNOC interconnect (Host or Target)
26  * controls one ring and the other side controls the other ring.
27  * The source side chooses when to initiate a transfer and it
28  * chooses what to send (buffer address, length). The destination
29  * side keeps a supply of "anonymous receive buffers" available and
30  * it handles incoming data as it arrives (when the destination
31  * receives an interrupt).
32  *
33  * The sender may send a simple buffer (address/length) or it may
34  * send a small list of buffers.  When a small list is sent, hardware
35  * "gathers" these and they end up in a single destination buffer
36  * with a single interrupt.
37  *
38  * There are several "contexts" managed by this layer -- more, it
39  * may seem -- than should be needed. These are provided mainly for
40  * maximum flexibility and especially to facilitate a simpler HIF
41  * implementation. There are per-CopyEngine recv, send, and watermark
42  * contexts. These are supplied by the caller when a recv, send,
43  * or watermark handler is established and they are echoed back to
44  * the caller when the respective callbacks are invoked. There is
45  * also a per-transfer context supplied by the caller when a buffer
46  * (or sendlist) is sent and when a buffer is enqueued for recv.
47  * These per-transfer contexts are echoed back to the caller when
48  * the buffer is sent/received.
49  */
50 
51 static inline u32 shadow_sr_wr_ind_addr(struct ath10k *ar,
52 					struct ath10k_ce_pipe *ce_state)
53 {
54 	u32 ce_id = ce_state->id;
55 	u32 addr = 0;
56 
57 	switch (ce_id) {
58 	case 0:
59 		addr = 0x00032000;
60 		break;
61 	case 3:
62 		addr = 0x0003200C;
63 		break;
64 	case 4:
65 		addr = 0x00032010;
66 		break;
67 	case 5:
68 		addr = 0x00032014;
69 		break;
70 	case 7:
71 		addr = 0x0003201C;
72 		break;
73 	default:
74 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
75 		break;
76 	}
77 	return addr;
78 }
79 
80 static inline u32 shadow_dst_wr_ind_addr(struct ath10k *ar,
81 					 struct ath10k_ce_pipe *ce_state)
82 {
83 	u32 ce_id = ce_state->id;
84 	u32 addr = 0;
85 
86 	switch (ce_id) {
87 	case 1:
88 		addr = 0x00032034;
89 		break;
90 	case 2:
91 		addr = 0x00032038;
92 		break;
93 	case 5:
94 		addr = 0x00032044;
95 		break;
96 	case 7:
97 		addr = 0x0003204C;
98 		break;
99 	case 8:
100 		addr = 0x00032050;
101 		break;
102 	case 9:
103 		addr = 0x00032054;
104 		break;
105 	case 10:
106 		addr = 0x00032058;
107 		break;
108 	case 11:
109 		addr = 0x0003205C;
110 		break;
111 	default:
112 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
113 		break;
114 	}
115 
116 	return addr;
117 }
118 
119 static inline unsigned int
120 ath10k_set_ring_byte(unsigned int offset,
121 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
122 {
123 	return ((offset << addr_map->lsb) & addr_map->mask);
124 }
125 
126 static inline unsigned int
127 ath10k_get_ring_byte(unsigned int offset,
128 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
129 {
130 	return ((offset & addr_map->mask) >> (addr_map->lsb));
131 }
132 
133 static inline u32 ath10k_ce_read32(struct ath10k *ar, u32 offset)
134 {
135 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
136 
137 	return ce->bus_ops->read32(ar, offset);
138 }
139 
140 static inline void ath10k_ce_write32(struct ath10k *ar, u32 offset, u32 value)
141 {
142 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
143 
144 	ce->bus_ops->write32(ar, offset, value);
145 }
146 
147 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
148 						       u32 ce_ctrl_addr,
149 						       unsigned int n)
150 {
151 	ath10k_ce_write32(ar, ce_ctrl_addr +
152 			  ar->hw_ce_regs->dst_wr_index_addr, n);
153 }
154 
155 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
156 						      u32 ce_ctrl_addr)
157 {
158 	return ath10k_ce_read32(ar, ce_ctrl_addr +
159 				ar->hw_ce_regs->dst_wr_index_addr);
160 }
161 
162 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
163 						      u32 ce_ctrl_addr,
164 						      unsigned int n)
165 {
166 	ath10k_ce_write32(ar, ce_ctrl_addr +
167 			  ar->hw_ce_regs->sr_wr_index_addr, n);
168 }
169 
170 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
171 						     u32 ce_ctrl_addr)
172 {
173 	return ath10k_ce_read32(ar, ce_ctrl_addr +
174 				ar->hw_ce_regs->sr_wr_index_addr);
175 }
176 
177 static inline u32 ath10k_ce_src_ring_read_index_from_ddr(struct ath10k *ar,
178 							 u32 ce_id)
179 {
180 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
181 
182 	return ce->vaddr_rri[ce_id] & CE_DDR_RRI_MASK;
183 }
184 
185 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
186 						    u32 ce_ctrl_addr)
187 {
188 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
189 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
190 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
191 	u32 index;
192 
193 	if (ar->hw_params.rri_on_ddr &&
194 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
195 		index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_id);
196 	else
197 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
198 					 ar->hw_ce_regs->current_srri_addr);
199 
200 	return index;
201 }
202 
203 static inline void
204 ath10k_ce_shadow_src_ring_write_index_set(struct ath10k *ar,
205 					  struct ath10k_ce_pipe *ce_state,
206 					  unsigned int value)
207 {
208 	ath10k_ce_write32(ar, shadow_sr_wr_ind_addr(ar, ce_state), value);
209 }
210 
211 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
212 						    u32 ce_id,
213 						    u64 addr)
214 {
215 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
216 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
217 	u32 ce_ctrl_addr = ath10k_ce_base_address(ar, ce_id);
218 	u32 addr_lo = lower_32_bits(addr);
219 
220 	ath10k_ce_write32(ar, ce_ctrl_addr +
221 			  ar->hw_ce_regs->sr_base_addr_lo, addr_lo);
222 
223 	if (ce_state->ops->ce_set_src_ring_base_addr_hi) {
224 		ce_state->ops->ce_set_src_ring_base_addr_hi(ar, ce_ctrl_addr,
225 							    addr);
226 	}
227 }
228 
229 static void ath10k_ce_set_src_ring_base_addr_hi(struct ath10k *ar,
230 						u32 ce_ctrl_addr,
231 						u64 addr)
232 {
233 	u32 addr_hi = upper_32_bits(addr) & CE_DESC_ADDR_HI_MASK;
234 
235 	ath10k_ce_write32(ar, ce_ctrl_addr +
236 			  ar->hw_ce_regs->sr_base_addr_hi, addr_hi);
237 }
238 
239 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
240 					       u32 ce_ctrl_addr,
241 					       unsigned int n)
242 {
243 	ath10k_ce_write32(ar, ce_ctrl_addr +
244 			  ar->hw_ce_regs->sr_size_addr, n);
245 }
246 
247 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
248 					       u32 ce_ctrl_addr,
249 					       unsigned int n)
250 {
251 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
252 
253 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
254 					  ctrl_regs->addr);
255 
256 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
257 			  (ctrl1_addr &  ~(ctrl_regs->dmax->mask)) |
258 			  ath10k_set_ring_byte(n, ctrl_regs->dmax));
259 }
260 
261 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
262 						    u32 ce_ctrl_addr,
263 						    unsigned int n)
264 {
265 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
266 
267 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
268 					  ctrl_regs->addr);
269 
270 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
271 			  (ctrl1_addr & ~(ctrl_regs->src_ring->mask)) |
272 			  ath10k_set_ring_byte(n, ctrl_regs->src_ring));
273 }
274 
275 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
276 						     u32 ce_ctrl_addr,
277 						     unsigned int n)
278 {
279 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
280 
281 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
282 					  ctrl_regs->addr);
283 
284 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
285 			  (ctrl1_addr & ~(ctrl_regs->dst_ring->mask)) |
286 			  ath10k_set_ring_byte(n, ctrl_regs->dst_ring));
287 }
288 
289 static inline
290 	u32 ath10k_ce_dest_ring_read_index_from_ddr(struct ath10k *ar, u32 ce_id)
291 {
292 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
293 
294 	return (ce->vaddr_rri[ce_id] >> CE_DDR_DRRI_SHIFT) &
295 		CE_DDR_RRI_MASK;
296 }
297 
298 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
299 						     u32 ce_ctrl_addr)
300 {
301 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
302 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
303 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
304 	u32 index;
305 
306 	if (ar->hw_params.rri_on_ddr &&
307 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
308 		index = ath10k_ce_dest_ring_read_index_from_ddr(ar, ce_id);
309 	else
310 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
311 					 ar->hw_ce_regs->current_drri_addr);
312 
313 	return index;
314 }
315 
316 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
317 						     u32 ce_id,
318 						     u64 addr)
319 {
320 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
321 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
322 	u32 ce_ctrl_addr = ath10k_ce_base_address(ar, ce_id);
323 	u32 addr_lo = lower_32_bits(addr);
324 
325 	ath10k_ce_write32(ar, ce_ctrl_addr +
326 			  ar->hw_ce_regs->dr_base_addr_lo, addr_lo);
327 
328 	if (ce_state->ops->ce_set_dest_ring_base_addr_hi) {
329 		ce_state->ops->ce_set_dest_ring_base_addr_hi(ar, ce_ctrl_addr,
330 							     addr);
331 	}
332 }
333 
334 static void ath10k_ce_set_dest_ring_base_addr_hi(struct ath10k *ar,
335 						 u32 ce_ctrl_addr,
336 						 u64 addr)
337 {
338 	u32 addr_hi = upper_32_bits(addr) & CE_DESC_ADDR_HI_MASK;
339 	u32 reg_value;
340 
341 	reg_value = ath10k_ce_read32(ar, ce_ctrl_addr +
342 				     ar->hw_ce_regs->dr_base_addr_hi);
343 	reg_value &= ~CE_DESC_ADDR_HI_MASK;
344 	reg_value |= addr_hi;
345 	ath10k_ce_write32(ar, ce_ctrl_addr +
346 			  ar->hw_ce_regs->dr_base_addr_hi, reg_value);
347 }
348 
349 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
350 						u32 ce_ctrl_addr,
351 						unsigned int n)
352 {
353 	ath10k_ce_write32(ar, ce_ctrl_addr +
354 			  ar->hw_ce_regs->dr_size_addr, n);
355 }
356 
357 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
358 						   u32 ce_ctrl_addr,
359 						   unsigned int n)
360 {
361 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
362 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
363 
364 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
365 			  (addr & ~(srcr_wm->wm_high->mask)) |
366 			  (ath10k_set_ring_byte(n, srcr_wm->wm_high)));
367 }
368 
369 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
370 						  u32 ce_ctrl_addr,
371 						  unsigned int n)
372 {
373 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
374 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
375 
376 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
377 			  (addr & ~(srcr_wm->wm_low->mask)) |
378 			  (ath10k_set_ring_byte(n, srcr_wm->wm_low)));
379 }
380 
381 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
382 						    u32 ce_ctrl_addr,
383 						    unsigned int n)
384 {
385 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
386 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
387 
388 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
389 			  (addr & ~(dstr_wm->wm_high->mask)) |
390 			  (ath10k_set_ring_byte(n, dstr_wm->wm_high)));
391 }
392 
393 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
394 						   u32 ce_ctrl_addr,
395 						   unsigned int n)
396 {
397 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
398 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
399 
400 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
401 			  (addr & ~(dstr_wm->wm_low->mask)) |
402 			  (ath10k_set_ring_byte(n, dstr_wm->wm_low)));
403 }
404 
405 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
406 							u32 ce_ctrl_addr)
407 {
408 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
409 
410 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
411 					    ar->hw_ce_regs->host_ie_addr);
412 
413 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
414 			  host_ie_addr | host_ie->copy_complete->mask);
415 }
416 
417 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
418 							u32 ce_ctrl_addr)
419 {
420 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
421 
422 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
423 					    ar->hw_ce_regs->host_ie_addr);
424 
425 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
426 			  host_ie_addr & ~(host_ie->copy_complete->mask));
427 }
428 
429 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
430 						    u32 ce_ctrl_addr)
431 {
432 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
433 
434 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
435 					    ar->hw_ce_regs->host_ie_addr);
436 
437 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
438 			  host_ie_addr & ~(wm_regs->wm_mask));
439 }
440 
441 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
442 					       u32 ce_ctrl_addr)
443 {
444 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
445 
446 	u32 misc_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
447 					    ar->hw_ce_regs->misc_ie_addr);
448 
449 	ath10k_ce_write32(ar,
450 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
451 			  misc_ie_addr | misc_regs->err_mask);
452 }
453 
454 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
455 						u32 ce_ctrl_addr)
456 {
457 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
458 
459 	u32 misc_ie_addr = ath10k_ce_read32(ar,
460 			ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr);
461 
462 	ath10k_ce_write32(ar,
463 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
464 			  misc_ie_addr & ~(misc_regs->err_mask));
465 }
466 
467 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
468 						     u32 ce_ctrl_addr,
469 						     unsigned int mask)
470 {
471 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
472 
473 	ath10k_ce_write32(ar, ce_ctrl_addr + wm_regs->addr, mask);
474 }
475 
476 /*
477  * Guts of ath10k_ce_send.
478  * The caller takes responsibility for any needed locking.
479  */
480 static int _ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
481 				  void *per_transfer_context,
482 				  dma_addr_t buffer,
483 				  unsigned int nbytes,
484 				  unsigned int transfer_id,
485 				  unsigned int flags)
486 {
487 	struct ath10k *ar = ce_state->ar;
488 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
489 	struct ce_desc *desc, sdesc;
490 	unsigned int nentries_mask = src_ring->nentries_mask;
491 	unsigned int sw_index = src_ring->sw_index;
492 	unsigned int write_index = src_ring->write_index;
493 	u32 ctrl_addr = ce_state->ctrl_addr;
494 	u32 desc_flags = 0;
495 	int ret = 0;
496 
497 	if (nbytes > ce_state->src_sz_max)
498 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
499 			    __func__, nbytes, ce_state->src_sz_max);
500 
501 	if (unlikely(CE_RING_DELTA(nentries_mask,
502 				   write_index, sw_index - 1) <= 0)) {
503 		ret = -ENOSR;
504 		goto exit;
505 	}
506 
507 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
508 				   write_index);
509 
510 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
511 
512 	if (flags & CE_SEND_FLAG_GATHER)
513 		desc_flags |= CE_DESC_FLAGS_GATHER;
514 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
515 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
516 
517 	sdesc.addr   = __cpu_to_le32(buffer);
518 	sdesc.nbytes = __cpu_to_le16(nbytes);
519 	sdesc.flags  = __cpu_to_le16(desc_flags);
520 
521 	*desc = sdesc;
522 
523 	src_ring->per_transfer_context[write_index] = per_transfer_context;
524 
525 	/* Update Source Ring Write Index */
526 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
527 
528 	/* WORKAROUND */
529 	if (!(flags & CE_SEND_FLAG_GATHER))
530 		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
531 
532 	src_ring->write_index = write_index;
533 exit:
534 	return ret;
535 }
536 
537 static int _ath10k_ce_send_nolock_64(struct ath10k_ce_pipe *ce_state,
538 				     void *per_transfer_context,
539 				     dma_addr_t buffer,
540 				     unsigned int nbytes,
541 				     unsigned int transfer_id,
542 				     unsigned int flags)
543 {
544 	struct ath10k *ar = ce_state->ar;
545 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
546 	struct ce_desc_64 *desc, sdesc;
547 	unsigned int nentries_mask = src_ring->nentries_mask;
548 	unsigned int sw_index;
549 	unsigned int write_index = src_ring->write_index;
550 	u32 ctrl_addr = ce_state->ctrl_addr;
551 	__le32 *addr;
552 	u32 desc_flags = 0;
553 	int ret = 0;
554 
555 	if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
556 		return -ESHUTDOWN;
557 
558 	if (nbytes > ce_state->src_sz_max)
559 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
560 			    __func__, nbytes, ce_state->src_sz_max);
561 
562 	if (ar->hw_params.rri_on_ddr)
563 		sw_index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_state->id);
564 	else
565 		sw_index = src_ring->sw_index;
566 
567 	if (unlikely(CE_RING_DELTA(nentries_mask,
568 				   write_index, sw_index - 1) <= 0)) {
569 		ret = -ENOSR;
570 		goto exit;
571 	}
572 
573 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
574 				      write_index);
575 
576 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
577 
578 	if (flags & CE_SEND_FLAG_GATHER)
579 		desc_flags |= CE_DESC_FLAGS_GATHER;
580 
581 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
582 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
583 
584 	addr = (__le32 *)&sdesc.addr;
585 
586 	flags |= upper_32_bits(buffer) & CE_DESC_ADDR_HI_MASK;
587 	addr[0] = __cpu_to_le32(buffer);
588 	addr[1] = __cpu_to_le32(flags);
589 	if (flags & CE_SEND_FLAG_GATHER)
590 		addr[1] |= __cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER);
591 	else
592 		addr[1] &= ~(__cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER));
593 
594 	sdesc.nbytes = __cpu_to_le16(nbytes);
595 	sdesc.flags  = __cpu_to_le16(desc_flags);
596 
597 	*desc = sdesc;
598 
599 	src_ring->per_transfer_context[write_index] = per_transfer_context;
600 
601 	/* Update Source Ring Write Index */
602 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
603 
604 	if (!(flags & CE_SEND_FLAG_GATHER)) {
605 		if (ar->hw_params.shadow_reg_support)
606 			ath10k_ce_shadow_src_ring_write_index_set(ar, ce_state,
607 								  write_index);
608 		else
609 			ath10k_ce_src_ring_write_index_set(ar, ctrl_addr,
610 							   write_index);
611 	}
612 
613 	src_ring->write_index = write_index;
614 exit:
615 	return ret;
616 }
617 
618 int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
619 			  void *per_transfer_context,
620 			  dma_addr_t buffer,
621 			  unsigned int nbytes,
622 			  unsigned int transfer_id,
623 			  unsigned int flags)
624 {
625 	return ce_state->ops->ce_send_nolock(ce_state, per_transfer_context,
626 				    buffer, nbytes, transfer_id, flags);
627 }
628 EXPORT_SYMBOL(ath10k_ce_send_nolock);
629 
630 void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
631 {
632 	struct ath10k *ar = pipe->ar;
633 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
634 	struct ath10k_ce_ring *src_ring = pipe->src_ring;
635 	u32 ctrl_addr = pipe->ctrl_addr;
636 
637 	lockdep_assert_held(&ce->ce_lock);
638 
639 	/*
640 	 * This function must be called only if there is an incomplete
641 	 * scatter-gather transfer (before index register is updated)
642 	 * that needs to be cleaned up.
643 	 */
644 	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
645 		return;
646 
647 	if (WARN_ON_ONCE(src_ring->write_index ==
648 			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
649 		return;
650 
651 	src_ring->write_index--;
652 	src_ring->write_index &= src_ring->nentries_mask;
653 
654 	src_ring->per_transfer_context[src_ring->write_index] = NULL;
655 }
656 EXPORT_SYMBOL(__ath10k_ce_send_revert);
657 
658 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
659 		   void *per_transfer_context,
660 		   dma_addr_t buffer,
661 		   unsigned int nbytes,
662 		   unsigned int transfer_id,
663 		   unsigned int flags)
664 {
665 	struct ath10k *ar = ce_state->ar;
666 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
667 	int ret;
668 
669 	spin_lock_bh(&ce->ce_lock);
670 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
671 				    buffer, nbytes, transfer_id, flags);
672 	spin_unlock_bh(&ce->ce_lock);
673 
674 	return ret;
675 }
676 EXPORT_SYMBOL(ath10k_ce_send);
677 
678 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
679 {
680 	struct ath10k *ar = pipe->ar;
681 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
682 	int delta;
683 
684 	spin_lock_bh(&ce->ce_lock);
685 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
686 			      pipe->src_ring->write_index,
687 			      pipe->src_ring->sw_index - 1);
688 	spin_unlock_bh(&ce->ce_lock);
689 
690 	return delta;
691 }
692 EXPORT_SYMBOL(ath10k_ce_num_free_src_entries);
693 
694 int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
695 {
696 	struct ath10k *ar = pipe->ar;
697 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
698 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
699 	unsigned int nentries_mask = dest_ring->nentries_mask;
700 	unsigned int write_index = dest_ring->write_index;
701 	unsigned int sw_index = dest_ring->sw_index;
702 
703 	lockdep_assert_held(&ce->ce_lock);
704 
705 	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
706 }
707 EXPORT_SYMBOL(__ath10k_ce_rx_num_free_bufs);
708 
709 static int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
710 				   dma_addr_t paddr)
711 {
712 	struct ath10k *ar = pipe->ar;
713 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
714 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
715 	unsigned int nentries_mask = dest_ring->nentries_mask;
716 	unsigned int write_index = dest_ring->write_index;
717 	unsigned int sw_index = dest_ring->sw_index;
718 	struct ce_desc *base = dest_ring->base_addr_owner_space;
719 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
720 	u32 ctrl_addr = pipe->ctrl_addr;
721 
722 	lockdep_assert_held(&ce->ce_lock);
723 
724 	if ((pipe->id != 5) &&
725 	    CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
726 		return -ENOSPC;
727 
728 	desc->addr = __cpu_to_le32(paddr);
729 	desc->nbytes = 0;
730 
731 	dest_ring->per_transfer_context[write_index] = ctx;
732 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
733 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
734 	dest_ring->write_index = write_index;
735 
736 	return 0;
737 }
738 
739 static int __ath10k_ce_rx_post_buf_64(struct ath10k_ce_pipe *pipe,
740 				      void *ctx,
741 				      dma_addr_t paddr)
742 {
743 	struct ath10k *ar = pipe->ar;
744 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
745 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
746 	unsigned int nentries_mask = dest_ring->nentries_mask;
747 	unsigned int write_index = dest_ring->write_index;
748 	unsigned int sw_index = dest_ring->sw_index;
749 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
750 	struct ce_desc_64 *desc =
751 			CE_DEST_RING_TO_DESC_64(base, write_index);
752 	u32 ctrl_addr = pipe->ctrl_addr;
753 
754 	lockdep_assert_held(&ce->ce_lock);
755 
756 	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
757 		return -ENOSPC;
758 
759 	desc->addr = __cpu_to_le64(paddr);
760 	desc->addr &= __cpu_to_le64(CE_DESC_ADDR_MASK);
761 
762 	desc->nbytes = 0;
763 
764 	dest_ring->per_transfer_context[write_index] = ctx;
765 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
766 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
767 	dest_ring->write_index = write_index;
768 
769 	return 0;
770 }
771 
772 void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
773 {
774 	struct ath10k *ar = pipe->ar;
775 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
776 	unsigned int nentries_mask = dest_ring->nentries_mask;
777 	unsigned int write_index = dest_ring->write_index;
778 	u32 ctrl_addr = pipe->ctrl_addr;
779 	u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
780 
781 	/* Prevent CE ring stuck issue that will occur when ring is full.
782 	 * Make sure that write index is 1 less than read index.
783 	 */
784 	if (((cur_write_idx + nentries) & nentries_mask) == dest_ring->sw_index)
785 		nentries -= 1;
786 
787 	write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
788 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
789 	dest_ring->write_index = write_index;
790 }
791 EXPORT_SYMBOL(ath10k_ce_rx_update_write_idx);
792 
793 int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
794 			  dma_addr_t paddr)
795 {
796 	struct ath10k *ar = pipe->ar;
797 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
798 	int ret;
799 
800 	spin_lock_bh(&ce->ce_lock);
801 	ret = pipe->ops->ce_rx_post_buf(pipe, ctx, paddr);
802 	spin_unlock_bh(&ce->ce_lock);
803 
804 	return ret;
805 }
806 EXPORT_SYMBOL(ath10k_ce_rx_post_buf);
807 
808 /*
809  * Guts of ath10k_ce_completed_recv_next.
810  * The caller takes responsibility for any necessary locking.
811  */
812 static int
813 	 _ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
814 					       void **per_transfer_contextp,
815 					       unsigned int *nbytesp)
816 {
817 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
818 	unsigned int nentries_mask = dest_ring->nentries_mask;
819 	unsigned int sw_index = dest_ring->sw_index;
820 
821 	struct ce_desc *base = dest_ring->base_addr_owner_space;
822 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
823 	struct ce_desc sdesc;
824 	u16 nbytes;
825 
826 	/* Copy in one go for performance reasons */
827 	sdesc = *desc;
828 
829 	nbytes = __le16_to_cpu(sdesc.nbytes);
830 	if (nbytes == 0) {
831 		/*
832 		 * This closes a relatively unusual race where the Host
833 		 * sees the updated DRRI before the update to the
834 		 * corresponding descriptor has completed. We treat this
835 		 * as a descriptor that is not yet done.
836 		 */
837 		return -EIO;
838 	}
839 
840 	desc->nbytes = 0;
841 
842 	/* Return data from completed destination descriptor */
843 	*nbytesp = nbytes;
844 
845 	if (per_transfer_contextp)
846 		*per_transfer_contextp =
847 			dest_ring->per_transfer_context[sw_index];
848 
849 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
850 	 * So update transfer context all CEs except CE5.
851 	 */
852 	if (ce_state->id != 5)
853 		dest_ring->per_transfer_context[sw_index] = NULL;
854 
855 	/* Update sw_index */
856 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
857 	dest_ring->sw_index = sw_index;
858 
859 	return 0;
860 }
861 
862 static int
863 _ath10k_ce_completed_recv_next_nolock_64(struct ath10k_ce_pipe *ce_state,
864 					 void **per_transfer_contextp,
865 					 unsigned int *nbytesp)
866 {
867 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
868 	unsigned int nentries_mask = dest_ring->nentries_mask;
869 	unsigned int sw_index = dest_ring->sw_index;
870 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
871 	struct ce_desc_64 *desc =
872 		CE_DEST_RING_TO_DESC_64(base, sw_index);
873 	struct ce_desc_64 sdesc;
874 	u16 nbytes;
875 
876 	/* Copy in one go for performance reasons */
877 	sdesc = *desc;
878 
879 	nbytes = __le16_to_cpu(sdesc.nbytes);
880 	if (nbytes == 0) {
881 		/* This closes a relatively unusual race where the Host
882 		 * sees the updated DRRI before the update to the
883 		 * corresponding descriptor has completed. We treat this
884 		 * as a descriptor that is not yet done.
885 		 */
886 		return -EIO;
887 	}
888 
889 	desc->nbytes = 0;
890 
891 	/* Return data from completed destination descriptor */
892 	*nbytesp = nbytes;
893 
894 	if (per_transfer_contextp)
895 		*per_transfer_contextp =
896 			dest_ring->per_transfer_context[sw_index];
897 
898 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
899 	 * So update transfer context all CEs except CE5.
900 	 */
901 	if (ce_state->id != 5)
902 		dest_ring->per_transfer_context[sw_index] = NULL;
903 
904 	/* Update sw_index */
905 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
906 	dest_ring->sw_index = sw_index;
907 
908 	return 0;
909 }
910 
911 int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
912 					 void **per_transfer_ctx,
913 					 unsigned int *nbytesp)
914 {
915 	return ce_state->ops->ce_completed_recv_next_nolock(ce_state,
916 							    per_transfer_ctx,
917 							    nbytesp);
918 }
919 EXPORT_SYMBOL(ath10k_ce_completed_recv_next_nolock);
920 
921 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
922 				  void **per_transfer_contextp,
923 				  unsigned int *nbytesp)
924 {
925 	struct ath10k *ar = ce_state->ar;
926 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
927 	int ret;
928 
929 	spin_lock_bh(&ce->ce_lock);
930 	ret = ce_state->ops->ce_completed_recv_next_nolock(ce_state,
931 						   per_transfer_contextp,
932 						   nbytesp);
933 
934 	spin_unlock_bh(&ce->ce_lock);
935 
936 	return ret;
937 }
938 EXPORT_SYMBOL(ath10k_ce_completed_recv_next);
939 
940 static int _ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
941 				       void **per_transfer_contextp,
942 				       dma_addr_t *bufferp)
943 {
944 	struct ath10k_ce_ring *dest_ring;
945 	unsigned int nentries_mask;
946 	unsigned int sw_index;
947 	unsigned int write_index;
948 	int ret;
949 	struct ath10k *ar;
950 	struct ath10k_ce *ce;
951 
952 	dest_ring = ce_state->dest_ring;
953 
954 	if (!dest_ring)
955 		return -EIO;
956 
957 	ar = ce_state->ar;
958 	ce = ath10k_ce_priv(ar);
959 
960 	spin_lock_bh(&ce->ce_lock);
961 
962 	nentries_mask = dest_ring->nentries_mask;
963 	sw_index = dest_ring->sw_index;
964 	write_index = dest_ring->write_index;
965 	if (write_index != sw_index) {
966 		struct ce_desc *base = dest_ring->base_addr_owner_space;
967 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
968 
969 		/* Return data from completed destination descriptor */
970 		*bufferp = __le32_to_cpu(desc->addr);
971 
972 		if (per_transfer_contextp)
973 			*per_transfer_contextp =
974 				dest_ring->per_transfer_context[sw_index];
975 
976 		/* sanity */
977 		dest_ring->per_transfer_context[sw_index] = NULL;
978 		desc->nbytes = 0;
979 
980 		/* Update sw_index */
981 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
982 		dest_ring->sw_index = sw_index;
983 		ret = 0;
984 	} else {
985 		ret = -EIO;
986 	}
987 
988 	spin_unlock_bh(&ce->ce_lock);
989 
990 	return ret;
991 }
992 
993 static int _ath10k_ce_revoke_recv_next_64(struct ath10k_ce_pipe *ce_state,
994 					  void **per_transfer_contextp,
995 					  dma_addr_t *bufferp)
996 {
997 	struct ath10k_ce_ring *dest_ring;
998 	unsigned int nentries_mask;
999 	unsigned int sw_index;
1000 	unsigned int write_index;
1001 	int ret;
1002 	struct ath10k *ar;
1003 	struct ath10k_ce *ce;
1004 
1005 	dest_ring = ce_state->dest_ring;
1006 
1007 	if (!dest_ring)
1008 		return -EIO;
1009 
1010 	ar = ce_state->ar;
1011 	ce = ath10k_ce_priv(ar);
1012 
1013 	spin_lock_bh(&ce->ce_lock);
1014 
1015 	nentries_mask = dest_ring->nentries_mask;
1016 	sw_index = dest_ring->sw_index;
1017 	write_index = dest_ring->write_index;
1018 	if (write_index != sw_index) {
1019 		struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
1020 		struct ce_desc_64 *desc =
1021 			CE_DEST_RING_TO_DESC_64(base, sw_index);
1022 
1023 		/* Return data from completed destination descriptor */
1024 		*bufferp = __le64_to_cpu(desc->addr);
1025 
1026 		if (per_transfer_contextp)
1027 			*per_transfer_contextp =
1028 				dest_ring->per_transfer_context[sw_index];
1029 
1030 		/* sanity */
1031 		dest_ring->per_transfer_context[sw_index] = NULL;
1032 		desc->nbytes = 0;
1033 
1034 		/* Update sw_index */
1035 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1036 		dest_ring->sw_index = sw_index;
1037 		ret = 0;
1038 	} else {
1039 		ret = -EIO;
1040 	}
1041 
1042 	spin_unlock_bh(&ce->ce_lock);
1043 
1044 	return ret;
1045 }
1046 
1047 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
1048 			       void **per_transfer_contextp,
1049 			       dma_addr_t *bufferp)
1050 {
1051 	return ce_state->ops->ce_revoke_recv_next(ce_state,
1052 						  per_transfer_contextp,
1053 						  bufferp);
1054 }
1055 EXPORT_SYMBOL(ath10k_ce_revoke_recv_next);
1056 
1057 /*
1058  * Guts of ath10k_ce_completed_send_next.
1059  * The caller takes responsibility for any necessary locking.
1060  */
1061 static int _ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1062 						 void **per_transfer_contextp)
1063 {
1064 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1065 	u32 ctrl_addr = ce_state->ctrl_addr;
1066 	struct ath10k *ar = ce_state->ar;
1067 	unsigned int nentries_mask = src_ring->nentries_mask;
1068 	unsigned int sw_index = src_ring->sw_index;
1069 	unsigned int read_index;
1070 	struct ce_desc *desc;
1071 
1072 	if (src_ring->hw_index == sw_index) {
1073 		/*
1074 		 * The SW completion index has caught up with the cached
1075 		 * version of the HW completion index.
1076 		 * Update the cached HW completion index to see whether
1077 		 * the SW has really caught up to the HW, or if the cached
1078 		 * value of the HW index has become stale.
1079 		 */
1080 
1081 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1082 		if (read_index == 0xffffffff)
1083 			return -ENODEV;
1084 
1085 		read_index &= nentries_mask;
1086 		src_ring->hw_index = read_index;
1087 	}
1088 
1089 	if (ar->hw_params.rri_on_ddr)
1090 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1091 	else
1092 		read_index = src_ring->hw_index;
1093 
1094 	if (read_index == sw_index)
1095 		return -EIO;
1096 
1097 	if (per_transfer_contextp)
1098 		*per_transfer_contextp =
1099 			src_ring->per_transfer_context[sw_index];
1100 
1101 	/* sanity */
1102 	src_ring->per_transfer_context[sw_index] = NULL;
1103 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
1104 				   sw_index);
1105 	desc->nbytes = 0;
1106 
1107 	/* Update sw_index */
1108 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1109 	src_ring->sw_index = sw_index;
1110 
1111 	return 0;
1112 }
1113 
1114 static int _ath10k_ce_completed_send_next_nolock_64(struct ath10k_ce_pipe *ce_state,
1115 						    void **per_transfer_contextp)
1116 {
1117 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1118 	u32 ctrl_addr = ce_state->ctrl_addr;
1119 	struct ath10k *ar = ce_state->ar;
1120 	unsigned int nentries_mask = src_ring->nentries_mask;
1121 	unsigned int sw_index = src_ring->sw_index;
1122 	unsigned int read_index;
1123 	struct ce_desc_64 *desc;
1124 
1125 	if (src_ring->hw_index == sw_index) {
1126 		/*
1127 		 * The SW completion index has caught up with the cached
1128 		 * version of the HW completion index.
1129 		 * Update the cached HW completion index to see whether
1130 		 * the SW has really caught up to the HW, or if the cached
1131 		 * value of the HW index has become stale.
1132 		 */
1133 
1134 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1135 		if (read_index == 0xffffffff)
1136 			return -ENODEV;
1137 
1138 		read_index &= nentries_mask;
1139 		src_ring->hw_index = read_index;
1140 	}
1141 
1142 	if (ar->hw_params.rri_on_ddr)
1143 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1144 	else
1145 		read_index = src_ring->hw_index;
1146 
1147 	if (read_index == sw_index)
1148 		return -EIO;
1149 
1150 	if (per_transfer_contextp)
1151 		*per_transfer_contextp =
1152 			src_ring->per_transfer_context[sw_index];
1153 
1154 	/* sanity */
1155 	src_ring->per_transfer_context[sw_index] = NULL;
1156 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
1157 				      sw_index);
1158 	desc->nbytes = 0;
1159 
1160 	/* Update sw_index */
1161 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1162 	src_ring->sw_index = sw_index;
1163 
1164 	return 0;
1165 }
1166 
1167 int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1168 					 void **per_transfer_contextp)
1169 {
1170 	return ce_state->ops->ce_completed_send_next_nolock(ce_state,
1171 							    per_transfer_contextp);
1172 }
1173 EXPORT_SYMBOL(ath10k_ce_completed_send_next_nolock);
1174 
1175 static void ath10k_ce_extract_desc_data(struct ath10k *ar,
1176 					struct ath10k_ce_ring *src_ring,
1177 					u32 sw_index,
1178 					dma_addr_t *bufferp,
1179 					u32 *nbytesp,
1180 					u32 *transfer_idp)
1181 {
1182 		struct ce_desc *base = src_ring->base_addr_owner_space;
1183 		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
1184 
1185 		/* Return data from completed source descriptor */
1186 		*bufferp = __le32_to_cpu(desc->addr);
1187 		*nbytesp = __le16_to_cpu(desc->nbytes);
1188 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1189 				   CE_DESC_FLAGS_META_DATA);
1190 }
1191 
1192 static void ath10k_ce_extract_desc_data_64(struct ath10k *ar,
1193 					   struct ath10k_ce_ring *src_ring,
1194 					   u32 sw_index,
1195 					   dma_addr_t *bufferp,
1196 					   u32 *nbytesp,
1197 					   u32 *transfer_idp)
1198 {
1199 		struct ce_desc_64 *base = src_ring->base_addr_owner_space;
1200 		struct ce_desc_64 *desc =
1201 			CE_SRC_RING_TO_DESC_64(base, sw_index);
1202 
1203 		/* Return data from completed source descriptor */
1204 		*bufferp = __le64_to_cpu(desc->addr);
1205 		*nbytesp = __le16_to_cpu(desc->nbytes);
1206 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1207 				   CE_DESC_FLAGS_META_DATA);
1208 }
1209 
1210 /* NB: Modeled after ath10k_ce_completed_send_next */
1211 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
1212 			       void **per_transfer_contextp,
1213 			       dma_addr_t *bufferp,
1214 			       unsigned int *nbytesp,
1215 			       unsigned int *transfer_idp)
1216 {
1217 	struct ath10k_ce_ring *src_ring;
1218 	unsigned int nentries_mask;
1219 	unsigned int sw_index;
1220 	unsigned int write_index;
1221 	int ret;
1222 	struct ath10k *ar;
1223 	struct ath10k_ce *ce;
1224 
1225 	src_ring = ce_state->src_ring;
1226 
1227 	if (!src_ring)
1228 		return -EIO;
1229 
1230 	ar = ce_state->ar;
1231 	ce = ath10k_ce_priv(ar);
1232 
1233 	spin_lock_bh(&ce->ce_lock);
1234 
1235 	nentries_mask = src_ring->nentries_mask;
1236 	sw_index = src_ring->sw_index;
1237 	write_index = src_ring->write_index;
1238 
1239 	if (write_index != sw_index) {
1240 		ce_state->ops->ce_extract_desc_data(ar, src_ring, sw_index,
1241 						    bufferp, nbytesp,
1242 						    transfer_idp);
1243 
1244 		if (per_transfer_contextp)
1245 			*per_transfer_contextp =
1246 				src_ring->per_transfer_context[sw_index];
1247 
1248 		/* sanity */
1249 		src_ring->per_transfer_context[sw_index] = NULL;
1250 
1251 		/* Update sw_index */
1252 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1253 		src_ring->sw_index = sw_index;
1254 		ret = 0;
1255 	} else {
1256 		ret = -EIO;
1257 	}
1258 
1259 	spin_unlock_bh(&ce->ce_lock);
1260 
1261 	return ret;
1262 }
1263 EXPORT_SYMBOL(ath10k_ce_cancel_send_next);
1264 
1265 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
1266 				  void **per_transfer_contextp)
1267 {
1268 	struct ath10k *ar = ce_state->ar;
1269 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1270 	int ret;
1271 
1272 	spin_lock_bh(&ce->ce_lock);
1273 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
1274 						   per_transfer_contextp);
1275 	spin_unlock_bh(&ce->ce_lock);
1276 
1277 	return ret;
1278 }
1279 EXPORT_SYMBOL(ath10k_ce_completed_send_next);
1280 
1281 /*
1282  * Guts of interrupt handler for per-engine interrupts on a particular CE.
1283  *
1284  * Invokes registered callbacks for recv_complete,
1285  * send_complete, and watermarks.
1286  */
1287 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
1288 {
1289 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1290 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1291 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
1292 	u32 ctrl_addr = ce_state->ctrl_addr;
1293 
1294 	/*
1295 	 * Clear before handling
1296 	 *
1297 	 * Misc CE interrupts are not being handled, but still need
1298 	 * to be cleared.
1299 	 *
1300 	 * NOTE: When the last copy engine interrupt is cleared the
1301 	 * hardware will go to sleep.  Once this happens any access to
1302 	 * the CE registers can cause a hardware fault.
1303 	 */
1304 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
1305 					  wm_regs->cc_mask | wm_regs->wm_mask);
1306 
1307 	if (ce_state->recv_cb)
1308 		ce_state->recv_cb(ce_state);
1309 
1310 	if (ce_state->send_cb)
1311 		ce_state->send_cb(ce_state);
1312 }
1313 EXPORT_SYMBOL(ath10k_ce_per_engine_service);
1314 
1315 /*
1316  * Handler for per-engine interrupts on ALL active CEs.
1317  * This is used in cases where the system is sharing a
1318  * single interrupt for all CEs
1319  */
1320 
1321 void ath10k_ce_per_engine_service_any(struct ath10k *ar)
1322 {
1323 	int ce_id;
1324 	u32 intr_summary;
1325 
1326 	intr_summary = ath10k_ce_interrupt_summary(ar);
1327 
1328 	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
1329 		if (intr_summary & (1 << ce_id))
1330 			intr_summary &= ~(1 << ce_id);
1331 		else
1332 			/* no intr pending on this CE */
1333 			continue;
1334 
1335 		ath10k_ce_per_engine_service(ar, ce_id);
1336 	}
1337 }
1338 EXPORT_SYMBOL(ath10k_ce_per_engine_service_any);
1339 
1340 /*
1341  * Adjust interrupts for the copy complete handler.
1342  * If it's needed for either send or recv, then unmask
1343  * this interrupt; otherwise, mask it.
1344  *
1345  * Called with ce_lock held.
1346  */
1347 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
1348 {
1349 	u32 ctrl_addr = ce_state->ctrl_addr;
1350 	struct ath10k *ar = ce_state->ar;
1351 	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
1352 
1353 	if ((!disable_copy_compl_intr) &&
1354 	    (ce_state->send_cb || ce_state->recv_cb))
1355 		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
1356 	else
1357 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1358 
1359 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1360 }
1361 
1362 void ath10k_ce_disable_interrupt(struct ath10k *ar, int ce_id)
1363 {
1364 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1365 	struct ath10k_ce_pipe *ce_state;
1366 	u32 ctrl_addr;
1367 
1368 	ce_state  = &ce->ce_states[ce_id];
1369 	if (ce_state->attr_flags & CE_ATTR_POLL)
1370 		return;
1371 
1372 	ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1373 
1374 	ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1375 	ath10k_ce_error_intr_disable(ar, ctrl_addr);
1376 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1377 }
1378 EXPORT_SYMBOL(ath10k_ce_disable_interrupt);
1379 
1380 void ath10k_ce_disable_interrupts(struct ath10k *ar)
1381 {
1382 	int ce_id;
1383 
1384 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
1385 		ath10k_ce_disable_interrupt(ar, ce_id);
1386 }
1387 EXPORT_SYMBOL(ath10k_ce_disable_interrupts);
1388 
1389 void ath10k_ce_enable_interrupt(struct ath10k *ar, int ce_id)
1390 {
1391 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1392 	struct ath10k_ce_pipe *ce_state;
1393 
1394 	ce_state  = &ce->ce_states[ce_id];
1395 	if (ce_state->attr_flags & CE_ATTR_POLL)
1396 		return;
1397 
1398 	ath10k_ce_per_engine_handler_adjust(ce_state);
1399 }
1400 EXPORT_SYMBOL(ath10k_ce_enable_interrupt);
1401 
1402 void ath10k_ce_enable_interrupts(struct ath10k *ar)
1403 {
1404 	int ce_id;
1405 
1406 	/* Enable interrupts for copy engine that
1407 	 * are not using polling mode.
1408 	 */
1409 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
1410 		ath10k_ce_enable_interrupt(ar, ce_id);
1411 }
1412 EXPORT_SYMBOL(ath10k_ce_enable_interrupts);
1413 
1414 static int ath10k_ce_init_src_ring(struct ath10k *ar,
1415 				   unsigned int ce_id,
1416 				   const struct ce_attr *attr)
1417 {
1418 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1419 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1420 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1421 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1422 
1423 	nentries = roundup_pow_of_two(attr->src_nentries);
1424 
1425 	if (ar->hw_params.target_64bit)
1426 		memset(src_ring->base_addr_owner_space, 0,
1427 		       nentries * sizeof(struct ce_desc_64));
1428 	else
1429 		memset(src_ring->base_addr_owner_space, 0,
1430 		       nentries * sizeof(struct ce_desc));
1431 
1432 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1433 	src_ring->sw_index &= src_ring->nentries_mask;
1434 	src_ring->hw_index = src_ring->sw_index;
1435 
1436 	src_ring->write_index =
1437 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
1438 	src_ring->write_index &= src_ring->nentries_mask;
1439 
1440 	ath10k_ce_src_ring_base_addr_set(ar, ce_id,
1441 					 src_ring->base_addr_ce_space);
1442 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
1443 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
1444 	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
1445 	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
1446 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
1447 
1448 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1449 		   "boot init ce src ring id %d entries %d base_addr %pK\n",
1450 		   ce_id, nentries, src_ring->base_addr_owner_space);
1451 
1452 	return 0;
1453 }
1454 
1455 static int ath10k_ce_init_dest_ring(struct ath10k *ar,
1456 				    unsigned int ce_id,
1457 				    const struct ce_attr *attr)
1458 {
1459 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1460 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1461 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
1462 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1463 
1464 	nentries = roundup_pow_of_two(attr->dest_nentries);
1465 
1466 	if (ar->hw_params.target_64bit)
1467 		memset(dest_ring->base_addr_owner_space, 0,
1468 		       nentries * sizeof(struct ce_desc_64));
1469 	else
1470 		memset(dest_ring->base_addr_owner_space, 0,
1471 		       nentries * sizeof(struct ce_desc));
1472 
1473 	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
1474 	dest_ring->sw_index &= dest_ring->nentries_mask;
1475 	dest_ring->write_index =
1476 		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
1477 	dest_ring->write_index &= dest_ring->nentries_mask;
1478 
1479 	ath10k_ce_dest_ring_base_addr_set(ar, ce_id,
1480 					  dest_ring->base_addr_ce_space);
1481 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1482 	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1483 	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1484 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1485 
1486 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1487 		   "boot ce dest ring id %d entries %d base_addr %pK\n",
1488 		   ce_id, nentries, dest_ring->base_addr_owner_space);
1489 
1490 	return 0;
1491 }
1492 
1493 static int ath10k_ce_alloc_shadow_base(struct ath10k *ar,
1494 				       struct ath10k_ce_ring *src_ring,
1495 				       u32 nentries)
1496 {
1497 	src_ring->shadow_base_unaligned = kcalloc(nentries,
1498 						  sizeof(struct ce_desc_64),
1499 						  GFP_KERNEL);
1500 	if (!src_ring->shadow_base_unaligned)
1501 		return -ENOMEM;
1502 
1503 	src_ring->shadow_base = (struct ce_desc_64 *)
1504 			PTR_ALIGN(src_ring->shadow_base_unaligned,
1505 				  CE_DESC_RING_ALIGN);
1506 	return 0;
1507 }
1508 
1509 static struct ath10k_ce_ring *
1510 ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
1511 			 const struct ce_attr *attr)
1512 {
1513 	struct ath10k_ce_ring *src_ring;
1514 	u32 nentries = attr->src_nentries;
1515 	dma_addr_t base_addr;
1516 	int ret;
1517 
1518 	nentries = roundup_pow_of_two(nentries);
1519 
1520 	src_ring = kzalloc(struct_size(src_ring, per_transfer_context,
1521 				       nentries), GFP_KERNEL);
1522 	if (src_ring == NULL)
1523 		return ERR_PTR(-ENOMEM);
1524 
1525 	src_ring->nentries = nentries;
1526 	src_ring->nentries_mask = nentries - 1;
1527 
1528 	/*
1529 	 * Legacy platforms that do not support cache
1530 	 * coherent DMA are unsupported
1531 	 */
1532 	src_ring->base_addr_owner_space_unaligned =
1533 		dma_alloc_coherent(ar->dev,
1534 				   (nentries * sizeof(struct ce_desc) +
1535 				    CE_DESC_RING_ALIGN),
1536 				   &base_addr, GFP_KERNEL);
1537 	if (!src_ring->base_addr_owner_space_unaligned) {
1538 		kfree(src_ring);
1539 		return ERR_PTR(-ENOMEM);
1540 	}
1541 
1542 	src_ring->base_addr_ce_space_unaligned = base_addr;
1543 
1544 	src_ring->base_addr_owner_space =
1545 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1546 				  CE_DESC_RING_ALIGN);
1547 	src_ring->base_addr_ce_space =
1548 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1549 			      CE_DESC_RING_ALIGN);
1550 
1551 	if (ar->hw_params.shadow_reg_support) {
1552 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1553 		if (ret) {
1554 			dma_free_coherent(ar->dev,
1555 					  (nentries * sizeof(struct ce_desc) +
1556 					   CE_DESC_RING_ALIGN),
1557 					  src_ring->base_addr_owner_space_unaligned,
1558 					  base_addr);
1559 			kfree(src_ring);
1560 			return ERR_PTR(ret);
1561 		}
1562 	}
1563 
1564 	return src_ring;
1565 }
1566 
1567 static struct ath10k_ce_ring *
1568 ath10k_ce_alloc_src_ring_64(struct ath10k *ar, unsigned int ce_id,
1569 			    const struct ce_attr *attr)
1570 {
1571 	struct ath10k_ce_ring *src_ring;
1572 	u32 nentries = attr->src_nentries;
1573 	dma_addr_t base_addr;
1574 	int ret;
1575 
1576 	nentries = roundup_pow_of_two(nentries);
1577 
1578 	src_ring = kzalloc(struct_size(src_ring, per_transfer_context,
1579 				       nentries), GFP_KERNEL);
1580 	if (!src_ring)
1581 		return ERR_PTR(-ENOMEM);
1582 
1583 	src_ring->nentries = nentries;
1584 	src_ring->nentries_mask = nentries - 1;
1585 
1586 	/* Legacy platforms that do not support cache
1587 	 * coherent DMA are unsupported
1588 	 */
1589 	src_ring->base_addr_owner_space_unaligned =
1590 		dma_alloc_coherent(ar->dev,
1591 				   (nentries * sizeof(struct ce_desc_64) +
1592 				    CE_DESC_RING_ALIGN),
1593 				   &base_addr, GFP_KERNEL);
1594 	if (!src_ring->base_addr_owner_space_unaligned) {
1595 		kfree(src_ring);
1596 		return ERR_PTR(-ENOMEM);
1597 	}
1598 
1599 	src_ring->base_addr_ce_space_unaligned = base_addr;
1600 
1601 	src_ring->base_addr_owner_space =
1602 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1603 				  CE_DESC_RING_ALIGN);
1604 	src_ring->base_addr_ce_space =
1605 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1606 			      CE_DESC_RING_ALIGN);
1607 
1608 	if (ar->hw_params.shadow_reg_support) {
1609 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1610 		if (ret) {
1611 			dma_free_coherent(ar->dev,
1612 					  (nentries * sizeof(struct ce_desc_64) +
1613 					   CE_DESC_RING_ALIGN),
1614 					  src_ring->base_addr_owner_space_unaligned,
1615 					  base_addr);
1616 			kfree(src_ring);
1617 			return ERR_PTR(ret);
1618 		}
1619 	}
1620 
1621 	return src_ring;
1622 }
1623 
1624 static struct ath10k_ce_ring *
1625 ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
1626 			  const struct ce_attr *attr)
1627 {
1628 	struct ath10k_ce_ring *dest_ring;
1629 	u32 nentries;
1630 	dma_addr_t base_addr;
1631 
1632 	nentries = roundup_pow_of_two(attr->dest_nentries);
1633 
1634 	dest_ring = kzalloc(struct_size(dest_ring, per_transfer_context,
1635 					nentries), GFP_KERNEL);
1636 	if (dest_ring == NULL)
1637 		return ERR_PTR(-ENOMEM);
1638 
1639 	dest_ring->nentries = nentries;
1640 	dest_ring->nentries_mask = nentries - 1;
1641 
1642 	/*
1643 	 * Legacy platforms that do not support cache
1644 	 * coherent DMA are unsupported
1645 	 */
1646 	dest_ring->base_addr_owner_space_unaligned =
1647 		dma_alloc_coherent(ar->dev,
1648 				   (nentries * sizeof(struct ce_desc) +
1649 				    CE_DESC_RING_ALIGN),
1650 				   &base_addr, GFP_KERNEL);
1651 	if (!dest_ring->base_addr_owner_space_unaligned) {
1652 		kfree(dest_ring);
1653 		return ERR_PTR(-ENOMEM);
1654 	}
1655 
1656 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1657 
1658 	dest_ring->base_addr_owner_space =
1659 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1660 				  CE_DESC_RING_ALIGN);
1661 	dest_ring->base_addr_ce_space =
1662 				ALIGN(dest_ring->base_addr_ce_space_unaligned,
1663 				      CE_DESC_RING_ALIGN);
1664 
1665 	return dest_ring;
1666 }
1667 
1668 static struct ath10k_ce_ring *
1669 ath10k_ce_alloc_dest_ring_64(struct ath10k *ar, unsigned int ce_id,
1670 			     const struct ce_attr *attr)
1671 {
1672 	struct ath10k_ce_ring *dest_ring;
1673 	u32 nentries;
1674 	dma_addr_t base_addr;
1675 
1676 	nentries = roundup_pow_of_two(attr->dest_nentries);
1677 
1678 	dest_ring = kzalloc(struct_size(dest_ring, per_transfer_context,
1679 					nentries), GFP_KERNEL);
1680 	if (!dest_ring)
1681 		return ERR_PTR(-ENOMEM);
1682 
1683 	dest_ring->nentries = nentries;
1684 	dest_ring->nentries_mask = nentries - 1;
1685 
1686 	/* Legacy platforms that do not support cache
1687 	 * coherent DMA are unsupported
1688 	 */
1689 	dest_ring->base_addr_owner_space_unaligned =
1690 		dma_alloc_coherent(ar->dev,
1691 				   (nentries * sizeof(struct ce_desc_64) +
1692 				    CE_DESC_RING_ALIGN),
1693 				   &base_addr, GFP_KERNEL);
1694 	if (!dest_ring->base_addr_owner_space_unaligned) {
1695 		kfree(dest_ring);
1696 		return ERR_PTR(-ENOMEM);
1697 	}
1698 
1699 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1700 
1701 	/* Correctly initialize memory to 0 to prevent garbage
1702 	 * data crashing system when download firmware
1703 	 */
1704 	dest_ring->base_addr_owner_space =
1705 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1706 				  CE_DESC_RING_ALIGN);
1707 	dest_ring->base_addr_ce_space =
1708 			ALIGN(dest_ring->base_addr_ce_space_unaligned,
1709 			      CE_DESC_RING_ALIGN);
1710 
1711 	return dest_ring;
1712 }
1713 
1714 /*
1715  * Initialize a Copy Engine based on caller-supplied attributes.
1716  * This may be called once to initialize both source and destination
1717  * rings or it may be called twice for separate source and destination
1718  * initialization. It may be that only one side or the other is
1719  * initialized by software/firmware.
1720  */
1721 int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1722 			const struct ce_attr *attr)
1723 {
1724 	int ret;
1725 
1726 	if (attr->src_nentries) {
1727 		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1728 		if (ret) {
1729 			ath10k_err(ar, "Failed to initialize CE src ring for ID: %d (%d)\n",
1730 				   ce_id, ret);
1731 			return ret;
1732 		}
1733 	}
1734 
1735 	if (attr->dest_nentries) {
1736 		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1737 		if (ret) {
1738 			ath10k_err(ar, "Failed to initialize CE dest ring for ID: %d (%d)\n",
1739 				   ce_id, ret);
1740 			return ret;
1741 		}
1742 	}
1743 
1744 	return 0;
1745 }
1746 EXPORT_SYMBOL(ath10k_ce_init_pipe);
1747 
1748 static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1749 {
1750 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1751 
1752 	ath10k_ce_src_ring_base_addr_set(ar, ce_id, 0);
1753 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1754 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1755 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1756 }
1757 
1758 static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1759 {
1760 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1761 
1762 	ath10k_ce_dest_ring_base_addr_set(ar, ce_id, 0);
1763 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1764 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1765 }
1766 
1767 void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1768 {
1769 	ath10k_ce_deinit_src_ring(ar, ce_id);
1770 	ath10k_ce_deinit_dest_ring(ar, ce_id);
1771 }
1772 EXPORT_SYMBOL(ath10k_ce_deinit_pipe);
1773 
1774 static void _ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1775 {
1776 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1777 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1778 
1779 	if (ce_state->src_ring) {
1780 		if (ar->hw_params.shadow_reg_support)
1781 			kfree(ce_state->src_ring->shadow_base_unaligned);
1782 		dma_free_coherent(ar->dev,
1783 				  (ce_state->src_ring->nentries *
1784 				   sizeof(struct ce_desc) +
1785 				   CE_DESC_RING_ALIGN),
1786 				  ce_state->src_ring->base_addr_owner_space,
1787 				  ce_state->src_ring->base_addr_ce_space);
1788 		kfree(ce_state->src_ring);
1789 	}
1790 
1791 	if (ce_state->dest_ring) {
1792 		dma_free_coherent(ar->dev,
1793 				  (ce_state->dest_ring->nentries *
1794 				   sizeof(struct ce_desc) +
1795 				   CE_DESC_RING_ALIGN),
1796 				  ce_state->dest_ring->base_addr_owner_space,
1797 				  ce_state->dest_ring->base_addr_ce_space);
1798 		kfree(ce_state->dest_ring);
1799 	}
1800 
1801 	ce_state->src_ring = NULL;
1802 	ce_state->dest_ring = NULL;
1803 }
1804 
1805 static void _ath10k_ce_free_pipe_64(struct ath10k *ar, int ce_id)
1806 {
1807 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1808 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1809 
1810 	if (ce_state->src_ring) {
1811 		if (ar->hw_params.shadow_reg_support)
1812 			kfree(ce_state->src_ring->shadow_base_unaligned);
1813 		dma_free_coherent(ar->dev,
1814 				  (ce_state->src_ring->nentries *
1815 				   sizeof(struct ce_desc_64) +
1816 				   CE_DESC_RING_ALIGN),
1817 				  ce_state->src_ring->base_addr_owner_space,
1818 				  ce_state->src_ring->base_addr_ce_space);
1819 		kfree(ce_state->src_ring);
1820 	}
1821 
1822 	if (ce_state->dest_ring) {
1823 		dma_free_coherent(ar->dev,
1824 				  (ce_state->dest_ring->nentries *
1825 				   sizeof(struct ce_desc_64) +
1826 				   CE_DESC_RING_ALIGN),
1827 				  ce_state->dest_ring->base_addr_owner_space,
1828 				  ce_state->dest_ring->base_addr_ce_space);
1829 		kfree(ce_state->dest_ring);
1830 	}
1831 
1832 	ce_state->src_ring = NULL;
1833 	ce_state->dest_ring = NULL;
1834 }
1835 
1836 void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1837 {
1838 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1839 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1840 
1841 	ce_state->ops->ce_free_pipe(ar, ce_id);
1842 }
1843 EXPORT_SYMBOL(ath10k_ce_free_pipe);
1844 
1845 void ath10k_ce_dump_registers(struct ath10k *ar,
1846 			      struct ath10k_fw_crash_data *crash_data)
1847 {
1848 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1849 	struct ath10k_ce_crash_data ce_data;
1850 	u32 addr, id;
1851 
1852 	lockdep_assert_held(&ar->dump_mutex);
1853 
1854 	ath10k_err(ar, "Copy Engine register dump:\n");
1855 
1856 	spin_lock_bh(&ce->ce_lock);
1857 	for (id = 0; id < CE_COUNT; id++) {
1858 		addr = ath10k_ce_base_address(ar, id);
1859 		ce_data.base_addr = cpu_to_le32(addr);
1860 
1861 		ce_data.src_wr_idx =
1862 			cpu_to_le32(ath10k_ce_src_ring_write_index_get(ar, addr));
1863 		ce_data.src_r_idx =
1864 			cpu_to_le32(ath10k_ce_src_ring_read_index_get(ar, addr));
1865 		ce_data.dst_wr_idx =
1866 			cpu_to_le32(ath10k_ce_dest_ring_write_index_get(ar, addr));
1867 		ce_data.dst_r_idx =
1868 			cpu_to_le32(ath10k_ce_dest_ring_read_index_get(ar, addr));
1869 
1870 		if (crash_data)
1871 			crash_data->ce_crash_data[id] = ce_data;
1872 
1873 		ath10k_err(ar, "[%02d]: 0x%08x %3u %3u %3u %3u", id,
1874 			   le32_to_cpu(ce_data.base_addr),
1875 			   le32_to_cpu(ce_data.src_wr_idx),
1876 			   le32_to_cpu(ce_data.src_r_idx),
1877 			   le32_to_cpu(ce_data.dst_wr_idx),
1878 			   le32_to_cpu(ce_data.dst_r_idx));
1879 	}
1880 
1881 	spin_unlock_bh(&ce->ce_lock);
1882 }
1883 EXPORT_SYMBOL(ath10k_ce_dump_registers);
1884 
1885 static const struct ath10k_ce_ops ce_ops = {
1886 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring,
1887 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring,
1888 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf,
1889 	.ce_completed_recv_next_nolock = _ath10k_ce_completed_recv_next_nolock,
1890 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next,
1891 	.ce_extract_desc_data = ath10k_ce_extract_desc_data,
1892 	.ce_free_pipe = _ath10k_ce_free_pipe,
1893 	.ce_send_nolock = _ath10k_ce_send_nolock,
1894 	.ce_set_src_ring_base_addr_hi = NULL,
1895 	.ce_set_dest_ring_base_addr_hi = NULL,
1896 	.ce_completed_send_next_nolock = _ath10k_ce_completed_send_next_nolock,
1897 };
1898 
1899 static const struct ath10k_ce_ops ce_64_ops = {
1900 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring_64,
1901 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring_64,
1902 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf_64,
1903 	.ce_completed_recv_next_nolock =
1904 				_ath10k_ce_completed_recv_next_nolock_64,
1905 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next_64,
1906 	.ce_extract_desc_data = ath10k_ce_extract_desc_data_64,
1907 	.ce_free_pipe = _ath10k_ce_free_pipe_64,
1908 	.ce_send_nolock = _ath10k_ce_send_nolock_64,
1909 	.ce_set_src_ring_base_addr_hi = ath10k_ce_set_src_ring_base_addr_hi,
1910 	.ce_set_dest_ring_base_addr_hi = ath10k_ce_set_dest_ring_base_addr_hi,
1911 	.ce_completed_send_next_nolock = _ath10k_ce_completed_send_next_nolock_64,
1912 };
1913 
1914 static void ath10k_ce_set_ops(struct ath10k *ar,
1915 			      struct ath10k_ce_pipe *ce_state)
1916 {
1917 	switch (ar->hw_rev) {
1918 	case ATH10K_HW_WCN3990:
1919 		ce_state->ops = &ce_64_ops;
1920 		break;
1921 	default:
1922 		ce_state->ops = &ce_ops;
1923 		break;
1924 	}
1925 }
1926 
1927 int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1928 			 const struct ce_attr *attr)
1929 {
1930 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1931 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1932 	int ret;
1933 
1934 	ath10k_ce_set_ops(ar, ce_state);
1935 	/* Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1936 	 * additional TX locking checks.
1937 	 *
1938 	 * For the lack of a better place do the check here.
1939 	 */
1940 	BUILD_BUG_ON(2 * TARGET_NUM_MSDU_DESC >
1941 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1942 	BUILD_BUG_ON(2 * TARGET_10_4_NUM_MSDU_DESC_PFC >
1943 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1944 	BUILD_BUG_ON(2 * TARGET_TLV_NUM_MSDU_DESC >
1945 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1946 
1947 	ce_state->ar = ar;
1948 	ce_state->id = ce_id;
1949 	ce_state->ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1950 	ce_state->attr_flags = attr->flags;
1951 	ce_state->src_sz_max = attr->src_sz_max;
1952 
1953 	if (attr->src_nentries)
1954 		ce_state->send_cb = attr->send_cb;
1955 
1956 	if (attr->dest_nentries)
1957 		ce_state->recv_cb = attr->recv_cb;
1958 
1959 	if (attr->src_nentries) {
1960 		ce_state->src_ring =
1961 			ce_state->ops->ce_alloc_src_ring(ar, ce_id, attr);
1962 		if (IS_ERR(ce_state->src_ring)) {
1963 			ret = PTR_ERR(ce_state->src_ring);
1964 			ath10k_err(ar, "failed to alloc CE src ring %d: %d\n",
1965 				   ce_id, ret);
1966 			ce_state->src_ring = NULL;
1967 			return ret;
1968 		}
1969 	}
1970 
1971 	if (attr->dest_nentries) {
1972 		ce_state->dest_ring = ce_state->ops->ce_alloc_dst_ring(ar,
1973 									ce_id,
1974 									attr);
1975 		if (IS_ERR(ce_state->dest_ring)) {
1976 			ret = PTR_ERR(ce_state->dest_ring);
1977 			ath10k_err(ar, "failed to alloc CE dest ring %d: %d\n",
1978 				   ce_id, ret);
1979 			ce_state->dest_ring = NULL;
1980 			return ret;
1981 		}
1982 	}
1983 
1984 	return 0;
1985 }
1986 EXPORT_SYMBOL(ath10k_ce_alloc_pipe);
1987 
1988 void ath10k_ce_alloc_rri(struct ath10k *ar)
1989 {
1990 	int i;
1991 	u32 value;
1992 	u32 ctrl1_regs;
1993 	u32 ce_base_addr;
1994 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1995 
1996 	ce->vaddr_rri = dma_alloc_coherent(ar->dev,
1997 					   (CE_COUNT * sizeof(u32)),
1998 					   &ce->paddr_rri, GFP_KERNEL);
1999 
2000 	if (!ce->vaddr_rri)
2001 		return;
2002 
2003 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_low,
2004 			  lower_32_bits(ce->paddr_rri));
2005 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_high,
2006 			  (upper_32_bits(ce->paddr_rri) &
2007 			  CE_DESC_ADDR_HI_MASK));
2008 
2009 	for (i = 0; i < CE_COUNT; i++) {
2010 		ctrl1_regs = ar->hw_ce_regs->ctrl1_regs->addr;
2011 		ce_base_addr = ath10k_ce_base_address(ar, i);
2012 		value = ath10k_ce_read32(ar, ce_base_addr + ctrl1_regs);
2013 		value |= ar->hw_ce_regs->upd->mask;
2014 		ath10k_ce_write32(ar, ce_base_addr + ctrl1_regs, value);
2015 	}
2016 }
2017 EXPORT_SYMBOL(ath10k_ce_alloc_rri);
2018 
2019 void ath10k_ce_free_rri(struct ath10k *ar)
2020 {
2021 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
2022 
2023 	dma_free_coherent(ar->dev, (CE_COUNT * sizeof(u32)),
2024 			  ce->vaddr_rri,
2025 			  ce->paddr_rri);
2026 }
2027 EXPORT_SYMBOL(ath10k_ce_free_rri);
2028