xref: /openbmc/linux/drivers/net/wireless/ath/ath10k/ce.c (revision 930beb5a)
1 /*
2  * Copyright (c) 2005-2011 Atheros Communications Inc.
3  * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "hif.h"
19 #include "pci.h"
20 #include "ce.h"
21 #include "debug.h"
22 
23 /*
24  * Support for Copy Engine hardware, which is mainly used for
25  * communication between Host and Target over a PCIe interconnect.
26  */
27 
28 /*
29  * A single CopyEngine (CE) comprises two "rings":
30  *   a source ring
31  *   a destination ring
32  *
33  * Each ring consists of a number of descriptors which specify
34  * an address, length, and meta-data.
35  *
36  * Typically, one side of the PCIe interconnect (Host or Target)
37  * controls one ring and the other side controls the other ring.
38  * The source side chooses when to initiate a transfer and it
39  * chooses what to send (buffer address, length). The destination
40  * side keeps a supply of "anonymous receive buffers" available and
41  * it handles incoming data as it arrives (when the destination
42  * recieves an interrupt).
43  *
44  * The sender may send a simple buffer (address/length) or it may
45  * send a small list of buffers.  When a small list is sent, hardware
46  * "gathers" these and they end up in a single destination buffer
47  * with a single interrupt.
48  *
49  * There are several "contexts" managed by this layer -- more, it
50  * may seem -- than should be needed. These are provided mainly for
51  * maximum flexibility and especially to facilitate a simpler HIF
52  * implementation. There are per-CopyEngine recv, send, and watermark
53  * contexts. These are supplied by the caller when a recv, send,
54  * or watermark handler is established and they are echoed back to
55  * the caller when the respective callbacks are invoked. There is
56  * also a per-transfer context supplied by the caller when a buffer
57  * (or sendlist) is sent and when a buffer is enqueued for recv.
58  * These per-transfer contexts are echoed back to the caller when
59  * the buffer is sent/received.
60  */
61 
62 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
63 						       u32 ce_ctrl_addr,
64 						       unsigned int n)
65 {
66 	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n);
67 }
68 
69 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
70 						      u32 ce_ctrl_addr)
71 {
72 	return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS);
73 }
74 
75 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
76 						      u32 ce_ctrl_addr,
77 						      unsigned int n)
78 {
79 	ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n);
80 }
81 
82 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
83 						     u32 ce_ctrl_addr)
84 {
85 	return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS);
86 }
87 
88 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
89 						    u32 ce_ctrl_addr)
90 {
91 	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS);
92 }
93 
94 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
95 						    u32 ce_ctrl_addr,
96 						    unsigned int addr)
97 {
98 	ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr);
99 }
100 
101 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
102 					       u32 ce_ctrl_addr,
103 					       unsigned int n)
104 {
105 	ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n);
106 }
107 
108 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
109 					       u32 ce_ctrl_addr,
110 					       unsigned int n)
111 {
112 	u32 ctrl1_addr = ath10k_pci_read32((ar),
113 					   (ce_ctrl_addr) + CE_CTRL1_ADDRESS);
114 
115 	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
116 			   (ctrl1_addr &  ~CE_CTRL1_DMAX_LENGTH_MASK) |
117 			   CE_CTRL1_DMAX_LENGTH_SET(n));
118 }
119 
120 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
121 						    u32 ce_ctrl_addr,
122 						    unsigned int n)
123 {
124 	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
125 
126 	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
127 			   (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) |
128 			   CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n));
129 }
130 
131 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
132 						     u32 ce_ctrl_addr,
133 						     unsigned int n)
134 {
135 	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
136 
137 	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
138 			   (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) |
139 			   CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n));
140 }
141 
142 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
143 						     u32 ce_ctrl_addr)
144 {
145 	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS);
146 }
147 
148 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
149 						     u32 ce_ctrl_addr,
150 						     u32 addr)
151 {
152 	ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr);
153 }
154 
155 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
156 						u32 ce_ctrl_addr,
157 						unsigned int n)
158 {
159 	ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n);
160 }
161 
162 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
163 						   u32 ce_ctrl_addr,
164 						   unsigned int n)
165 {
166 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
167 
168 	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
169 			   (addr & ~SRC_WATERMARK_HIGH_MASK) |
170 			   SRC_WATERMARK_HIGH_SET(n));
171 }
172 
173 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
174 						  u32 ce_ctrl_addr,
175 						  unsigned int n)
176 {
177 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
178 
179 	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
180 			   (addr & ~SRC_WATERMARK_LOW_MASK) |
181 			   SRC_WATERMARK_LOW_SET(n));
182 }
183 
184 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
185 						    u32 ce_ctrl_addr,
186 						    unsigned int n)
187 {
188 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
189 
190 	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
191 			   (addr & ~DST_WATERMARK_HIGH_MASK) |
192 			   DST_WATERMARK_HIGH_SET(n));
193 }
194 
195 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
196 						   u32 ce_ctrl_addr,
197 						   unsigned int n)
198 {
199 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
200 
201 	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
202 			   (addr & ~DST_WATERMARK_LOW_MASK) |
203 			   DST_WATERMARK_LOW_SET(n));
204 }
205 
206 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
207 							u32 ce_ctrl_addr)
208 {
209 	u32 host_ie_addr = ath10k_pci_read32(ar,
210 					     ce_ctrl_addr + HOST_IE_ADDRESS);
211 
212 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
213 			   host_ie_addr | HOST_IE_COPY_COMPLETE_MASK);
214 }
215 
216 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
217 							u32 ce_ctrl_addr)
218 {
219 	u32 host_ie_addr = ath10k_pci_read32(ar,
220 					     ce_ctrl_addr + HOST_IE_ADDRESS);
221 
222 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
223 			   host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK);
224 }
225 
226 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
227 						    u32 ce_ctrl_addr)
228 {
229 	u32 host_ie_addr = ath10k_pci_read32(ar,
230 					     ce_ctrl_addr + HOST_IE_ADDRESS);
231 
232 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
233 			   host_ie_addr & ~CE_WATERMARK_MASK);
234 }
235 
236 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
237 					       u32 ce_ctrl_addr)
238 {
239 	u32 misc_ie_addr = ath10k_pci_read32(ar,
240 					     ce_ctrl_addr + MISC_IE_ADDRESS);
241 
242 	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
243 			   misc_ie_addr | CE_ERROR_MASK);
244 }
245 
246 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
247 						     u32 ce_ctrl_addr,
248 						     unsigned int mask)
249 {
250 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask);
251 }
252 
253 
254 /*
255  * Guts of ath10k_ce_send, used by both ath10k_ce_send and
256  * ath10k_ce_sendlist_send.
257  * The caller takes responsibility for any needed locking.
258  */
259 static int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
260 				 void *per_transfer_context,
261 				 u32 buffer,
262 				 unsigned int nbytes,
263 				 unsigned int transfer_id,
264 				 unsigned int flags)
265 {
266 	struct ath10k *ar = ce_state->ar;
267 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
268 	struct ce_desc *desc, *sdesc;
269 	unsigned int nentries_mask = src_ring->nentries_mask;
270 	unsigned int sw_index = src_ring->sw_index;
271 	unsigned int write_index = src_ring->write_index;
272 	u32 ctrl_addr = ce_state->ctrl_addr;
273 	u32 desc_flags = 0;
274 	int ret = 0;
275 
276 	if (nbytes > ce_state->src_sz_max)
277 		ath10k_warn("%s: send more we can (nbytes: %d, max: %d)\n",
278 			    __func__, nbytes, ce_state->src_sz_max);
279 
280 	ret = ath10k_pci_wake(ar);
281 	if (ret)
282 		return ret;
283 
284 	if (unlikely(CE_RING_DELTA(nentries_mask,
285 				   write_index, sw_index - 1) <= 0)) {
286 		ret = -ENOSR;
287 		goto exit;
288 	}
289 
290 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
291 				   write_index);
292 	sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index);
293 
294 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
295 
296 	if (flags & CE_SEND_FLAG_GATHER)
297 		desc_flags |= CE_DESC_FLAGS_GATHER;
298 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
299 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
300 
301 	sdesc->addr   = __cpu_to_le32(buffer);
302 	sdesc->nbytes = __cpu_to_le16(nbytes);
303 	sdesc->flags  = __cpu_to_le16(desc_flags);
304 
305 	*desc = *sdesc;
306 
307 	src_ring->per_transfer_context[write_index] = per_transfer_context;
308 
309 	/* Update Source Ring Write Index */
310 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
311 
312 	/* WORKAROUND */
313 	if (!(flags & CE_SEND_FLAG_GATHER))
314 		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
315 
316 	src_ring->write_index = write_index;
317 exit:
318 	ath10k_pci_sleep(ar);
319 	return ret;
320 }
321 
322 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
323 		   void *per_transfer_context,
324 		   u32 buffer,
325 		   unsigned int nbytes,
326 		   unsigned int transfer_id,
327 		   unsigned int flags)
328 {
329 	struct ath10k *ar = ce_state->ar;
330 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
331 	int ret;
332 
333 	spin_lock_bh(&ar_pci->ce_lock);
334 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
335 				    buffer, nbytes, transfer_id, flags);
336 	spin_unlock_bh(&ar_pci->ce_lock);
337 
338 	return ret;
339 }
340 
341 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
342 {
343 	struct ath10k *ar = pipe->ar;
344 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
345 	int delta;
346 
347 	spin_lock_bh(&ar_pci->ce_lock);
348 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
349 			      pipe->src_ring->write_index,
350 			      pipe->src_ring->sw_index - 1);
351 	spin_unlock_bh(&ar_pci->ce_lock);
352 
353 	return delta;
354 }
355 
356 int ath10k_ce_recv_buf_enqueue(struct ath10k_ce_pipe *ce_state,
357 			       void *per_recv_context,
358 			       u32 buffer)
359 {
360 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
361 	u32 ctrl_addr = ce_state->ctrl_addr;
362 	struct ath10k *ar = ce_state->ar;
363 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
364 	unsigned int nentries_mask = dest_ring->nentries_mask;
365 	unsigned int write_index;
366 	unsigned int sw_index;
367 	int ret;
368 
369 	spin_lock_bh(&ar_pci->ce_lock);
370 	write_index = dest_ring->write_index;
371 	sw_index = dest_ring->sw_index;
372 
373 	ret = ath10k_pci_wake(ar);
374 	if (ret)
375 		goto out;
376 
377 	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) > 0) {
378 		struct ce_desc *base = dest_ring->base_addr_owner_space;
379 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
380 
381 		/* Update destination descriptor */
382 		desc->addr    = __cpu_to_le32(buffer);
383 		desc->nbytes = 0;
384 
385 		dest_ring->per_transfer_context[write_index] =
386 							per_recv_context;
387 
388 		/* Update Destination Ring Write Index */
389 		write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
390 		ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
391 		dest_ring->write_index = write_index;
392 		ret = 0;
393 	} else {
394 		ret = -EIO;
395 	}
396 	ath10k_pci_sleep(ar);
397 
398 out:
399 	spin_unlock_bh(&ar_pci->ce_lock);
400 
401 	return ret;
402 }
403 
404 /*
405  * Guts of ath10k_ce_completed_recv_next.
406  * The caller takes responsibility for any necessary locking.
407  */
408 static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
409 						void **per_transfer_contextp,
410 						u32 *bufferp,
411 						unsigned int *nbytesp,
412 						unsigned int *transfer_idp,
413 						unsigned int *flagsp)
414 {
415 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
416 	unsigned int nentries_mask = dest_ring->nentries_mask;
417 	unsigned int sw_index = dest_ring->sw_index;
418 
419 	struct ce_desc *base = dest_ring->base_addr_owner_space;
420 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
421 	struct ce_desc sdesc;
422 	u16 nbytes;
423 
424 	/* Copy in one go for performance reasons */
425 	sdesc = *desc;
426 
427 	nbytes = __le16_to_cpu(sdesc.nbytes);
428 	if (nbytes == 0) {
429 		/*
430 		 * This closes a relatively unusual race where the Host
431 		 * sees the updated DRRI before the update to the
432 		 * corresponding descriptor has completed. We treat this
433 		 * as a descriptor that is not yet done.
434 		 */
435 		return -EIO;
436 	}
437 
438 	desc->nbytes = 0;
439 
440 	/* Return data from completed destination descriptor */
441 	*bufferp = __le32_to_cpu(sdesc.addr);
442 	*nbytesp = nbytes;
443 	*transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA);
444 
445 	if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP)
446 		*flagsp = CE_RECV_FLAG_SWAPPED;
447 	else
448 		*flagsp = 0;
449 
450 	if (per_transfer_contextp)
451 		*per_transfer_contextp =
452 			dest_ring->per_transfer_context[sw_index];
453 
454 	/* sanity */
455 	dest_ring->per_transfer_context[sw_index] = NULL;
456 
457 	/* Update sw_index */
458 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
459 	dest_ring->sw_index = sw_index;
460 
461 	return 0;
462 }
463 
464 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
465 				  void **per_transfer_contextp,
466 				  u32 *bufferp,
467 				  unsigned int *nbytesp,
468 				  unsigned int *transfer_idp,
469 				  unsigned int *flagsp)
470 {
471 	struct ath10k *ar = ce_state->ar;
472 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
473 	int ret;
474 
475 	spin_lock_bh(&ar_pci->ce_lock);
476 	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
477 						   per_transfer_contextp,
478 						   bufferp, nbytesp,
479 						   transfer_idp, flagsp);
480 	spin_unlock_bh(&ar_pci->ce_lock);
481 
482 	return ret;
483 }
484 
485 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
486 			       void **per_transfer_contextp,
487 			       u32 *bufferp)
488 {
489 	struct ath10k_ce_ring *dest_ring;
490 	unsigned int nentries_mask;
491 	unsigned int sw_index;
492 	unsigned int write_index;
493 	int ret;
494 	struct ath10k *ar;
495 	struct ath10k_pci *ar_pci;
496 
497 	dest_ring = ce_state->dest_ring;
498 
499 	if (!dest_ring)
500 		return -EIO;
501 
502 	ar = ce_state->ar;
503 	ar_pci = ath10k_pci_priv(ar);
504 
505 	spin_lock_bh(&ar_pci->ce_lock);
506 
507 	nentries_mask = dest_ring->nentries_mask;
508 	sw_index = dest_ring->sw_index;
509 	write_index = dest_ring->write_index;
510 	if (write_index != sw_index) {
511 		struct ce_desc *base = dest_ring->base_addr_owner_space;
512 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
513 
514 		/* Return data from completed destination descriptor */
515 		*bufferp = __le32_to_cpu(desc->addr);
516 
517 		if (per_transfer_contextp)
518 			*per_transfer_contextp =
519 				dest_ring->per_transfer_context[sw_index];
520 
521 		/* sanity */
522 		dest_ring->per_transfer_context[sw_index] = NULL;
523 
524 		/* Update sw_index */
525 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
526 		dest_ring->sw_index = sw_index;
527 		ret = 0;
528 	} else {
529 		ret = -EIO;
530 	}
531 
532 	spin_unlock_bh(&ar_pci->ce_lock);
533 
534 	return ret;
535 }
536 
537 /*
538  * Guts of ath10k_ce_completed_send_next.
539  * The caller takes responsibility for any necessary locking.
540  */
541 static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
542 						void **per_transfer_contextp,
543 						u32 *bufferp,
544 						unsigned int *nbytesp,
545 						unsigned int *transfer_idp)
546 {
547 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
548 	u32 ctrl_addr = ce_state->ctrl_addr;
549 	struct ath10k *ar = ce_state->ar;
550 	unsigned int nentries_mask = src_ring->nentries_mask;
551 	unsigned int sw_index = src_ring->sw_index;
552 	struct ce_desc *sdesc, *sbase;
553 	unsigned int read_index;
554 	int ret;
555 
556 	if (src_ring->hw_index == sw_index) {
557 		/*
558 		 * The SW completion index has caught up with the cached
559 		 * version of the HW completion index.
560 		 * Update the cached HW completion index to see whether
561 		 * the SW has really caught up to the HW, or if the cached
562 		 * value of the HW index has become stale.
563 		 */
564 
565 		ret = ath10k_pci_wake(ar);
566 		if (ret)
567 			return ret;
568 
569 		src_ring->hw_index =
570 			ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
571 		src_ring->hw_index &= nentries_mask;
572 
573 		ath10k_pci_sleep(ar);
574 	}
575 
576 	read_index = src_ring->hw_index;
577 
578 	if ((read_index == sw_index) || (read_index == 0xffffffff))
579 		return -EIO;
580 
581 	sbase = src_ring->shadow_base;
582 	sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index);
583 
584 	/* Return data from completed source descriptor */
585 	*bufferp = __le32_to_cpu(sdesc->addr);
586 	*nbytesp = __le16_to_cpu(sdesc->nbytes);
587 	*transfer_idp = MS(__le16_to_cpu(sdesc->flags),
588 			   CE_DESC_FLAGS_META_DATA);
589 
590 	if (per_transfer_contextp)
591 		*per_transfer_contextp =
592 			src_ring->per_transfer_context[sw_index];
593 
594 	/* sanity */
595 	src_ring->per_transfer_context[sw_index] = NULL;
596 
597 	/* Update sw_index */
598 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
599 	src_ring->sw_index = sw_index;
600 
601 	return 0;
602 }
603 
604 /* NB: Modeled after ath10k_ce_completed_send_next */
605 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
606 			       void **per_transfer_contextp,
607 			       u32 *bufferp,
608 			       unsigned int *nbytesp,
609 			       unsigned int *transfer_idp)
610 {
611 	struct ath10k_ce_ring *src_ring;
612 	unsigned int nentries_mask;
613 	unsigned int sw_index;
614 	unsigned int write_index;
615 	int ret;
616 	struct ath10k *ar;
617 	struct ath10k_pci *ar_pci;
618 
619 	src_ring = ce_state->src_ring;
620 
621 	if (!src_ring)
622 		return -EIO;
623 
624 	ar = ce_state->ar;
625 	ar_pci = ath10k_pci_priv(ar);
626 
627 	spin_lock_bh(&ar_pci->ce_lock);
628 
629 	nentries_mask = src_ring->nentries_mask;
630 	sw_index = src_ring->sw_index;
631 	write_index = src_ring->write_index;
632 
633 	if (write_index != sw_index) {
634 		struct ce_desc *base = src_ring->base_addr_owner_space;
635 		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
636 
637 		/* Return data from completed source descriptor */
638 		*bufferp = __le32_to_cpu(desc->addr);
639 		*nbytesp = __le16_to_cpu(desc->nbytes);
640 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
641 						CE_DESC_FLAGS_META_DATA);
642 
643 		if (per_transfer_contextp)
644 			*per_transfer_contextp =
645 				src_ring->per_transfer_context[sw_index];
646 
647 		/* sanity */
648 		src_ring->per_transfer_context[sw_index] = NULL;
649 
650 		/* Update sw_index */
651 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
652 		src_ring->sw_index = sw_index;
653 		ret = 0;
654 	} else {
655 		ret = -EIO;
656 	}
657 
658 	spin_unlock_bh(&ar_pci->ce_lock);
659 
660 	return ret;
661 }
662 
663 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
664 				  void **per_transfer_contextp,
665 				  u32 *bufferp,
666 				  unsigned int *nbytesp,
667 				  unsigned int *transfer_idp)
668 {
669 	struct ath10k *ar = ce_state->ar;
670 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
671 	int ret;
672 
673 	spin_lock_bh(&ar_pci->ce_lock);
674 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
675 						   per_transfer_contextp,
676 						   bufferp, nbytesp,
677 						   transfer_idp);
678 	spin_unlock_bh(&ar_pci->ce_lock);
679 
680 	return ret;
681 }
682 
683 /*
684  * Guts of interrupt handler for per-engine interrupts on a particular CE.
685  *
686  * Invokes registered callbacks for recv_complete,
687  * send_complete, and watermarks.
688  */
689 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
690 {
691 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
692 	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
693 	u32 ctrl_addr = ce_state->ctrl_addr;
694 	int ret;
695 
696 	ret = ath10k_pci_wake(ar);
697 	if (ret)
698 		return;
699 
700 	spin_lock_bh(&ar_pci->ce_lock);
701 
702 	/* Clear the copy-complete interrupts that will be handled here. */
703 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
704 					  HOST_IS_COPY_COMPLETE_MASK);
705 
706 	spin_unlock_bh(&ar_pci->ce_lock);
707 
708 	if (ce_state->recv_cb)
709 		ce_state->recv_cb(ce_state);
710 
711 	if (ce_state->send_cb)
712 		ce_state->send_cb(ce_state);
713 
714 	spin_lock_bh(&ar_pci->ce_lock);
715 
716 	/*
717 	 * Misc CE interrupts are not being handled, but still need
718 	 * to be cleared.
719 	 */
720 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK);
721 
722 	spin_unlock_bh(&ar_pci->ce_lock);
723 	ath10k_pci_sleep(ar);
724 }
725 
726 /*
727  * Handler for per-engine interrupts on ALL active CEs.
728  * This is used in cases where the system is sharing a
729  * single interrput for all CEs
730  */
731 
732 void ath10k_ce_per_engine_service_any(struct ath10k *ar)
733 {
734 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
735 	int ce_id, ret;
736 	u32 intr_summary;
737 
738 	ret = ath10k_pci_wake(ar);
739 	if (ret)
740 		return;
741 
742 	intr_summary = CE_INTERRUPT_SUMMARY(ar);
743 
744 	for (ce_id = 0; intr_summary && (ce_id < ar_pci->ce_count); ce_id++) {
745 		if (intr_summary & (1 << ce_id))
746 			intr_summary &= ~(1 << ce_id);
747 		else
748 			/* no intr pending on this CE */
749 			continue;
750 
751 		ath10k_ce_per_engine_service(ar, ce_id);
752 	}
753 
754 	ath10k_pci_sleep(ar);
755 }
756 
757 /*
758  * Adjust interrupts for the copy complete handler.
759  * If it's needed for either send or recv, then unmask
760  * this interrupt; otherwise, mask it.
761  *
762  * Called with ce_lock held.
763  */
764 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state,
765 						int disable_copy_compl_intr)
766 {
767 	u32 ctrl_addr = ce_state->ctrl_addr;
768 	struct ath10k *ar = ce_state->ar;
769 	int ret;
770 
771 	ret = ath10k_pci_wake(ar);
772 	if (ret)
773 		return;
774 
775 	if ((!disable_copy_compl_intr) &&
776 	    (ce_state->send_cb || ce_state->recv_cb))
777 		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
778 	else
779 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
780 
781 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
782 
783 	ath10k_pci_sleep(ar);
784 }
785 
786 void ath10k_ce_disable_interrupts(struct ath10k *ar)
787 {
788 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
789 	int ce_id, ret;
790 
791 	ret = ath10k_pci_wake(ar);
792 	if (ret)
793 		return;
794 
795 	for (ce_id = 0; ce_id < ar_pci->ce_count; ce_id++) {
796 		struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
797 		u32 ctrl_addr = ce_state->ctrl_addr;
798 
799 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
800 	}
801 	ath10k_pci_sleep(ar);
802 }
803 
804 void ath10k_ce_send_cb_register(struct ath10k_ce_pipe *ce_state,
805 				void (*send_cb)(struct ath10k_ce_pipe *),
806 				int disable_interrupts)
807 {
808 	struct ath10k *ar = ce_state->ar;
809 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
810 
811 	spin_lock_bh(&ar_pci->ce_lock);
812 	ce_state->send_cb = send_cb;
813 	ath10k_ce_per_engine_handler_adjust(ce_state, disable_interrupts);
814 	spin_unlock_bh(&ar_pci->ce_lock);
815 }
816 
817 void ath10k_ce_recv_cb_register(struct ath10k_ce_pipe *ce_state,
818 				void (*recv_cb)(struct ath10k_ce_pipe *))
819 {
820 	struct ath10k *ar = ce_state->ar;
821 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
822 
823 	spin_lock_bh(&ar_pci->ce_lock);
824 	ce_state->recv_cb = recv_cb;
825 	ath10k_ce_per_engine_handler_adjust(ce_state, 0);
826 	spin_unlock_bh(&ar_pci->ce_lock);
827 }
828 
829 static int ath10k_ce_init_src_ring(struct ath10k *ar,
830 				   unsigned int ce_id,
831 				   struct ath10k_ce_pipe *ce_state,
832 				   const struct ce_attr *attr)
833 {
834 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
835 	struct ath10k_ce_ring *src_ring;
836 	unsigned int nentries = attr->src_nentries;
837 	unsigned int ce_nbytes;
838 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
839 	dma_addr_t base_addr;
840 	char *ptr;
841 
842 	nentries = roundup_pow_of_two(nentries);
843 
844 	if (ce_state->src_ring) {
845 		WARN_ON(ce_state->src_ring->nentries != nentries);
846 		return 0;
847 	}
848 
849 	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
850 	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
851 	if (ptr == NULL)
852 		return -ENOMEM;
853 
854 	ce_state->src_ring = (struct ath10k_ce_ring *)ptr;
855 	src_ring = ce_state->src_ring;
856 
857 	ptr += sizeof(struct ath10k_ce_ring);
858 	src_ring->nentries = nentries;
859 	src_ring->nentries_mask = nentries - 1;
860 
861 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
862 	src_ring->sw_index &= src_ring->nentries_mask;
863 	src_ring->hw_index = src_ring->sw_index;
864 
865 	src_ring->write_index =
866 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
867 	src_ring->write_index &= src_ring->nentries_mask;
868 
869 	src_ring->per_transfer_context = (void **)ptr;
870 
871 	/*
872 	 * Legacy platforms that do not support cache
873 	 * coherent DMA are unsupported
874 	 */
875 	src_ring->base_addr_owner_space_unaligned =
876 		pci_alloc_consistent(ar_pci->pdev,
877 				     (nentries * sizeof(struct ce_desc) +
878 				      CE_DESC_RING_ALIGN),
879 				     &base_addr);
880 	if (!src_ring->base_addr_owner_space_unaligned) {
881 		kfree(ce_state->src_ring);
882 		ce_state->src_ring = NULL;
883 		return -ENOMEM;
884 	}
885 
886 	src_ring->base_addr_ce_space_unaligned = base_addr;
887 
888 	src_ring->base_addr_owner_space = PTR_ALIGN(
889 			src_ring->base_addr_owner_space_unaligned,
890 			CE_DESC_RING_ALIGN);
891 	src_ring->base_addr_ce_space = ALIGN(
892 			src_ring->base_addr_ce_space_unaligned,
893 			CE_DESC_RING_ALIGN);
894 
895 	/*
896 	 * Also allocate a shadow src ring in regular
897 	 * mem to use for faster access.
898 	 */
899 	src_ring->shadow_base_unaligned =
900 		kmalloc((nentries * sizeof(struct ce_desc) +
901 			 CE_DESC_RING_ALIGN), GFP_KERNEL);
902 	if (!src_ring->shadow_base_unaligned) {
903 		pci_free_consistent(ar_pci->pdev,
904 				    (nentries * sizeof(struct ce_desc) +
905 				     CE_DESC_RING_ALIGN),
906 				    src_ring->base_addr_owner_space,
907 				    src_ring->base_addr_ce_space);
908 		kfree(ce_state->src_ring);
909 		ce_state->src_ring = NULL;
910 		return -ENOMEM;
911 	}
912 
913 	src_ring->shadow_base = PTR_ALIGN(
914 			src_ring->shadow_base_unaligned,
915 			CE_DESC_RING_ALIGN);
916 
917 	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
918 					 src_ring->base_addr_ce_space);
919 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
920 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
921 	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
922 	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
923 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
924 
925 	ath10k_dbg(ATH10K_DBG_BOOT,
926 		   "boot ce src ring id %d entries %d base_addr %p\n",
927 		   ce_id, nentries, src_ring->base_addr_owner_space);
928 
929 	return 0;
930 }
931 
932 static int ath10k_ce_init_dest_ring(struct ath10k *ar,
933 				    unsigned int ce_id,
934 				    struct ath10k_ce_pipe *ce_state,
935 				    const struct ce_attr *attr)
936 {
937 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
938 	struct ath10k_ce_ring *dest_ring;
939 	unsigned int nentries = attr->dest_nentries;
940 	unsigned int ce_nbytes;
941 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
942 	dma_addr_t base_addr;
943 	char *ptr;
944 
945 	nentries = roundup_pow_of_two(nentries);
946 
947 	if (ce_state->dest_ring) {
948 		WARN_ON(ce_state->dest_ring->nentries != nentries);
949 		return 0;
950 	}
951 
952 	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
953 	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
954 	if (ptr == NULL)
955 		return -ENOMEM;
956 
957 	ce_state->dest_ring = (struct ath10k_ce_ring *)ptr;
958 	dest_ring = ce_state->dest_ring;
959 
960 	ptr += sizeof(struct ath10k_ce_ring);
961 	dest_ring->nentries = nentries;
962 	dest_ring->nentries_mask = nentries - 1;
963 
964 	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
965 	dest_ring->sw_index &= dest_ring->nentries_mask;
966 	dest_ring->write_index =
967 		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
968 	dest_ring->write_index &= dest_ring->nentries_mask;
969 
970 	dest_ring->per_transfer_context = (void **)ptr;
971 
972 	/*
973 	 * Legacy platforms that do not support cache
974 	 * coherent DMA are unsupported
975 	 */
976 	dest_ring->base_addr_owner_space_unaligned =
977 		pci_alloc_consistent(ar_pci->pdev,
978 				     (nentries * sizeof(struct ce_desc) +
979 				      CE_DESC_RING_ALIGN),
980 				     &base_addr);
981 	if (!dest_ring->base_addr_owner_space_unaligned) {
982 		kfree(ce_state->dest_ring);
983 		ce_state->dest_ring = NULL;
984 		return -ENOMEM;
985 	}
986 
987 	dest_ring->base_addr_ce_space_unaligned = base_addr;
988 
989 	/*
990 	 * Correctly initialize memory to 0 to prevent garbage
991 	 * data crashing system when download firmware
992 	 */
993 	memset(dest_ring->base_addr_owner_space_unaligned, 0,
994 	       nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN);
995 
996 	dest_ring->base_addr_owner_space = PTR_ALIGN(
997 			dest_ring->base_addr_owner_space_unaligned,
998 			CE_DESC_RING_ALIGN);
999 	dest_ring->base_addr_ce_space = ALIGN(
1000 			dest_ring->base_addr_ce_space_unaligned,
1001 			CE_DESC_RING_ALIGN);
1002 
1003 	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
1004 					  dest_ring->base_addr_ce_space);
1005 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1006 	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1007 	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1008 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1009 
1010 	ath10k_dbg(ATH10K_DBG_BOOT,
1011 		   "boot ce dest ring id %d entries %d base_addr %p\n",
1012 		   ce_id, nentries, dest_ring->base_addr_owner_space);
1013 
1014 	return 0;
1015 }
1016 
1017 static struct ath10k_ce_pipe *ath10k_ce_init_state(struct ath10k *ar,
1018 					     unsigned int ce_id,
1019 					     const struct ce_attr *attr)
1020 {
1021 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1022 	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1023 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1024 
1025 	spin_lock_bh(&ar_pci->ce_lock);
1026 
1027 	ce_state->ar = ar;
1028 	ce_state->id = ce_id;
1029 	ce_state->ctrl_addr = ctrl_addr;
1030 	ce_state->attr_flags = attr->flags;
1031 	ce_state->src_sz_max = attr->src_sz_max;
1032 
1033 	spin_unlock_bh(&ar_pci->ce_lock);
1034 
1035 	return ce_state;
1036 }
1037 
1038 /*
1039  * Initialize a Copy Engine based on caller-supplied attributes.
1040  * This may be called once to initialize both source and destination
1041  * rings or it may be called twice for separate source and destination
1042  * initialization. It may be that only one side or the other is
1043  * initialized by software/firmware.
1044  */
1045 struct ath10k_ce_pipe *ath10k_ce_init(struct ath10k *ar,
1046 				unsigned int ce_id,
1047 				const struct ce_attr *attr)
1048 {
1049 	struct ath10k_ce_pipe *ce_state;
1050 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1051 	int ret;
1052 
1053 	ret = ath10k_pci_wake(ar);
1054 	if (ret)
1055 		return NULL;
1056 
1057 	ce_state = ath10k_ce_init_state(ar, ce_id, attr);
1058 	if (!ce_state) {
1059 		ath10k_err("Failed to initialize CE state for ID: %d\n", ce_id);
1060 		return NULL;
1061 	}
1062 
1063 	if (attr->src_nentries) {
1064 		ret = ath10k_ce_init_src_ring(ar, ce_id, ce_state, attr);
1065 		if (ret) {
1066 			ath10k_err("Failed to initialize CE src ring for ID: %d (%d)\n",
1067 				   ce_id, ret);
1068 			ath10k_ce_deinit(ce_state);
1069 			return NULL;
1070 		}
1071 	}
1072 
1073 	if (attr->dest_nentries) {
1074 		ret = ath10k_ce_init_dest_ring(ar, ce_id, ce_state, attr);
1075 		if (ret) {
1076 			ath10k_err("Failed to initialize CE dest ring for ID: %d (%d)\n",
1077 				   ce_id, ret);
1078 			ath10k_ce_deinit(ce_state);
1079 			return NULL;
1080 		}
1081 	}
1082 
1083 	/* Enable CE error interrupts */
1084 	ath10k_ce_error_intr_enable(ar, ctrl_addr);
1085 
1086 	ath10k_pci_sleep(ar);
1087 
1088 	return ce_state;
1089 }
1090 
1091 void ath10k_ce_deinit(struct ath10k_ce_pipe *ce_state)
1092 {
1093 	struct ath10k *ar = ce_state->ar;
1094 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1095 
1096 	if (ce_state->src_ring) {
1097 		kfree(ce_state->src_ring->shadow_base_unaligned);
1098 		pci_free_consistent(ar_pci->pdev,
1099 				    (ce_state->src_ring->nentries *
1100 				     sizeof(struct ce_desc) +
1101 				     CE_DESC_RING_ALIGN),
1102 				    ce_state->src_ring->base_addr_owner_space,
1103 				    ce_state->src_ring->base_addr_ce_space);
1104 		kfree(ce_state->src_ring);
1105 	}
1106 
1107 	if (ce_state->dest_ring) {
1108 		pci_free_consistent(ar_pci->pdev,
1109 				    (ce_state->dest_ring->nentries *
1110 				     sizeof(struct ce_desc) +
1111 				     CE_DESC_RING_ALIGN),
1112 				    ce_state->dest_ring->base_addr_owner_space,
1113 				    ce_state->dest_ring->base_addr_ce_space);
1114 		kfree(ce_state->dest_ring);
1115 	}
1116 
1117 	ce_state->src_ring = NULL;
1118 	ce_state->dest_ring = NULL;
1119 }
1120