1 // SPDX-License-Identifier: ISC
2 /*
3  * Copyright (C) 2016 Felix Fietkau <nbd@nbd.name>
4  */
5 
6 #include <linux/dma-mapping.h>
7 #include "mt76.h"
8 #include "dma.h"
9 
10 static struct mt76_txwi_cache *
11 mt76_alloc_txwi(struct mt76_dev *dev)
12 {
13 	struct mt76_txwi_cache *t;
14 	dma_addr_t addr;
15 	u8 *txwi;
16 	int size;
17 
18 	size = L1_CACHE_ALIGN(dev->drv->txwi_size + sizeof(*t));
19 	txwi = devm_kzalloc(dev->dev, size, GFP_ATOMIC);
20 	if (!txwi)
21 		return NULL;
22 
23 	addr = dma_map_single(dev->dev, txwi, dev->drv->txwi_size,
24 			      DMA_TO_DEVICE);
25 	t = (struct mt76_txwi_cache *)(txwi + dev->drv->txwi_size);
26 	t->dma_addr = addr;
27 
28 	return t;
29 }
30 
31 static struct mt76_txwi_cache *
32 __mt76_get_txwi(struct mt76_dev *dev)
33 {
34 	struct mt76_txwi_cache *t = NULL;
35 
36 	spin_lock(&dev->lock);
37 	if (!list_empty(&dev->txwi_cache)) {
38 		t = list_first_entry(&dev->txwi_cache, struct mt76_txwi_cache,
39 				     list);
40 		list_del(&t->list);
41 	}
42 	spin_unlock(&dev->lock);
43 
44 	return t;
45 }
46 
47 static struct mt76_txwi_cache *
48 mt76_get_txwi(struct mt76_dev *dev)
49 {
50 	struct mt76_txwi_cache *t = __mt76_get_txwi(dev);
51 
52 	if (t)
53 		return t;
54 
55 	return mt76_alloc_txwi(dev);
56 }
57 
58 void
59 mt76_put_txwi(struct mt76_dev *dev, struct mt76_txwi_cache *t)
60 {
61 	if (!t)
62 		return;
63 
64 	spin_lock(&dev->lock);
65 	list_add(&t->list, &dev->txwi_cache);
66 	spin_unlock(&dev->lock);
67 }
68 EXPORT_SYMBOL_GPL(mt76_put_txwi);
69 
70 static void
71 mt76_free_pending_txwi(struct mt76_dev *dev)
72 {
73 	struct mt76_txwi_cache *t;
74 
75 	local_bh_disable();
76 	while ((t = __mt76_get_txwi(dev)) != NULL)
77 		dma_unmap_single(dev->dev, t->dma_addr, dev->drv->txwi_size,
78 				 DMA_TO_DEVICE);
79 	local_bh_enable();
80 }
81 
82 static int
83 mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
84 		     int idx, int n_desc, int bufsize,
85 		     u32 ring_base)
86 {
87 	int size;
88 	int i;
89 
90 	spin_lock_init(&q->lock);
91 	spin_lock_init(&q->cleanup_lock);
92 
93 	q->regs = dev->mmio.regs + ring_base + idx * MT_RING_SIZE;
94 	q->ndesc = n_desc;
95 	q->buf_size = bufsize;
96 	q->hw_idx = idx;
97 
98 	size = q->ndesc * sizeof(struct mt76_desc);
99 	q->desc = dmam_alloc_coherent(dev->dev, size, &q->desc_dma, GFP_KERNEL);
100 	if (!q->desc)
101 		return -ENOMEM;
102 
103 	size = q->ndesc * sizeof(*q->entry);
104 	q->entry = devm_kzalloc(dev->dev, size, GFP_KERNEL);
105 	if (!q->entry)
106 		return -ENOMEM;
107 
108 	/* clear descriptors */
109 	for (i = 0; i < q->ndesc; i++)
110 		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
111 
112 	writel(q->desc_dma, &q->regs->desc_base);
113 	writel(0, &q->regs->cpu_idx);
114 	writel(0, &q->regs->dma_idx);
115 	writel(q->ndesc, &q->regs->ring_size);
116 
117 	return 0;
118 }
119 
120 static int
121 mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
122 		 struct mt76_queue_buf *buf, int nbufs, u32 info,
123 		 struct sk_buff *skb, void *txwi)
124 {
125 	struct mt76_queue_entry *entry;
126 	struct mt76_desc *desc;
127 	u32 ctrl;
128 	int i, idx = -1;
129 
130 	if (txwi) {
131 		q->entry[q->head].txwi = DMA_DUMMY_DATA;
132 		q->entry[q->head].skip_buf0 = true;
133 	}
134 
135 	for (i = 0; i < nbufs; i += 2, buf += 2) {
136 		u32 buf0 = buf[0].addr, buf1 = 0;
137 
138 		idx = q->head;
139 		q->head = (q->head + 1) % q->ndesc;
140 
141 		desc = &q->desc[idx];
142 		entry = &q->entry[idx];
143 
144 		if (buf[0].skip_unmap)
145 			entry->skip_buf0 = true;
146 		entry->skip_buf1 = i == nbufs - 1;
147 
148 		entry->dma_addr[0] = buf[0].addr;
149 		entry->dma_len[0] = buf[0].len;
150 
151 		ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
152 		if (i < nbufs - 1) {
153 			entry->dma_addr[1] = buf[1].addr;
154 			entry->dma_len[1] = buf[1].len;
155 			buf1 = buf[1].addr;
156 			ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
157 			if (buf[1].skip_unmap)
158 				entry->skip_buf1 = true;
159 		}
160 
161 		if (i == nbufs - 1)
162 			ctrl |= MT_DMA_CTL_LAST_SEC0;
163 		else if (i == nbufs - 2)
164 			ctrl |= MT_DMA_CTL_LAST_SEC1;
165 
166 		WRITE_ONCE(desc->buf0, cpu_to_le32(buf0));
167 		WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
168 		WRITE_ONCE(desc->info, cpu_to_le32(info));
169 		WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
170 
171 		q->queued++;
172 	}
173 
174 	q->entry[idx].txwi = txwi;
175 	q->entry[idx].skb = skb;
176 
177 	return idx;
178 }
179 
180 static void
181 mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
182 			struct mt76_queue_entry *prev_e)
183 {
184 	struct mt76_queue_entry *e = &q->entry[idx];
185 
186 	if (!e->skip_buf0)
187 		dma_unmap_single(dev->dev, e->dma_addr[0], e->dma_len[0],
188 				 DMA_TO_DEVICE);
189 
190 	if (!e->skip_buf1)
191 		dma_unmap_single(dev->dev, e->dma_addr[1], e->dma_len[1],
192 				 DMA_TO_DEVICE);
193 
194 	if (e->txwi == DMA_DUMMY_DATA)
195 		e->txwi = NULL;
196 
197 	if (e->skb == DMA_DUMMY_DATA)
198 		e->skb = NULL;
199 
200 	*prev_e = *e;
201 	memset(e, 0, sizeof(*e));
202 }
203 
204 static void
205 mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
206 {
207 	writel(q->desc_dma, &q->regs->desc_base);
208 	writel(q->ndesc, &q->regs->ring_size);
209 	q->head = readl(&q->regs->dma_idx);
210 	q->tail = q->head;
211 }
212 
213 static void
214 mt76_dma_kick_queue(struct mt76_dev *dev, struct mt76_queue *q)
215 {
216 	wmb();
217 	writel(q->head, &q->regs->cpu_idx);
218 }
219 
220 static void
221 mt76_dma_tx_cleanup(struct mt76_dev *dev, struct mt76_queue *q, bool flush)
222 {
223 	struct mt76_queue_entry entry;
224 	int last;
225 
226 	if (!q)
227 		return;
228 
229 	spin_lock_bh(&q->cleanup_lock);
230 	if (flush)
231 		last = -1;
232 	else
233 		last = readl(&q->regs->dma_idx);
234 
235 	while (q->queued > 0 && q->tail != last) {
236 		mt76_dma_tx_cleanup_idx(dev, q, q->tail, &entry);
237 		mt76_queue_tx_complete(dev, q, &entry);
238 
239 		if (entry.txwi) {
240 			if (!(dev->drv->drv_flags & MT_DRV_TXWI_NO_FREE))
241 				mt76_put_txwi(dev, entry.txwi);
242 		}
243 
244 		if (!flush && q->tail == last)
245 			last = readl(&q->regs->dma_idx);
246 
247 	}
248 	spin_unlock_bh(&q->cleanup_lock);
249 
250 	if (flush) {
251 		spin_lock_bh(&q->lock);
252 		mt76_dma_sync_idx(dev, q);
253 		mt76_dma_kick_queue(dev, q);
254 		spin_unlock_bh(&q->lock);
255 	}
256 
257 	if (!q->queued)
258 		wake_up(&dev->tx_wait);
259 }
260 
261 static void *
262 mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
263 		 int *len, u32 *info, bool *more)
264 {
265 	struct mt76_queue_entry *e = &q->entry[idx];
266 	struct mt76_desc *desc = &q->desc[idx];
267 	dma_addr_t buf_addr;
268 	void *buf = e->buf;
269 	int buf_len = SKB_WITH_OVERHEAD(q->buf_size);
270 
271 	buf_addr = e->dma_addr[0];
272 	if (len) {
273 		u32 ctl = le32_to_cpu(READ_ONCE(desc->ctrl));
274 		*len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctl);
275 		*more = !(ctl & MT_DMA_CTL_LAST_SEC0);
276 	}
277 
278 	if (info)
279 		*info = le32_to_cpu(desc->info);
280 
281 	dma_unmap_single(dev->dev, buf_addr, buf_len, DMA_FROM_DEVICE);
282 	e->buf = NULL;
283 
284 	return buf;
285 }
286 
287 static void *
288 mt76_dma_dequeue(struct mt76_dev *dev, struct mt76_queue *q, bool flush,
289 		 int *len, u32 *info, bool *more)
290 {
291 	int idx = q->tail;
292 
293 	*more = false;
294 	if (!q->queued)
295 		return NULL;
296 
297 	if (flush)
298 		q->desc[idx].ctrl |= cpu_to_le32(MT_DMA_CTL_DMA_DONE);
299 	else if (!(q->desc[idx].ctrl & cpu_to_le32(MT_DMA_CTL_DMA_DONE)))
300 		return NULL;
301 
302 	q->tail = (q->tail + 1) % q->ndesc;
303 	q->queued--;
304 
305 	return mt76_dma_get_buf(dev, q, idx, len, info, more);
306 }
307 
308 static int
309 mt76_dma_tx_queue_skb_raw(struct mt76_dev *dev, struct mt76_queue *q,
310 			  struct sk_buff *skb, u32 tx_info)
311 {
312 	struct mt76_queue_buf buf;
313 	dma_addr_t addr;
314 
315 	if (q->queued + 1 >= q->ndesc - 1)
316 		goto error;
317 
318 	addr = dma_map_single(dev->dev, skb->data, skb->len,
319 			      DMA_TO_DEVICE);
320 	if (unlikely(dma_mapping_error(dev->dev, addr)))
321 		goto error;
322 
323 	buf.addr = addr;
324 	buf.len = skb->len;
325 
326 	spin_lock_bh(&q->lock);
327 	mt76_dma_add_buf(dev, q, &buf, 1, tx_info, skb, NULL);
328 	mt76_dma_kick_queue(dev, q);
329 	spin_unlock_bh(&q->lock);
330 
331 	return 0;
332 
333 error:
334 	dev_kfree_skb(skb);
335 	return -ENOMEM;
336 }
337 
338 static int
339 mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
340 		      struct sk_buff *skb, struct mt76_wcid *wcid,
341 		      struct ieee80211_sta *sta)
342 {
343 	struct mt76_tx_info tx_info = {
344 		.skb = skb,
345 	};
346 	struct ieee80211_hw *hw;
347 	int len, n = 0, ret = -ENOMEM;
348 	struct mt76_queue_entry e;
349 	struct mt76_txwi_cache *t;
350 	struct sk_buff *iter;
351 	dma_addr_t addr;
352 	u8 *txwi;
353 
354 	t = mt76_get_txwi(dev);
355 	if (!t) {
356 		hw = mt76_tx_status_get_hw(dev, skb);
357 		ieee80211_free_txskb(hw, skb);
358 		return -ENOMEM;
359 	}
360 	txwi = mt76_get_txwi_ptr(dev, t);
361 
362 	skb->prev = skb->next = NULL;
363 	if (dev->drv->drv_flags & MT_DRV_TX_ALIGNED4_SKBS)
364 		mt76_insert_hdr_pad(skb);
365 
366 	len = skb_headlen(skb);
367 	addr = dma_map_single(dev->dev, skb->data, len, DMA_TO_DEVICE);
368 	if (unlikely(dma_mapping_error(dev->dev, addr)))
369 		goto free;
370 
371 	tx_info.buf[n].addr = t->dma_addr;
372 	tx_info.buf[n++].len = dev->drv->txwi_size;
373 	tx_info.buf[n].addr = addr;
374 	tx_info.buf[n++].len = len;
375 
376 	skb_walk_frags(skb, iter) {
377 		if (n == ARRAY_SIZE(tx_info.buf))
378 			goto unmap;
379 
380 		addr = dma_map_single(dev->dev, iter->data, iter->len,
381 				      DMA_TO_DEVICE);
382 		if (unlikely(dma_mapping_error(dev->dev, addr)))
383 			goto unmap;
384 
385 		tx_info.buf[n].addr = addr;
386 		tx_info.buf[n++].len = iter->len;
387 	}
388 	tx_info.nbuf = n;
389 
390 	dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size,
391 				DMA_TO_DEVICE);
392 	ret = dev->drv->tx_prepare_skb(dev, txwi, q->qid, wcid, sta, &tx_info);
393 	dma_sync_single_for_device(dev->dev, t->dma_addr, dev->drv->txwi_size,
394 				   DMA_TO_DEVICE);
395 	if (ret < 0)
396 		goto unmap;
397 
398 	if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
399 		ret = -ENOMEM;
400 		goto unmap;
401 	}
402 
403 	return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf,
404 				tx_info.info, tx_info.skb, t);
405 
406 unmap:
407 	for (n--; n > 0; n--)
408 		dma_unmap_single(dev->dev, tx_info.buf[n].addr,
409 				 tx_info.buf[n].len, DMA_TO_DEVICE);
410 
411 free:
412 #ifdef CONFIG_NL80211_TESTMODE
413 	/* fix tx_done accounting on queue overflow */
414 	if (mt76_is_testmode_skb(dev, skb, &hw)) {
415 		struct mt76_phy *phy = hw->priv;
416 
417 		if (tx_info.skb == phy->test.tx_skb)
418 			phy->test.tx_done--;
419 	}
420 #endif
421 
422 	e.skb = tx_info.skb;
423 	e.txwi = t;
424 	dev->drv->tx_complete_skb(dev, &e);
425 	mt76_put_txwi(dev, t);
426 	return ret;
427 }
428 
429 static int
430 mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
431 {
432 	dma_addr_t addr;
433 	void *buf;
434 	int frames = 0;
435 	int len = SKB_WITH_OVERHEAD(q->buf_size);
436 	int offset = q->buf_offset;
437 
438 	spin_lock_bh(&q->lock);
439 
440 	while (q->queued < q->ndesc - 1) {
441 		struct mt76_queue_buf qbuf;
442 
443 		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
444 		if (!buf)
445 			break;
446 
447 		addr = dma_map_single(dev->dev, buf, len, DMA_FROM_DEVICE);
448 		if (unlikely(dma_mapping_error(dev->dev, addr))) {
449 			skb_free_frag(buf);
450 			break;
451 		}
452 
453 		qbuf.addr = addr + offset;
454 		qbuf.len = len - offset;
455 		mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL);
456 		frames++;
457 	}
458 
459 	if (frames)
460 		mt76_dma_kick_queue(dev, q);
461 
462 	spin_unlock_bh(&q->lock);
463 
464 	return frames;
465 }
466 
467 static void
468 mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
469 {
470 	struct page *page;
471 	void *buf;
472 	bool more;
473 
474 	spin_lock_bh(&q->lock);
475 	do {
476 		buf = mt76_dma_dequeue(dev, q, true, NULL, NULL, &more);
477 		if (!buf)
478 			break;
479 
480 		skb_free_frag(buf);
481 	} while (1);
482 	spin_unlock_bh(&q->lock);
483 
484 	if (!q->rx_page.va)
485 		return;
486 
487 	page = virt_to_page(q->rx_page.va);
488 	__page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
489 	memset(&q->rx_page, 0, sizeof(q->rx_page));
490 }
491 
492 static void
493 mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
494 {
495 	struct mt76_queue *q = &dev->q_rx[qid];
496 	int i;
497 
498 	for (i = 0; i < q->ndesc; i++)
499 		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
500 
501 	mt76_dma_rx_cleanup(dev, q);
502 	mt76_dma_sync_idx(dev, q);
503 	mt76_dma_rx_fill(dev, q);
504 
505 	if (!q->rx_head)
506 		return;
507 
508 	dev_kfree_skb(q->rx_head);
509 	q->rx_head = NULL;
510 }
511 
512 static void
513 mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
514 		  int len, bool more)
515 {
516 	struct sk_buff *skb = q->rx_head;
517 	struct skb_shared_info *shinfo = skb_shinfo(skb);
518 
519 	if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) {
520 		struct page *page = virt_to_head_page(data);
521 		int offset = data - page_address(page) + q->buf_offset;
522 
523 		skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len,
524 				q->buf_size);
525 	} else {
526 		skb_free_frag(data);
527 	}
528 
529 	if (more)
530 		return;
531 
532 	q->rx_head = NULL;
533 	dev->drv->rx_skb(dev, q - dev->q_rx, skb);
534 }
535 
536 static int
537 mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
538 {
539 	int len, data_len, done = 0;
540 	struct sk_buff *skb;
541 	unsigned char *data;
542 	bool more;
543 
544 	while (done < budget) {
545 		u32 info;
546 
547 		data = mt76_dma_dequeue(dev, q, false, &len, &info, &more);
548 		if (!data)
549 			break;
550 
551 		if (q->rx_head)
552 			data_len = q->buf_size;
553 		else
554 			data_len = SKB_WITH_OVERHEAD(q->buf_size);
555 
556 		if (data_len < len + q->buf_offset) {
557 			dev_kfree_skb(q->rx_head);
558 			q->rx_head = NULL;
559 
560 			skb_free_frag(data);
561 			continue;
562 		}
563 
564 		if (q->rx_head) {
565 			mt76_add_fragment(dev, q, data, len, more);
566 			continue;
567 		}
568 
569 		skb = build_skb(data, q->buf_size);
570 		if (!skb) {
571 			skb_free_frag(data);
572 			continue;
573 		}
574 		skb_reserve(skb, q->buf_offset);
575 
576 		if (q == &dev->q_rx[MT_RXQ_MCU]) {
577 			u32 *rxfce = (u32 *)skb->cb;
578 			*rxfce = info;
579 		}
580 
581 		__skb_put(skb, len);
582 		done++;
583 
584 		if (more) {
585 			q->rx_head = skb;
586 			continue;
587 		}
588 
589 		dev->drv->rx_skb(dev, q - dev->q_rx, skb);
590 	}
591 
592 	mt76_dma_rx_fill(dev, q);
593 	return done;
594 }
595 
596 static int
597 mt76_dma_rx_poll(struct napi_struct *napi, int budget)
598 {
599 	struct mt76_dev *dev;
600 	int qid, done = 0, cur;
601 
602 	dev = container_of(napi->dev, struct mt76_dev, napi_dev);
603 	qid = napi - dev->napi;
604 
605 	local_bh_disable();
606 	rcu_read_lock();
607 
608 	do {
609 		cur = mt76_dma_rx_process(dev, &dev->q_rx[qid], budget - done);
610 		mt76_rx_poll_complete(dev, qid, napi);
611 		done += cur;
612 	} while (cur && done < budget);
613 
614 	rcu_read_unlock();
615 	local_bh_enable();
616 
617 	if (done < budget && napi_complete(napi))
618 		dev->drv->rx_poll_complete(dev, qid);
619 
620 	return done;
621 }
622 
623 static int
624 mt76_dma_init(struct mt76_dev *dev)
625 {
626 	int i;
627 
628 	init_dummy_netdev(&dev->napi_dev);
629 
630 	mt76_for_each_q_rx(dev, i) {
631 		netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
632 			       64);
633 		mt76_dma_rx_fill(dev, &dev->q_rx[i]);
634 		napi_enable(&dev->napi[i]);
635 	}
636 
637 	return 0;
638 }
639 
640 static const struct mt76_queue_ops mt76_dma_ops = {
641 	.init = mt76_dma_init,
642 	.alloc = mt76_dma_alloc_queue,
643 	.tx_queue_skb_raw = mt76_dma_tx_queue_skb_raw,
644 	.tx_queue_skb = mt76_dma_tx_queue_skb,
645 	.tx_cleanup = mt76_dma_tx_cleanup,
646 	.rx_reset = mt76_dma_rx_reset,
647 	.kick = mt76_dma_kick_queue,
648 };
649 
650 void mt76_dma_attach(struct mt76_dev *dev)
651 {
652 	dev->queue_ops = &mt76_dma_ops;
653 }
654 EXPORT_SYMBOL_GPL(mt76_dma_attach);
655 
656 void mt76_dma_cleanup(struct mt76_dev *dev)
657 {
658 	int i;
659 
660 	mt76_worker_disable(&dev->tx_worker);
661 	netif_napi_del(&dev->tx_napi);
662 
663 	for (i = 0; i < ARRAY_SIZE(dev->phy.q_tx); i++) {
664 		mt76_dma_tx_cleanup(dev, dev->phy.q_tx[i], true);
665 		if (dev->phy2)
666 			mt76_dma_tx_cleanup(dev, dev->phy2->q_tx[i], true);
667 	}
668 
669 	for (i = 0; i < ARRAY_SIZE(dev->q_mcu); i++)
670 		mt76_dma_tx_cleanup(dev, dev->q_mcu[i], true);
671 
672 	mt76_for_each_q_rx(dev, i) {
673 		netif_napi_del(&dev->napi[i]);
674 		mt76_dma_rx_cleanup(dev, &dev->q_rx[i]);
675 	}
676 
677 	mt76_free_pending_txwi(dev);
678 }
679 EXPORT_SYMBOL_GPL(mt76_dma_cleanup);
680