1 // SPDX-License-Identifier: ISC
2 /*
3  * Copyright (C) 2016 Felix Fietkau <nbd@nbd.name>
4  */
5 
6 #include <linux/dma-mapping.h>
7 #include "mt76.h"
8 #include "dma.h"
9 
10 static struct mt76_txwi_cache *
11 mt76_alloc_txwi(struct mt76_dev *dev)
12 {
13 	struct mt76_txwi_cache *t;
14 	dma_addr_t addr;
15 	u8 *txwi;
16 	int size;
17 
18 	size = L1_CACHE_ALIGN(dev->drv->txwi_size + sizeof(*t));
19 	txwi = devm_kzalloc(dev->dev, size, GFP_ATOMIC);
20 	if (!txwi)
21 		return NULL;
22 
23 	addr = dma_map_single(dev->dev, txwi, dev->drv->txwi_size,
24 			      DMA_TO_DEVICE);
25 	t = (struct mt76_txwi_cache *)(txwi + dev->drv->txwi_size);
26 	t->dma_addr = addr;
27 
28 	return t;
29 }
30 
31 static struct mt76_txwi_cache *
32 __mt76_get_txwi(struct mt76_dev *dev)
33 {
34 	struct mt76_txwi_cache *t = NULL;
35 
36 	spin_lock(&dev->lock);
37 	if (!list_empty(&dev->txwi_cache)) {
38 		t = list_first_entry(&dev->txwi_cache, struct mt76_txwi_cache,
39 				     list);
40 		list_del(&t->list);
41 	}
42 	spin_unlock(&dev->lock);
43 
44 	return t;
45 }
46 
47 static struct mt76_txwi_cache *
48 mt76_get_txwi(struct mt76_dev *dev)
49 {
50 	struct mt76_txwi_cache *t = __mt76_get_txwi(dev);
51 
52 	if (t)
53 		return t;
54 
55 	return mt76_alloc_txwi(dev);
56 }
57 
58 void
59 mt76_put_txwi(struct mt76_dev *dev, struct mt76_txwi_cache *t)
60 {
61 	if (!t)
62 		return;
63 
64 	spin_lock(&dev->lock);
65 	list_add(&t->list, &dev->txwi_cache);
66 	spin_unlock(&dev->lock);
67 }
68 EXPORT_SYMBOL_GPL(mt76_put_txwi);
69 
70 static void
71 mt76_free_pending_txwi(struct mt76_dev *dev)
72 {
73 	struct mt76_txwi_cache *t;
74 
75 	local_bh_disable();
76 	while ((t = __mt76_get_txwi(dev)) != NULL)
77 		dma_unmap_single(dev->dev, t->dma_addr, dev->drv->txwi_size,
78 				 DMA_TO_DEVICE);
79 	local_bh_enable();
80 }
81 
82 static int
83 mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
84 		     int idx, int n_desc, int bufsize,
85 		     u32 ring_base)
86 {
87 	int size;
88 	int i;
89 
90 	spin_lock_init(&q->lock);
91 	spin_lock_init(&q->cleanup_lock);
92 
93 	q->regs = dev->mmio.regs + ring_base + idx * MT_RING_SIZE;
94 	q->ndesc = n_desc;
95 	q->buf_size = bufsize;
96 	q->hw_idx = idx;
97 
98 	size = q->ndesc * sizeof(struct mt76_desc);
99 	q->desc = dmam_alloc_coherent(dev->dev, size, &q->desc_dma, GFP_KERNEL);
100 	if (!q->desc)
101 		return -ENOMEM;
102 
103 	size = q->ndesc * sizeof(*q->entry);
104 	q->entry = devm_kzalloc(dev->dev, size, GFP_KERNEL);
105 	if (!q->entry)
106 		return -ENOMEM;
107 
108 	/* clear descriptors */
109 	for (i = 0; i < q->ndesc; i++)
110 		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
111 
112 	writel(q->desc_dma, &q->regs->desc_base);
113 	writel(0, &q->regs->cpu_idx);
114 	writel(0, &q->regs->dma_idx);
115 	writel(q->ndesc, &q->regs->ring_size);
116 
117 	return 0;
118 }
119 
120 static int
121 mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
122 		 struct mt76_queue_buf *buf, int nbufs, u32 info,
123 		 struct sk_buff *skb, void *txwi)
124 {
125 	struct mt76_queue_entry *entry;
126 	struct mt76_desc *desc;
127 	u32 ctrl;
128 	int i, idx = -1;
129 
130 	if (txwi) {
131 		q->entry[q->head].txwi = DMA_DUMMY_DATA;
132 		q->entry[q->head].skip_buf0 = true;
133 	}
134 
135 	for (i = 0; i < nbufs; i += 2, buf += 2) {
136 		u32 buf0 = buf[0].addr, buf1 = 0;
137 
138 		idx = q->head;
139 		q->head = (q->head + 1) % q->ndesc;
140 
141 		desc = &q->desc[idx];
142 		entry = &q->entry[idx];
143 
144 		if (buf[0].skip_unmap)
145 			entry->skip_buf0 = true;
146 		entry->skip_buf1 = i == nbufs - 1;
147 
148 		entry->dma_addr[0] = buf[0].addr;
149 		entry->dma_len[0] = buf[0].len;
150 
151 		ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
152 		if (i < nbufs - 1) {
153 			entry->dma_addr[1] = buf[1].addr;
154 			entry->dma_len[1] = buf[1].len;
155 			buf1 = buf[1].addr;
156 			ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
157 			if (buf[1].skip_unmap)
158 				entry->skip_buf1 = true;
159 		}
160 
161 		if (i == nbufs - 1)
162 			ctrl |= MT_DMA_CTL_LAST_SEC0;
163 		else if (i == nbufs - 2)
164 			ctrl |= MT_DMA_CTL_LAST_SEC1;
165 
166 		WRITE_ONCE(desc->buf0, cpu_to_le32(buf0));
167 		WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
168 		WRITE_ONCE(desc->info, cpu_to_le32(info));
169 		WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
170 
171 		q->queued++;
172 	}
173 
174 	q->entry[idx].txwi = txwi;
175 	q->entry[idx].skb = skb;
176 
177 	return idx;
178 }
179 
180 static void
181 mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
182 			struct mt76_queue_entry *prev_e)
183 {
184 	struct mt76_queue_entry *e = &q->entry[idx];
185 
186 	if (!e->skip_buf0)
187 		dma_unmap_single(dev->dev, e->dma_addr[0], e->dma_len[0],
188 				 DMA_TO_DEVICE);
189 
190 	if (!e->skip_buf1)
191 		dma_unmap_single(dev->dev, e->dma_addr[1], e->dma_len[1],
192 				 DMA_TO_DEVICE);
193 
194 	if (e->txwi == DMA_DUMMY_DATA)
195 		e->txwi = NULL;
196 
197 	if (e->skb == DMA_DUMMY_DATA)
198 		e->skb = NULL;
199 
200 	*prev_e = *e;
201 	memset(e, 0, sizeof(*e));
202 }
203 
204 static void
205 mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
206 {
207 	writel(q->desc_dma, &q->regs->desc_base);
208 	writel(q->ndesc, &q->regs->ring_size);
209 	q->head = readl(&q->regs->dma_idx);
210 	q->tail = q->head;
211 }
212 
213 static void
214 mt76_dma_kick_queue(struct mt76_dev *dev, struct mt76_queue *q)
215 {
216 	wmb();
217 	writel(q->head, &q->regs->cpu_idx);
218 }
219 
220 static void
221 mt76_dma_tx_cleanup(struct mt76_dev *dev, struct mt76_queue *q, bool flush)
222 {
223 	struct mt76_queue_entry entry;
224 	int last;
225 
226 	if (!q)
227 		return;
228 
229 	spin_lock_bh(&q->cleanup_lock);
230 	if (flush)
231 		last = -1;
232 	else
233 		last = readl(&q->regs->dma_idx);
234 
235 	while (q->queued > 0 && q->tail != last) {
236 		mt76_dma_tx_cleanup_idx(dev, q, q->tail, &entry);
237 		mt76_queue_tx_complete(dev, q, &entry);
238 
239 		if (entry.txwi) {
240 			if (!(dev->drv->drv_flags & MT_DRV_TXWI_NO_FREE))
241 				mt76_put_txwi(dev, entry.txwi);
242 		}
243 
244 		if (!flush && q->tail == last)
245 			last = readl(&q->regs->dma_idx);
246 
247 	}
248 	spin_unlock_bh(&q->cleanup_lock);
249 
250 	if (flush) {
251 		spin_lock_bh(&q->lock);
252 		mt76_dma_sync_idx(dev, q);
253 		mt76_dma_kick_queue(dev, q);
254 		spin_unlock_bh(&q->lock);
255 	}
256 
257 	if (!q->queued)
258 		wake_up(&dev->tx_wait);
259 }
260 
261 static void *
262 mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
263 		 int *len, u32 *info, bool *more)
264 {
265 	struct mt76_queue_entry *e = &q->entry[idx];
266 	struct mt76_desc *desc = &q->desc[idx];
267 	dma_addr_t buf_addr;
268 	void *buf = e->buf;
269 	int buf_len = SKB_WITH_OVERHEAD(q->buf_size);
270 
271 	buf_addr = e->dma_addr[0];
272 	if (len) {
273 		u32 ctl = le32_to_cpu(READ_ONCE(desc->ctrl));
274 		*len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctl);
275 		*more = !(ctl & MT_DMA_CTL_LAST_SEC0);
276 	}
277 
278 	if (info)
279 		*info = le32_to_cpu(desc->info);
280 
281 	dma_unmap_single(dev->dev, buf_addr, buf_len, DMA_FROM_DEVICE);
282 	e->buf = NULL;
283 
284 	return buf;
285 }
286 
287 static void *
288 mt76_dma_dequeue(struct mt76_dev *dev, struct mt76_queue *q, bool flush,
289 		 int *len, u32 *info, bool *more)
290 {
291 	int idx = q->tail;
292 
293 	*more = false;
294 	if (!q->queued)
295 		return NULL;
296 
297 	if (flush)
298 		q->desc[idx].ctrl |= cpu_to_le32(MT_DMA_CTL_DMA_DONE);
299 	else if (!(q->desc[idx].ctrl & cpu_to_le32(MT_DMA_CTL_DMA_DONE)))
300 		return NULL;
301 
302 	q->tail = (q->tail + 1) % q->ndesc;
303 	q->queued--;
304 
305 	return mt76_dma_get_buf(dev, q, idx, len, info, more);
306 }
307 
308 static int
309 mt76_dma_tx_queue_skb_raw(struct mt76_dev *dev, struct mt76_queue *q,
310 			  struct sk_buff *skb, u32 tx_info)
311 {
312 	struct mt76_queue_buf buf;
313 	dma_addr_t addr;
314 
315 	if (q->queued + 1 >= q->ndesc - 1)
316 		goto error;
317 
318 	addr = dma_map_single(dev->dev, skb->data, skb->len,
319 			      DMA_TO_DEVICE);
320 	if (unlikely(dma_mapping_error(dev->dev, addr)))
321 		goto error;
322 
323 	buf.addr = addr;
324 	buf.len = skb->len;
325 
326 	spin_lock_bh(&q->lock);
327 	mt76_dma_add_buf(dev, q, &buf, 1, tx_info, skb, NULL);
328 	mt76_dma_kick_queue(dev, q);
329 	spin_unlock_bh(&q->lock);
330 
331 	return 0;
332 
333 error:
334 	dev_kfree_skb(skb);
335 	return -ENOMEM;
336 }
337 
338 static int
339 mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
340 		      struct sk_buff *skb, struct mt76_wcid *wcid,
341 		      struct ieee80211_sta *sta)
342 {
343 	struct mt76_tx_info tx_info = {
344 		.skb = skb,
345 	};
346 	struct ieee80211_hw *hw;
347 	int len, n = 0, ret = -ENOMEM;
348 	struct mt76_queue_entry e;
349 	struct mt76_txwi_cache *t;
350 	struct sk_buff *iter;
351 	dma_addr_t addr;
352 	u8 *txwi;
353 
354 	t = mt76_get_txwi(dev);
355 	if (!t) {
356 		hw = mt76_tx_status_get_hw(dev, skb);
357 		ieee80211_free_txskb(hw, skb);
358 		return -ENOMEM;
359 	}
360 	txwi = mt76_get_txwi_ptr(dev, t);
361 
362 	skb->prev = skb->next = NULL;
363 	if (dev->drv->drv_flags & MT_DRV_TX_ALIGNED4_SKBS)
364 		mt76_insert_hdr_pad(skb);
365 
366 	len = skb_headlen(skb);
367 	addr = dma_map_single(dev->dev, skb->data, len, DMA_TO_DEVICE);
368 	if (unlikely(dma_mapping_error(dev->dev, addr)))
369 		goto free;
370 
371 	tx_info.buf[n].addr = t->dma_addr;
372 	tx_info.buf[n++].len = dev->drv->txwi_size;
373 	tx_info.buf[n].addr = addr;
374 	tx_info.buf[n++].len = len;
375 
376 	skb_walk_frags(skb, iter) {
377 		if (n == ARRAY_SIZE(tx_info.buf))
378 			goto unmap;
379 
380 		addr = dma_map_single(dev->dev, iter->data, iter->len,
381 				      DMA_TO_DEVICE);
382 		if (unlikely(dma_mapping_error(dev->dev, addr)))
383 			goto unmap;
384 
385 		tx_info.buf[n].addr = addr;
386 		tx_info.buf[n++].len = iter->len;
387 	}
388 	tx_info.nbuf = n;
389 
390 	dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size,
391 				DMA_TO_DEVICE);
392 	ret = dev->drv->tx_prepare_skb(dev, txwi, q->qid, wcid, sta, &tx_info);
393 	dma_sync_single_for_device(dev->dev, t->dma_addr, dev->drv->txwi_size,
394 				   DMA_TO_DEVICE);
395 	if (ret < 0)
396 		goto unmap;
397 
398 	if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
399 		ret = -ENOMEM;
400 		goto unmap;
401 	}
402 
403 	return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf,
404 				tx_info.info, tx_info.skb, t);
405 
406 unmap:
407 	for (n--; n > 0; n--)
408 		dma_unmap_single(dev->dev, tx_info.buf[n].addr,
409 				 tx_info.buf[n].len, DMA_TO_DEVICE);
410 
411 free:
412 #ifdef CONFIG_NL80211_TESTMODE
413 	/* fix tx_done accounting on queue overflow */
414 	if (tx_info.skb == dev->test.tx_skb)
415 		dev->test.tx_done--;
416 #endif
417 
418 	e.skb = tx_info.skb;
419 	e.txwi = t;
420 	dev->drv->tx_complete_skb(dev, &e);
421 	mt76_put_txwi(dev, t);
422 	return ret;
423 }
424 
425 static int
426 mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
427 {
428 	dma_addr_t addr;
429 	void *buf;
430 	int frames = 0;
431 	int len = SKB_WITH_OVERHEAD(q->buf_size);
432 	int offset = q->buf_offset;
433 
434 	spin_lock_bh(&q->lock);
435 
436 	while (q->queued < q->ndesc - 1) {
437 		struct mt76_queue_buf qbuf;
438 
439 		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
440 		if (!buf)
441 			break;
442 
443 		addr = dma_map_single(dev->dev, buf, len, DMA_FROM_DEVICE);
444 		if (unlikely(dma_mapping_error(dev->dev, addr))) {
445 			skb_free_frag(buf);
446 			break;
447 		}
448 
449 		qbuf.addr = addr + offset;
450 		qbuf.len = len - offset;
451 		mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, NULL);
452 		frames++;
453 	}
454 
455 	if (frames)
456 		mt76_dma_kick_queue(dev, q);
457 
458 	spin_unlock_bh(&q->lock);
459 
460 	return frames;
461 }
462 
463 static void
464 mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
465 {
466 	struct page *page;
467 	void *buf;
468 	bool more;
469 
470 	spin_lock_bh(&q->lock);
471 	do {
472 		buf = mt76_dma_dequeue(dev, q, true, NULL, NULL, &more);
473 		if (!buf)
474 			break;
475 
476 		skb_free_frag(buf);
477 	} while (1);
478 	spin_unlock_bh(&q->lock);
479 
480 	if (!q->rx_page.va)
481 		return;
482 
483 	page = virt_to_page(q->rx_page.va);
484 	__page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
485 	memset(&q->rx_page, 0, sizeof(q->rx_page));
486 }
487 
488 static void
489 mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
490 {
491 	struct mt76_queue *q = &dev->q_rx[qid];
492 	int i;
493 
494 	for (i = 0; i < q->ndesc; i++)
495 		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
496 
497 	mt76_dma_rx_cleanup(dev, q);
498 	mt76_dma_sync_idx(dev, q);
499 	mt76_dma_rx_fill(dev, q);
500 
501 	if (!q->rx_head)
502 		return;
503 
504 	dev_kfree_skb(q->rx_head);
505 	q->rx_head = NULL;
506 }
507 
508 static void
509 mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
510 		  int len, bool more)
511 {
512 	struct page *page = virt_to_head_page(data);
513 	int offset = data - page_address(page);
514 	struct sk_buff *skb = q->rx_head;
515 	struct skb_shared_info *shinfo = skb_shinfo(skb);
516 
517 	if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) {
518 		offset += q->buf_offset;
519 		skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len,
520 				q->buf_size);
521 	}
522 
523 	if (more)
524 		return;
525 
526 	q->rx_head = NULL;
527 	dev->drv->rx_skb(dev, q - dev->q_rx, skb);
528 }
529 
530 static int
531 mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
532 {
533 	int len, data_len, done = 0;
534 	struct sk_buff *skb;
535 	unsigned char *data;
536 	bool more;
537 
538 	while (done < budget) {
539 		u32 info;
540 
541 		data = mt76_dma_dequeue(dev, q, false, &len, &info, &more);
542 		if (!data)
543 			break;
544 
545 		if (q->rx_head)
546 			data_len = q->buf_size;
547 		else
548 			data_len = SKB_WITH_OVERHEAD(q->buf_size);
549 
550 		if (data_len < len + q->buf_offset) {
551 			dev_kfree_skb(q->rx_head);
552 			q->rx_head = NULL;
553 
554 			skb_free_frag(data);
555 			continue;
556 		}
557 
558 		if (q->rx_head) {
559 			mt76_add_fragment(dev, q, data, len, more);
560 			continue;
561 		}
562 
563 		skb = build_skb(data, q->buf_size);
564 		if (!skb) {
565 			skb_free_frag(data);
566 			continue;
567 		}
568 		skb_reserve(skb, q->buf_offset);
569 
570 		if (q == &dev->q_rx[MT_RXQ_MCU]) {
571 			u32 *rxfce = (u32 *)skb->cb;
572 			*rxfce = info;
573 		}
574 
575 		__skb_put(skb, len);
576 		done++;
577 
578 		if (more) {
579 			q->rx_head = skb;
580 			continue;
581 		}
582 
583 		dev->drv->rx_skb(dev, q - dev->q_rx, skb);
584 	}
585 
586 	mt76_dma_rx_fill(dev, q);
587 	return done;
588 }
589 
590 static int
591 mt76_dma_rx_poll(struct napi_struct *napi, int budget)
592 {
593 	struct mt76_dev *dev;
594 	int qid, done = 0, cur;
595 
596 	dev = container_of(napi->dev, struct mt76_dev, napi_dev);
597 	qid = napi - dev->napi;
598 
599 	local_bh_disable();
600 	rcu_read_lock();
601 
602 	do {
603 		cur = mt76_dma_rx_process(dev, &dev->q_rx[qid], budget - done);
604 		mt76_rx_poll_complete(dev, qid, napi);
605 		done += cur;
606 	} while (cur && done < budget);
607 
608 	rcu_read_unlock();
609 	local_bh_enable();
610 
611 	if (done < budget && napi_complete(napi))
612 		dev->drv->rx_poll_complete(dev, qid);
613 
614 	return done;
615 }
616 
617 static int
618 mt76_dma_init(struct mt76_dev *dev)
619 {
620 	int i;
621 
622 	init_dummy_netdev(&dev->napi_dev);
623 
624 	mt76_for_each_q_rx(dev, i) {
625 		netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
626 			       64);
627 		mt76_dma_rx_fill(dev, &dev->q_rx[i]);
628 		napi_enable(&dev->napi[i]);
629 	}
630 
631 	return 0;
632 }
633 
634 static const struct mt76_queue_ops mt76_dma_ops = {
635 	.init = mt76_dma_init,
636 	.alloc = mt76_dma_alloc_queue,
637 	.tx_queue_skb_raw = mt76_dma_tx_queue_skb_raw,
638 	.tx_queue_skb = mt76_dma_tx_queue_skb,
639 	.tx_cleanup = mt76_dma_tx_cleanup,
640 	.rx_reset = mt76_dma_rx_reset,
641 	.kick = mt76_dma_kick_queue,
642 };
643 
644 void mt76_dma_attach(struct mt76_dev *dev)
645 {
646 	dev->queue_ops = &mt76_dma_ops;
647 }
648 EXPORT_SYMBOL_GPL(mt76_dma_attach);
649 
650 void mt76_dma_cleanup(struct mt76_dev *dev)
651 {
652 	int i;
653 
654 	mt76_worker_disable(&dev->tx_worker);
655 	netif_napi_del(&dev->tx_napi);
656 
657 	for (i = 0; i < ARRAY_SIZE(dev->phy.q_tx); i++) {
658 		mt76_dma_tx_cleanup(dev, dev->phy.q_tx[i], true);
659 		if (dev->phy2)
660 			mt76_dma_tx_cleanup(dev, dev->phy2->q_tx[i], true);
661 	}
662 
663 	for (i = 0; i < ARRAY_SIZE(dev->q_mcu); i++)
664 		mt76_dma_tx_cleanup(dev, dev->q_mcu[i], true);
665 
666 	mt76_for_each_q_rx(dev, i) {
667 		netif_napi_del(&dev->napi[i]);
668 		mt76_dma_rx_cleanup(dev, &dev->q_rx[i]);
669 	}
670 
671 	mt76_free_pending_txwi(dev);
672 }
673 EXPORT_SYMBOL_GPL(mt76_dma_cleanup);
674