xref: /openbmc/linux/drivers/mailbox/bcm-pdc-mailbox.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright 2016 Broadcom
4   */
5  
6  /*
7   * Broadcom PDC Mailbox Driver
8   * The PDC provides a ring based programming interface to one or more hardware
9   * offload engines. For example, the PDC driver works with both SPU-M and SPU2
10   * cryptographic offload hardware. In some chips the PDC is referred to as MDE,
11   * and in others the FA2/FA+ hardware is used with this PDC driver.
12   *
13   * The PDC driver registers with the Linux mailbox framework as a mailbox
14   * controller, once for each PDC instance. Ring 0 for each PDC is registered as
15   * a mailbox channel. The PDC driver uses interrupts to determine when data
16   * transfers to and from an offload engine are complete. The PDC driver uses
17   * threaded IRQs so that response messages are handled outside of interrupt
18   * context.
19   *
20   * The PDC driver allows multiple messages to be pending in the descriptor
21   * rings. The tx_msg_start descriptor index indicates where the last message
22   * starts. The txin_numd value at this index indicates how many descriptor
23   * indexes make up the message. Similar state is kept on the receive side. When
24   * an rx interrupt indicates a response is ready, the PDC driver processes numd
25   * descriptors from the tx and rx ring, thus processing one response at a time.
26   */
27  
28  #include <linux/errno.h>
29  #include <linux/module.h>
30  #include <linux/init.h>
31  #include <linux/slab.h>
32  #include <linux/debugfs.h>
33  #include <linux/interrupt.h>
34  #include <linux/wait.h>
35  #include <linux/platform_device.h>
36  #include <linux/io.h>
37  #include <linux/of.h>
38  #include <linux/of_device.h>
39  #include <linux/of_address.h>
40  #include <linux/of_irq.h>
41  #include <linux/mailbox_controller.h>
42  #include <linux/mailbox/brcm-message.h>
43  #include <linux/scatterlist.h>
44  #include <linux/dma-direction.h>
45  #include <linux/dma-mapping.h>
46  #include <linux/dmapool.h>
47  
48  #define PDC_SUCCESS  0
49  
50  #define RING_ENTRY_SIZE   sizeof(struct dma64dd)
51  
52  /* # entries in PDC dma ring */
53  #define PDC_RING_ENTRIES  512
54  /*
55   * Minimum number of ring descriptor entries that must be free to tell mailbox
56   * framework that it can submit another request
57   */
58  #define PDC_RING_SPACE_MIN  15
59  
60  #define PDC_RING_SIZE    (PDC_RING_ENTRIES * RING_ENTRY_SIZE)
61  /* Rings are 8k aligned */
62  #define RING_ALIGN_ORDER  13
63  #define RING_ALIGN        BIT(RING_ALIGN_ORDER)
64  
65  #define RX_BUF_ALIGN_ORDER  5
66  #define RX_BUF_ALIGN	    BIT(RX_BUF_ALIGN_ORDER)
67  
68  /* descriptor bumping macros */
69  #define XXD(x, max_mask)              ((x) & (max_mask))
70  #define TXD(x, max_mask)              XXD((x), (max_mask))
71  #define RXD(x, max_mask)              XXD((x), (max_mask))
72  #define NEXTTXD(i, max_mask)          TXD((i) + 1, (max_mask))
73  #define PREVTXD(i, max_mask)          TXD((i) - 1, (max_mask))
74  #define NEXTRXD(i, max_mask)          RXD((i) + 1, (max_mask))
75  #define PREVRXD(i, max_mask)          RXD((i) - 1, (max_mask))
76  #define NTXDACTIVE(h, t, max_mask)    TXD((t) - (h), (max_mask))
77  #define NRXDACTIVE(h, t, max_mask)    RXD((t) - (h), (max_mask))
78  
79  /* Length of BCM header at start of SPU msg, in bytes */
80  #define BCM_HDR_LEN  8
81  
82  /*
83   * PDC driver reserves ringset 0 on each SPU for its own use. The driver does
84   * not currently support use of multiple ringsets on a single PDC engine.
85   */
86  #define PDC_RINGSET  0
87  
88  /*
89   * Interrupt mask and status definitions. Enable interrupts for tx and rx on
90   * ring 0
91   */
92  #define PDC_RCVINT_0         (16 + PDC_RINGSET)
93  #define PDC_RCVINTEN_0       BIT(PDC_RCVINT_0)
94  #define PDC_INTMASK	     (PDC_RCVINTEN_0)
95  #define PDC_LAZY_FRAMECOUNT  1
96  #define PDC_LAZY_TIMEOUT     10000
97  #define PDC_LAZY_INT  (PDC_LAZY_TIMEOUT | (PDC_LAZY_FRAMECOUNT << 24))
98  #define PDC_INTMASK_OFFSET   0x24
99  #define PDC_INTSTATUS_OFFSET 0x20
100  #define PDC_RCVLAZY0_OFFSET  (0x30 + 4 * PDC_RINGSET)
101  #define FA_RCVLAZY0_OFFSET   0x100
102  
103  /*
104   * For SPU2, configure MDE_CKSUM_CONTROL to write 17 bytes of metadata
105   * before frame
106   */
107  #define PDC_SPU2_RESP_HDR_LEN  17
108  #define PDC_CKSUM_CTRL         BIT(27)
109  #define PDC_CKSUM_CTRL_OFFSET  0x400
110  
111  #define PDC_SPUM_RESP_HDR_LEN  32
112  
113  /*
114   * Sets the following bits for write to transmit control reg:
115   * 11    - PtyChkDisable - parity check is disabled
116   * 20:18 - BurstLen = 3 -> 2^7 = 128 byte data reads from memory
117   */
118  #define PDC_TX_CTL		0x000C0800
119  
120  /* Bit in tx control reg to enable tx channel */
121  #define PDC_TX_ENABLE		0x1
122  
123  /*
124   * Sets the following bits for write to receive control reg:
125   * 7:1   - RcvOffset - size in bytes of status region at start of rx frame buf
126   * 9     - SepRxHdrDescEn - place start of new frames only in descriptors
127   *                          that have StartOfFrame set
128   * 10    - OflowContinue - on rx FIFO overflow, clear rx fifo, discard all
129   *                         remaining bytes in current frame, report error
130   *                         in rx frame status for current frame
131   * 11    - PtyChkDisable - parity check is disabled
132   * 20:18 - BurstLen = 3 -> 2^7 = 128 byte data reads from memory
133   */
134  #define PDC_RX_CTL		0x000C0E00
135  
136  /* Bit in rx control reg to enable rx channel */
137  #define PDC_RX_ENABLE		0x1
138  
139  #define CRYPTO_D64_RS0_CD_MASK   ((PDC_RING_ENTRIES * RING_ENTRY_SIZE) - 1)
140  
141  /* descriptor flags */
142  #define D64_CTRL1_EOT   BIT(28)	/* end of descriptor table */
143  #define D64_CTRL1_IOC   BIT(29)	/* interrupt on complete */
144  #define D64_CTRL1_EOF   BIT(30)	/* end of frame */
145  #define D64_CTRL1_SOF   BIT(31)	/* start of frame */
146  
147  #define RX_STATUS_OVERFLOW       0x00800000
148  #define RX_STATUS_LEN            0x0000FFFF
149  
150  #define PDC_TXREGS_OFFSET  0x200
151  #define PDC_RXREGS_OFFSET  0x220
152  
153  /* Maximum size buffer the DMA engine can handle */
154  #define PDC_DMA_BUF_MAX 16384
155  
156  enum pdc_hw {
157  	FA_HW,		/* FA2/FA+ hardware (i.e. Northstar Plus) */
158  	PDC_HW		/* PDC/MDE hardware (i.e. Northstar 2, Pegasus) */
159  };
160  
161  struct pdc_dma_map {
162  	void *ctx;          /* opaque context associated with frame */
163  };
164  
165  /* dma descriptor */
166  struct dma64dd {
167  	u32 ctrl1;      /* misc control bits */
168  	u32 ctrl2;      /* buffer count and address extension */
169  	u32 addrlow;    /* memory address of the date buffer, bits 31:0 */
170  	u32 addrhigh;   /* memory address of the date buffer, bits 63:32 */
171  };
172  
173  /* dma registers per channel(xmt or rcv) */
174  struct dma64_regs {
175  	u32  control;   /* enable, et al */
176  	u32  ptr;       /* last descriptor posted to chip */
177  	u32  addrlow;   /* descriptor ring base address low 32-bits */
178  	u32  addrhigh;  /* descriptor ring base address bits 63:32 */
179  	u32  status0;   /* last rx descriptor written by hw */
180  	u32  status1;   /* driver does not use */
181  };
182  
183  /* cpp contortions to concatenate w/arg prescan */
184  #ifndef PAD
185  #define _PADLINE(line)  pad ## line
186  #define _XSTR(line)     _PADLINE(line)
187  #define PAD             _XSTR(__LINE__)
188  #endif  /* PAD */
189  
190  /* dma registers. matches hw layout. */
191  struct dma64 {
192  	struct dma64_regs dmaxmt;  /* dma tx */
193  	u32          PAD[2];
194  	struct dma64_regs dmarcv;  /* dma rx */
195  	u32          PAD[2];
196  };
197  
198  /* PDC registers */
199  struct pdc_regs {
200  	u32  devcontrol;             /* 0x000 */
201  	u32  devstatus;              /* 0x004 */
202  	u32  PAD;
203  	u32  biststatus;             /* 0x00c */
204  	u32  PAD[4];
205  	u32  intstatus;              /* 0x020 */
206  	u32  intmask;                /* 0x024 */
207  	u32  gptimer;                /* 0x028 */
208  
209  	u32  PAD;
210  	u32  intrcvlazy_0;           /* 0x030 (Only in PDC, not FA2) */
211  	u32  intrcvlazy_1;           /* 0x034 (Only in PDC, not FA2) */
212  	u32  intrcvlazy_2;           /* 0x038 (Only in PDC, not FA2) */
213  	u32  intrcvlazy_3;           /* 0x03c (Only in PDC, not FA2) */
214  
215  	u32  PAD[48];
216  	u32  fa_intrecvlazy;         /* 0x100 (Only in FA2, not PDC) */
217  	u32  flowctlthresh;          /* 0x104 */
218  	u32  wrrthresh;              /* 0x108 */
219  	u32  gmac_idle_cnt_thresh;   /* 0x10c */
220  
221  	u32  PAD[4];
222  	u32  ifioaccessaddr;         /* 0x120 */
223  	u32  ifioaccessbyte;         /* 0x124 */
224  	u32  ifioaccessdata;         /* 0x128 */
225  
226  	u32  PAD[21];
227  	u32  phyaccess;              /* 0x180 */
228  	u32  PAD;
229  	u32  phycontrol;             /* 0x188 */
230  	u32  txqctl;                 /* 0x18c */
231  	u32  rxqctl;                 /* 0x190 */
232  	u32  gpioselect;             /* 0x194 */
233  	u32  gpio_output_en;         /* 0x198 */
234  	u32  PAD;                    /* 0x19c */
235  	u32  txq_rxq_mem_ctl;        /* 0x1a0 */
236  	u32  memory_ecc_status;      /* 0x1a4 */
237  	u32  serdes_ctl;             /* 0x1a8 */
238  	u32  serdes_status0;         /* 0x1ac */
239  	u32  serdes_status1;         /* 0x1b0 */
240  	u32  PAD[11];                /* 0x1b4-1dc */
241  	u32  clk_ctl_st;             /* 0x1e0 */
242  	u32  hw_war;                 /* 0x1e4 (Only in PDC, not FA2) */
243  	u32  pwrctl;                 /* 0x1e8 */
244  	u32  PAD[5];
245  
246  #define PDC_NUM_DMA_RINGS   4
247  	struct dma64 dmaregs[PDC_NUM_DMA_RINGS];  /* 0x0200 - 0x2fc */
248  
249  	/* more registers follow, but we don't use them */
250  };
251  
252  /* structure for allocating/freeing DMA rings */
253  struct pdc_ring_alloc {
254  	dma_addr_t  dmabase; /* DMA address of start of ring */
255  	void	   *vbase;   /* base kernel virtual address of ring */
256  	u32	    size;    /* ring allocation size in bytes */
257  };
258  
259  /*
260   * context associated with a receive descriptor.
261   * @rxp_ctx: opaque context associated with frame that starts at each
262   *           rx ring index.
263   * @dst_sg:  Scatterlist used to form reply frames beginning at a given ring
264   *           index. Retained in order to unmap each sg after reply is processed.
265   * @rxin_numd: Number of rx descriptors associated with the message that starts
266   *             at a descriptor index. Not set for every index. For example,
267   *             if descriptor index i points to a scatterlist with 4 entries,
268   *             then the next three descriptor indexes don't have a value set.
269   * @resp_hdr: Virtual address of buffer used to catch DMA rx status
270   * @resp_hdr_daddr: physical address of DMA rx status buffer
271   */
272  struct pdc_rx_ctx {
273  	void *rxp_ctx;
274  	struct scatterlist *dst_sg;
275  	u32  rxin_numd;
276  	void *resp_hdr;
277  	dma_addr_t resp_hdr_daddr;
278  };
279  
280  /* PDC state structure */
281  struct pdc_state {
282  	/* Index of the PDC whose state is in this structure instance */
283  	u8 pdc_idx;
284  
285  	/* Platform device for this PDC instance */
286  	struct platform_device *pdev;
287  
288  	/*
289  	 * Each PDC instance has a mailbox controller. PDC receives request
290  	 * messages through mailboxes, and sends response messages through the
291  	 * mailbox framework.
292  	 */
293  	struct mbox_controller mbc;
294  
295  	unsigned int pdc_irq;
296  
297  	/* tasklet for deferred processing after DMA rx interrupt */
298  	struct tasklet_struct rx_tasklet;
299  
300  	/* Number of bytes of receive status prior to each rx frame */
301  	u32 rx_status_len;
302  	/* Whether a BCM header is prepended to each frame */
303  	bool use_bcm_hdr;
304  	/* Sum of length of BCM header and rx status header */
305  	u32 pdc_resp_hdr_len;
306  
307  	/* The base virtual address of DMA hw registers */
308  	void __iomem *pdc_reg_vbase;
309  
310  	/* Pool for allocation of DMA rings */
311  	struct dma_pool *ring_pool;
312  
313  	/* Pool for allocation of metadata buffers for response messages */
314  	struct dma_pool *rx_buf_pool;
315  
316  	/*
317  	 * The base virtual address of DMA tx/rx descriptor rings. Corresponding
318  	 * DMA address and size of ring allocation.
319  	 */
320  	struct pdc_ring_alloc tx_ring_alloc;
321  	struct pdc_ring_alloc rx_ring_alloc;
322  
323  	struct pdc_regs *regs;    /* start of PDC registers */
324  
325  	struct dma64_regs *txregs_64; /* dma tx engine registers */
326  	struct dma64_regs *rxregs_64; /* dma rx engine registers */
327  
328  	/*
329  	 * Arrays of PDC_RING_ENTRIES descriptors
330  	 * To use multiple ringsets, this needs to be extended
331  	 */
332  	struct dma64dd   *txd_64;  /* tx descriptor ring */
333  	struct dma64dd   *rxd_64;  /* rx descriptor ring */
334  
335  	/* descriptor ring sizes */
336  	u32      ntxd;       /* # tx descriptors */
337  	u32      nrxd;       /* # rx descriptors */
338  	u32      nrxpost;    /* # rx buffers to keep posted */
339  	u32      ntxpost;    /* max number of tx buffers that can be posted */
340  
341  	/*
342  	 * Index of next tx descriptor to reclaim. That is, the descriptor
343  	 * index of the oldest tx buffer for which the host has yet to process
344  	 * the corresponding response.
345  	 */
346  	u32  txin;
347  
348  	/*
349  	 * Index of the first receive descriptor for the sequence of
350  	 * message fragments currently under construction. Used to build up
351  	 * the rxin_numd count for a message. Updated to rxout when the host
352  	 * starts a new sequence of rx buffers for a new message.
353  	 */
354  	u32  tx_msg_start;
355  
356  	/* Index of next tx descriptor to post. */
357  	u32  txout;
358  
359  	/*
360  	 * Number of tx descriptors associated with the message that starts
361  	 * at this tx descriptor index.
362  	 */
363  	u32      txin_numd[PDC_RING_ENTRIES];
364  
365  	/*
366  	 * Index of next rx descriptor to reclaim. This is the index of
367  	 * the next descriptor whose data has yet to be processed by the host.
368  	 */
369  	u32  rxin;
370  
371  	/*
372  	 * Index of the first receive descriptor for the sequence of
373  	 * message fragments currently under construction. Used to build up
374  	 * the rxin_numd count for a message. Updated to rxout when the host
375  	 * starts a new sequence of rx buffers for a new message.
376  	 */
377  	u32  rx_msg_start;
378  
379  	/*
380  	 * Saved value of current hardware rx descriptor index.
381  	 * The last rx buffer written by the hw is the index previous to
382  	 * this one.
383  	 */
384  	u32  last_rx_curr;
385  
386  	/* Index of next rx descriptor to post. */
387  	u32  rxout;
388  
389  	struct pdc_rx_ctx rx_ctx[PDC_RING_ENTRIES];
390  
391  	/*
392  	 * Scatterlists used to form request and reply frames beginning at a
393  	 * given ring index. Retained in order to unmap each sg after reply
394  	 * is processed
395  	 */
396  	struct scatterlist *src_sg[PDC_RING_ENTRIES];
397  
398  	/* counters */
399  	u32  pdc_requests;     /* number of request messages submitted */
400  	u32  pdc_replies;      /* number of reply messages received */
401  	u32  last_tx_not_done; /* too few tx descriptors to indicate done */
402  	u32  tx_ring_full;     /* unable to accept msg because tx ring full */
403  	u32  rx_ring_full;     /* unable to accept msg because rx ring full */
404  	u32  txnobuf;          /* unable to create tx descriptor */
405  	u32  rxnobuf;          /* unable to create rx descriptor */
406  	u32  rx_oflow;         /* count of rx overflows */
407  
408  	/* hardware type - FA2 or PDC/MDE */
409  	enum pdc_hw hw_type;
410  };
411  
412  /* Global variables */
413  
414  struct pdc_globals {
415  	/* Actual number of SPUs in hardware, as reported by device tree */
416  	u32 num_spu;
417  };
418  
419  static struct pdc_globals pdcg;
420  
421  /* top level debug FS directory for PDC driver */
422  static struct dentry *debugfs_dir;
423  
pdc_debugfs_read(struct file * filp,char __user * ubuf,size_t count,loff_t * offp)424  static ssize_t pdc_debugfs_read(struct file *filp, char __user *ubuf,
425  				size_t count, loff_t *offp)
426  {
427  	struct pdc_state *pdcs;
428  	char *buf;
429  	ssize_t ret, out_offset, out_count;
430  
431  	out_count = 512;
432  
433  	buf = kmalloc(out_count, GFP_KERNEL);
434  	if (!buf)
435  		return -ENOMEM;
436  
437  	pdcs = filp->private_data;
438  	out_offset = 0;
439  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
440  			       "SPU %u stats:\n", pdcs->pdc_idx);
441  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
442  			       "PDC requests....................%u\n",
443  			       pdcs->pdc_requests);
444  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
445  			       "PDC responses...................%u\n",
446  			       pdcs->pdc_replies);
447  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
448  			       "Tx not done.....................%u\n",
449  			       pdcs->last_tx_not_done);
450  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
451  			       "Tx ring full....................%u\n",
452  			       pdcs->tx_ring_full);
453  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
454  			       "Rx ring full....................%u\n",
455  			       pdcs->rx_ring_full);
456  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
457  			       "Tx desc write fail. Ring full...%u\n",
458  			       pdcs->txnobuf);
459  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
460  			       "Rx desc write fail. Ring full...%u\n",
461  			       pdcs->rxnobuf);
462  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
463  			       "Receive overflow................%u\n",
464  			       pdcs->rx_oflow);
465  	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
466  			       "Num frags in rx ring............%u\n",
467  			       NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr,
468  					  pdcs->nrxpost));
469  
470  	if (out_offset > out_count)
471  		out_offset = out_count;
472  
473  	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
474  	kfree(buf);
475  	return ret;
476  }
477  
478  static const struct file_operations pdc_debugfs_stats = {
479  	.owner = THIS_MODULE,
480  	.open = simple_open,
481  	.read = pdc_debugfs_read,
482  };
483  
484  /**
485   * pdc_setup_debugfs() - Create the debug FS directories. If the top-level
486   * directory has not yet been created, create it now. Create a stats file in
487   * this directory for a SPU.
488   * @pdcs: PDC state structure
489   */
pdc_setup_debugfs(struct pdc_state * pdcs)490  static void pdc_setup_debugfs(struct pdc_state *pdcs)
491  {
492  	char spu_stats_name[16];
493  
494  	if (!debugfs_initialized())
495  		return;
496  
497  	snprintf(spu_stats_name, 16, "pdc%d_stats", pdcs->pdc_idx);
498  	if (!debugfs_dir)
499  		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
500  
501  	/* S_IRUSR == 0400 */
502  	debugfs_create_file(spu_stats_name, 0400, debugfs_dir, pdcs,
503  			    &pdc_debugfs_stats);
504  }
505  
pdc_free_debugfs(void)506  static void pdc_free_debugfs(void)
507  {
508  	debugfs_remove_recursive(debugfs_dir);
509  	debugfs_dir = NULL;
510  }
511  
512  /**
513   * pdc_build_rxd() - Build DMA descriptor to receive SPU result.
514   * @pdcs:      PDC state for SPU that will generate result
515   * @dma_addr:  DMA address of buffer that descriptor is being built for
516   * @buf_len:   Length of the receive buffer, in bytes
517   * @flags:     Flags to be stored in descriptor
518   */
519  static inline void
pdc_build_rxd(struct pdc_state * pdcs,dma_addr_t dma_addr,u32 buf_len,u32 flags)520  pdc_build_rxd(struct pdc_state *pdcs, dma_addr_t dma_addr,
521  	      u32 buf_len, u32 flags)
522  {
523  	struct device *dev = &pdcs->pdev->dev;
524  	struct dma64dd *rxd = &pdcs->rxd_64[pdcs->rxout];
525  
526  	dev_dbg(dev,
527  		"Writing rx descriptor for PDC %u at index %u with length %u. flags %#x\n",
528  		pdcs->pdc_idx, pdcs->rxout, buf_len, flags);
529  
530  	rxd->addrlow = cpu_to_le32(lower_32_bits(dma_addr));
531  	rxd->addrhigh = cpu_to_le32(upper_32_bits(dma_addr));
532  	rxd->ctrl1 = cpu_to_le32(flags);
533  	rxd->ctrl2 = cpu_to_le32(buf_len);
534  
535  	/* bump ring index and return */
536  	pdcs->rxout = NEXTRXD(pdcs->rxout, pdcs->nrxpost);
537  }
538  
539  /**
540   * pdc_build_txd() - Build a DMA descriptor to transmit a SPU request to
541   * hardware.
542   * @pdcs:        PDC state for the SPU that will process this request
543   * @dma_addr:    DMA address of packet to be transmitted
544   * @buf_len:     Length of tx buffer, in bytes
545   * @flags:       Flags to be stored in descriptor
546   */
547  static inline void
pdc_build_txd(struct pdc_state * pdcs,dma_addr_t dma_addr,u32 buf_len,u32 flags)548  pdc_build_txd(struct pdc_state *pdcs, dma_addr_t dma_addr, u32 buf_len,
549  	      u32 flags)
550  {
551  	struct device *dev = &pdcs->pdev->dev;
552  	struct dma64dd *txd = &pdcs->txd_64[pdcs->txout];
553  
554  	dev_dbg(dev,
555  		"Writing tx descriptor for PDC %u at index %u with length %u, flags %#x\n",
556  		pdcs->pdc_idx, pdcs->txout, buf_len, flags);
557  
558  	txd->addrlow = cpu_to_le32(lower_32_bits(dma_addr));
559  	txd->addrhigh = cpu_to_le32(upper_32_bits(dma_addr));
560  	txd->ctrl1 = cpu_to_le32(flags);
561  	txd->ctrl2 = cpu_to_le32(buf_len);
562  
563  	/* bump ring index and return */
564  	pdcs->txout = NEXTTXD(pdcs->txout, pdcs->ntxpost);
565  }
566  
567  /**
568   * pdc_receive_one() - Receive a response message from a given SPU.
569   * @pdcs:    PDC state for the SPU to receive from
570   *
571   * When the return code indicates success, the response message is available in
572   * the receive buffers provided prior to submission of the request.
573   *
574   * Return:  PDC_SUCCESS if one or more receive descriptors was processed
575   *          -EAGAIN indicates that no response message is available
576   *          -EIO an error occurred
577   */
578  static int
pdc_receive_one(struct pdc_state * pdcs)579  pdc_receive_one(struct pdc_state *pdcs)
580  {
581  	struct device *dev = &pdcs->pdev->dev;
582  	struct mbox_controller *mbc;
583  	struct mbox_chan *chan;
584  	struct brcm_message mssg;
585  	u32 len, rx_status;
586  	u32 num_frags;
587  	u8 *resp_hdr;    /* virtual addr of start of resp message DMA header */
588  	u32 frags_rdy;   /* number of fragments ready to read */
589  	u32 rx_idx;      /* ring index of start of receive frame */
590  	dma_addr_t resp_hdr_daddr;
591  	struct pdc_rx_ctx *rx_ctx;
592  
593  	mbc = &pdcs->mbc;
594  	chan = &mbc->chans[0];
595  	mssg.type = BRCM_MESSAGE_SPU;
596  
597  	/*
598  	 * return if a complete response message is not yet ready.
599  	 * rxin_numd[rxin] is the number of fragments in the next msg
600  	 * to read.
601  	 */
602  	frags_rdy = NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr, pdcs->nrxpost);
603  	if ((frags_rdy == 0) ||
604  	    (frags_rdy < pdcs->rx_ctx[pdcs->rxin].rxin_numd))
605  		/* No response ready */
606  		return -EAGAIN;
607  
608  	num_frags = pdcs->txin_numd[pdcs->txin];
609  	WARN_ON(num_frags == 0);
610  
611  	dma_unmap_sg(dev, pdcs->src_sg[pdcs->txin],
612  		     sg_nents(pdcs->src_sg[pdcs->txin]), DMA_TO_DEVICE);
613  
614  	pdcs->txin = (pdcs->txin + num_frags) & pdcs->ntxpost;
615  
616  	dev_dbg(dev, "PDC %u reclaimed %d tx descriptors",
617  		pdcs->pdc_idx, num_frags);
618  
619  	rx_idx = pdcs->rxin;
620  	rx_ctx = &pdcs->rx_ctx[rx_idx];
621  	num_frags = rx_ctx->rxin_numd;
622  	/* Return opaque context with result */
623  	mssg.ctx = rx_ctx->rxp_ctx;
624  	rx_ctx->rxp_ctx = NULL;
625  	resp_hdr = rx_ctx->resp_hdr;
626  	resp_hdr_daddr = rx_ctx->resp_hdr_daddr;
627  	dma_unmap_sg(dev, rx_ctx->dst_sg, sg_nents(rx_ctx->dst_sg),
628  		     DMA_FROM_DEVICE);
629  
630  	pdcs->rxin = (pdcs->rxin + num_frags) & pdcs->nrxpost;
631  
632  	dev_dbg(dev, "PDC %u reclaimed %d rx descriptors",
633  		pdcs->pdc_idx, num_frags);
634  
635  	dev_dbg(dev,
636  		"PDC %u txin %u, txout %u, rxin %u, rxout %u, last_rx_curr %u\n",
637  		pdcs->pdc_idx, pdcs->txin, pdcs->txout, pdcs->rxin,
638  		pdcs->rxout, pdcs->last_rx_curr);
639  
640  	if (pdcs->pdc_resp_hdr_len == PDC_SPUM_RESP_HDR_LEN) {
641  		/*
642  		 * For SPU-M, get length of response msg and rx overflow status.
643  		 */
644  		rx_status = *((u32 *)resp_hdr);
645  		len = rx_status & RX_STATUS_LEN;
646  		dev_dbg(dev,
647  			"SPU response length %u bytes", len);
648  		if (unlikely(((rx_status & RX_STATUS_OVERFLOW) || (!len)))) {
649  			if (rx_status & RX_STATUS_OVERFLOW) {
650  				dev_err_ratelimited(dev,
651  						    "crypto receive overflow");
652  				pdcs->rx_oflow++;
653  			} else {
654  				dev_info_ratelimited(dev, "crypto rx len = 0");
655  			}
656  			return -EIO;
657  		}
658  	}
659  
660  	dma_pool_free(pdcs->rx_buf_pool, resp_hdr, resp_hdr_daddr);
661  
662  	mbox_chan_received_data(chan, &mssg);
663  
664  	pdcs->pdc_replies++;
665  	return PDC_SUCCESS;
666  }
667  
668  /**
669   * pdc_receive() - Process as many responses as are available in the rx ring.
670   * @pdcs:  PDC state
671   *
672   * Called within the hard IRQ.
673   * Return:
674   */
675  static int
pdc_receive(struct pdc_state * pdcs)676  pdc_receive(struct pdc_state *pdcs)
677  {
678  	int rx_status;
679  
680  	/* read last_rx_curr from register once */
681  	pdcs->last_rx_curr =
682  	    (ioread32((const void __iomem *)&pdcs->rxregs_64->status0) &
683  	     CRYPTO_D64_RS0_CD_MASK) / RING_ENTRY_SIZE;
684  
685  	do {
686  		/* Could be many frames ready */
687  		rx_status = pdc_receive_one(pdcs);
688  	} while (rx_status == PDC_SUCCESS);
689  
690  	return 0;
691  }
692  
693  /**
694   * pdc_tx_list_sg_add() - Add the buffers in a scatterlist to the transmit
695   * descriptors for a given SPU. The scatterlist buffers contain the data for a
696   * SPU request message.
697   * @pdcs:      PDC state for the SPU that will process this request
698   * @sg:        Scatterlist whose buffers contain part of the SPU request
699   *
700   * If a scatterlist buffer is larger than PDC_DMA_BUF_MAX, multiple descriptors
701   * are written for that buffer, each <= PDC_DMA_BUF_MAX byte in length.
702   *
703   * Return: PDC_SUCCESS if successful
704   *         < 0 otherwise
705   */
pdc_tx_list_sg_add(struct pdc_state * pdcs,struct scatterlist * sg)706  static int pdc_tx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg)
707  {
708  	u32 flags = 0;
709  	u32 eot;
710  	u32 tx_avail;
711  
712  	/*
713  	 * Num descriptors needed. Conservatively assume we need a descriptor
714  	 * for every entry in sg.
715  	 */
716  	u32 num_desc;
717  	u32 desc_w = 0;	/* Number of tx descriptors written */
718  	u32 bufcnt;	/* Number of bytes of buffer pointed to by descriptor */
719  	dma_addr_t databufptr;	/* DMA address to put in descriptor */
720  
721  	num_desc = (u32)sg_nents(sg);
722  
723  	/* check whether enough tx descriptors are available */
724  	tx_avail = pdcs->ntxpost - NTXDACTIVE(pdcs->txin, pdcs->txout,
725  					      pdcs->ntxpost);
726  	if (unlikely(num_desc > tx_avail)) {
727  		pdcs->txnobuf++;
728  		return -ENOSPC;
729  	}
730  
731  	/* build tx descriptors */
732  	if (pdcs->tx_msg_start == pdcs->txout) {
733  		/* Start of frame */
734  		pdcs->txin_numd[pdcs->tx_msg_start] = 0;
735  		pdcs->src_sg[pdcs->txout] = sg;
736  		flags = D64_CTRL1_SOF;
737  	}
738  
739  	while (sg) {
740  		if (unlikely(pdcs->txout == (pdcs->ntxd - 1)))
741  			eot = D64_CTRL1_EOT;
742  		else
743  			eot = 0;
744  
745  		/*
746  		 * If sg buffer larger than PDC limit, split across
747  		 * multiple descriptors
748  		 */
749  		bufcnt = sg_dma_len(sg);
750  		databufptr = sg_dma_address(sg);
751  		while (bufcnt > PDC_DMA_BUF_MAX) {
752  			pdc_build_txd(pdcs, databufptr, PDC_DMA_BUF_MAX,
753  				      flags | eot);
754  			desc_w++;
755  			bufcnt -= PDC_DMA_BUF_MAX;
756  			databufptr += PDC_DMA_BUF_MAX;
757  			if (unlikely(pdcs->txout == (pdcs->ntxd - 1)))
758  				eot = D64_CTRL1_EOT;
759  			else
760  				eot = 0;
761  		}
762  		sg = sg_next(sg);
763  		if (!sg)
764  			/* Writing last descriptor for frame */
765  			flags |= (D64_CTRL1_EOF | D64_CTRL1_IOC);
766  		pdc_build_txd(pdcs, databufptr, bufcnt, flags | eot);
767  		desc_w++;
768  		/* Clear start of frame after first descriptor */
769  		flags &= ~D64_CTRL1_SOF;
770  	}
771  	pdcs->txin_numd[pdcs->tx_msg_start] += desc_w;
772  
773  	return PDC_SUCCESS;
774  }
775  
776  /**
777   * pdc_tx_list_final() - Initiate DMA transfer of last frame written to tx
778   * ring.
779   * @pdcs:  PDC state for SPU to process the request
780   *
781   * Sets the index of the last descriptor written in both the rx and tx ring.
782   *
783   * Return: PDC_SUCCESS
784   */
pdc_tx_list_final(struct pdc_state * pdcs)785  static int pdc_tx_list_final(struct pdc_state *pdcs)
786  {
787  	/*
788  	 * write barrier to ensure all register writes are complete
789  	 * before chip starts to process new request
790  	 */
791  	wmb();
792  	iowrite32(pdcs->rxout << 4, &pdcs->rxregs_64->ptr);
793  	iowrite32(pdcs->txout << 4, &pdcs->txregs_64->ptr);
794  	pdcs->pdc_requests++;
795  
796  	return PDC_SUCCESS;
797  }
798  
799  /**
800   * pdc_rx_list_init() - Start a new receive descriptor list for a given PDC.
801   * @pdcs:   PDC state for SPU handling request
802   * @dst_sg: scatterlist providing rx buffers for response to be returned to
803   *	    mailbox client
804   * @ctx:    Opaque context for this request
805   *
806   * Posts a single receive descriptor to hold the metadata that precedes a
807   * response. For example, with SPU-M, the metadata is a 32-byte DMA header and
808   * an 8-byte BCM header. Moves the msg_start descriptor indexes for both tx and
809   * rx to indicate the start of a new message.
810   *
811   * Return:  PDC_SUCCESS if successful
812   *          < 0 if an error (e.g., rx ring is full)
813   */
pdc_rx_list_init(struct pdc_state * pdcs,struct scatterlist * dst_sg,void * ctx)814  static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg,
815  			    void *ctx)
816  {
817  	u32 flags = 0;
818  	u32 rx_avail;
819  	u32 rx_pkt_cnt = 1;	/* Adding a single rx buffer */
820  	dma_addr_t daddr;
821  	void *vaddr;
822  	struct pdc_rx_ctx *rx_ctx;
823  
824  	rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
825  					      pdcs->nrxpost);
826  	if (unlikely(rx_pkt_cnt > rx_avail)) {
827  		pdcs->rxnobuf++;
828  		return -ENOSPC;
829  	}
830  
831  	/* allocate a buffer for the dma rx status */
832  	vaddr = dma_pool_zalloc(pdcs->rx_buf_pool, GFP_ATOMIC, &daddr);
833  	if (unlikely(!vaddr))
834  		return -ENOMEM;
835  
836  	/*
837  	 * Update msg_start indexes for both tx and rx to indicate the start
838  	 * of a new sequence of descriptor indexes that contain the fragments
839  	 * of the same message.
840  	 */
841  	pdcs->rx_msg_start = pdcs->rxout;
842  	pdcs->tx_msg_start = pdcs->txout;
843  
844  	/* This is always the first descriptor in the receive sequence */
845  	flags = D64_CTRL1_SOF;
846  	pdcs->rx_ctx[pdcs->rx_msg_start].rxin_numd = 1;
847  
848  	if (unlikely(pdcs->rxout == (pdcs->nrxd - 1)))
849  		flags |= D64_CTRL1_EOT;
850  
851  	rx_ctx = &pdcs->rx_ctx[pdcs->rxout];
852  	rx_ctx->rxp_ctx = ctx;
853  	rx_ctx->dst_sg = dst_sg;
854  	rx_ctx->resp_hdr = vaddr;
855  	rx_ctx->resp_hdr_daddr = daddr;
856  	pdc_build_rxd(pdcs, daddr, pdcs->pdc_resp_hdr_len, flags);
857  	return PDC_SUCCESS;
858  }
859  
860  /**
861   * pdc_rx_list_sg_add() - Add the buffers in a scatterlist to the receive
862   * descriptors for a given SPU. The caller must have already DMA mapped the
863   * scatterlist.
864   * @pdcs:       PDC state for the SPU that will process this request
865   * @sg:         Scatterlist whose buffers are added to the receive ring
866   *
867   * If a receive buffer in the scatterlist is larger than PDC_DMA_BUF_MAX,
868   * multiple receive descriptors are written, each with a buffer <=
869   * PDC_DMA_BUF_MAX.
870   *
871   * Return: PDC_SUCCESS if successful
872   *         < 0 otherwise (e.g., receive ring is full)
873   */
pdc_rx_list_sg_add(struct pdc_state * pdcs,struct scatterlist * sg)874  static int pdc_rx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg)
875  {
876  	u32 flags = 0;
877  	u32 rx_avail;
878  
879  	/*
880  	 * Num descriptors needed. Conservatively assume we need a descriptor
881  	 * for every entry from our starting point in the scatterlist.
882  	 */
883  	u32 num_desc;
884  	u32 desc_w = 0;	/* Number of tx descriptors written */
885  	u32 bufcnt;	/* Number of bytes of buffer pointed to by descriptor */
886  	dma_addr_t databufptr;	/* DMA address to put in descriptor */
887  
888  	num_desc = (u32)sg_nents(sg);
889  
890  	rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
891  					      pdcs->nrxpost);
892  	if (unlikely(num_desc > rx_avail)) {
893  		pdcs->rxnobuf++;
894  		return -ENOSPC;
895  	}
896  
897  	while (sg) {
898  		if (unlikely(pdcs->rxout == (pdcs->nrxd - 1)))
899  			flags = D64_CTRL1_EOT;
900  		else
901  			flags = 0;
902  
903  		/*
904  		 * If sg buffer larger than PDC limit, split across
905  		 * multiple descriptors
906  		 */
907  		bufcnt = sg_dma_len(sg);
908  		databufptr = sg_dma_address(sg);
909  		while (bufcnt > PDC_DMA_BUF_MAX) {
910  			pdc_build_rxd(pdcs, databufptr, PDC_DMA_BUF_MAX, flags);
911  			desc_w++;
912  			bufcnt -= PDC_DMA_BUF_MAX;
913  			databufptr += PDC_DMA_BUF_MAX;
914  			if (unlikely(pdcs->rxout == (pdcs->nrxd - 1)))
915  				flags = D64_CTRL1_EOT;
916  			else
917  				flags = 0;
918  		}
919  		pdc_build_rxd(pdcs, databufptr, bufcnt, flags);
920  		desc_w++;
921  		sg = sg_next(sg);
922  	}
923  	pdcs->rx_ctx[pdcs->rx_msg_start].rxin_numd += desc_w;
924  
925  	return PDC_SUCCESS;
926  }
927  
928  /**
929   * pdc_irq_handler() - Interrupt handler called in interrupt context.
930   * @irq:      Interrupt number that has fired
931   * @data:     device struct for DMA engine that generated the interrupt
932   *
933   * We have to clear the device interrupt status flags here. So cache the
934   * status for later use in the thread function. Other than that, just return
935   * WAKE_THREAD to invoke the thread function.
936   *
937   * Return: IRQ_WAKE_THREAD if interrupt is ours
938   *         IRQ_NONE otherwise
939   */
pdc_irq_handler(int irq,void * data)940  static irqreturn_t pdc_irq_handler(int irq, void *data)
941  {
942  	struct device *dev = (struct device *)data;
943  	struct pdc_state *pdcs = dev_get_drvdata(dev);
944  	u32 intstatus = ioread32(pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET);
945  
946  	if (unlikely(intstatus == 0))
947  		return IRQ_NONE;
948  
949  	/* Disable interrupts until soft handler runs */
950  	iowrite32(0, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
951  
952  	/* Clear interrupt flags in device */
953  	iowrite32(intstatus, pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET);
954  
955  	/* Wakeup IRQ thread */
956  	tasklet_schedule(&pdcs->rx_tasklet);
957  	return IRQ_HANDLED;
958  }
959  
960  /**
961   * pdc_tasklet_cb() - Tasklet callback that runs the deferred processing after
962   * a DMA receive interrupt. Reenables the receive interrupt.
963   * @t: Pointer to the Altera sSGDMA channel structure
964   */
pdc_tasklet_cb(struct tasklet_struct * t)965  static void pdc_tasklet_cb(struct tasklet_struct *t)
966  {
967  	struct pdc_state *pdcs = from_tasklet(pdcs, t, rx_tasklet);
968  
969  	pdc_receive(pdcs);
970  
971  	/* reenable interrupts */
972  	iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
973  }
974  
975  /**
976   * pdc_ring_init() - Allocate DMA rings and initialize constant fields of
977   * descriptors in one ringset.
978   * @pdcs:    PDC instance state
979   * @ringset: index of ringset being used
980   *
981   * Return: PDC_SUCCESS if ring initialized
982   *         < 0 otherwise
983   */
pdc_ring_init(struct pdc_state * pdcs,int ringset)984  static int pdc_ring_init(struct pdc_state *pdcs, int ringset)
985  {
986  	int i;
987  	int err = PDC_SUCCESS;
988  	struct dma64 *dma_reg;
989  	struct device *dev = &pdcs->pdev->dev;
990  	struct pdc_ring_alloc tx;
991  	struct pdc_ring_alloc rx;
992  
993  	/* Allocate tx ring */
994  	tx.vbase = dma_pool_zalloc(pdcs->ring_pool, GFP_KERNEL, &tx.dmabase);
995  	if (unlikely(!tx.vbase)) {
996  		err = -ENOMEM;
997  		goto done;
998  	}
999  
1000  	/* Allocate rx ring */
1001  	rx.vbase = dma_pool_zalloc(pdcs->ring_pool, GFP_KERNEL, &rx.dmabase);
1002  	if (unlikely(!rx.vbase)) {
1003  		err = -ENOMEM;
1004  		goto fail_dealloc;
1005  	}
1006  
1007  	dev_dbg(dev, " - base DMA addr of tx ring      %pad", &tx.dmabase);
1008  	dev_dbg(dev, " - base virtual addr of tx ring  %p", tx.vbase);
1009  	dev_dbg(dev, " - base DMA addr of rx ring      %pad", &rx.dmabase);
1010  	dev_dbg(dev, " - base virtual addr of rx ring  %p", rx.vbase);
1011  
1012  	memcpy(&pdcs->tx_ring_alloc, &tx, sizeof(tx));
1013  	memcpy(&pdcs->rx_ring_alloc, &rx, sizeof(rx));
1014  
1015  	pdcs->rxin = 0;
1016  	pdcs->rx_msg_start = 0;
1017  	pdcs->last_rx_curr = 0;
1018  	pdcs->rxout = 0;
1019  	pdcs->txin = 0;
1020  	pdcs->tx_msg_start = 0;
1021  	pdcs->txout = 0;
1022  
1023  	/* Set descriptor array base addresses */
1024  	pdcs->txd_64 = (struct dma64dd *)pdcs->tx_ring_alloc.vbase;
1025  	pdcs->rxd_64 = (struct dma64dd *)pdcs->rx_ring_alloc.vbase;
1026  
1027  	/* Tell device the base DMA address of each ring */
1028  	dma_reg = &pdcs->regs->dmaregs[ringset];
1029  
1030  	/* But first disable DMA and set curptr to 0 for both TX & RX */
1031  	iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control);
1032  	iowrite32((PDC_RX_CTL + (pdcs->rx_status_len << 1)),
1033  		  &dma_reg->dmarcv.control);
1034  	iowrite32(0, &dma_reg->dmaxmt.ptr);
1035  	iowrite32(0, &dma_reg->dmarcv.ptr);
1036  
1037  	/* Set base DMA addresses */
1038  	iowrite32(lower_32_bits(pdcs->tx_ring_alloc.dmabase),
1039  		  &dma_reg->dmaxmt.addrlow);
1040  	iowrite32(upper_32_bits(pdcs->tx_ring_alloc.dmabase),
1041  		  &dma_reg->dmaxmt.addrhigh);
1042  
1043  	iowrite32(lower_32_bits(pdcs->rx_ring_alloc.dmabase),
1044  		  &dma_reg->dmarcv.addrlow);
1045  	iowrite32(upper_32_bits(pdcs->rx_ring_alloc.dmabase),
1046  		  &dma_reg->dmarcv.addrhigh);
1047  
1048  	/* Re-enable DMA */
1049  	iowrite32(PDC_TX_CTL | PDC_TX_ENABLE, &dma_reg->dmaxmt.control);
1050  	iowrite32((PDC_RX_CTL | PDC_RX_ENABLE | (pdcs->rx_status_len << 1)),
1051  		  &dma_reg->dmarcv.control);
1052  
1053  	/* Initialize descriptors */
1054  	for (i = 0; i < PDC_RING_ENTRIES; i++) {
1055  		/* Every tx descriptor can be used for start of frame. */
1056  		if (i != pdcs->ntxpost) {
1057  			iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOF,
1058  				  &pdcs->txd_64[i].ctrl1);
1059  		} else {
1060  			/* Last descriptor in ringset. Set End of Table. */
1061  			iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOF |
1062  				  D64_CTRL1_EOT, &pdcs->txd_64[i].ctrl1);
1063  		}
1064  
1065  		/* Every rx descriptor can be used for start of frame */
1066  		if (i != pdcs->nrxpost) {
1067  			iowrite32(D64_CTRL1_SOF,
1068  				  &pdcs->rxd_64[i].ctrl1);
1069  		} else {
1070  			/* Last descriptor in ringset. Set End of Table. */
1071  			iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOT,
1072  				  &pdcs->rxd_64[i].ctrl1);
1073  		}
1074  	}
1075  	return PDC_SUCCESS;
1076  
1077  fail_dealloc:
1078  	dma_pool_free(pdcs->ring_pool, tx.vbase, tx.dmabase);
1079  done:
1080  	return err;
1081  }
1082  
pdc_ring_free(struct pdc_state * pdcs)1083  static void pdc_ring_free(struct pdc_state *pdcs)
1084  {
1085  	if (pdcs->tx_ring_alloc.vbase) {
1086  		dma_pool_free(pdcs->ring_pool, pdcs->tx_ring_alloc.vbase,
1087  			      pdcs->tx_ring_alloc.dmabase);
1088  		pdcs->tx_ring_alloc.vbase = NULL;
1089  	}
1090  
1091  	if (pdcs->rx_ring_alloc.vbase) {
1092  		dma_pool_free(pdcs->ring_pool, pdcs->rx_ring_alloc.vbase,
1093  			      pdcs->rx_ring_alloc.dmabase);
1094  		pdcs->rx_ring_alloc.vbase = NULL;
1095  	}
1096  }
1097  
1098  /**
1099   * pdc_desc_count() - Count the number of DMA descriptors that will be required
1100   * for a given scatterlist. Account for the max length of a DMA buffer.
1101   * @sg:    Scatterlist to be DMA'd
1102   * Return: Number of descriptors required
1103   */
pdc_desc_count(struct scatterlist * sg)1104  static u32 pdc_desc_count(struct scatterlist *sg)
1105  {
1106  	u32 cnt = 0;
1107  
1108  	while (sg) {
1109  		cnt += ((sg->length / PDC_DMA_BUF_MAX) + 1);
1110  		sg = sg_next(sg);
1111  	}
1112  	return cnt;
1113  }
1114  
1115  /**
1116   * pdc_rings_full() - Check whether the tx ring has room for tx_cnt descriptors
1117   * and the rx ring has room for rx_cnt descriptors.
1118   * @pdcs:  PDC state
1119   * @tx_cnt: The number of descriptors required in the tx ring
1120   * @rx_cnt: The number of descriptors required i the rx ring
1121   *
1122   * Return: true if one of the rings does not have enough space
1123   *         false if sufficient space is available in both rings
1124   */
pdc_rings_full(struct pdc_state * pdcs,int tx_cnt,int rx_cnt)1125  static bool pdc_rings_full(struct pdc_state *pdcs, int tx_cnt, int rx_cnt)
1126  {
1127  	u32 rx_avail;
1128  	u32 tx_avail;
1129  	bool full = false;
1130  
1131  	/* Check if the tx and rx rings are likely to have enough space */
1132  	rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
1133  					      pdcs->nrxpost);
1134  	if (unlikely(rx_cnt > rx_avail)) {
1135  		pdcs->rx_ring_full++;
1136  		full = true;
1137  	}
1138  
1139  	if (likely(!full)) {
1140  		tx_avail = pdcs->ntxpost - NTXDACTIVE(pdcs->txin, pdcs->txout,
1141  						      pdcs->ntxpost);
1142  		if (unlikely(tx_cnt > tx_avail)) {
1143  			pdcs->tx_ring_full++;
1144  			full = true;
1145  		}
1146  	}
1147  	return full;
1148  }
1149  
1150  /**
1151   * pdc_last_tx_done() - If both the tx and rx rings have at least
1152   * PDC_RING_SPACE_MIN descriptors available, then indicate that the mailbox
1153   * framework can submit another message.
1154   * @chan:  mailbox channel to check
1155   * Return: true if PDC can accept another message on this channel
1156   */
pdc_last_tx_done(struct mbox_chan * chan)1157  static bool pdc_last_tx_done(struct mbox_chan *chan)
1158  {
1159  	struct pdc_state *pdcs = chan->con_priv;
1160  	bool ret;
1161  
1162  	if (unlikely(pdc_rings_full(pdcs, PDC_RING_SPACE_MIN,
1163  				    PDC_RING_SPACE_MIN))) {
1164  		pdcs->last_tx_not_done++;
1165  		ret = false;
1166  	} else {
1167  		ret = true;
1168  	}
1169  	return ret;
1170  }
1171  
1172  /**
1173   * pdc_send_data() - mailbox send_data function
1174   * @chan:	The mailbox channel on which the data is sent. The channel
1175   *              corresponds to a DMA ringset.
1176   * @data:	The mailbox message to be sent. The message must be a
1177   *              brcm_message structure.
1178   *
1179   * This function is registered as the send_data function for the mailbox
1180   * controller. From the destination scatterlist in the mailbox message, it
1181   * creates a sequence of receive descriptors in the rx ring. From the source
1182   * scatterlist, it creates a sequence of transmit descriptors in the tx ring.
1183   * After creating the descriptors, it writes the rx ptr and tx ptr registers to
1184   * initiate the DMA transfer.
1185   *
1186   * This function does the DMA map and unmap of the src and dst scatterlists in
1187   * the mailbox message.
1188   *
1189   * Return: 0 if successful
1190   *	   -ENOTSUPP if the mailbox message is a type this driver does not
1191   *			support
1192   *         < 0 if an error
1193   */
pdc_send_data(struct mbox_chan * chan,void * data)1194  static int pdc_send_data(struct mbox_chan *chan, void *data)
1195  {
1196  	struct pdc_state *pdcs = chan->con_priv;
1197  	struct device *dev = &pdcs->pdev->dev;
1198  	struct brcm_message *mssg = data;
1199  	int err = PDC_SUCCESS;
1200  	int src_nent;
1201  	int dst_nent;
1202  	int nent;
1203  	u32 tx_desc_req;
1204  	u32 rx_desc_req;
1205  
1206  	if (unlikely(mssg->type != BRCM_MESSAGE_SPU))
1207  		return -ENOTSUPP;
1208  
1209  	src_nent = sg_nents(mssg->spu.src);
1210  	if (likely(src_nent)) {
1211  		nent = dma_map_sg(dev, mssg->spu.src, src_nent, DMA_TO_DEVICE);
1212  		if (unlikely(nent == 0))
1213  			return -EIO;
1214  	}
1215  
1216  	dst_nent = sg_nents(mssg->spu.dst);
1217  	if (likely(dst_nent)) {
1218  		nent = dma_map_sg(dev, mssg->spu.dst, dst_nent,
1219  				  DMA_FROM_DEVICE);
1220  		if (unlikely(nent == 0)) {
1221  			dma_unmap_sg(dev, mssg->spu.src, src_nent,
1222  				     DMA_TO_DEVICE);
1223  			return -EIO;
1224  		}
1225  	}
1226  
1227  	/*
1228  	 * Check if the tx and rx rings have enough space. Do this prior to
1229  	 * writing any tx or rx descriptors. Need to ensure that we do not write
1230  	 * a partial set of descriptors, or write just rx descriptors but
1231  	 * corresponding tx descriptors don't fit. Note that we want this check
1232  	 * and the entire sequence of descriptor to happen without another
1233  	 * thread getting in. The channel spin lock in the mailbox framework
1234  	 * ensures this.
1235  	 */
1236  	tx_desc_req = pdc_desc_count(mssg->spu.src);
1237  	rx_desc_req = pdc_desc_count(mssg->spu.dst);
1238  	if (unlikely(pdc_rings_full(pdcs, tx_desc_req, rx_desc_req + 1)))
1239  		return -ENOSPC;
1240  
1241  	/* Create rx descriptors to SPU catch response */
1242  	err = pdc_rx_list_init(pdcs, mssg->spu.dst, mssg->ctx);
1243  	err |= pdc_rx_list_sg_add(pdcs, mssg->spu.dst);
1244  
1245  	/* Create tx descriptors to submit SPU request */
1246  	err |= pdc_tx_list_sg_add(pdcs, mssg->spu.src);
1247  	err |= pdc_tx_list_final(pdcs);	/* initiate transfer */
1248  
1249  	if (unlikely(err))
1250  		dev_err(&pdcs->pdev->dev,
1251  			"%s failed with error %d", __func__, err);
1252  
1253  	return err;
1254  }
1255  
pdc_startup(struct mbox_chan * chan)1256  static int pdc_startup(struct mbox_chan *chan)
1257  {
1258  	return pdc_ring_init(chan->con_priv, PDC_RINGSET);
1259  }
1260  
pdc_shutdown(struct mbox_chan * chan)1261  static void pdc_shutdown(struct mbox_chan *chan)
1262  {
1263  	struct pdc_state *pdcs = chan->con_priv;
1264  
1265  	if (!pdcs)
1266  		return;
1267  
1268  	dev_dbg(&pdcs->pdev->dev,
1269  		"Shutdown mailbox channel for PDC %u", pdcs->pdc_idx);
1270  	pdc_ring_free(pdcs);
1271  }
1272  
1273  /**
1274   * pdc_hw_init() - Use the given initialization parameters to initialize the
1275   * state for one of the PDCs.
1276   * @pdcs:  state of the PDC
1277   */
1278  static
pdc_hw_init(struct pdc_state * pdcs)1279  void pdc_hw_init(struct pdc_state *pdcs)
1280  {
1281  	struct platform_device *pdev;
1282  	struct device *dev;
1283  	struct dma64 *dma_reg;
1284  	int ringset = PDC_RINGSET;
1285  
1286  	pdev = pdcs->pdev;
1287  	dev = &pdev->dev;
1288  
1289  	dev_dbg(dev, "PDC %u initial values:", pdcs->pdc_idx);
1290  	dev_dbg(dev, "state structure:                   %p",
1291  		pdcs);
1292  	dev_dbg(dev, " - base virtual addr of hw regs    %p",
1293  		pdcs->pdc_reg_vbase);
1294  
1295  	/* initialize data structures */
1296  	pdcs->regs = (struct pdc_regs *)pdcs->pdc_reg_vbase;
1297  	pdcs->txregs_64 = (struct dma64_regs *)
1298  	    (((u8 *)pdcs->pdc_reg_vbase) +
1299  		     PDC_TXREGS_OFFSET + (sizeof(struct dma64) * ringset));
1300  	pdcs->rxregs_64 = (struct dma64_regs *)
1301  	    (((u8 *)pdcs->pdc_reg_vbase) +
1302  		     PDC_RXREGS_OFFSET + (sizeof(struct dma64) * ringset));
1303  
1304  	pdcs->ntxd = PDC_RING_ENTRIES;
1305  	pdcs->nrxd = PDC_RING_ENTRIES;
1306  	pdcs->ntxpost = PDC_RING_ENTRIES - 1;
1307  	pdcs->nrxpost = PDC_RING_ENTRIES - 1;
1308  	iowrite32(0, &pdcs->regs->intmask);
1309  
1310  	dma_reg = &pdcs->regs->dmaregs[ringset];
1311  
1312  	/* Configure DMA but will enable later in pdc_ring_init() */
1313  	iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control);
1314  
1315  	iowrite32(PDC_RX_CTL + (pdcs->rx_status_len << 1),
1316  		  &dma_reg->dmarcv.control);
1317  
1318  	/* Reset current index pointers after making sure DMA is disabled */
1319  	iowrite32(0, &dma_reg->dmaxmt.ptr);
1320  	iowrite32(0, &dma_reg->dmarcv.ptr);
1321  
1322  	if (pdcs->pdc_resp_hdr_len == PDC_SPU2_RESP_HDR_LEN)
1323  		iowrite32(PDC_CKSUM_CTRL,
1324  			  pdcs->pdc_reg_vbase + PDC_CKSUM_CTRL_OFFSET);
1325  }
1326  
1327  /**
1328   * pdc_hw_disable() - Disable the tx and rx control in the hw.
1329   * @pdcs: PDC state structure
1330   *
1331   */
pdc_hw_disable(struct pdc_state * pdcs)1332  static void pdc_hw_disable(struct pdc_state *pdcs)
1333  {
1334  	struct dma64 *dma_reg;
1335  
1336  	dma_reg = &pdcs->regs->dmaregs[PDC_RINGSET];
1337  	iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control);
1338  	iowrite32(PDC_RX_CTL + (pdcs->rx_status_len << 1),
1339  		  &dma_reg->dmarcv.control);
1340  }
1341  
1342  /**
1343   * pdc_rx_buf_pool_create() - Pool of receive buffers used to catch the metadata
1344   * header returned with each response message.
1345   * @pdcs: PDC state structure
1346   *
1347   * The metadata is not returned to the mailbox client. So the PDC driver
1348   * manages these buffers.
1349   *
1350   * Return: PDC_SUCCESS
1351   *         -ENOMEM if pool creation fails
1352   */
pdc_rx_buf_pool_create(struct pdc_state * pdcs)1353  static int pdc_rx_buf_pool_create(struct pdc_state *pdcs)
1354  {
1355  	struct platform_device *pdev;
1356  	struct device *dev;
1357  
1358  	pdev = pdcs->pdev;
1359  	dev = &pdev->dev;
1360  
1361  	pdcs->pdc_resp_hdr_len = pdcs->rx_status_len;
1362  	if (pdcs->use_bcm_hdr)
1363  		pdcs->pdc_resp_hdr_len += BCM_HDR_LEN;
1364  
1365  	pdcs->rx_buf_pool = dma_pool_create("pdc rx bufs", dev,
1366  					    pdcs->pdc_resp_hdr_len,
1367  					    RX_BUF_ALIGN, 0);
1368  	if (!pdcs->rx_buf_pool)
1369  		return -ENOMEM;
1370  
1371  	return PDC_SUCCESS;
1372  }
1373  
1374  /**
1375   * pdc_interrupts_init() - Initialize the interrupt configuration for a PDC and
1376   * specify a threaded IRQ handler for deferred handling of interrupts outside of
1377   * interrupt context.
1378   * @pdcs:   PDC state
1379   *
1380   * Set the interrupt mask for transmit and receive done.
1381   * Set the lazy interrupt frame count to generate an interrupt for just one pkt.
1382   *
1383   * Return:  PDC_SUCCESS
1384   *          <0 if threaded irq request fails
1385   */
pdc_interrupts_init(struct pdc_state * pdcs)1386  static int pdc_interrupts_init(struct pdc_state *pdcs)
1387  {
1388  	struct platform_device *pdev = pdcs->pdev;
1389  	struct device *dev = &pdev->dev;
1390  	struct device_node *dn = pdev->dev.of_node;
1391  	int err;
1392  
1393  	/* interrupt configuration */
1394  	iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
1395  
1396  	if (pdcs->hw_type == FA_HW)
1397  		iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
1398  			  FA_RCVLAZY0_OFFSET);
1399  	else
1400  		iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
1401  			  PDC_RCVLAZY0_OFFSET);
1402  
1403  	/* read irq from device tree */
1404  	pdcs->pdc_irq = irq_of_parse_and_map(dn, 0);
1405  	dev_dbg(dev, "pdc device %s irq %u for pdcs %p",
1406  		dev_name(dev), pdcs->pdc_irq, pdcs);
1407  
1408  	err = devm_request_irq(dev, pdcs->pdc_irq, pdc_irq_handler, 0,
1409  			       dev_name(dev), dev);
1410  	if (err) {
1411  		dev_err(dev, "IRQ %u request failed with err %d\n",
1412  			pdcs->pdc_irq, err);
1413  		return err;
1414  	}
1415  	return PDC_SUCCESS;
1416  }
1417  
1418  static const struct mbox_chan_ops pdc_mbox_chan_ops = {
1419  	.send_data = pdc_send_data,
1420  	.last_tx_done = pdc_last_tx_done,
1421  	.startup = pdc_startup,
1422  	.shutdown = pdc_shutdown
1423  };
1424  
1425  /**
1426   * pdc_mb_init() - Initialize the mailbox controller.
1427   * @pdcs:  PDC state
1428   *
1429   * Each PDC is a mailbox controller. Each ringset is a mailbox channel. Kernel
1430   * driver only uses one ringset and thus one mb channel. PDC uses the transmit
1431   * complete interrupt to determine when a mailbox message has successfully been
1432   * transmitted.
1433   *
1434   * Return: 0 on success
1435   *         < 0 if there is an allocation or registration failure
1436   */
pdc_mb_init(struct pdc_state * pdcs)1437  static int pdc_mb_init(struct pdc_state *pdcs)
1438  {
1439  	struct device *dev = &pdcs->pdev->dev;
1440  	struct mbox_controller *mbc;
1441  	int chan_index;
1442  	int err;
1443  
1444  	mbc = &pdcs->mbc;
1445  	mbc->dev = dev;
1446  	mbc->ops = &pdc_mbox_chan_ops;
1447  	mbc->num_chans = 1;
1448  	mbc->chans = devm_kcalloc(dev, mbc->num_chans, sizeof(*mbc->chans),
1449  				  GFP_KERNEL);
1450  	if (!mbc->chans)
1451  		return -ENOMEM;
1452  
1453  	mbc->txdone_irq = false;
1454  	mbc->txdone_poll = true;
1455  	mbc->txpoll_period = 1;
1456  	for (chan_index = 0; chan_index < mbc->num_chans; chan_index++)
1457  		mbc->chans[chan_index].con_priv = pdcs;
1458  
1459  	/* Register mailbox controller */
1460  	err = devm_mbox_controller_register(dev, mbc);
1461  	if (err) {
1462  		dev_crit(dev,
1463  			 "Failed to register PDC mailbox controller. Error %d.",
1464  			 err);
1465  		return err;
1466  	}
1467  	return 0;
1468  }
1469  
1470  /* Device tree API */
1471  static const int pdc_hw = PDC_HW;
1472  static const int fa_hw = FA_HW;
1473  
1474  static const struct of_device_id pdc_mbox_of_match[] = {
1475  	{.compatible = "brcm,iproc-pdc-mbox", .data = &pdc_hw},
1476  	{.compatible = "brcm,iproc-fa2-mbox", .data = &fa_hw},
1477  	{ /* sentinel */ }
1478  };
1479  MODULE_DEVICE_TABLE(of, pdc_mbox_of_match);
1480  
1481  /**
1482   * pdc_dt_read() - Read application-specific data from device tree.
1483   * @pdev:  Platform device
1484   * @pdcs:  PDC state
1485   *
1486   * Reads the number of bytes of receive status that precede each received frame.
1487   * Reads whether transmit and received frames should be preceded by an 8-byte
1488   * BCM header.
1489   *
1490   * Return: 0 if successful
1491   *         -ENODEV if device not available
1492   */
pdc_dt_read(struct platform_device * pdev,struct pdc_state * pdcs)1493  static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs)
1494  {
1495  	struct device *dev = &pdev->dev;
1496  	struct device_node *dn = pdev->dev.of_node;
1497  	const struct of_device_id *match;
1498  	const int *hw_type;
1499  	int err;
1500  
1501  	err = of_property_read_u32(dn, "brcm,rx-status-len",
1502  				   &pdcs->rx_status_len);
1503  	if (err < 0)
1504  		dev_err(dev,
1505  			"%s failed to get DMA receive status length from device tree",
1506  			__func__);
1507  
1508  	pdcs->use_bcm_hdr = of_property_read_bool(dn, "brcm,use-bcm-hdr");
1509  
1510  	pdcs->hw_type = PDC_HW;
1511  
1512  	match = of_match_device(of_match_ptr(pdc_mbox_of_match), dev);
1513  	if (match != NULL) {
1514  		hw_type = match->data;
1515  		pdcs->hw_type = *hw_type;
1516  	}
1517  
1518  	return 0;
1519  }
1520  
1521  /**
1522   * pdc_probe() - Probe function for PDC driver.
1523   * @pdev:   PDC platform device
1524   *
1525   * Reserve and map register regions defined in device tree.
1526   * Allocate and initialize tx and rx DMA rings.
1527   * Initialize a mailbox controller for each PDC.
1528   *
1529   * Return: 0 if successful
1530   *         < 0 if an error
1531   */
pdc_probe(struct platform_device * pdev)1532  static int pdc_probe(struct platform_device *pdev)
1533  {
1534  	int err = 0;
1535  	struct device *dev = &pdev->dev;
1536  	struct resource *pdc_regs;
1537  	struct pdc_state *pdcs;
1538  
1539  	/* PDC state for one SPU */
1540  	pdcs = devm_kzalloc(dev, sizeof(*pdcs), GFP_KERNEL);
1541  	if (!pdcs) {
1542  		err = -ENOMEM;
1543  		goto cleanup;
1544  	}
1545  
1546  	pdcs->pdev = pdev;
1547  	platform_set_drvdata(pdev, pdcs);
1548  	pdcs->pdc_idx = pdcg.num_spu;
1549  	pdcg.num_spu++;
1550  
1551  	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
1552  	if (err) {
1553  		dev_warn(dev, "PDC device cannot perform DMA. Error %d.", err);
1554  		goto cleanup;
1555  	}
1556  
1557  	/* Create DMA pool for tx ring */
1558  	pdcs->ring_pool = dma_pool_create("pdc rings", dev, PDC_RING_SIZE,
1559  					  RING_ALIGN, 0);
1560  	if (!pdcs->ring_pool) {
1561  		err = -ENOMEM;
1562  		goto cleanup;
1563  	}
1564  
1565  	err = pdc_dt_read(pdev, pdcs);
1566  	if (err)
1567  		goto cleanup_ring_pool;
1568  
1569  	pdcs->pdc_reg_vbase = devm_platform_get_and_ioremap_resource(pdev, 0, &pdc_regs);
1570  	if (IS_ERR(pdcs->pdc_reg_vbase)) {
1571  		err = PTR_ERR(pdcs->pdc_reg_vbase);
1572  		goto cleanup_ring_pool;
1573  	}
1574  	dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
1575  		&pdc_regs->start, &pdc_regs->end);
1576  
1577  	/* create rx buffer pool after dt read to know how big buffers are */
1578  	err = pdc_rx_buf_pool_create(pdcs);
1579  	if (err)
1580  		goto cleanup_ring_pool;
1581  
1582  	pdc_hw_init(pdcs);
1583  
1584  	/* Init tasklet for deferred DMA rx processing */
1585  	tasklet_setup(&pdcs->rx_tasklet, pdc_tasklet_cb);
1586  
1587  	err = pdc_interrupts_init(pdcs);
1588  	if (err)
1589  		goto cleanup_buf_pool;
1590  
1591  	/* Initialize mailbox controller */
1592  	err = pdc_mb_init(pdcs);
1593  	if (err)
1594  		goto cleanup_buf_pool;
1595  
1596  	pdc_setup_debugfs(pdcs);
1597  
1598  	dev_dbg(dev, "pdc_probe() successful");
1599  	return PDC_SUCCESS;
1600  
1601  cleanup_buf_pool:
1602  	tasklet_kill(&pdcs->rx_tasklet);
1603  	dma_pool_destroy(pdcs->rx_buf_pool);
1604  
1605  cleanup_ring_pool:
1606  	dma_pool_destroy(pdcs->ring_pool);
1607  
1608  cleanup:
1609  	return err;
1610  }
1611  
pdc_remove(struct platform_device * pdev)1612  static int pdc_remove(struct platform_device *pdev)
1613  {
1614  	struct pdc_state *pdcs = platform_get_drvdata(pdev);
1615  
1616  	pdc_free_debugfs();
1617  
1618  	tasklet_kill(&pdcs->rx_tasklet);
1619  
1620  	pdc_hw_disable(pdcs);
1621  
1622  	dma_pool_destroy(pdcs->rx_buf_pool);
1623  	dma_pool_destroy(pdcs->ring_pool);
1624  	return 0;
1625  }
1626  
1627  static struct platform_driver pdc_mbox_driver = {
1628  	.probe = pdc_probe,
1629  	.remove = pdc_remove,
1630  	.driver = {
1631  		   .name = "brcm-iproc-pdc-mbox",
1632  		   .of_match_table = pdc_mbox_of_match,
1633  		   },
1634  };
1635  module_platform_driver(pdc_mbox_driver);
1636  
1637  MODULE_AUTHOR("Rob Rice <rob.rice@broadcom.com>");
1638  MODULE_DESCRIPTION("Broadcom PDC mailbox driver");
1639  MODULE_LICENSE("GPL v2");
1640