1 // SPDX-License-Identifier: GPL-2.0-only
2 //
3 // Copyright (C) 2020 NVIDIA CORPORATION.
4 
5 #include <linux/clk.h>
6 #include <linux/completion.h>
7 #include <linux/delay.h>
8 #include <linux/dmaengine.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/dmapool.h>
11 #include <linux/err.h>
12 #include <linux/interrupt.h>
13 #include <linux/io.h>
14 #include <linux/iopoll.h>
15 #include <linux/kernel.h>
16 #include <linux/kthread.h>
17 #include <linux/module.h>
18 #include <linux/platform_device.h>
19 #include <linux/pm_runtime.h>
20 #include <linux/of.h>
21 #include <linux/of_device.h>
22 #include <linux/reset.h>
23 #include <linux/spi/spi.h>
24 
25 #define QSPI_COMMAND1				0x000
26 #define QSPI_BIT_LENGTH(x)			(((x) & 0x1f) << 0)
27 #define QSPI_PACKED				BIT(5)
28 #define QSPI_INTERFACE_WIDTH_MASK		(0x03 << 7)
29 #define QSPI_INTERFACE_WIDTH(x)			(((x) & 0x03) << 7)
30 #define QSPI_INTERFACE_WIDTH_SINGLE		QSPI_INTERFACE_WIDTH(0)
31 #define QSPI_INTERFACE_WIDTH_DUAL		QSPI_INTERFACE_WIDTH(1)
32 #define QSPI_INTERFACE_WIDTH_QUAD		QSPI_INTERFACE_WIDTH(2)
33 #define QSPI_SDR_DDR_SEL			BIT(9)
34 #define QSPI_TX_EN				BIT(11)
35 #define QSPI_RX_EN				BIT(12)
36 #define QSPI_CS_SW_VAL				BIT(20)
37 #define QSPI_CS_SW_HW				BIT(21)
38 #define QSPI_CONTROL_MODE_0			(0 << 28)
39 #define QSPI_CONTROL_MODE_3			(3 << 28)
40 #define QSPI_CONTROL_MODE_MASK			(3 << 28)
41 #define QSPI_M_S				BIT(30)
42 #define QSPI_PIO				BIT(31)
43 
44 #define QSPI_COMMAND2				0x004
45 #define QSPI_TX_TAP_DELAY(x)			(((x) & 0x3f) << 10)
46 #define QSPI_RX_TAP_DELAY(x)			(((x) & 0xff) << 0)
47 
48 #define QSPI_CS_TIMING1				0x008
49 #define QSPI_SETUP_HOLD(setup, hold)		(((setup) << 4) | (hold))
50 
51 #define QSPI_CS_TIMING2				0x00c
52 #define CYCLES_BETWEEN_PACKETS_0(x)		(((x) & 0x1f) << 0)
53 #define CS_ACTIVE_BETWEEN_PACKETS_0		BIT(5)
54 
55 #define QSPI_TRANS_STATUS			0x010
56 #define QSPI_BLK_CNT(val)			(((val) >> 0) & 0xffff)
57 #define QSPI_RDY				BIT(30)
58 
59 #define QSPI_FIFO_STATUS			0x014
60 #define QSPI_RX_FIFO_EMPTY			BIT(0)
61 #define QSPI_RX_FIFO_FULL			BIT(1)
62 #define QSPI_TX_FIFO_EMPTY			BIT(2)
63 #define QSPI_TX_FIFO_FULL			BIT(3)
64 #define QSPI_RX_FIFO_UNF			BIT(4)
65 #define QSPI_RX_FIFO_OVF			BIT(5)
66 #define QSPI_TX_FIFO_UNF			BIT(6)
67 #define QSPI_TX_FIFO_OVF			BIT(7)
68 #define QSPI_ERR				BIT(8)
69 #define QSPI_TX_FIFO_FLUSH			BIT(14)
70 #define QSPI_RX_FIFO_FLUSH			BIT(15)
71 #define QSPI_TX_FIFO_EMPTY_COUNT(val)		(((val) >> 16) & 0x7f)
72 #define QSPI_RX_FIFO_FULL_COUNT(val)		(((val) >> 23) & 0x7f)
73 
74 #define QSPI_FIFO_ERROR				(QSPI_RX_FIFO_UNF | \
75 						 QSPI_RX_FIFO_OVF | \
76 						 QSPI_TX_FIFO_UNF | \
77 						 QSPI_TX_FIFO_OVF)
78 #define QSPI_FIFO_EMPTY				(QSPI_RX_FIFO_EMPTY | \
79 						 QSPI_TX_FIFO_EMPTY)
80 
81 #define QSPI_TX_DATA				0x018
82 #define QSPI_RX_DATA				0x01c
83 
84 #define QSPI_DMA_CTL				0x020
85 #define QSPI_TX_TRIG(n)				(((n) & 0x3) << 15)
86 #define QSPI_TX_TRIG_1				QSPI_TX_TRIG(0)
87 #define QSPI_TX_TRIG_4				QSPI_TX_TRIG(1)
88 #define QSPI_TX_TRIG_8				QSPI_TX_TRIG(2)
89 #define QSPI_TX_TRIG_16				QSPI_TX_TRIG(3)
90 
91 #define QSPI_RX_TRIG(n)				(((n) & 0x3) << 19)
92 #define QSPI_RX_TRIG_1				QSPI_RX_TRIG(0)
93 #define QSPI_RX_TRIG_4				QSPI_RX_TRIG(1)
94 #define QSPI_RX_TRIG_8				QSPI_RX_TRIG(2)
95 #define QSPI_RX_TRIG_16				QSPI_RX_TRIG(3)
96 
97 #define QSPI_DMA_EN				BIT(31)
98 
99 #define QSPI_DMA_BLK				0x024
100 #define QSPI_DMA_BLK_SET(x)			(((x) & 0xffff) << 0)
101 
102 #define QSPI_TX_FIFO				0x108
103 #define QSPI_RX_FIFO				0x188
104 
105 #define QSPI_FIFO_DEPTH				64
106 
107 #define QSPI_INTR_MASK				0x18c
108 #define QSPI_INTR_RX_FIFO_UNF_MASK		BIT(25)
109 #define QSPI_INTR_RX_FIFO_OVF_MASK		BIT(26)
110 #define QSPI_INTR_TX_FIFO_UNF_MASK		BIT(27)
111 #define QSPI_INTR_TX_FIFO_OVF_MASK		BIT(28)
112 #define QSPI_INTR_RDY_MASK			BIT(29)
113 #define QSPI_INTR_RX_TX_FIFO_ERR		(QSPI_INTR_RX_FIFO_UNF_MASK | \
114 						 QSPI_INTR_RX_FIFO_OVF_MASK | \
115 						 QSPI_INTR_TX_FIFO_UNF_MASK | \
116 						 QSPI_INTR_TX_FIFO_OVF_MASK)
117 
118 #define QSPI_MISC_REG                           0x194
119 #define QSPI_NUM_DUMMY_CYCLE(x)			(((x) & 0xff) << 0)
120 #define QSPI_DUMMY_CYCLES_MAX			0xff
121 
122 #define DATA_DIR_TX				BIT(0)
123 #define DATA_DIR_RX				BIT(1)
124 
125 #define QSPI_DMA_TIMEOUT			(msecs_to_jiffies(1000))
126 #define DEFAULT_QSPI_DMA_BUF_LEN		(64 * 1024)
127 
128 struct tegra_qspi_client_data {
129 	int tx_clk_tap_delay;
130 	int rx_clk_tap_delay;
131 };
132 
133 struct tegra_qspi {
134 	struct device				*dev;
135 	struct spi_master			*master;
136 	/* lock to protect data accessed by irq */
137 	spinlock_t				lock;
138 
139 	struct clk				*clk;
140 	struct reset_control			*rst;
141 	void __iomem				*base;
142 	phys_addr_t				phys;
143 	unsigned int				irq;
144 
145 	u32					cur_speed;
146 	unsigned int				cur_pos;
147 	unsigned int				words_per_32bit;
148 	unsigned int				bytes_per_word;
149 	unsigned int				curr_dma_words;
150 	unsigned int				cur_direction;
151 
152 	unsigned int				cur_rx_pos;
153 	unsigned int				cur_tx_pos;
154 
155 	unsigned int				dma_buf_size;
156 	unsigned int				max_buf_size;
157 	bool					is_curr_dma_xfer;
158 
159 	struct completion			rx_dma_complete;
160 	struct completion			tx_dma_complete;
161 
162 	u32					tx_status;
163 	u32					rx_status;
164 	u32					status_reg;
165 	bool					is_packed;
166 	bool					use_dma;
167 
168 	u32					command1_reg;
169 	u32					dma_control_reg;
170 	u32					def_command1_reg;
171 	u32					def_command2_reg;
172 	u32					spi_cs_timing1;
173 	u32					spi_cs_timing2;
174 	u8					dummy_cycles;
175 
176 	struct completion			xfer_completion;
177 	struct spi_transfer			*curr_xfer;
178 
179 	struct dma_chan				*rx_dma_chan;
180 	u32					*rx_dma_buf;
181 	dma_addr_t				rx_dma_phys;
182 	struct dma_async_tx_descriptor		*rx_dma_desc;
183 
184 	struct dma_chan				*tx_dma_chan;
185 	u32					*tx_dma_buf;
186 	dma_addr_t				tx_dma_phys;
187 	struct dma_async_tx_descriptor		*tx_dma_desc;
188 };
189 
190 static inline u32 tegra_qspi_readl(struct tegra_qspi *tqspi, unsigned long offset)
191 {
192 	return readl(tqspi->base + offset);
193 }
194 
195 static inline void tegra_qspi_writel(struct tegra_qspi *tqspi, u32 value, unsigned long offset)
196 {
197 	writel(value, tqspi->base + offset);
198 
199 	/* read back register to make sure that register writes completed */
200 	if (offset != QSPI_TX_FIFO)
201 		readl(tqspi->base + QSPI_COMMAND1);
202 }
203 
204 static void tegra_qspi_mask_clear_irq(struct tegra_qspi *tqspi)
205 {
206 	u32 value;
207 
208 	/* write 1 to clear status register */
209 	value = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
210 	tegra_qspi_writel(tqspi, value, QSPI_TRANS_STATUS);
211 
212 	value = tegra_qspi_readl(tqspi, QSPI_INTR_MASK);
213 	if (!(value & QSPI_INTR_RDY_MASK)) {
214 		value |= (QSPI_INTR_RDY_MASK | QSPI_INTR_RX_TX_FIFO_ERR);
215 		tegra_qspi_writel(tqspi, value, QSPI_INTR_MASK);
216 	}
217 
218 	/* clear fifo status error if any */
219 	value = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
220 	if (value & QSPI_ERR)
221 		tegra_qspi_writel(tqspi, QSPI_ERR | QSPI_FIFO_ERROR, QSPI_FIFO_STATUS);
222 }
223 
224 static unsigned int
225 tegra_qspi_calculate_curr_xfer_param(struct tegra_qspi *tqspi, struct spi_transfer *t)
226 {
227 	unsigned int max_word, max_len, total_fifo_words;
228 	unsigned int remain_len = t->len - tqspi->cur_pos;
229 	unsigned int bits_per_word = t->bits_per_word;
230 
231 	tqspi->bytes_per_word = DIV_ROUND_UP(bits_per_word, 8);
232 
233 	/*
234 	 * Tegra QSPI controller supports packed or unpacked mode transfers.
235 	 * Packed mode is used for data transfers using 8, 16, or 32 bits per
236 	 * word with a minimum transfer of 1 word and for all other transfers
237 	 * unpacked mode will be used.
238 	 */
239 
240 	if ((bits_per_word == 8 || bits_per_word == 16 ||
241 	     bits_per_word == 32) && t->len > 3) {
242 		tqspi->is_packed = true;
243 		tqspi->words_per_32bit = 32 / bits_per_word;
244 	} else {
245 		tqspi->is_packed = false;
246 		tqspi->words_per_32bit = 1;
247 	}
248 
249 	if (tqspi->is_packed) {
250 		max_len = min(remain_len, tqspi->max_buf_size);
251 		tqspi->curr_dma_words = max_len / tqspi->bytes_per_word;
252 		total_fifo_words = (max_len + 3) / 4;
253 	} else {
254 		max_word = (remain_len - 1) / tqspi->bytes_per_word + 1;
255 		max_word = min(max_word, tqspi->max_buf_size / 4);
256 		tqspi->curr_dma_words = max_word;
257 		total_fifo_words = max_word;
258 	}
259 
260 	return total_fifo_words;
261 }
262 
263 static unsigned int
264 tegra_qspi_fill_tx_fifo_from_client_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
265 {
266 	unsigned int written_words, fifo_words_left, count;
267 	unsigned int len, tx_empty_count, max_n_32bit, i;
268 	u8 *tx_buf = (u8 *)t->tx_buf + tqspi->cur_tx_pos;
269 	u32 fifo_status;
270 
271 	fifo_status = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
272 	tx_empty_count = QSPI_TX_FIFO_EMPTY_COUNT(fifo_status);
273 
274 	if (tqspi->is_packed) {
275 		fifo_words_left = tx_empty_count * tqspi->words_per_32bit;
276 		written_words = min(fifo_words_left, tqspi->curr_dma_words);
277 		len = written_words * tqspi->bytes_per_word;
278 		max_n_32bit = DIV_ROUND_UP(len, 4);
279 		for (count = 0; count < max_n_32bit; count++) {
280 			u32 x = 0;
281 
282 			for (i = 0; (i < 4) && len; i++, len--)
283 				x |= (u32)(*tx_buf++) << (i * 8);
284 			tegra_qspi_writel(tqspi, x, QSPI_TX_FIFO);
285 		}
286 
287 		tqspi->cur_tx_pos += written_words * tqspi->bytes_per_word;
288 	} else {
289 		unsigned int write_bytes;
290 		u8 bytes_per_word = tqspi->bytes_per_word;
291 
292 		max_n_32bit = min(tqspi->curr_dma_words, tx_empty_count);
293 		written_words = max_n_32bit;
294 		len = written_words * tqspi->bytes_per_word;
295 		if (len > t->len - tqspi->cur_pos)
296 			len = t->len - tqspi->cur_pos;
297 		write_bytes = len;
298 		for (count = 0; count < max_n_32bit; count++) {
299 			u32 x = 0;
300 
301 			for (i = 0; len && (i < bytes_per_word); i++, len--)
302 				x |= (u32)(*tx_buf++) << (i * 8);
303 			tegra_qspi_writel(tqspi, x, QSPI_TX_FIFO);
304 		}
305 
306 		tqspi->cur_tx_pos += write_bytes;
307 	}
308 
309 	return written_words;
310 }
311 
312 static unsigned int
313 tegra_qspi_read_rx_fifo_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
314 {
315 	u8 *rx_buf = (u8 *)t->rx_buf + tqspi->cur_rx_pos;
316 	unsigned int len, rx_full_count, count, i;
317 	unsigned int read_words = 0;
318 	u32 fifo_status, x;
319 
320 	fifo_status = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
321 	rx_full_count = QSPI_RX_FIFO_FULL_COUNT(fifo_status);
322 	if (tqspi->is_packed) {
323 		len = tqspi->curr_dma_words * tqspi->bytes_per_word;
324 		for (count = 0; count < rx_full_count; count++) {
325 			x = tegra_qspi_readl(tqspi, QSPI_RX_FIFO);
326 
327 			for (i = 0; len && (i < 4); i++, len--)
328 				*rx_buf++ = (x >> i * 8) & 0xff;
329 		}
330 
331 		read_words += tqspi->curr_dma_words;
332 		tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word;
333 	} else {
334 		u32 rx_mask = ((u32)1 << t->bits_per_word) - 1;
335 		u8 bytes_per_word = tqspi->bytes_per_word;
336 		unsigned int read_bytes;
337 
338 		len = rx_full_count * bytes_per_word;
339 		if (len > t->len - tqspi->cur_pos)
340 			len = t->len - tqspi->cur_pos;
341 		read_bytes = len;
342 		for (count = 0; count < rx_full_count; count++) {
343 			x = tegra_qspi_readl(tqspi, QSPI_RX_FIFO) & rx_mask;
344 
345 			for (i = 0; len && (i < bytes_per_word); i++, len--)
346 				*rx_buf++ = (x >> (i * 8)) & 0xff;
347 		}
348 
349 		read_words += rx_full_count;
350 		tqspi->cur_rx_pos += read_bytes;
351 	}
352 
353 	return read_words;
354 }
355 
356 static void
357 tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
358 {
359 	dma_sync_single_for_cpu(tqspi->dev, tqspi->tx_dma_phys,
360 				tqspi->dma_buf_size, DMA_TO_DEVICE);
361 
362 	/*
363 	 * In packed mode, each word in FIFO may contain multiple packets
364 	 * based on bits per word. So all bytes in each FIFO word are valid.
365 	 *
366 	 * In unpacked mode, each word in FIFO contains single packet and
367 	 * based on bits per word any remaining bits in FIFO word will be
368 	 * ignored by the hardware and are invalid bits.
369 	 */
370 	if (tqspi->is_packed) {
371 		tqspi->cur_tx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word;
372 	} else {
373 		u8 *tx_buf = (u8 *)t->tx_buf + tqspi->cur_tx_pos;
374 		unsigned int i, count, consume, write_bytes;
375 
376 		/*
377 		 * Fill tx_dma_buf to contain single packet in each word based
378 		 * on bits per word from SPI core tx_buf.
379 		 */
380 		consume = tqspi->curr_dma_words * tqspi->bytes_per_word;
381 		if (consume > t->len - tqspi->cur_pos)
382 			consume = t->len - tqspi->cur_pos;
383 		write_bytes = consume;
384 		for (count = 0; count < tqspi->curr_dma_words; count++) {
385 			u32 x = 0;
386 
387 			for (i = 0; consume && (i < tqspi->bytes_per_word); i++, consume--)
388 				x |= (u32)(*tx_buf++) << (i * 8);
389 			tqspi->tx_dma_buf[count] = x;
390 		}
391 
392 		tqspi->cur_tx_pos += write_bytes;
393 	}
394 
395 	dma_sync_single_for_device(tqspi->dev, tqspi->tx_dma_phys,
396 				   tqspi->dma_buf_size, DMA_TO_DEVICE);
397 }
398 
399 static void
400 tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t)
401 {
402 	dma_sync_single_for_cpu(tqspi->dev, tqspi->rx_dma_phys,
403 				tqspi->dma_buf_size, DMA_FROM_DEVICE);
404 
405 	if (tqspi->is_packed) {
406 		tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word;
407 	} else {
408 		unsigned char *rx_buf = t->rx_buf + tqspi->cur_rx_pos;
409 		u32 rx_mask = ((u32)1 << t->bits_per_word) - 1;
410 		unsigned int i, count, consume, read_bytes;
411 
412 		/*
413 		 * Each FIFO word contains single data packet.
414 		 * Skip invalid bits in each FIFO word based on bits per word
415 		 * and align bytes while filling in SPI core rx_buf.
416 		 */
417 		consume = tqspi->curr_dma_words * tqspi->bytes_per_word;
418 		if (consume > t->len - tqspi->cur_pos)
419 			consume = t->len - tqspi->cur_pos;
420 		read_bytes = consume;
421 		for (count = 0; count < tqspi->curr_dma_words; count++) {
422 			u32 x = tqspi->rx_dma_buf[count] & rx_mask;
423 
424 			for (i = 0; consume && (i < tqspi->bytes_per_word); i++, consume--)
425 				*rx_buf++ = (x >> (i * 8)) & 0xff;
426 		}
427 
428 		tqspi->cur_rx_pos += read_bytes;
429 	}
430 
431 	dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
432 				   tqspi->dma_buf_size, DMA_FROM_DEVICE);
433 }
434 
435 static void tegra_qspi_dma_complete(void *args)
436 {
437 	struct completion *dma_complete = args;
438 
439 	complete(dma_complete);
440 }
441 
442 static int tegra_qspi_start_tx_dma(struct tegra_qspi *tqspi, struct spi_transfer *t, int len)
443 {
444 	dma_addr_t tx_dma_phys;
445 
446 	reinit_completion(&tqspi->tx_dma_complete);
447 
448 	if (tqspi->is_packed)
449 		tx_dma_phys = t->tx_dma;
450 	else
451 		tx_dma_phys = tqspi->tx_dma_phys;
452 
453 	tqspi->tx_dma_desc = dmaengine_prep_slave_single(tqspi->tx_dma_chan, tx_dma_phys,
454 							 len, DMA_MEM_TO_DEV,
455 							 DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
456 
457 	if (!tqspi->tx_dma_desc) {
458 		dev_err(tqspi->dev, "Unable to get TX descriptor\n");
459 		return -EIO;
460 	}
461 
462 	tqspi->tx_dma_desc->callback = tegra_qspi_dma_complete;
463 	tqspi->tx_dma_desc->callback_param = &tqspi->tx_dma_complete;
464 	dmaengine_submit(tqspi->tx_dma_desc);
465 	dma_async_issue_pending(tqspi->tx_dma_chan);
466 
467 	return 0;
468 }
469 
470 static int tegra_qspi_start_rx_dma(struct tegra_qspi *tqspi, struct spi_transfer *t, int len)
471 {
472 	dma_addr_t rx_dma_phys;
473 
474 	reinit_completion(&tqspi->rx_dma_complete);
475 
476 	if (tqspi->is_packed)
477 		rx_dma_phys = t->rx_dma;
478 	else
479 		rx_dma_phys = tqspi->rx_dma_phys;
480 
481 	tqspi->rx_dma_desc = dmaengine_prep_slave_single(tqspi->rx_dma_chan, rx_dma_phys,
482 							 len, DMA_DEV_TO_MEM,
483 							 DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
484 
485 	if (!tqspi->rx_dma_desc) {
486 		dev_err(tqspi->dev, "Unable to get RX descriptor\n");
487 		return -EIO;
488 	}
489 
490 	tqspi->rx_dma_desc->callback = tegra_qspi_dma_complete;
491 	tqspi->rx_dma_desc->callback_param = &tqspi->rx_dma_complete;
492 	dmaengine_submit(tqspi->rx_dma_desc);
493 	dma_async_issue_pending(tqspi->rx_dma_chan);
494 
495 	return 0;
496 }
497 
498 static int tegra_qspi_flush_fifos(struct tegra_qspi *tqspi, bool atomic)
499 {
500 	void __iomem *addr = tqspi->base + QSPI_FIFO_STATUS;
501 	u32 val;
502 
503 	val = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
504 	if ((val & QSPI_FIFO_EMPTY) == QSPI_FIFO_EMPTY)
505 		return 0;
506 
507 	val |= QSPI_RX_FIFO_FLUSH | QSPI_TX_FIFO_FLUSH;
508 	tegra_qspi_writel(tqspi, val, QSPI_FIFO_STATUS);
509 
510 	if (!atomic)
511 		return readl_relaxed_poll_timeout(addr, val,
512 						  (val & QSPI_FIFO_EMPTY) == QSPI_FIFO_EMPTY,
513 						  1000, 1000000);
514 
515 	return readl_relaxed_poll_timeout_atomic(addr, val,
516 						 (val & QSPI_FIFO_EMPTY) == QSPI_FIFO_EMPTY,
517 						 1000, 1000000);
518 }
519 
520 static void tegra_qspi_unmask_irq(struct tegra_qspi *tqspi)
521 {
522 	u32 intr_mask;
523 
524 	intr_mask = tegra_qspi_readl(tqspi, QSPI_INTR_MASK);
525 	intr_mask &= ~(QSPI_INTR_RDY_MASK | QSPI_INTR_RX_TX_FIFO_ERR);
526 	tegra_qspi_writel(tqspi, intr_mask, QSPI_INTR_MASK);
527 }
528 
529 static int tegra_qspi_dma_map_xfer(struct tegra_qspi *tqspi, struct spi_transfer *t)
530 {
531 	u8 *tx_buf = (u8 *)t->tx_buf + tqspi->cur_tx_pos;
532 	u8 *rx_buf = (u8 *)t->rx_buf + tqspi->cur_rx_pos;
533 	unsigned int len;
534 
535 	len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
536 
537 	if (t->tx_buf) {
538 		t->tx_dma = dma_map_single(tqspi->dev, (void *)tx_buf, len, DMA_TO_DEVICE);
539 		if (dma_mapping_error(tqspi->dev, t->tx_dma))
540 			return -ENOMEM;
541 	}
542 
543 	if (t->rx_buf) {
544 		t->rx_dma = dma_map_single(tqspi->dev, (void *)rx_buf, len, DMA_FROM_DEVICE);
545 		if (dma_mapping_error(tqspi->dev, t->rx_dma)) {
546 			dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
547 			return -ENOMEM;
548 		}
549 	}
550 
551 	return 0;
552 }
553 
554 static void tegra_qspi_dma_unmap_xfer(struct tegra_qspi *tqspi, struct spi_transfer *t)
555 {
556 	unsigned int len;
557 
558 	len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
559 
560 	dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE);
561 	dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE);
562 }
563 
564 static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct spi_transfer *t)
565 {
566 	struct dma_slave_config dma_sconfig = { 0 };
567 	unsigned int len;
568 	u8 dma_burst;
569 	int ret = 0;
570 	u32 val;
571 
572 	if (tqspi->is_packed) {
573 		ret = tegra_qspi_dma_map_xfer(tqspi, t);
574 		if (ret < 0)
575 			return ret;
576 	}
577 
578 	val = QSPI_DMA_BLK_SET(tqspi->curr_dma_words - 1);
579 	tegra_qspi_writel(tqspi, val, QSPI_DMA_BLK);
580 
581 	tegra_qspi_unmask_irq(tqspi);
582 
583 	if (tqspi->is_packed)
584 		len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
585 	else
586 		len = tqspi->curr_dma_words * 4;
587 
588 	/* set attention level based on length of transfer */
589 	val = 0;
590 	if (len & 0xf) {
591 		val |= QSPI_TX_TRIG_1 | QSPI_RX_TRIG_1;
592 		dma_burst = 1;
593 	} else if (((len) >> 4) & 0x1) {
594 		val |= QSPI_TX_TRIG_4 | QSPI_RX_TRIG_4;
595 		dma_burst = 4;
596 	} else {
597 		val |= QSPI_TX_TRIG_8 | QSPI_RX_TRIG_8;
598 		dma_burst = 8;
599 	}
600 
601 	tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
602 	tqspi->dma_control_reg = val;
603 
604 	dma_sconfig.device_fc = true;
605 	if (tqspi->cur_direction & DATA_DIR_TX) {
606 		dma_sconfig.dst_addr = tqspi->phys + QSPI_TX_FIFO;
607 		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
608 		dma_sconfig.dst_maxburst = dma_burst;
609 		ret = dmaengine_slave_config(tqspi->tx_dma_chan, &dma_sconfig);
610 		if (ret < 0) {
611 			dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
612 			return ret;
613 		}
614 
615 		tegra_qspi_copy_client_txbuf_to_qspi_txbuf(tqspi, t);
616 		ret = tegra_qspi_start_tx_dma(tqspi, t, len);
617 		if (ret < 0) {
618 			dev_err(tqspi->dev, "failed to starting TX DMA: %d\n", ret);
619 			return ret;
620 		}
621 	}
622 
623 	if (tqspi->cur_direction & DATA_DIR_RX) {
624 		dma_sconfig.src_addr = tqspi->phys + QSPI_RX_FIFO;
625 		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
626 		dma_sconfig.src_maxburst = dma_burst;
627 		ret = dmaengine_slave_config(tqspi->rx_dma_chan, &dma_sconfig);
628 		if (ret < 0) {
629 			dev_err(tqspi->dev, "failed DMA slave config: %d\n", ret);
630 			return ret;
631 		}
632 
633 		dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys,
634 					   tqspi->dma_buf_size,
635 					   DMA_FROM_DEVICE);
636 
637 		ret = tegra_qspi_start_rx_dma(tqspi, t, len);
638 		if (ret < 0) {
639 			dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret);
640 			if (tqspi->cur_direction & DATA_DIR_TX)
641 				dmaengine_terminate_all(tqspi->tx_dma_chan);
642 			return ret;
643 		}
644 	}
645 
646 	tegra_qspi_writel(tqspi, tqspi->command1_reg, QSPI_COMMAND1);
647 
648 	tqspi->is_curr_dma_xfer = true;
649 	tqspi->dma_control_reg = val;
650 	val |= QSPI_DMA_EN;
651 	tegra_qspi_writel(tqspi, val, QSPI_DMA_CTL);
652 
653 	return ret;
654 }
655 
656 static int tegra_qspi_start_cpu_based_transfer(struct tegra_qspi *qspi, struct spi_transfer *t)
657 {
658 	u32 val;
659 	unsigned int cur_words;
660 
661 	if (qspi->cur_direction & DATA_DIR_TX)
662 		cur_words = tegra_qspi_fill_tx_fifo_from_client_txbuf(qspi, t);
663 	else
664 		cur_words = qspi->curr_dma_words;
665 
666 	val = QSPI_DMA_BLK_SET(cur_words - 1);
667 	tegra_qspi_writel(qspi, val, QSPI_DMA_BLK);
668 
669 	tegra_qspi_unmask_irq(qspi);
670 
671 	qspi->is_curr_dma_xfer = false;
672 	val = qspi->command1_reg;
673 	val |= QSPI_PIO;
674 	tegra_qspi_writel(qspi, val, QSPI_COMMAND1);
675 
676 	return 0;
677 }
678 
679 static void tegra_qspi_deinit_dma(struct tegra_qspi *tqspi)
680 {
681 	if (tqspi->tx_dma_buf) {
682 		dma_free_coherent(tqspi->dev, tqspi->dma_buf_size,
683 				  tqspi->tx_dma_buf, tqspi->tx_dma_phys);
684 		tqspi->tx_dma_buf = NULL;
685 	}
686 
687 	if (tqspi->tx_dma_chan) {
688 		dma_release_channel(tqspi->tx_dma_chan);
689 		tqspi->tx_dma_chan = NULL;
690 	}
691 
692 	if (tqspi->rx_dma_buf) {
693 		dma_free_coherent(tqspi->dev, tqspi->dma_buf_size,
694 				  tqspi->rx_dma_buf, tqspi->rx_dma_phys);
695 		tqspi->rx_dma_buf = NULL;
696 	}
697 
698 	if (tqspi->rx_dma_chan) {
699 		dma_release_channel(tqspi->rx_dma_chan);
700 		tqspi->rx_dma_chan = NULL;
701 	}
702 }
703 
704 static int tegra_qspi_init_dma(struct tegra_qspi *tqspi)
705 {
706 	struct dma_chan *dma_chan;
707 	dma_addr_t dma_phys;
708 	u32 *dma_buf;
709 	int err;
710 
711 	dma_chan = dma_request_chan(tqspi->dev, "rx");
712 	if (IS_ERR(dma_chan)) {
713 		err = PTR_ERR(dma_chan);
714 		goto err_out;
715 	}
716 
717 	tqspi->rx_dma_chan = dma_chan;
718 
719 	dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL);
720 	if (!dma_buf) {
721 		err = -ENOMEM;
722 		goto err_out;
723 	}
724 
725 	tqspi->rx_dma_buf = dma_buf;
726 	tqspi->rx_dma_phys = dma_phys;
727 
728 	dma_chan = dma_request_chan(tqspi->dev, "tx");
729 	if (IS_ERR(dma_chan)) {
730 		err = PTR_ERR(dma_chan);
731 		goto err_out;
732 	}
733 
734 	tqspi->tx_dma_chan = dma_chan;
735 
736 	dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL);
737 	if (!dma_buf) {
738 		err = -ENOMEM;
739 		goto err_out;
740 	}
741 
742 	tqspi->tx_dma_buf = dma_buf;
743 	tqspi->tx_dma_phys = dma_phys;
744 	tqspi->use_dma = true;
745 
746 	return 0;
747 
748 err_out:
749 	tegra_qspi_deinit_dma(tqspi);
750 
751 	if (err != -EPROBE_DEFER) {
752 		dev_err(tqspi->dev, "cannot use DMA: %d\n", err);
753 		dev_err(tqspi->dev, "falling back to PIO\n");
754 		return 0;
755 	}
756 
757 	return err;
758 }
759 
760 static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_transfer *t,
761 					 bool is_first_of_msg)
762 {
763 	struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master);
764 	struct tegra_qspi_client_data *cdata = spi->controller_data;
765 	u32 command1, command2, speed = t->speed_hz;
766 	u8 bits_per_word = t->bits_per_word;
767 	u32 tx_tap = 0, rx_tap = 0;
768 	int req_mode;
769 
770 	if (speed != tqspi->cur_speed) {
771 		clk_set_rate(tqspi->clk, speed);
772 		tqspi->cur_speed = speed;
773 	}
774 
775 	tqspi->cur_pos = 0;
776 	tqspi->cur_rx_pos = 0;
777 	tqspi->cur_tx_pos = 0;
778 	tqspi->curr_xfer = t;
779 
780 	if (is_first_of_msg) {
781 		tegra_qspi_mask_clear_irq(tqspi);
782 
783 		command1 = tqspi->def_command1_reg;
784 		command1 |= QSPI_BIT_LENGTH(bits_per_word - 1);
785 
786 		command1 &= ~QSPI_CONTROL_MODE_MASK;
787 		req_mode = spi->mode & 0x3;
788 		if (req_mode == SPI_MODE_3)
789 			command1 |= QSPI_CONTROL_MODE_3;
790 		else
791 			command1 |= QSPI_CONTROL_MODE_0;
792 
793 		if (spi->mode & SPI_CS_HIGH)
794 			command1 |= QSPI_CS_SW_VAL;
795 		else
796 			command1 &= ~QSPI_CS_SW_VAL;
797 		tegra_qspi_writel(tqspi, command1, QSPI_COMMAND1);
798 
799 		if (cdata && cdata->tx_clk_tap_delay)
800 			tx_tap = cdata->tx_clk_tap_delay;
801 
802 		if (cdata && cdata->rx_clk_tap_delay)
803 			rx_tap = cdata->rx_clk_tap_delay;
804 
805 		command2 = QSPI_TX_TAP_DELAY(tx_tap) | QSPI_RX_TAP_DELAY(rx_tap);
806 		if (command2 != tqspi->def_command2_reg)
807 			tegra_qspi_writel(tqspi, command2, QSPI_COMMAND2);
808 
809 	} else {
810 		command1 = tqspi->command1_reg;
811 		command1 &= ~QSPI_BIT_LENGTH(~0);
812 		command1 |= QSPI_BIT_LENGTH(bits_per_word - 1);
813 	}
814 
815 	command1 &= ~QSPI_SDR_DDR_SEL;
816 
817 	return command1;
818 }
819 
820 static int tegra_qspi_start_transfer_one(struct spi_device *spi,
821 					 struct spi_transfer *t, u32 command1)
822 {
823 	struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master);
824 	unsigned int total_fifo_words;
825 	u8 bus_width = 0;
826 	int ret;
827 
828 	total_fifo_words = tegra_qspi_calculate_curr_xfer_param(tqspi, t);
829 
830 	command1 &= ~QSPI_PACKED;
831 	if (tqspi->is_packed)
832 		command1 |= QSPI_PACKED;
833 	tegra_qspi_writel(tqspi, command1, QSPI_COMMAND1);
834 
835 	tqspi->cur_direction = 0;
836 
837 	command1 &= ~(QSPI_TX_EN | QSPI_RX_EN);
838 	if (t->rx_buf) {
839 		command1 |= QSPI_RX_EN;
840 		tqspi->cur_direction |= DATA_DIR_RX;
841 		bus_width = t->rx_nbits;
842 	}
843 
844 	if (t->tx_buf) {
845 		command1 |= QSPI_TX_EN;
846 		tqspi->cur_direction |= DATA_DIR_TX;
847 		bus_width = t->tx_nbits;
848 	}
849 
850 	command1 &= ~QSPI_INTERFACE_WIDTH_MASK;
851 
852 	if (bus_width == SPI_NBITS_QUAD)
853 		command1 |= QSPI_INTERFACE_WIDTH_QUAD;
854 	else if (bus_width == SPI_NBITS_DUAL)
855 		command1 |= QSPI_INTERFACE_WIDTH_DUAL;
856 	else
857 		command1 |= QSPI_INTERFACE_WIDTH_SINGLE;
858 
859 	tqspi->command1_reg = command1;
860 
861 	tegra_qspi_writel(tqspi, QSPI_NUM_DUMMY_CYCLE(tqspi->dummy_cycles), QSPI_MISC_REG);
862 
863 	ret = tegra_qspi_flush_fifos(tqspi, false);
864 	if (ret < 0)
865 		return ret;
866 
867 	if (tqspi->use_dma && total_fifo_words > QSPI_FIFO_DEPTH)
868 		ret = tegra_qspi_start_dma_based_transfer(tqspi, t);
869 	else
870 		ret = tegra_qspi_start_cpu_based_transfer(tqspi, t);
871 
872 	return ret;
873 }
874 
875 static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi)
876 {
877 	struct tegra_qspi_client_data *cdata;
878 	struct device_node *slave_np = spi->dev.of_node;
879 
880 	cdata = kzalloc(sizeof(*cdata), GFP_KERNEL);
881 	if (!cdata)
882 		return NULL;
883 
884 	of_property_read_u32(slave_np, "nvidia,tx-clk-tap-delay",
885 			     &cdata->tx_clk_tap_delay);
886 	of_property_read_u32(slave_np, "nvidia,rx-clk-tap-delay",
887 			     &cdata->rx_clk_tap_delay);
888 	return cdata;
889 }
890 
891 static void tegra_qspi_cleanup(struct spi_device *spi)
892 {
893 	struct tegra_qspi_client_data *cdata = spi->controller_data;
894 
895 	spi->controller_data = NULL;
896 	kfree(cdata);
897 }
898 
899 static int tegra_qspi_setup(struct spi_device *spi)
900 {
901 	struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master);
902 	struct tegra_qspi_client_data *cdata = spi->controller_data;
903 	unsigned long flags;
904 	u32 val;
905 	int ret;
906 
907 	ret = pm_runtime_resume_and_get(tqspi->dev);
908 	if (ret < 0) {
909 		dev_err(tqspi->dev, "failed to get runtime PM: %d\n", ret);
910 		return ret;
911 	}
912 
913 	if (!cdata) {
914 		cdata = tegra_qspi_parse_cdata_dt(spi);
915 		spi->controller_data = cdata;
916 	}
917 
918 	spin_lock_irqsave(&tqspi->lock, flags);
919 
920 	/* keep default cs state to inactive */
921 	val = tqspi->def_command1_reg;
922 	if (spi->mode & SPI_CS_HIGH)
923 		val &= ~QSPI_CS_SW_VAL;
924 	else
925 		val |= QSPI_CS_SW_VAL;
926 
927 	tqspi->def_command1_reg = val;
928 	tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
929 
930 	spin_unlock_irqrestore(&tqspi->lock, flags);
931 
932 	pm_runtime_put(tqspi->dev);
933 
934 	return 0;
935 }
936 
937 static void tegra_qspi_dump_regs(struct tegra_qspi *tqspi)
938 {
939 	dev_dbg(tqspi->dev, "============ QSPI REGISTER DUMP ============\n");
940 	dev_dbg(tqspi->dev, "Command1:    0x%08x | Command2:    0x%08x\n",
941 		tegra_qspi_readl(tqspi, QSPI_COMMAND1),
942 		tegra_qspi_readl(tqspi, QSPI_COMMAND2));
943 	dev_dbg(tqspi->dev, "DMA_CTL:     0x%08x | DMA_BLK:     0x%08x\n",
944 		tegra_qspi_readl(tqspi, QSPI_DMA_CTL),
945 		tegra_qspi_readl(tqspi, QSPI_DMA_BLK));
946 	dev_dbg(tqspi->dev, "INTR_MASK:  0x%08x | MISC: 0x%08x\n",
947 		tegra_qspi_readl(tqspi, QSPI_INTR_MASK),
948 		tegra_qspi_readl(tqspi, QSPI_MISC_REG));
949 	dev_dbg(tqspi->dev, "TRANS_STAT:  0x%08x | FIFO_STATUS: 0x%08x\n",
950 		tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS),
951 		tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS));
952 }
953 
954 static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
955 {
956 	dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
957 	tegra_qspi_dump_regs(tqspi);
958 	tegra_qspi_flush_fifos(tqspi, true);
959 	reset_control_assert(tqspi->rst);
960 	udelay(2);
961 	reset_control_deassert(tqspi->rst);
962 }
963 
964 static void tegra_qspi_transfer_end(struct spi_device *spi)
965 {
966 	struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master);
967 	int cs_val = (spi->mode & SPI_CS_HIGH) ? 0 : 1;
968 
969 	if (cs_val)
970 		tqspi->command1_reg |= QSPI_CS_SW_VAL;
971 	else
972 		tqspi->command1_reg &= ~QSPI_CS_SW_VAL;
973 	tegra_qspi_writel(tqspi, tqspi->command1_reg, QSPI_COMMAND1);
974 	tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
975 }
976 
977 static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi_message *msg)
978 {
979 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
980 	struct spi_device *spi = msg->spi;
981 	struct spi_transfer *transfer;
982 	bool is_first_msg = true;
983 	int ret;
984 
985 	msg->status = 0;
986 	msg->actual_length = 0;
987 	tqspi->tx_status = 0;
988 	tqspi->rx_status = 0;
989 
990 	list_for_each_entry(transfer, &msg->transfers, transfer_list) {
991 		struct spi_transfer *xfer = transfer;
992 		u8 dummy_bytes = 0;
993 		u32 cmd1;
994 
995 		tqspi->dummy_cycles = 0;
996 		/*
997 		 * Tegra QSPI hardware supports dummy bytes transfer after actual transfer
998 		 * bytes based on programmed dummy clock cycles in the QSPI_MISC register.
999 		 * So, check if the next transfer is dummy data transfer and program dummy
1000 		 * clock cycles along with the current transfer and skip next transfer.
1001 		 */
1002 		if (!list_is_last(&xfer->transfer_list, &msg->transfers)) {
1003 			struct spi_transfer *next_xfer;
1004 
1005 			next_xfer = list_next_entry(xfer, transfer_list);
1006 			if (next_xfer->dummy_data) {
1007 				u32 dummy_cycles = next_xfer->len * 8 / next_xfer->tx_nbits;
1008 
1009 				if (dummy_cycles <= QSPI_DUMMY_CYCLES_MAX) {
1010 					tqspi->dummy_cycles = dummy_cycles;
1011 					dummy_bytes = next_xfer->len;
1012 					transfer = next_xfer;
1013 				}
1014 			}
1015 		}
1016 
1017 		reinit_completion(&tqspi->xfer_completion);
1018 
1019 		cmd1 = tegra_qspi_setup_transfer_one(spi, xfer, is_first_msg);
1020 
1021 		ret = tegra_qspi_start_transfer_one(spi, xfer, cmd1);
1022 		if (ret < 0) {
1023 			dev_err(tqspi->dev, "failed to start transfer: %d\n", ret);
1024 			goto complete_xfer;
1025 		}
1026 
1027 		is_first_msg = false;
1028 		ret = wait_for_completion_timeout(&tqspi->xfer_completion,
1029 						  QSPI_DMA_TIMEOUT);
1030 		if (WARN_ON(ret == 0)) {
1031 			dev_err(tqspi->dev, "transfer timeout: %d\n", ret);
1032 			if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX))
1033 				dmaengine_terminate_all(tqspi->tx_dma_chan);
1034 			if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_RX))
1035 				dmaengine_terminate_all(tqspi->rx_dma_chan);
1036 			tegra_qspi_handle_error(tqspi);
1037 			ret = -EIO;
1038 			goto complete_xfer;
1039 		}
1040 
1041 		if (tqspi->tx_status ||  tqspi->rx_status) {
1042 			tegra_qspi_handle_error(tqspi);
1043 			ret = -EIO;
1044 			goto complete_xfer;
1045 		}
1046 
1047 		msg->actual_length += xfer->len + dummy_bytes;
1048 
1049 complete_xfer:
1050 		if (ret < 0) {
1051 			tegra_qspi_transfer_end(spi);
1052 			spi_transfer_delay_exec(xfer);
1053 			goto exit;
1054 		}
1055 
1056 		if (list_is_last(&xfer->transfer_list, &msg->transfers)) {
1057 			/* de-activate CS after last transfer only when cs_change is not set */
1058 			if (!xfer->cs_change) {
1059 				tegra_qspi_transfer_end(spi);
1060 				spi_transfer_delay_exec(xfer);
1061 			}
1062 		} else if (xfer->cs_change) {
1063 			 /* de-activated CS between the transfers only when cs_change is set */
1064 			tegra_qspi_transfer_end(spi);
1065 			spi_transfer_delay_exec(xfer);
1066 		}
1067 	}
1068 
1069 	ret = 0;
1070 exit:
1071 	msg->status = ret;
1072 	spi_finalize_current_message(master);
1073 	return ret;
1074 }
1075 
1076 static irqreturn_t handle_cpu_based_xfer(struct tegra_qspi *tqspi)
1077 {
1078 	struct spi_transfer *t = tqspi->curr_xfer;
1079 	unsigned long flags;
1080 
1081 	spin_lock_irqsave(&tqspi->lock, flags);
1082 
1083 	if (tqspi->tx_status ||  tqspi->rx_status) {
1084 		tegra_qspi_handle_error(tqspi);
1085 		complete(&tqspi->xfer_completion);
1086 		goto exit;
1087 	}
1088 
1089 	if (tqspi->cur_direction & DATA_DIR_RX)
1090 		tegra_qspi_read_rx_fifo_to_client_rxbuf(tqspi, t);
1091 
1092 	if (tqspi->cur_direction & DATA_DIR_TX)
1093 		tqspi->cur_pos = tqspi->cur_tx_pos;
1094 	else
1095 		tqspi->cur_pos = tqspi->cur_rx_pos;
1096 
1097 	if (tqspi->cur_pos == t->len) {
1098 		complete(&tqspi->xfer_completion);
1099 		goto exit;
1100 	}
1101 
1102 	tegra_qspi_calculate_curr_xfer_param(tqspi, t);
1103 	tegra_qspi_start_cpu_based_transfer(tqspi, t);
1104 exit:
1105 	spin_unlock_irqrestore(&tqspi->lock, flags);
1106 	return IRQ_HANDLED;
1107 }
1108 
1109 static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi)
1110 {
1111 	struct spi_transfer *t = tqspi->curr_xfer;
1112 	unsigned int total_fifo_words;
1113 	unsigned long flags;
1114 	long wait_status;
1115 	int err = 0;
1116 
1117 	if (tqspi->cur_direction & DATA_DIR_TX) {
1118 		if (tqspi->tx_status) {
1119 			dmaengine_terminate_all(tqspi->tx_dma_chan);
1120 			err += 1;
1121 		} else {
1122 			wait_status = wait_for_completion_interruptible_timeout(
1123 				&tqspi->tx_dma_complete, QSPI_DMA_TIMEOUT);
1124 			if (wait_status <= 0) {
1125 				dmaengine_terminate_all(tqspi->tx_dma_chan);
1126 				dev_err(tqspi->dev, "failed TX DMA transfer\n");
1127 				err += 1;
1128 			}
1129 		}
1130 	}
1131 
1132 	if (tqspi->cur_direction & DATA_DIR_RX) {
1133 		if (tqspi->rx_status) {
1134 			dmaengine_terminate_all(tqspi->rx_dma_chan);
1135 			err += 2;
1136 		} else {
1137 			wait_status = wait_for_completion_interruptible_timeout(
1138 				&tqspi->rx_dma_complete, QSPI_DMA_TIMEOUT);
1139 			if (wait_status <= 0) {
1140 				dmaengine_terminate_all(tqspi->rx_dma_chan);
1141 				dev_err(tqspi->dev, "failed RX DMA transfer\n");
1142 				err += 2;
1143 			}
1144 		}
1145 	}
1146 
1147 	spin_lock_irqsave(&tqspi->lock, flags);
1148 
1149 	if (err) {
1150 		tegra_qspi_dma_unmap_xfer(tqspi, t);
1151 		tegra_qspi_handle_error(tqspi);
1152 		complete(&tqspi->xfer_completion);
1153 		goto exit;
1154 	}
1155 
1156 	if (tqspi->cur_direction & DATA_DIR_RX)
1157 		tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(tqspi, t);
1158 
1159 	if (tqspi->cur_direction & DATA_DIR_TX)
1160 		tqspi->cur_pos = tqspi->cur_tx_pos;
1161 	else
1162 		tqspi->cur_pos = tqspi->cur_rx_pos;
1163 
1164 	if (tqspi->cur_pos == t->len) {
1165 		tegra_qspi_dma_unmap_xfer(tqspi, t);
1166 		complete(&tqspi->xfer_completion);
1167 		goto exit;
1168 	}
1169 
1170 	tegra_qspi_dma_unmap_xfer(tqspi, t);
1171 
1172 	/* continue transfer in current message */
1173 	total_fifo_words = tegra_qspi_calculate_curr_xfer_param(tqspi, t);
1174 	if (total_fifo_words > QSPI_FIFO_DEPTH)
1175 		err = tegra_qspi_start_dma_based_transfer(tqspi, t);
1176 	else
1177 		err = tegra_qspi_start_cpu_based_transfer(tqspi, t);
1178 
1179 exit:
1180 	spin_unlock_irqrestore(&tqspi->lock, flags);
1181 	return IRQ_HANDLED;
1182 }
1183 
1184 static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
1185 {
1186 	struct tegra_qspi *tqspi = context_data;
1187 
1188 	tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
1189 
1190 	if (tqspi->cur_direction & DATA_DIR_TX)
1191 		tqspi->tx_status = tqspi->status_reg & (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF);
1192 
1193 	if (tqspi->cur_direction & DATA_DIR_RX)
1194 		tqspi->rx_status = tqspi->status_reg & (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF);
1195 
1196 	tegra_qspi_mask_clear_irq(tqspi);
1197 
1198 	if (!tqspi->is_curr_dma_xfer)
1199 		return handle_cpu_based_xfer(tqspi);
1200 
1201 	return handle_dma_based_xfer(tqspi);
1202 }
1203 
1204 static const struct of_device_id tegra_qspi_of_match[] = {
1205 	{ .compatible = "nvidia,tegra210-qspi", },
1206 	{ .compatible = "nvidia,tegra186-qspi", },
1207 	{ .compatible = "nvidia,tegra194-qspi", },
1208 	{}
1209 };
1210 
1211 MODULE_DEVICE_TABLE(of, tegra_qspi_of_match);
1212 
1213 static int tegra_qspi_probe(struct platform_device *pdev)
1214 {
1215 	struct spi_master	*master;
1216 	struct tegra_qspi	*tqspi;
1217 	struct resource		*r;
1218 	int ret, qspi_irq;
1219 	int bus_num;
1220 
1221 	master = devm_spi_alloc_master(&pdev->dev, sizeof(*tqspi));
1222 	if (!master)
1223 		return -ENOMEM;
1224 
1225 	platform_set_drvdata(pdev, master);
1226 	tqspi = spi_master_get_devdata(master);
1227 
1228 	master->mode_bits = SPI_MODE_0 | SPI_MODE_3 | SPI_CS_HIGH |
1229 			    SPI_TX_DUAL | SPI_RX_DUAL | SPI_TX_QUAD | SPI_RX_QUAD;
1230 	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) | SPI_BPW_MASK(8);
1231 	master->setup = tegra_qspi_setup;
1232 	master->cleanup = tegra_qspi_cleanup;
1233 	master->transfer_one_message = tegra_qspi_transfer_one_message;
1234 	master->num_chipselect = 1;
1235 	master->auto_runtime_pm = true;
1236 
1237 	bus_num = of_alias_get_id(pdev->dev.of_node, "spi");
1238 	if (bus_num >= 0)
1239 		master->bus_num = bus_num;
1240 
1241 	tqspi->master = master;
1242 	tqspi->dev = &pdev->dev;
1243 	spin_lock_init(&tqspi->lock);
1244 
1245 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1246 	tqspi->base = devm_ioremap_resource(&pdev->dev, r);
1247 	if (IS_ERR(tqspi->base))
1248 		return PTR_ERR(tqspi->base);
1249 
1250 	tqspi->phys = r->start;
1251 	qspi_irq = platform_get_irq(pdev, 0);
1252 	tqspi->irq = qspi_irq;
1253 
1254 	tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
1255 	if (IS_ERR(tqspi->clk)) {
1256 		ret = PTR_ERR(tqspi->clk);
1257 		dev_err(&pdev->dev, "failed to get clock: %d\n", ret);
1258 		return ret;
1259 	}
1260 
1261 	tqspi->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
1262 	if (IS_ERR(tqspi->rst)) {
1263 		ret = PTR_ERR(tqspi->rst);
1264 		dev_err(&pdev->dev, "failed to get reset control: %d\n", ret);
1265 		return ret;
1266 	}
1267 
1268 	tqspi->max_buf_size = QSPI_FIFO_DEPTH << 2;
1269 	tqspi->dma_buf_size = DEFAULT_QSPI_DMA_BUF_LEN;
1270 
1271 	ret = tegra_qspi_init_dma(tqspi);
1272 	if (ret < 0)
1273 		return ret;
1274 
1275 	if (tqspi->use_dma)
1276 		tqspi->max_buf_size = tqspi->dma_buf_size;
1277 
1278 	init_completion(&tqspi->tx_dma_complete);
1279 	init_completion(&tqspi->rx_dma_complete);
1280 	init_completion(&tqspi->xfer_completion);
1281 
1282 	pm_runtime_enable(&pdev->dev);
1283 	ret = pm_runtime_resume_and_get(&pdev->dev);
1284 	if (ret < 0) {
1285 		dev_err(&pdev->dev, "failed to get runtime PM: %d\n", ret);
1286 		goto exit_pm_disable;
1287 	}
1288 
1289 	reset_control_assert(tqspi->rst);
1290 	udelay(2);
1291 	reset_control_deassert(tqspi->rst);
1292 
1293 	tqspi->def_command1_reg = QSPI_M_S | QSPI_CS_SW_HW |  QSPI_CS_SW_VAL;
1294 	tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
1295 	tqspi->spi_cs_timing1 = tegra_qspi_readl(tqspi, QSPI_CS_TIMING1);
1296 	tqspi->spi_cs_timing2 = tegra_qspi_readl(tqspi, QSPI_CS_TIMING2);
1297 	tqspi->def_command2_reg = tegra_qspi_readl(tqspi, QSPI_COMMAND2);
1298 
1299 	pm_runtime_put(&pdev->dev);
1300 
1301 	ret = request_threaded_irq(tqspi->irq, NULL,
1302 				   tegra_qspi_isr_thread, IRQF_ONESHOT,
1303 				   dev_name(&pdev->dev), tqspi);
1304 	if (ret < 0) {
1305 		dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", tqspi->irq, ret);
1306 		goto exit_pm_disable;
1307 	}
1308 
1309 	master->dev.of_node = pdev->dev.of_node;
1310 	ret = spi_register_master(master);
1311 	if (ret < 0) {
1312 		dev_err(&pdev->dev, "failed to register master: %d\n", ret);
1313 		goto exit_free_irq;
1314 	}
1315 
1316 	return 0;
1317 
1318 exit_free_irq:
1319 	free_irq(qspi_irq, tqspi);
1320 exit_pm_disable:
1321 	pm_runtime_disable(&pdev->dev);
1322 	tegra_qspi_deinit_dma(tqspi);
1323 	return ret;
1324 }
1325 
1326 static int tegra_qspi_remove(struct platform_device *pdev)
1327 {
1328 	struct spi_master *master = platform_get_drvdata(pdev);
1329 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
1330 
1331 	spi_unregister_master(master);
1332 	free_irq(tqspi->irq, tqspi);
1333 	pm_runtime_disable(&pdev->dev);
1334 	tegra_qspi_deinit_dma(tqspi);
1335 
1336 	return 0;
1337 }
1338 
1339 static int __maybe_unused tegra_qspi_suspend(struct device *dev)
1340 {
1341 	struct spi_master *master = dev_get_drvdata(dev);
1342 
1343 	return spi_master_suspend(master);
1344 }
1345 
1346 static int __maybe_unused tegra_qspi_resume(struct device *dev)
1347 {
1348 	struct spi_master *master = dev_get_drvdata(dev);
1349 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
1350 	int ret;
1351 
1352 	ret = pm_runtime_resume_and_get(dev);
1353 	if (ret < 0) {
1354 		dev_err(dev, "failed to get runtime PM: %d\n", ret);
1355 		return ret;
1356 	}
1357 
1358 	tegra_qspi_writel(tqspi, tqspi->command1_reg, QSPI_COMMAND1);
1359 	tegra_qspi_writel(tqspi, tqspi->def_command2_reg, QSPI_COMMAND2);
1360 	pm_runtime_put(dev);
1361 
1362 	return spi_master_resume(master);
1363 }
1364 
1365 static int __maybe_unused tegra_qspi_runtime_suspend(struct device *dev)
1366 {
1367 	struct spi_master *master = dev_get_drvdata(dev);
1368 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
1369 
1370 	/* flush all write which are in PPSB queue by reading back */
1371 	tegra_qspi_readl(tqspi, QSPI_COMMAND1);
1372 
1373 	clk_disable_unprepare(tqspi->clk);
1374 
1375 	return 0;
1376 }
1377 
1378 static int __maybe_unused tegra_qspi_runtime_resume(struct device *dev)
1379 {
1380 	struct spi_master *master = dev_get_drvdata(dev);
1381 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
1382 	int ret;
1383 
1384 	ret = clk_prepare_enable(tqspi->clk);
1385 	if (ret < 0)
1386 		dev_err(tqspi->dev, "failed to enable clock: %d\n", ret);
1387 
1388 	return ret;
1389 }
1390 
1391 static const struct dev_pm_ops tegra_qspi_pm_ops = {
1392 	SET_RUNTIME_PM_OPS(tegra_qspi_runtime_suspend, tegra_qspi_runtime_resume, NULL)
1393 	SET_SYSTEM_SLEEP_PM_OPS(tegra_qspi_suspend, tegra_qspi_resume)
1394 };
1395 
1396 static struct platform_driver tegra_qspi_driver = {
1397 	.driver = {
1398 		.name		= "tegra-qspi",
1399 		.pm		= &tegra_qspi_pm_ops,
1400 		.of_match_table	= tegra_qspi_of_match,
1401 	},
1402 	.probe =	tegra_qspi_probe,
1403 	.remove =	tegra_qspi_remove,
1404 };
1405 module_platform_driver(tegra_qspi_driver);
1406 
1407 MODULE_ALIAS("platform:qspi-tegra");
1408 MODULE_DESCRIPTION("NVIDIA Tegra QSPI Controller Driver");
1409 MODULE_AUTHOR("Sowjanya Komatineni <skomatineni@nvidia.com>");
1410 MODULE_LICENSE("GPL v2");
1411