1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/iopoll.h>
24 #include <linux/module.h>
25 #include <linux/msi.h>
26 #include <linux/of.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/pci-ats.h>
32 #include <linux/platform_device.h>
33 
34 #include <linux/amba/bus.h>
35 
36 /* MMIO registers */
37 #define ARM_SMMU_IDR0			0x0
38 #define IDR0_ST_LVL			GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL		1
40 #define IDR0_STALL_MODEL		GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL		0
42 #define IDR0_STALL_MODEL_FORCE		2
43 #define IDR0_TTENDIAN			GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED		0
45 #define IDR0_TTENDIAN_LE		2
46 #define IDR0_TTENDIAN_BE		3
47 #define IDR0_CD2L			(1 << 19)
48 #define IDR0_VMID16			(1 << 18)
49 #define IDR0_PRI			(1 << 16)
50 #define IDR0_SEV			(1 << 14)
51 #define IDR0_MSI			(1 << 13)
52 #define IDR0_ASID16			(1 << 12)
53 #define IDR0_ATS			(1 << 10)
54 #define IDR0_HYP			(1 << 9)
55 #define IDR0_COHACC			(1 << 4)
56 #define IDR0_TTF			GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64		2
58 #define IDR0_TTF_AARCH32_64		3
59 #define IDR0_S1P			(1 << 1)
60 #define IDR0_S2P			(1 << 0)
61 
62 #define ARM_SMMU_IDR1			0x4
63 #define IDR1_TABLES_PRESET		(1 << 30)
64 #define IDR1_QUEUES_PRESET		(1 << 29)
65 #define IDR1_REL			(1 << 28)
66 #define IDR1_CMDQS			GENMASK(25, 21)
67 #define IDR1_EVTQS			GENMASK(20, 16)
68 #define IDR1_PRIQS			GENMASK(15, 11)
69 #define IDR1_SSIDSIZE			GENMASK(10, 6)
70 #define IDR1_SIDSIZE			GENMASK(5, 0)
71 
72 #define ARM_SMMU_IDR3			0xc
73 #define IDR3_RIL			(1 << 10)
74 
75 #define ARM_SMMU_IDR5			0x14
76 #define IDR5_STALL_MAX			GENMASK(31, 16)
77 #define IDR5_GRAN64K			(1 << 6)
78 #define IDR5_GRAN16K			(1 << 5)
79 #define IDR5_GRAN4K			(1 << 4)
80 #define IDR5_OAS			GENMASK(2, 0)
81 #define IDR5_OAS_32_BIT			0
82 #define IDR5_OAS_36_BIT			1
83 #define IDR5_OAS_40_BIT			2
84 #define IDR5_OAS_42_BIT			3
85 #define IDR5_OAS_44_BIT			4
86 #define IDR5_OAS_48_BIT			5
87 #define IDR5_OAS_52_BIT			6
88 #define IDR5_VAX			GENMASK(11, 10)
89 #define IDR5_VAX_52_BIT			1
90 
91 #define ARM_SMMU_CR0			0x20
92 #define CR0_ATSCHK			(1 << 4)
93 #define CR0_CMDQEN			(1 << 3)
94 #define CR0_EVTQEN			(1 << 2)
95 #define CR0_PRIQEN			(1 << 1)
96 #define CR0_SMMUEN			(1 << 0)
97 
98 #define ARM_SMMU_CR0ACK			0x24
99 
100 #define ARM_SMMU_CR1			0x28
101 #define CR1_TABLE_SH			GENMASK(11, 10)
102 #define CR1_TABLE_OC			GENMASK(9, 8)
103 #define CR1_TABLE_IC			GENMASK(7, 6)
104 #define CR1_QUEUE_SH			GENMASK(5, 4)
105 #define CR1_QUEUE_OC			GENMASK(3, 2)
106 #define CR1_QUEUE_IC			GENMASK(1, 0)
107 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
108 #define CR1_CACHE_NC			0
109 #define CR1_CACHE_WB			1
110 #define CR1_CACHE_WT			2
111 
112 #define ARM_SMMU_CR2			0x2c
113 #define CR2_PTM				(1 << 2)
114 #define CR2_RECINVSID			(1 << 1)
115 #define CR2_E2H				(1 << 0)
116 
117 #define ARM_SMMU_GBPA			0x44
118 #define GBPA_UPDATE			(1 << 31)
119 #define GBPA_ABORT			(1 << 20)
120 
121 #define ARM_SMMU_IRQ_CTRL		0x50
122 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
123 #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
124 #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
125 
126 #define ARM_SMMU_IRQ_CTRLACK		0x54
127 
128 #define ARM_SMMU_GERROR			0x60
129 #define GERROR_SFM_ERR			(1 << 8)
130 #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
131 #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
132 #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
133 #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
134 #define GERROR_PRIQ_ABT_ERR		(1 << 3)
135 #define GERROR_EVTQ_ABT_ERR		(1 << 2)
136 #define GERROR_CMDQ_ERR			(1 << 0)
137 #define GERROR_ERR_MASK			0xfd
138 
139 #define ARM_SMMU_GERRORN		0x64
140 
141 #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
142 #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
143 #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
144 
145 #define ARM_SMMU_STRTAB_BASE		0x80
146 #define STRTAB_BASE_RA			(1UL << 62)
147 #define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
148 
149 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
150 #define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
151 #define STRTAB_BASE_CFG_FMT_LINEAR	0
152 #define STRTAB_BASE_CFG_FMT_2LVL	1
153 #define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
154 #define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
155 
156 #define ARM_SMMU_CMDQ_BASE		0x90
157 #define ARM_SMMU_CMDQ_PROD		0x98
158 #define ARM_SMMU_CMDQ_CONS		0x9c
159 
160 #define ARM_SMMU_EVTQ_BASE		0xa0
161 #define ARM_SMMU_EVTQ_PROD		0x100a8
162 #define ARM_SMMU_EVTQ_CONS		0x100ac
163 #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
164 #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
165 #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
166 
167 #define ARM_SMMU_PRIQ_BASE		0xc0
168 #define ARM_SMMU_PRIQ_PROD		0x100c8
169 #define ARM_SMMU_PRIQ_CONS		0x100cc
170 #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
171 #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
172 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
173 
174 #define ARM_SMMU_REG_SZ			0xe00
175 
176 /* Common MSI config fields */
177 #define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
178 #define MSI_CFG2_SH			GENMASK(5, 4)
179 #define MSI_CFG2_MEMATTR		GENMASK(3, 0)
180 
181 /* Common memory attribute values */
182 #define ARM_SMMU_SH_NSH			0
183 #define ARM_SMMU_SH_OSH			2
184 #define ARM_SMMU_SH_ISH			3
185 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
186 #define ARM_SMMU_MEMATTR_OIWB		0xf
187 
188 #define Q_IDX(llq, p)			((p) & ((1 << (llq)->max_n_shift) - 1))
189 #define Q_WRP(llq, p)			((p) & (1 << (llq)->max_n_shift))
190 #define Q_OVERFLOW_FLAG			(1U << 31)
191 #define Q_OVF(p)			((p) & Q_OVERFLOW_FLAG)
192 #define Q_ENT(q, p)			((q)->base +			\
193 					 Q_IDX(&((q)->llq), p) *	\
194 					 (q)->ent_dwords)
195 
196 #define Q_BASE_RWA			(1UL << 62)
197 #define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
198 #define Q_BASE_LOG2SIZE			GENMASK(4, 0)
199 
200 /* Ensure DMA allocations are naturally aligned */
201 #ifdef CONFIG_CMA_ALIGNMENT
202 #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
203 #else
204 #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + MAX_ORDER - 1)
205 #endif
206 
207 /*
208  * Stream table.
209  *
210  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
211  * 2lvl: 128k L1 entries,
212  *       256 lazy entries per table (each table covers a PCI bus)
213  */
214 #define STRTAB_L1_SZ_SHIFT		20
215 #define STRTAB_SPLIT			8
216 
217 #define STRTAB_L1_DESC_DWORDS		1
218 #define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
219 #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
220 
221 #define STRTAB_STE_DWORDS		8
222 #define STRTAB_STE_0_V			(1UL << 0)
223 #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
224 #define STRTAB_STE_0_CFG_ABORT		0
225 #define STRTAB_STE_0_CFG_BYPASS		4
226 #define STRTAB_STE_0_CFG_S1_TRANS	5
227 #define STRTAB_STE_0_CFG_S2_TRANS	6
228 
229 #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
230 #define STRTAB_STE_0_S1FMT_LINEAR	0
231 #define STRTAB_STE_0_S1FMT_64K_L2	2
232 #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
233 #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
234 
235 #define STRTAB_STE_1_S1DSS		GENMASK_ULL(1, 0)
236 #define STRTAB_STE_1_S1DSS_TERMINATE	0x0
237 #define STRTAB_STE_1_S1DSS_BYPASS	0x1
238 #define STRTAB_STE_1_S1DSS_SSID0	0x2
239 
240 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
241 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
242 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
243 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
244 #define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
245 #define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
246 #define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
247 
248 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
249 
250 #define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
251 #define STRTAB_STE_1_EATS_ABT		0UL
252 #define STRTAB_STE_1_EATS_TRANS		1UL
253 #define STRTAB_STE_1_EATS_S1CHK		2UL
254 
255 #define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
256 #define STRTAB_STE_1_STRW_NSEL1		0UL
257 #define STRTAB_STE_1_STRW_EL2		2UL
258 
259 #define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
260 #define STRTAB_STE_1_SHCFG_INCOMING	1UL
261 
262 #define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
263 #define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
264 #define STRTAB_STE_2_VTCR_S2T0SZ	GENMASK_ULL(5, 0)
265 #define STRTAB_STE_2_VTCR_S2SL0		GENMASK_ULL(7, 6)
266 #define STRTAB_STE_2_VTCR_S2IR0		GENMASK_ULL(9, 8)
267 #define STRTAB_STE_2_VTCR_S2OR0		GENMASK_ULL(11, 10)
268 #define STRTAB_STE_2_VTCR_S2SH0		GENMASK_ULL(13, 12)
269 #define STRTAB_STE_2_VTCR_S2TG		GENMASK_ULL(15, 14)
270 #define STRTAB_STE_2_VTCR_S2PS		GENMASK_ULL(18, 16)
271 #define STRTAB_STE_2_S2AA64		(1UL << 51)
272 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
273 #define STRTAB_STE_2_S2PTW		(1UL << 54)
274 #define STRTAB_STE_2_S2R		(1UL << 58)
275 
276 #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
277 
278 /*
279  * Context descriptors.
280  *
281  * Linear: when less than 1024 SSIDs are supported
282  * 2lvl: at most 1024 L1 entries,
283  *       1024 lazy entries per table.
284  */
285 #define CTXDESC_SPLIT			10
286 #define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
287 
288 #define CTXDESC_L1_DESC_DWORDS		1
289 #define CTXDESC_L1_DESC_V		(1UL << 0)
290 #define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
291 
292 #define CTXDESC_CD_DWORDS		8
293 #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
294 #define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
295 #define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
296 #define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
297 #define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
298 #define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
299 #define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
300 
301 #define CTXDESC_CD_0_ENDI		(1UL << 15)
302 #define CTXDESC_CD_0_V			(1UL << 31)
303 
304 #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
305 #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
306 
307 #define CTXDESC_CD_0_AA64		(1UL << 41)
308 #define CTXDESC_CD_0_S			(1UL << 44)
309 #define CTXDESC_CD_0_R			(1UL << 45)
310 #define CTXDESC_CD_0_A			(1UL << 46)
311 #define CTXDESC_CD_0_ASET		(1UL << 47)
312 #define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
313 
314 #define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
315 
316 /*
317  * When the SMMU only supports linear context descriptor tables, pick a
318  * reasonable size limit (64kB).
319  */
320 #define CTXDESC_LINEAR_CDMAX		ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
321 
322 /* Command queue */
323 #define CMDQ_ENT_SZ_SHIFT		4
324 #define CMDQ_ENT_DWORDS			((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
325 #define CMDQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
326 
327 #define CMDQ_CONS_ERR			GENMASK(30, 24)
328 #define CMDQ_ERR_CERROR_NONE_IDX	0
329 #define CMDQ_ERR_CERROR_ILL_IDX		1
330 #define CMDQ_ERR_CERROR_ABT_IDX		2
331 #define CMDQ_ERR_CERROR_ATC_INV_IDX	3
332 
333 #define CMDQ_PROD_OWNED_FLAG		Q_OVERFLOW_FLAG
334 
335 /*
336  * This is used to size the command queue and therefore must be at least
337  * BITS_PER_LONG so that the valid_map works correctly (it relies on the
338  * total number of queue entries being a multiple of BITS_PER_LONG).
339  */
340 #define CMDQ_BATCH_ENTRIES		BITS_PER_LONG
341 
342 #define CMDQ_0_OP			GENMASK_ULL(7, 0)
343 #define CMDQ_0_SSV			(1UL << 11)
344 
345 #define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
346 #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
347 #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
348 
349 #define CMDQ_CFGI_0_SSID		GENMASK_ULL(31, 12)
350 #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
351 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
352 #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
353 
354 #define CMDQ_TLBI_0_NUM			GENMASK_ULL(16, 12)
355 #define CMDQ_TLBI_RANGE_NUM_MAX		31
356 #define CMDQ_TLBI_0_SCALE		GENMASK_ULL(24, 20)
357 #define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
358 #define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
359 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
360 #define CMDQ_TLBI_1_TTL			GENMASK_ULL(9, 8)
361 #define CMDQ_TLBI_1_TG			GENMASK_ULL(11, 10)
362 #define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
363 #define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
364 
365 #define CMDQ_ATC_0_SSID			GENMASK_ULL(31, 12)
366 #define CMDQ_ATC_0_SID			GENMASK_ULL(63, 32)
367 #define CMDQ_ATC_0_GLOBAL		(1UL << 9)
368 #define CMDQ_ATC_1_SIZE			GENMASK_ULL(5, 0)
369 #define CMDQ_ATC_1_ADDR_MASK		GENMASK_ULL(63, 12)
370 
371 #define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
372 #define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
373 #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
374 #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
375 
376 #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
377 #define CMDQ_SYNC_0_CS_NONE		0
378 #define CMDQ_SYNC_0_CS_IRQ		1
379 #define CMDQ_SYNC_0_CS_SEV		2
380 #define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
381 #define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
382 #define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
383 #define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
384 
385 /* Event queue */
386 #define EVTQ_ENT_SZ_SHIFT		5
387 #define EVTQ_ENT_DWORDS			((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
388 #define EVTQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
389 
390 #define EVTQ_0_ID			GENMASK_ULL(7, 0)
391 
392 /* PRI queue */
393 #define PRIQ_ENT_SZ_SHIFT		4
394 #define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
395 #define PRIQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
396 
397 #define PRIQ_0_SID			GENMASK_ULL(31, 0)
398 #define PRIQ_0_SSID			GENMASK_ULL(51, 32)
399 #define PRIQ_0_PERM_PRIV		(1UL << 58)
400 #define PRIQ_0_PERM_EXEC		(1UL << 59)
401 #define PRIQ_0_PERM_READ		(1UL << 60)
402 #define PRIQ_0_PERM_WRITE		(1UL << 61)
403 #define PRIQ_0_PRG_LAST			(1UL << 62)
404 #define PRIQ_0_SSID_V			(1UL << 63)
405 
406 #define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
407 #define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
408 
409 /* High-level queue structures */
410 #define ARM_SMMU_POLL_TIMEOUT_US	1000000 /* 1s! */
411 #define ARM_SMMU_POLL_SPIN_COUNT	10
412 
413 #define MSI_IOVA_BASE			0x8000000
414 #define MSI_IOVA_LENGTH			0x100000
415 
416 static bool disable_bypass = 1;
417 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
418 MODULE_PARM_DESC(disable_bypass,
419 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
420 
421 enum pri_resp {
422 	PRI_RESP_DENY = 0,
423 	PRI_RESP_FAIL = 1,
424 	PRI_RESP_SUCC = 2,
425 };
426 
427 enum arm_smmu_msi_index {
428 	EVTQ_MSI_INDEX,
429 	GERROR_MSI_INDEX,
430 	PRIQ_MSI_INDEX,
431 	ARM_SMMU_MAX_MSIS,
432 };
433 
434 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
435 	[EVTQ_MSI_INDEX] = {
436 		ARM_SMMU_EVTQ_IRQ_CFG0,
437 		ARM_SMMU_EVTQ_IRQ_CFG1,
438 		ARM_SMMU_EVTQ_IRQ_CFG2,
439 	},
440 	[GERROR_MSI_INDEX] = {
441 		ARM_SMMU_GERROR_IRQ_CFG0,
442 		ARM_SMMU_GERROR_IRQ_CFG1,
443 		ARM_SMMU_GERROR_IRQ_CFG2,
444 	},
445 	[PRIQ_MSI_INDEX] = {
446 		ARM_SMMU_PRIQ_IRQ_CFG0,
447 		ARM_SMMU_PRIQ_IRQ_CFG1,
448 		ARM_SMMU_PRIQ_IRQ_CFG2,
449 	},
450 };
451 
452 struct arm_smmu_cmdq_ent {
453 	/* Common fields */
454 	u8				opcode;
455 	bool				substream_valid;
456 
457 	/* Command-specific fields */
458 	union {
459 		#define CMDQ_OP_PREFETCH_CFG	0x1
460 		struct {
461 			u32			sid;
462 			u8			size;
463 			u64			addr;
464 		} prefetch;
465 
466 		#define CMDQ_OP_CFGI_STE	0x3
467 		#define CMDQ_OP_CFGI_ALL	0x4
468 		#define CMDQ_OP_CFGI_CD		0x5
469 		#define CMDQ_OP_CFGI_CD_ALL	0x6
470 		struct {
471 			u32			sid;
472 			u32			ssid;
473 			union {
474 				bool		leaf;
475 				u8		span;
476 			};
477 		} cfgi;
478 
479 		#define CMDQ_OP_TLBI_NH_ASID	0x11
480 		#define CMDQ_OP_TLBI_NH_VA	0x12
481 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
482 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
483 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
484 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
485 		struct {
486 			u8			num;
487 			u8			scale;
488 			u16			asid;
489 			u16			vmid;
490 			bool			leaf;
491 			u8			ttl;
492 			u8			tg;
493 			u64			addr;
494 		} tlbi;
495 
496 		#define CMDQ_OP_ATC_INV		0x40
497 		#define ATC_INV_SIZE_ALL	52
498 		struct {
499 			u32			sid;
500 			u32			ssid;
501 			u64			addr;
502 			u8			size;
503 			bool			global;
504 		} atc;
505 
506 		#define CMDQ_OP_PRI_RESP	0x41
507 		struct {
508 			u32			sid;
509 			u32			ssid;
510 			u16			grpid;
511 			enum pri_resp		resp;
512 		} pri;
513 
514 		#define CMDQ_OP_CMD_SYNC	0x46
515 		struct {
516 			u64			msiaddr;
517 		} sync;
518 	};
519 };
520 
521 struct arm_smmu_ll_queue {
522 	union {
523 		u64			val;
524 		struct {
525 			u32		prod;
526 			u32		cons;
527 		};
528 		struct {
529 			atomic_t	prod;
530 			atomic_t	cons;
531 		} atomic;
532 		u8			__pad[SMP_CACHE_BYTES];
533 	} ____cacheline_aligned_in_smp;
534 	u32				max_n_shift;
535 };
536 
537 struct arm_smmu_queue {
538 	struct arm_smmu_ll_queue	llq;
539 	int				irq; /* Wired interrupt */
540 
541 	__le64				*base;
542 	dma_addr_t			base_dma;
543 	u64				q_base;
544 
545 	size_t				ent_dwords;
546 
547 	u32 __iomem			*prod_reg;
548 	u32 __iomem			*cons_reg;
549 };
550 
551 struct arm_smmu_queue_poll {
552 	ktime_t				timeout;
553 	unsigned int			delay;
554 	unsigned int			spin_cnt;
555 	bool				wfe;
556 };
557 
558 struct arm_smmu_cmdq {
559 	struct arm_smmu_queue		q;
560 	atomic_long_t			*valid_map;
561 	atomic_t			owner_prod;
562 	atomic_t			lock;
563 };
564 
565 struct arm_smmu_cmdq_batch {
566 	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
567 	int				num;
568 };
569 
570 struct arm_smmu_evtq {
571 	struct arm_smmu_queue		q;
572 	u32				max_stalls;
573 };
574 
575 struct arm_smmu_priq {
576 	struct arm_smmu_queue		q;
577 };
578 
579 /* High-level stream table and context descriptor structures */
580 struct arm_smmu_strtab_l1_desc {
581 	u8				span;
582 
583 	__le64				*l2ptr;
584 	dma_addr_t			l2ptr_dma;
585 };
586 
587 struct arm_smmu_ctx_desc {
588 	u16				asid;
589 	u64				ttbr;
590 	u64				tcr;
591 	u64				mair;
592 };
593 
594 struct arm_smmu_l1_ctx_desc {
595 	__le64				*l2ptr;
596 	dma_addr_t			l2ptr_dma;
597 };
598 
599 struct arm_smmu_ctx_desc_cfg {
600 	__le64				*cdtab;
601 	dma_addr_t			cdtab_dma;
602 	struct arm_smmu_l1_ctx_desc	*l1_desc;
603 	unsigned int			num_l1_ents;
604 };
605 
606 struct arm_smmu_s1_cfg {
607 	struct arm_smmu_ctx_desc_cfg	cdcfg;
608 	struct arm_smmu_ctx_desc	cd;
609 	u8				s1fmt;
610 	u8				s1cdmax;
611 };
612 
613 struct arm_smmu_s2_cfg {
614 	u16				vmid;
615 	u64				vttbr;
616 	u64				vtcr;
617 };
618 
619 struct arm_smmu_strtab_cfg {
620 	__le64				*strtab;
621 	dma_addr_t			strtab_dma;
622 	struct arm_smmu_strtab_l1_desc	*l1_desc;
623 	unsigned int			num_l1_ents;
624 
625 	u64				strtab_base;
626 	u32				strtab_base_cfg;
627 };
628 
629 /* An SMMUv3 instance */
630 struct arm_smmu_device {
631 	struct device			*dev;
632 	void __iomem			*base;
633 	void __iomem			*page1;
634 
635 #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
636 #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
637 #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
638 #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
639 #define ARM_SMMU_FEAT_PRI		(1 << 4)
640 #define ARM_SMMU_FEAT_ATS		(1 << 5)
641 #define ARM_SMMU_FEAT_SEV		(1 << 6)
642 #define ARM_SMMU_FEAT_MSI		(1 << 7)
643 #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
644 #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
645 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
646 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
647 #define ARM_SMMU_FEAT_HYP		(1 << 12)
648 #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
649 #define ARM_SMMU_FEAT_VAX		(1 << 14)
650 #define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
651 	u32				features;
652 
653 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
654 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
655 	u32				options;
656 
657 	struct arm_smmu_cmdq		cmdq;
658 	struct arm_smmu_evtq		evtq;
659 	struct arm_smmu_priq		priq;
660 
661 	int				gerr_irq;
662 	int				combined_irq;
663 
664 	unsigned long			ias; /* IPA */
665 	unsigned long			oas; /* PA */
666 	unsigned long			pgsize_bitmap;
667 
668 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
669 	unsigned int			asid_bits;
670 
671 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
672 	unsigned int			vmid_bits;
673 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
674 
675 	unsigned int			ssid_bits;
676 	unsigned int			sid_bits;
677 
678 	struct arm_smmu_strtab_cfg	strtab_cfg;
679 
680 	/* IOMMU core code handle */
681 	struct iommu_device		iommu;
682 };
683 
684 /* SMMU private data for each master */
685 struct arm_smmu_master {
686 	struct arm_smmu_device		*smmu;
687 	struct device			*dev;
688 	struct arm_smmu_domain		*domain;
689 	struct list_head		domain_head;
690 	u32				*sids;
691 	unsigned int			num_sids;
692 	bool				ats_enabled;
693 	unsigned int			ssid_bits;
694 };
695 
696 /* SMMU private data for an IOMMU domain */
697 enum arm_smmu_domain_stage {
698 	ARM_SMMU_DOMAIN_S1 = 0,
699 	ARM_SMMU_DOMAIN_S2,
700 	ARM_SMMU_DOMAIN_NESTED,
701 	ARM_SMMU_DOMAIN_BYPASS,
702 };
703 
704 struct arm_smmu_domain {
705 	struct arm_smmu_device		*smmu;
706 	struct mutex			init_mutex; /* Protects smmu pointer */
707 
708 	struct io_pgtable_ops		*pgtbl_ops;
709 	bool				non_strict;
710 	atomic_t			nr_ats_masters;
711 
712 	enum arm_smmu_domain_stage	stage;
713 	union {
714 		struct arm_smmu_s1_cfg	s1_cfg;
715 		struct arm_smmu_s2_cfg	s2_cfg;
716 	};
717 
718 	struct iommu_domain		domain;
719 
720 	struct list_head		devices;
721 	spinlock_t			devices_lock;
722 };
723 
724 struct arm_smmu_option_prop {
725 	u32 opt;
726 	const char *prop;
727 };
728 
729 static DEFINE_XARRAY_ALLOC1(asid_xa);
730 
731 static struct arm_smmu_option_prop arm_smmu_options[] = {
732 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
733 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
734 	{ 0, NULL},
735 };
736 
737 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
738 						 struct arm_smmu_device *smmu)
739 {
740 	if (offset > SZ_64K)
741 		return smmu->page1 + offset - SZ_64K;
742 
743 	return smmu->base + offset;
744 }
745 
746 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
747 {
748 	return container_of(dom, struct arm_smmu_domain, domain);
749 }
750 
751 static void parse_driver_options(struct arm_smmu_device *smmu)
752 {
753 	int i = 0;
754 
755 	do {
756 		if (of_property_read_bool(smmu->dev->of_node,
757 						arm_smmu_options[i].prop)) {
758 			smmu->options |= arm_smmu_options[i].opt;
759 			dev_notice(smmu->dev, "option %s\n",
760 				arm_smmu_options[i].prop);
761 		}
762 	} while (arm_smmu_options[++i].opt);
763 }
764 
765 /* Low-level queue manipulation functions */
766 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
767 {
768 	u32 space, prod, cons;
769 
770 	prod = Q_IDX(q, q->prod);
771 	cons = Q_IDX(q, q->cons);
772 
773 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
774 		space = (1 << q->max_n_shift) - (prod - cons);
775 	else
776 		space = cons - prod;
777 
778 	return space >= n;
779 }
780 
781 static bool queue_full(struct arm_smmu_ll_queue *q)
782 {
783 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
784 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
785 }
786 
787 static bool queue_empty(struct arm_smmu_ll_queue *q)
788 {
789 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
790 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
791 }
792 
793 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
794 {
795 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
796 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
797 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
798 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
799 }
800 
801 static void queue_sync_cons_out(struct arm_smmu_queue *q)
802 {
803 	/*
804 	 * Ensure that all CPU accesses (reads and writes) to the queue
805 	 * are complete before we update the cons pointer.
806 	 */
807 	mb();
808 	writel_relaxed(q->llq.cons, q->cons_reg);
809 }
810 
811 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
812 {
813 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
814 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
815 }
816 
817 static int queue_sync_prod_in(struct arm_smmu_queue *q)
818 {
819 	int ret = 0;
820 	u32 prod = readl_relaxed(q->prod_reg);
821 
822 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
823 		ret = -EOVERFLOW;
824 
825 	q->llq.prod = prod;
826 	return ret;
827 }
828 
829 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
830 {
831 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
832 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
833 }
834 
835 static void queue_poll_init(struct arm_smmu_device *smmu,
836 			    struct arm_smmu_queue_poll *qp)
837 {
838 	qp->delay = 1;
839 	qp->spin_cnt = 0;
840 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
841 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
842 }
843 
844 static int queue_poll(struct arm_smmu_queue_poll *qp)
845 {
846 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
847 		return -ETIMEDOUT;
848 
849 	if (qp->wfe) {
850 		wfe();
851 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
852 		cpu_relax();
853 	} else {
854 		udelay(qp->delay);
855 		qp->delay *= 2;
856 		qp->spin_cnt = 0;
857 	}
858 
859 	return 0;
860 }
861 
862 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
863 {
864 	int i;
865 
866 	for (i = 0; i < n_dwords; ++i)
867 		*dst++ = cpu_to_le64(*src++);
868 }
869 
870 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
871 {
872 	int i;
873 
874 	for (i = 0; i < n_dwords; ++i)
875 		*dst++ = le64_to_cpu(*src++);
876 }
877 
878 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
879 {
880 	if (queue_empty(&q->llq))
881 		return -EAGAIN;
882 
883 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
884 	queue_inc_cons(&q->llq);
885 	queue_sync_cons_out(q);
886 	return 0;
887 }
888 
889 /* High-level queue accessors */
890 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
891 {
892 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
893 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
894 
895 	switch (ent->opcode) {
896 	case CMDQ_OP_TLBI_EL2_ALL:
897 	case CMDQ_OP_TLBI_NSNH_ALL:
898 		break;
899 	case CMDQ_OP_PREFETCH_CFG:
900 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
901 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
902 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
903 		break;
904 	case CMDQ_OP_CFGI_CD:
905 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
906 		fallthrough;
907 	case CMDQ_OP_CFGI_STE:
908 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
909 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
910 		break;
911 	case CMDQ_OP_CFGI_CD_ALL:
912 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
913 		break;
914 	case CMDQ_OP_CFGI_ALL:
915 		/* Cover the entire SID range */
916 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
917 		break;
918 	case CMDQ_OP_TLBI_NH_VA:
919 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
920 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
921 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
922 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
923 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
924 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
925 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
926 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
927 		break;
928 	case CMDQ_OP_TLBI_S2_IPA:
929 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
930 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
931 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
932 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
933 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
934 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
935 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
936 		break;
937 	case CMDQ_OP_TLBI_NH_ASID:
938 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
939 		fallthrough;
940 	case CMDQ_OP_TLBI_S12_VMALL:
941 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
942 		break;
943 	case CMDQ_OP_ATC_INV:
944 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
945 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
946 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
947 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
948 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
949 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
950 		break;
951 	case CMDQ_OP_PRI_RESP:
952 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
953 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
954 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
955 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
956 		switch (ent->pri.resp) {
957 		case PRI_RESP_DENY:
958 		case PRI_RESP_FAIL:
959 		case PRI_RESP_SUCC:
960 			break;
961 		default:
962 			return -EINVAL;
963 		}
964 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
965 		break;
966 	case CMDQ_OP_CMD_SYNC:
967 		if (ent->sync.msiaddr) {
968 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
969 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
970 		} else {
971 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
972 		}
973 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
974 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
975 		break;
976 	default:
977 		return -ENOENT;
978 	}
979 
980 	return 0;
981 }
982 
983 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
984 					 u32 prod)
985 {
986 	struct arm_smmu_queue *q = &smmu->cmdq.q;
987 	struct arm_smmu_cmdq_ent ent = {
988 		.opcode = CMDQ_OP_CMD_SYNC,
989 	};
990 
991 	/*
992 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
993 	 * payload, so the write will zero the entire command on that platform.
994 	 */
995 	if (smmu->features & ARM_SMMU_FEAT_MSI &&
996 	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
997 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
998 				   q->ent_dwords * 8;
999 	}
1000 
1001 	arm_smmu_cmdq_build_cmd(cmd, &ent);
1002 }
1003 
1004 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
1005 {
1006 	static const char *cerror_str[] = {
1007 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
1008 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
1009 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
1010 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
1011 	};
1012 
1013 	int i;
1014 	u64 cmd[CMDQ_ENT_DWORDS];
1015 	struct arm_smmu_queue *q = &smmu->cmdq.q;
1016 	u32 cons = readl_relaxed(q->cons_reg);
1017 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
1018 	struct arm_smmu_cmdq_ent cmd_sync = {
1019 		.opcode = CMDQ_OP_CMD_SYNC,
1020 	};
1021 
1022 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
1023 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
1024 
1025 	switch (idx) {
1026 	case CMDQ_ERR_CERROR_ABT_IDX:
1027 		dev_err(smmu->dev, "retrying command fetch\n");
1028 	case CMDQ_ERR_CERROR_NONE_IDX:
1029 		return;
1030 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
1031 		/*
1032 		 * ATC Invalidation Completion timeout. CONS is still pointing
1033 		 * at the CMD_SYNC. Attempt to complete other pending commands
1034 		 * by repeating the CMD_SYNC, though we might well end up back
1035 		 * here since the ATC invalidation may still be pending.
1036 		 */
1037 		return;
1038 	case CMDQ_ERR_CERROR_ILL_IDX:
1039 	default:
1040 		break;
1041 	}
1042 
1043 	/*
1044 	 * We may have concurrent producers, so we need to be careful
1045 	 * not to touch any of the shadow cmdq state.
1046 	 */
1047 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
1048 	dev_err(smmu->dev, "skipping command in error state:\n");
1049 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1050 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1051 
1052 	/* Convert the erroneous command into a CMD_SYNC */
1053 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1054 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1055 		return;
1056 	}
1057 
1058 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
1059 }
1060 
1061 /*
1062  * Command queue locking.
1063  * This is a form of bastardised rwlock with the following major changes:
1064  *
1065  * - The only LOCK routines are exclusive_trylock() and shared_lock().
1066  *   Neither have barrier semantics, and instead provide only a control
1067  *   dependency.
1068  *
1069  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1070  *   fails if the caller appears to be the last lock holder (yes, this is
1071  *   racy). All successful UNLOCK routines have RELEASE semantics.
1072  */
1073 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
1074 {
1075 	int val;
1076 
1077 	/*
1078 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
1079 	 * lock counter. When held in exclusive state, the lock counter is set
1080 	 * to INT_MIN so these increments won't hurt as the value will remain
1081 	 * negative.
1082 	 */
1083 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1084 		return;
1085 
1086 	do {
1087 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1088 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1089 }
1090 
1091 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1092 {
1093 	(void)atomic_dec_return_release(&cmdq->lock);
1094 }
1095 
1096 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1097 {
1098 	if (atomic_read(&cmdq->lock) == 1)
1099 		return false;
1100 
1101 	arm_smmu_cmdq_shared_unlock(cmdq);
1102 	return true;
1103 }
1104 
1105 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
1106 ({									\
1107 	bool __ret;							\
1108 	local_irq_save(flags);						\
1109 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
1110 	if (!__ret)							\
1111 		local_irq_restore(flags);				\
1112 	__ret;								\
1113 })
1114 
1115 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
1116 ({									\
1117 	atomic_set_release(&cmdq->lock, 0);				\
1118 	local_irq_restore(flags);					\
1119 })
1120 
1121 
1122 /*
1123  * Command queue insertion.
1124  * This is made fiddly by our attempts to achieve some sort of scalability
1125  * since there is one queue shared amongst all of the CPUs in the system.  If
1126  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1127  * then you'll *love* this monstrosity.
1128  *
1129  * The basic idea is to split the queue up into ranges of commands that are
1130  * owned by a given CPU; the owner may not have written all of the commands
1131  * itself, but is responsible for advancing the hardware prod pointer when
1132  * the time comes. The algorithm is roughly:
1133  *
1134  * 	1. Allocate some space in the queue. At this point we also discover
1135  *	   whether the head of the queue is currently owned by another CPU,
1136  *	   or whether we are the owner.
1137  *
1138  *	2. Write our commands into our allocated slots in the queue.
1139  *
1140  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1141  *
1142  *	4. If we are an owner:
1143  *		a. Wait for the previous owner to finish.
1144  *		b. Mark the queue head as unowned, which tells us the range
1145  *		   that we are responsible for publishing.
1146  *		c. Wait for all commands in our owned range to become valid.
1147  *		d. Advance the hardware prod pointer.
1148  *		e. Tell the next owner we've finished.
1149  *
1150  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
1151  *	   owner), then we need to stick around until it has completed:
1152  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1153  *		   to clear the first 4 bytes.
1154  *		b. Otherwise, we spin waiting for the hardware cons pointer to
1155  *		   advance past our command.
1156  *
1157  * The devil is in the details, particularly the use of locking for handling
1158  * SYNC completion and freeing up space in the queue before we think that it is
1159  * full.
1160  */
1161 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1162 					       u32 sprod, u32 eprod, bool set)
1163 {
1164 	u32 swidx, sbidx, ewidx, ebidx;
1165 	struct arm_smmu_ll_queue llq = {
1166 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1167 		.prod		= sprod,
1168 	};
1169 
1170 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1171 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
1172 
1173 	while (llq.prod != eprod) {
1174 		unsigned long mask;
1175 		atomic_long_t *ptr;
1176 		u32 limit = BITS_PER_LONG;
1177 
1178 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1179 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1180 
1181 		ptr = &cmdq->valid_map[swidx];
1182 
1183 		if ((swidx == ewidx) && (sbidx < ebidx))
1184 			limit = ebidx;
1185 
1186 		mask = GENMASK(limit - 1, sbidx);
1187 
1188 		/*
1189 		 * The valid bit is the inverse of the wrap bit. This means
1190 		 * that a zero-initialised queue is invalid and, after marking
1191 		 * all entries as valid, they become invalid again when we
1192 		 * wrap.
1193 		 */
1194 		if (set) {
1195 			atomic_long_xor(mask, ptr);
1196 		} else { /* Poll */
1197 			unsigned long valid;
1198 
1199 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1200 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1201 		}
1202 
1203 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
1204 	}
1205 }
1206 
1207 /* Mark all entries in the range [sprod, eprod) as valid */
1208 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1209 					u32 sprod, u32 eprod)
1210 {
1211 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1212 }
1213 
1214 /* Wait for all entries in the range [sprod, eprod) to become valid */
1215 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1216 					 u32 sprod, u32 eprod)
1217 {
1218 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1219 }
1220 
1221 /* Wait for the command queue to become non-full */
1222 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1223 					     struct arm_smmu_ll_queue *llq)
1224 {
1225 	unsigned long flags;
1226 	struct arm_smmu_queue_poll qp;
1227 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1228 	int ret = 0;
1229 
1230 	/*
1231 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1232 	 * that fails, spin until somebody else updates it for us.
1233 	 */
1234 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1235 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1236 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1237 		llq->val = READ_ONCE(cmdq->q.llq.val);
1238 		return 0;
1239 	}
1240 
1241 	queue_poll_init(smmu, &qp);
1242 	do {
1243 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1244 		if (!queue_full(llq))
1245 			break;
1246 
1247 		ret = queue_poll(&qp);
1248 	} while (!ret);
1249 
1250 	return ret;
1251 }
1252 
1253 /*
1254  * Wait until the SMMU signals a CMD_SYNC completion MSI.
1255  * Must be called with the cmdq lock held in some capacity.
1256  */
1257 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1258 					  struct arm_smmu_ll_queue *llq)
1259 {
1260 	int ret = 0;
1261 	struct arm_smmu_queue_poll qp;
1262 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1263 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
1264 
1265 	queue_poll_init(smmu, &qp);
1266 
1267 	/*
1268 	 * The MSI won't generate an event, since it's being written back
1269 	 * into the command queue.
1270 	 */
1271 	qp.wfe = false;
1272 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1273 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1274 	return ret;
1275 }
1276 
1277 /*
1278  * Wait until the SMMU cons index passes llq->prod.
1279  * Must be called with the cmdq lock held in some capacity.
1280  */
1281 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1282 					       struct arm_smmu_ll_queue *llq)
1283 {
1284 	struct arm_smmu_queue_poll qp;
1285 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1286 	u32 prod = llq->prod;
1287 	int ret = 0;
1288 
1289 	queue_poll_init(smmu, &qp);
1290 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1291 	do {
1292 		if (queue_consumed(llq, prod))
1293 			break;
1294 
1295 		ret = queue_poll(&qp);
1296 
1297 		/*
1298 		 * This needs to be a readl() so that our subsequent call
1299 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1300 		 *
1301 		 * Specifically, we need to ensure that we observe all
1302 		 * shared_lock()s by other CMD_SYNCs that share our owner,
1303 		 * so that a failing call to tryunlock() means that we're
1304 		 * the last one out and therefore we can safely advance
1305 		 * cmdq->q.llq.cons. Roughly speaking:
1306 		 *
1307 		 * CPU 0		CPU1			CPU2 (us)
1308 		 *
1309 		 * if (sync)
1310 		 * 	shared_lock();
1311 		 *
1312 		 * dma_wmb();
1313 		 * set_valid_map();
1314 		 *
1315 		 * 			if (owner) {
1316 		 *				poll_valid_map();
1317 		 *				<control dependency>
1318 		 *				writel(prod_reg);
1319 		 *
1320 		 *						readl(cons_reg);
1321 		 *						tryunlock();
1322 		 *
1323 		 * Requires us to see CPU 0's shared_lock() acquisition.
1324 		 */
1325 		llq->cons = readl(cmdq->q.cons_reg);
1326 	} while (!ret);
1327 
1328 	return ret;
1329 }
1330 
1331 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1332 					 struct arm_smmu_ll_queue *llq)
1333 {
1334 	if (smmu->features & ARM_SMMU_FEAT_MSI &&
1335 	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
1336 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1337 
1338 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1339 }
1340 
1341 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1342 					u32 prod, int n)
1343 {
1344 	int i;
1345 	struct arm_smmu_ll_queue llq = {
1346 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1347 		.prod		= prod,
1348 	};
1349 
1350 	for (i = 0; i < n; ++i) {
1351 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1352 
1353 		prod = queue_inc_prod_n(&llq, i);
1354 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1355 	}
1356 }
1357 
1358 /*
1359  * This is the actual insertion function, and provides the following
1360  * ordering guarantees to callers:
1361  *
1362  * - There is a dma_wmb() before publishing any commands to the queue.
1363  *   This can be relied upon to order prior writes to data structures
1364  *   in memory (such as a CD or an STE) before the command.
1365  *
1366  * - On completion of a CMD_SYNC, there is a control dependency.
1367  *   This can be relied upon to order subsequent writes to memory (e.g.
1368  *   freeing an IOVA) after completion of the CMD_SYNC.
1369  *
1370  * - Command insertion is totally ordered, so if two CPUs each race to
1371  *   insert their own list of commands then all of the commands from one
1372  *   CPU will appear before any of the commands from the other CPU.
1373  */
1374 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1375 				       u64 *cmds, int n, bool sync)
1376 {
1377 	u64 cmd_sync[CMDQ_ENT_DWORDS];
1378 	u32 prod;
1379 	unsigned long flags;
1380 	bool owner;
1381 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1382 	struct arm_smmu_ll_queue llq = {
1383 		.max_n_shift = cmdq->q.llq.max_n_shift,
1384 	}, head = llq;
1385 	int ret = 0;
1386 
1387 	/* 1. Allocate some space in the queue */
1388 	local_irq_save(flags);
1389 	llq.val = READ_ONCE(cmdq->q.llq.val);
1390 	do {
1391 		u64 old;
1392 
1393 		while (!queue_has_space(&llq, n + sync)) {
1394 			local_irq_restore(flags);
1395 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1396 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1397 			local_irq_save(flags);
1398 		}
1399 
1400 		head.cons = llq.cons;
1401 		head.prod = queue_inc_prod_n(&llq, n + sync) |
1402 					     CMDQ_PROD_OWNED_FLAG;
1403 
1404 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1405 		if (old == llq.val)
1406 			break;
1407 
1408 		llq.val = old;
1409 	} while (1);
1410 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1411 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1412 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1413 
1414 	/*
1415 	 * 2. Write our commands into the queue
1416 	 * Dependency ordering from the cmpxchg() loop above.
1417 	 */
1418 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1419 	if (sync) {
1420 		prod = queue_inc_prod_n(&llq, n);
1421 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1422 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1423 
1424 		/*
1425 		 * In order to determine completion of our CMD_SYNC, we must
1426 		 * ensure that the queue can't wrap twice without us noticing.
1427 		 * We achieve that by taking the cmdq lock as shared before
1428 		 * marking our slot as valid.
1429 		 */
1430 		arm_smmu_cmdq_shared_lock(cmdq);
1431 	}
1432 
1433 	/* 3. Mark our slots as valid, ensuring commands are visible first */
1434 	dma_wmb();
1435 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1436 
1437 	/* 4. If we are the owner, take control of the SMMU hardware */
1438 	if (owner) {
1439 		/* a. Wait for previous owner to finish */
1440 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1441 
1442 		/* b. Stop gathering work by clearing the owned flag */
1443 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1444 						   &cmdq->q.llq.atomic.prod);
1445 		prod &= ~CMDQ_PROD_OWNED_FLAG;
1446 
1447 		/*
1448 		 * c. Wait for any gathered work to be written to the queue.
1449 		 * Note that we read our own entries so that we have the control
1450 		 * dependency required by (d).
1451 		 */
1452 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1453 
1454 		/*
1455 		 * d. Advance the hardware prod pointer
1456 		 * Control dependency ordering from the entries becoming valid.
1457 		 */
1458 		writel_relaxed(prod, cmdq->q.prod_reg);
1459 
1460 		/*
1461 		 * e. Tell the next owner we're done
1462 		 * Make sure we've updated the hardware first, so that we don't
1463 		 * race to update prod and potentially move it backwards.
1464 		 */
1465 		atomic_set_release(&cmdq->owner_prod, prod);
1466 	}
1467 
1468 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1469 	if (sync) {
1470 		llq.prod = queue_inc_prod_n(&llq, n);
1471 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1472 		if (ret) {
1473 			dev_err_ratelimited(smmu->dev,
1474 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1475 					    llq.prod,
1476 					    readl_relaxed(cmdq->q.prod_reg),
1477 					    readl_relaxed(cmdq->q.cons_reg));
1478 		}
1479 
1480 		/*
1481 		 * Try to unlock the cmdq lock. This will fail if we're the last
1482 		 * reader, in which case we can safely update cmdq->q.llq.cons
1483 		 */
1484 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1485 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1486 			arm_smmu_cmdq_shared_unlock(cmdq);
1487 		}
1488 	}
1489 
1490 	local_irq_restore(flags);
1491 	return ret;
1492 }
1493 
1494 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1495 				   struct arm_smmu_cmdq_ent *ent)
1496 {
1497 	u64 cmd[CMDQ_ENT_DWORDS];
1498 
1499 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1500 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1501 			 ent->opcode);
1502 		return -EINVAL;
1503 	}
1504 
1505 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1506 }
1507 
1508 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1509 {
1510 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
1511 }
1512 
1513 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
1514 				    struct arm_smmu_cmdq_batch *cmds,
1515 				    struct arm_smmu_cmdq_ent *cmd)
1516 {
1517 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
1518 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
1519 		cmds->num = 0;
1520 	}
1521 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
1522 	cmds->num++;
1523 }
1524 
1525 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
1526 				      struct arm_smmu_cmdq_batch *cmds)
1527 {
1528 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
1529 }
1530 
1531 /* Context descriptor manipulation functions */
1532 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1533 			     int ssid, bool leaf)
1534 {
1535 	size_t i;
1536 	unsigned long flags;
1537 	struct arm_smmu_master *master;
1538 	struct arm_smmu_cmdq_batch cmds = {};
1539 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1540 	struct arm_smmu_cmdq_ent cmd = {
1541 		.opcode	= CMDQ_OP_CFGI_CD,
1542 		.cfgi	= {
1543 			.ssid	= ssid,
1544 			.leaf	= leaf,
1545 		},
1546 	};
1547 
1548 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1549 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1550 		for (i = 0; i < master->num_sids; i++) {
1551 			cmd.cfgi.sid = master->sids[i];
1552 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1553 		}
1554 	}
1555 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1556 
1557 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1558 }
1559 
1560 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1561 					struct arm_smmu_l1_ctx_desc *l1_desc)
1562 {
1563 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1564 
1565 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1566 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1567 	if (!l1_desc->l2ptr) {
1568 		dev_warn(smmu->dev,
1569 			 "failed to allocate context descriptor table\n");
1570 		return -ENOMEM;
1571 	}
1572 	return 0;
1573 }
1574 
1575 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1576 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1577 {
1578 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1579 		  CTXDESC_L1_DESC_V;
1580 
1581 	/* See comment in arm_smmu_write_ctx_desc() */
1582 	WRITE_ONCE(*dst, cpu_to_le64(val));
1583 }
1584 
1585 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1586 				   u32 ssid)
1587 {
1588 	__le64 *l1ptr;
1589 	unsigned int idx;
1590 	struct arm_smmu_l1_ctx_desc *l1_desc;
1591 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1592 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1593 
1594 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1595 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1596 
1597 	idx = ssid >> CTXDESC_SPLIT;
1598 	l1_desc = &cdcfg->l1_desc[idx];
1599 	if (!l1_desc->l2ptr) {
1600 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1601 			return NULL;
1602 
1603 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1604 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1605 		/* An invalid L1CD can be cached */
1606 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1607 	}
1608 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1609 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1610 }
1611 
1612 static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1613 				   int ssid, struct arm_smmu_ctx_desc *cd)
1614 {
1615 	/*
1616 	 * This function handles the following cases:
1617 	 *
1618 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1619 	 * (2) Install a secondary CD, for SID+SSID traffic.
1620 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1621 	 *     CD, then invalidate the old entry and mappings.
1622 	 * (4) Remove a secondary CD.
1623 	 */
1624 	u64 val;
1625 	bool cd_live;
1626 	__le64 *cdptr;
1627 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1628 
1629 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1630 		return -E2BIG;
1631 
1632 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1633 	if (!cdptr)
1634 		return -ENOMEM;
1635 
1636 	val = le64_to_cpu(cdptr[0]);
1637 	cd_live = !!(val & CTXDESC_CD_0_V);
1638 
1639 	if (!cd) { /* (4) */
1640 		val = 0;
1641 	} else if (cd_live) { /* (3) */
1642 		val &= ~CTXDESC_CD_0_ASID;
1643 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1644 		/*
1645 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1646 		 * this substream's traffic
1647 		 */
1648 	} else { /* (1) and (2) */
1649 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1650 		cdptr[2] = 0;
1651 		cdptr[3] = cpu_to_le64(cd->mair);
1652 
1653 		/*
1654 		 * STE is live, and the SMMU might read dwords of this CD in any
1655 		 * order. Ensure that it observes valid values before reading
1656 		 * V=1.
1657 		 */
1658 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1659 
1660 		val = cd->tcr |
1661 #ifdef __BIG_ENDIAN
1662 			CTXDESC_CD_0_ENDI |
1663 #endif
1664 			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1665 			CTXDESC_CD_0_AA64 |
1666 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1667 			CTXDESC_CD_0_V;
1668 
1669 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1670 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1671 			val |= CTXDESC_CD_0_S;
1672 	}
1673 
1674 	/*
1675 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1676 	 * "Configuration structures and configuration invalidation completion"
1677 	 *
1678 	 *   The size of single-copy atomic reads made by the SMMU is
1679 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1680 	 *   field within an aligned 64-bit span of a structure can be altered
1681 	 *   without first making the structure invalid.
1682 	 */
1683 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1684 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1685 	return 0;
1686 }
1687 
1688 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1689 {
1690 	int ret;
1691 	size_t l1size;
1692 	size_t max_contexts;
1693 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1694 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1695 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1696 
1697 	max_contexts = 1 << cfg->s1cdmax;
1698 
1699 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1700 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1701 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1702 		cdcfg->num_l1_ents = max_contexts;
1703 
1704 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1705 	} else {
1706 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1707 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1708 						  CTXDESC_L2_ENTRIES);
1709 
1710 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1711 					      sizeof(*cdcfg->l1_desc),
1712 					      GFP_KERNEL);
1713 		if (!cdcfg->l1_desc)
1714 			return -ENOMEM;
1715 
1716 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1717 	}
1718 
1719 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1720 					   GFP_KERNEL);
1721 	if (!cdcfg->cdtab) {
1722 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1723 		ret = -ENOMEM;
1724 		goto err_free_l1;
1725 	}
1726 
1727 	return 0;
1728 
1729 err_free_l1:
1730 	if (cdcfg->l1_desc) {
1731 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1732 		cdcfg->l1_desc = NULL;
1733 	}
1734 	return ret;
1735 }
1736 
1737 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1738 {
1739 	int i;
1740 	size_t size, l1size;
1741 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1742 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1743 
1744 	if (cdcfg->l1_desc) {
1745 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1746 
1747 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1748 			if (!cdcfg->l1_desc[i].l2ptr)
1749 				continue;
1750 
1751 			dmam_free_coherent(smmu->dev, size,
1752 					   cdcfg->l1_desc[i].l2ptr,
1753 					   cdcfg->l1_desc[i].l2ptr_dma);
1754 		}
1755 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1756 		cdcfg->l1_desc = NULL;
1757 
1758 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1759 	} else {
1760 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1761 	}
1762 
1763 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1764 	cdcfg->cdtab_dma = 0;
1765 	cdcfg->cdtab = NULL;
1766 }
1767 
1768 static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1769 {
1770 	if (!cd->asid)
1771 		return;
1772 
1773 	xa_erase(&asid_xa, cd->asid);
1774 }
1775 
1776 /* Stream table manipulation functions */
1777 static void
1778 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1779 {
1780 	u64 val = 0;
1781 
1782 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1783 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1784 
1785 	/* See comment in arm_smmu_write_ctx_desc() */
1786 	WRITE_ONCE(*dst, cpu_to_le64(val));
1787 }
1788 
1789 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1790 {
1791 	struct arm_smmu_cmdq_ent cmd = {
1792 		.opcode	= CMDQ_OP_CFGI_STE,
1793 		.cfgi	= {
1794 			.sid	= sid,
1795 			.leaf	= true,
1796 		},
1797 	};
1798 
1799 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1800 	arm_smmu_cmdq_issue_sync(smmu);
1801 }
1802 
1803 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1804 				      __le64 *dst)
1805 {
1806 	/*
1807 	 * This is hideously complicated, but we only really care about
1808 	 * three cases at the moment:
1809 	 *
1810 	 * 1. Invalid (all zero) -> bypass/fault (init)
1811 	 * 2. Bypass/fault -> translation/bypass (attach)
1812 	 * 3. Translation/bypass -> bypass/fault (detach)
1813 	 *
1814 	 * Given that we can't update the STE atomically and the SMMU
1815 	 * doesn't read the thing in a defined order, that leaves us
1816 	 * with the following maintenance requirements:
1817 	 *
1818 	 * 1. Update Config, return (init time STEs aren't live)
1819 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1820 	 * 3. Update Config, sync
1821 	 */
1822 	u64 val = le64_to_cpu(dst[0]);
1823 	bool ste_live = false;
1824 	struct arm_smmu_device *smmu = NULL;
1825 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1826 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1827 	struct arm_smmu_domain *smmu_domain = NULL;
1828 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1829 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1830 		.prefetch	= {
1831 			.sid	= sid,
1832 		},
1833 	};
1834 
1835 	if (master) {
1836 		smmu_domain = master->domain;
1837 		smmu = master->smmu;
1838 	}
1839 
1840 	if (smmu_domain) {
1841 		switch (smmu_domain->stage) {
1842 		case ARM_SMMU_DOMAIN_S1:
1843 			s1_cfg = &smmu_domain->s1_cfg;
1844 			break;
1845 		case ARM_SMMU_DOMAIN_S2:
1846 		case ARM_SMMU_DOMAIN_NESTED:
1847 			s2_cfg = &smmu_domain->s2_cfg;
1848 			break;
1849 		default:
1850 			break;
1851 		}
1852 	}
1853 
1854 	if (val & STRTAB_STE_0_V) {
1855 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1856 		case STRTAB_STE_0_CFG_BYPASS:
1857 			break;
1858 		case STRTAB_STE_0_CFG_S1_TRANS:
1859 		case STRTAB_STE_0_CFG_S2_TRANS:
1860 			ste_live = true;
1861 			break;
1862 		case STRTAB_STE_0_CFG_ABORT:
1863 			BUG_ON(!disable_bypass);
1864 			break;
1865 		default:
1866 			BUG(); /* STE corruption */
1867 		}
1868 	}
1869 
1870 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1871 	val = STRTAB_STE_0_V;
1872 
1873 	/* Bypass/fault */
1874 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1875 		if (!smmu_domain && disable_bypass)
1876 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1877 		else
1878 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1879 
1880 		dst[0] = cpu_to_le64(val);
1881 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1882 						STRTAB_STE_1_SHCFG_INCOMING));
1883 		dst[2] = 0; /* Nuke the VMID */
1884 		/*
1885 		 * The SMMU can perform negative caching, so we must sync
1886 		 * the STE regardless of whether the old value was live.
1887 		 */
1888 		if (smmu)
1889 			arm_smmu_sync_ste_for_sid(smmu, sid);
1890 		return;
1891 	}
1892 
1893 	if (s1_cfg) {
1894 		BUG_ON(ste_live);
1895 		dst[1] = cpu_to_le64(
1896 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1897 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1898 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1899 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1900 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1901 
1902 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1903 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1904 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1905 
1906 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1907 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1908 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1909 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1910 	}
1911 
1912 	if (s2_cfg) {
1913 		BUG_ON(ste_live);
1914 		dst[2] = cpu_to_le64(
1915 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1916 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1917 #ifdef __BIG_ENDIAN
1918 			 STRTAB_STE_2_S2ENDI |
1919 #endif
1920 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1921 			 STRTAB_STE_2_S2R);
1922 
1923 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1924 
1925 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1926 	}
1927 
1928 	if (master->ats_enabled)
1929 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1930 						 STRTAB_STE_1_EATS_TRANS));
1931 
1932 	arm_smmu_sync_ste_for_sid(smmu, sid);
1933 	/* See comment in arm_smmu_write_ctx_desc() */
1934 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1935 	arm_smmu_sync_ste_for_sid(smmu, sid);
1936 
1937 	/* It's likely that we'll want to use the new STE soon */
1938 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1939 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1940 }
1941 
1942 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1943 {
1944 	unsigned int i;
1945 
1946 	for (i = 0; i < nent; ++i) {
1947 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1948 		strtab += STRTAB_STE_DWORDS;
1949 	}
1950 }
1951 
1952 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1953 {
1954 	size_t size;
1955 	void *strtab;
1956 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1957 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1958 
1959 	if (desc->l2ptr)
1960 		return 0;
1961 
1962 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1963 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1964 
1965 	desc->span = STRTAB_SPLIT + 1;
1966 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1967 					  GFP_KERNEL);
1968 	if (!desc->l2ptr) {
1969 		dev_err(smmu->dev,
1970 			"failed to allocate l2 stream table for SID %u\n",
1971 			sid);
1972 		return -ENOMEM;
1973 	}
1974 
1975 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1976 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1977 	return 0;
1978 }
1979 
1980 /* IRQ and event handlers */
1981 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1982 {
1983 	int i;
1984 	struct arm_smmu_device *smmu = dev;
1985 	struct arm_smmu_queue *q = &smmu->evtq.q;
1986 	struct arm_smmu_ll_queue *llq = &q->llq;
1987 	u64 evt[EVTQ_ENT_DWORDS];
1988 
1989 	do {
1990 		while (!queue_remove_raw(q, evt)) {
1991 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1992 
1993 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1994 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1995 				dev_info(smmu->dev, "\t0x%016llx\n",
1996 					 (unsigned long long)evt[i]);
1997 
1998 		}
1999 
2000 		/*
2001 		 * Not much we can do on overflow, so scream and pretend we're
2002 		 * trying harder.
2003 		 */
2004 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2005 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
2006 	} while (!queue_empty(llq));
2007 
2008 	/* Sync our overflow flag, as we believe we're up to speed */
2009 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2010 		    Q_IDX(llq, llq->cons);
2011 	return IRQ_HANDLED;
2012 }
2013 
2014 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
2015 {
2016 	u32 sid, ssid;
2017 	u16 grpid;
2018 	bool ssv, last;
2019 
2020 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
2021 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
2022 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
2023 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
2024 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
2025 
2026 	dev_info(smmu->dev, "unexpected PRI request received:\n");
2027 	dev_info(smmu->dev,
2028 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
2029 		 sid, ssid, grpid, last ? "L" : "",
2030 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
2031 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
2032 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
2033 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
2034 		 evt[1] & PRIQ_1_ADDR_MASK);
2035 
2036 	if (last) {
2037 		struct arm_smmu_cmdq_ent cmd = {
2038 			.opcode			= CMDQ_OP_PRI_RESP,
2039 			.substream_valid	= ssv,
2040 			.pri			= {
2041 				.sid	= sid,
2042 				.ssid	= ssid,
2043 				.grpid	= grpid,
2044 				.resp	= PRI_RESP_DENY,
2045 			},
2046 		};
2047 
2048 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2049 	}
2050 }
2051 
2052 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2053 {
2054 	struct arm_smmu_device *smmu = dev;
2055 	struct arm_smmu_queue *q = &smmu->priq.q;
2056 	struct arm_smmu_ll_queue *llq = &q->llq;
2057 	u64 evt[PRIQ_ENT_DWORDS];
2058 
2059 	do {
2060 		while (!queue_remove_raw(q, evt))
2061 			arm_smmu_handle_ppr(smmu, evt);
2062 
2063 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2064 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2065 	} while (!queue_empty(llq));
2066 
2067 	/* Sync our overflow flag, as we believe we're up to speed */
2068 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2069 		      Q_IDX(llq, llq->cons);
2070 	queue_sync_cons_out(q);
2071 	return IRQ_HANDLED;
2072 }
2073 
2074 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2075 
2076 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2077 {
2078 	u32 gerror, gerrorn, active;
2079 	struct arm_smmu_device *smmu = dev;
2080 
2081 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2082 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2083 
2084 	active = gerror ^ gerrorn;
2085 	if (!(active & GERROR_ERR_MASK))
2086 		return IRQ_NONE; /* No errors pending */
2087 
2088 	dev_warn(smmu->dev,
2089 		 "unexpected global error reported (0x%08x), this could be serious\n",
2090 		 active);
2091 
2092 	if (active & GERROR_SFM_ERR) {
2093 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2094 		arm_smmu_device_disable(smmu);
2095 	}
2096 
2097 	if (active & GERROR_MSI_GERROR_ABT_ERR)
2098 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2099 
2100 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
2101 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2102 
2103 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
2104 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2105 
2106 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
2107 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2108 
2109 	if (active & GERROR_PRIQ_ABT_ERR)
2110 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2111 
2112 	if (active & GERROR_EVTQ_ABT_ERR)
2113 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2114 
2115 	if (active & GERROR_CMDQ_ERR)
2116 		arm_smmu_cmdq_skip_err(smmu);
2117 
2118 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2119 	return IRQ_HANDLED;
2120 }
2121 
2122 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2123 {
2124 	struct arm_smmu_device *smmu = dev;
2125 
2126 	arm_smmu_evtq_thread(irq, dev);
2127 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2128 		arm_smmu_priq_thread(irq, dev);
2129 
2130 	return IRQ_HANDLED;
2131 }
2132 
2133 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2134 {
2135 	arm_smmu_gerror_handler(irq, dev);
2136 	return IRQ_WAKE_THREAD;
2137 }
2138 
2139 static void
2140 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2141 			struct arm_smmu_cmdq_ent *cmd)
2142 {
2143 	size_t log2_span;
2144 	size_t span_mask;
2145 	/* ATC invalidates are always on 4096-bytes pages */
2146 	size_t inval_grain_shift = 12;
2147 	unsigned long page_start, page_end;
2148 
2149 	*cmd = (struct arm_smmu_cmdq_ent) {
2150 		.opcode			= CMDQ_OP_ATC_INV,
2151 		.substream_valid	= !!ssid,
2152 		.atc.ssid		= ssid,
2153 	};
2154 
2155 	if (!size) {
2156 		cmd->atc.size = ATC_INV_SIZE_ALL;
2157 		return;
2158 	}
2159 
2160 	page_start	= iova >> inval_grain_shift;
2161 	page_end	= (iova + size - 1) >> inval_grain_shift;
2162 
2163 	/*
2164 	 * In an ATS Invalidate Request, the address must be aligned on the
2165 	 * range size, which must be a power of two number of page sizes. We
2166 	 * thus have to choose between grossly over-invalidating the region, or
2167 	 * splitting the invalidation into multiple commands. For simplicity
2168 	 * we'll go with the first solution, but should refine it in the future
2169 	 * if multiple commands are shown to be more efficient.
2170 	 *
2171 	 * Find the smallest power of two that covers the range. The most
2172 	 * significant differing bit between the start and end addresses,
2173 	 * fls(start ^ end), indicates the required span. For example:
2174 	 *
2175 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
2176 	 *		x = 0b1000 ^ 0b1011 = 0b11
2177 	 *		span = 1 << fls(x) = 4
2178 	 *
2179 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2180 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2181 	 *		span = 1 << fls(x) = 16
2182 	 */
2183 	log2_span	= fls_long(page_start ^ page_end);
2184 	span_mask	= (1ULL << log2_span) - 1;
2185 
2186 	page_start	&= ~span_mask;
2187 
2188 	cmd->atc.addr	= page_start << inval_grain_shift;
2189 	cmd->atc.size	= log2_span;
2190 }
2191 
2192 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
2193 {
2194 	int i;
2195 	struct arm_smmu_cmdq_ent cmd;
2196 
2197 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2198 
2199 	for (i = 0; i < master->num_sids; i++) {
2200 		cmd.atc.sid = master->sids[i];
2201 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
2202 	}
2203 
2204 	return arm_smmu_cmdq_issue_sync(master->smmu);
2205 }
2206 
2207 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2208 				   int ssid, unsigned long iova, size_t size)
2209 {
2210 	int i;
2211 	unsigned long flags;
2212 	struct arm_smmu_cmdq_ent cmd;
2213 	struct arm_smmu_master *master;
2214 	struct arm_smmu_cmdq_batch cmds = {};
2215 
2216 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2217 		return 0;
2218 
2219 	/*
2220 	 * Ensure that we've completed prior invalidation of the main TLBs
2221 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2222 	 * arm_smmu_enable_ats():
2223 	 *
2224 	 *	// unmap()			// arm_smmu_enable_ats()
2225 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2226 	 *	smp_mb();			[...]
2227 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2228 	 *
2229 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2230 	 * ATS was enabled at the PCI device before completion of the TLBI.
2231 	 */
2232 	smp_mb();
2233 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2234 		return 0;
2235 
2236 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2237 
2238 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2239 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
2240 		if (!master->ats_enabled)
2241 			continue;
2242 
2243 		for (i = 0; i < master->num_sids; i++) {
2244 			cmd.atc.sid = master->sids[i];
2245 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2246 		}
2247 	}
2248 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2249 
2250 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2251 }
2252 
2253 /* IO_PGTABLE API */
2254 static void arm_smmu_tlb_inv_context(void *cookie)
2255 {
2256 	struct arm_smmu_domain *smmu_domain = cookie;
2257 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2258 	struct arm_smmu_cmdq_ent cmd;
2259 
2260 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2261 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
2262 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2263 		cmd.tlbi.vmid	= 0;
2264 	} else {
2265 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2266 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2267 	}
2268 
2269 	/*
2270 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2271 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2272 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2273 	 * insertion to guarantee those are observed before the TLBI. Do be
2274 	 * careful, 007.
2275 	 */
2276 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2277 	arm_smmu_cmdq_issue_sync(smmu);
2278 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2279 }
2280 
2281 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2282 				   size_t granule, bool leaf,
2283 				   struct arm_smmu_domain *smmu_domain)
2284 {
2285 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2286 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
2287 	size_t inv_range = granule;
2288 	struct arm_smmu_cmdq_batch cmds = {};
2289 	struct arm_smmu_cmdq_ent cmd = {
2290 		.tlbi = {
2291 			.leaf	= leaf,
2292 		},
2293 	};
2294 
2295 	if (!size)
2296 		return;
2297 
2298 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2299 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
2300 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2301 	} else {
2302 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2303 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2304 	}
2305 
2306 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2307 		/* Get the leaf page size */
2308 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2309 
2310 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2311 		cmd.tlbi.tg = (tg - 10) / 2;
2312 
2313 		/* Determine what level the granule is at */
2314 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2315 
2316 		num_pages = size >> tg;
2317 	}
2318 
2319 	while (iova < end) {
2320 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2321 			/*
2322 			 * On each iteration of the loop, the range is 5 bits
2323 			 * worth of the aligned size remaining.
2324 			 * The range in pages is:
2325 			 *
2326 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2327 			 */
2328 			unsigned long scale, num;
2329 
2330 			/* Determine the power of 2 multiple number of pages */
2331 			scale = __ffs(num_pages);
2332 			cmd.tlbi.scale = scale;
2333 
2334 			/* Determine how many chunks of 2^scale size we have */
2335 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2336 			cmd.tlbi.num = num - 1;
2337 
2338 			/* range is num * 2^scale * pgsize */
2339 			inv_range = num << (scale + tg);
2340 
2341 			/* Clear out the lower order bits for the next iteration */
2342 			num_pages -= num << scale;
2343 		}
2344 
2345 		cmd.tlbi.addr = iova;
2346 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
2347 		iova += inv_range;
2348 	}
2349 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2350 
2351 	/*
2352 	 * Unfortunately, this can't be leaf-only since we may have
2353 	 * zapped an entire table.
2354 	 */
2355 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
2356 }
2357 
2358 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2359 					 unsigned long iova, size_t granule,
2360 					 void *cookie)
2361 {
2362 	struct arm_smmu_domain *smmu_domain = cookie;
2363 	struct iommu_domain *domain = &smmu_domain->domain;
2364 
2365 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2366 }
2367 
2368 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2369 				  size_t granule, void *cookie)
2370 {
2371 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
2372 }
2373 
2374 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2375 				  size_t granule, void *cookie)
2376 {
2377 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
2378 }
2379 
2380 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2381 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2382 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2383 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
2384 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2385 };
2386 
2387 /* IOMMU API */
2388 static bool arm_smmu_capable(enum iommu_cap cap)
2389 {
2390 	switch (cap) {
2391 	case IOMMU_CAP_CACHE_COHERENCY:
2392 		return true;
2393 	case IOMMU_CAP_NOEXEC:
2394 		return true;
2395 	default:
2396 		return false;
2397 	}
2398 }
2399 
2400 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2401 {
2402 	struct arm_smmu_domain *smmu_domain;
2403 
2404 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2405 	    type != IOMMU_DOMAIN_DMA &&
2406 	    type != IOMMU_DOMAIN_IDENTITY)
2407 		return NULL;
2408 
2409 	/*
2410 	 * Allocate the domain and initialise some of its data structures.
2411 	 * We can't really do anything meaningful until we've added a
2412 	 * master.
2413 	 */
2414 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2415 	if (!smmu_domain)
2416 		return NULL;
2417 
2418 	if (type == IOMMU_DOMAIN_DMA &&
2419 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
2420 		kfree(smmu_domain);
2421 		return NULL;
2422 	}
2423 
2424 	mutex_init(&smmu_domain->init_mutex);
2425 	INIT_LIST_HEAD(&smmu_domain->devices);
2426 	spin_lock_init(&smmu_domain->devices_lock);
2427 
2428 	return &smmu_domain->domain;
2429 }
2430 
2431 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2432 {
2433 	int idx, size = 1 << span;
2434 
2435 	do {
2436 		idx = find_first_zero_bit(map, size);
2437 		if (idx == size)
2438 			return -ENOSPC;
2439 	} while (test_and_set_bit(idx, map));
2440 
2441 	return idx;
2442 }
2443 
2444 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2445 {
2446 	clear_bit(idx, map);
2447 }
2448 
2449 static void arm_smmu_domain_free(struct iommu_domain *domain)
2450 {
2451 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2452 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2453 
2454 	iommu_put_dma_cookie(domain);
2455 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2456 
2457 	/* Free the CD and ASID, if we allocated them */
2458 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2459 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2460 
2461 		if (cfg->cdcfg.cdtab)
2462 			arm_smmu_free_cd_tables(smmu_domain);
2463 		arm_smmu_free_asid(&cfg->cd);
2464 	} else {
2465 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2466 		if (cfg->vmid)
2467 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2468 	}
2469 
2470 	kfree(smmu_domain);
2471 }
2472 
2473 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2474 				       struct arm_smmu_master *master,
2475 				       struct io_pgtable_cfg *pgtbl_cfg)
2476 {
2477 	int ret;
2478 	u32 asid;
2479 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2480 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2481 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2482 
2483 	ret = xa_alloc(&asid_xa, &asid, &cfg->cd,
2484 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2485 	if (ret)
2486 		return ret;
2487 
2488 	cfg->s1cdmax = master->ssid_bits;
2489 
2490 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2491 	if (ret)
2492 		goto out_free_asid;
2493 
2494 	cfg->cd.asid	= (u16)asid;
2495 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2496 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2497 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2498 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2499 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2500 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2501 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2502 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2503 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2504 
2505 	/*
2506 	 * Note that this will end up calling arm_smmu_sync_cd() before
2507 	 * the master has been added to the devices list for this domain.
2508 	 * This isn't an issue because the STE hasn't been installed yet.
2509 	 */
2510 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2511 	if (ret)
2512 		goto out_free_cd_tables;
2513 
2514 	return 0;
2515 
2516 out_free_cd_tables:
2517 	arm_smmu_free_cd_tables(smmu_domain);
2518 out_free_asid:
2519 	arm_smmu_free_asid(&cfg->cd);
2520 	return ret;
2521 }
2522 
2523 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2524 				       struct arm_smmu_master *master,
2525 				       struct io_pgtable_cfg *pgtbl_cfg)
2526 {
2527 	int vmid;
2528 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2529 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2530 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2531 
2532 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2533 	if (vmid < 0)
2534 		return vmid;
2535 
2536 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2537 	cfg->vmid	= (u16)vmid;
2538 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2539 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2540 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2541 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2542 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2543 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2544 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2545 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2546 	return 0;
2547 }
2548 
2549 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2550 				    struct arm_smmu_master *master)
2551 {
2552 	int ret;
2553 	unsigned long ias, oas;
2554 	enum io_pgtable_fmt fmt;
2555 	struct io_pgtable_cfg pgtbl_cfg;
2556 	struct io_pgtable_ops *pgtbl_ops;
2557 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2558 				 struct arm_smmu_master *,
2559 				 struct io_pgtable_cfg *);
2560 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2561 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2562 
2563 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2564 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2565 		return 0;
2566 	}
2567 
2568 	/* Restrict the stage to what we can actually support */
2569 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2570 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2571 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2572 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2573 
2574 	switch (smmu_domain->stage) {
2575 	case ARM_SMMU_DOMAIN_S1:
2576 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2577 		ias = min_t(unsigned long, ias, VA_BITS);
2578 		oas = smmu->ias;
2579 		fmt = ARM_64_LPAE_S1;
2580 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2581 		break;
2582 	case ARM_SMMU_DOMAIN_NESTED:
2583 	case ARM_SMMU_DOMAIN_S2:
2584 		ias = smmu->ias;
2585 		oas = smmu->oas;
2586 		fmt = ARM_64_LPAE_S2;
2587 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2588 		break;
2589 	default:
2590 		return -EINVAL;
2591 	}
2592 
2593 	pgtbl_cfg = (struct io_pgtable_cfg) {
2594 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2595 		.ias		= ias,
2596 		.oas		= oas,
2597 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2598 		.tlb		= &arm_smmu_flush_ops,
2599 		.iommu_dev	= smmu->dev,
2600 	};
2601 
2602 	if (smmu_domain->non_strict)
2603 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2604 
2605 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2606 	if (!pgtbl_ops)
2607 		return -ENOMEM;
2608 
2609 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2610 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2611 	domain->geometry.force_aperture = true;
2612 
2613 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2614 	if (ret < 0) {
2615 		free_io_pgtable_ops(pgtbl_ops);
2616 		return ret;
2617 	}
2618 
2619 	smmu_domain->pgtbl_ops = pgtbl_ops;
2620 	return 0;
2621 }
2622 
2623 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2624 {
2625 	__le64 *step;
2626 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2627 
2628 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2629 		struct arm_smmu_strtab_l1_desc *l1_desc;
2630 		int idx;
2631 
2632 		/* Two-level walk */
2633 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2634 		l1_desc = &cfg->l1_desc[idx];
2635 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2636 		step = &l1_desc->l2ptr[idx];
2637 	} else {
2638 		/* Simple linear lookup */
2639 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2640 	}
2641 
2642 	return step;
2643 }
2644 
2645 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2646 {
2647 	int i, j;
2648 	struct arm_smmu_device *smmu = master->smmu;
2649 
2650 	for (i = 0; i < master->num_sids; ++i) {
2651 		u32 sid = master->sids[i];
2652 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2653 
2654 		/* Bridged PCI devices may end up with duplicated IDs */
2655 		for (j = 0; j < i; j++)
2656 			if (master->sids[j] == sid)
2657 				break;
2658 		if (j < i)
2659 			continue;
2660 
2661 		arm_smmu_write_strtab_ent(master, sid, step);
2662 	}
2663 }
2664 
2665 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2666 {
2667 	struct device *dev = master->dev;
2668 	struct arm_smmu_device *smmu = master->smmu;
2669 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2670 
2671 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2672 		return false;
2673 
2674 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2675 		return false;
2676 
2677 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2678 }
2679 
2680 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2681 {
2682 	size_t stu;
2683 	struct pci_dev *pdev;
2684 	struct arm_smmu_device *smmu = master->smmu;
2685 	struct arm_smmu_domain *smmu_domain = master->domain;
2686 
2687 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2688 	if (!master->ats_enabled)
2689 		return;
2690 
2691 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2692 	stu = __ffs(smmu->pgsize_bitmap);
2693 	pdev = to_pci_dev(master->dev);
2694 
2695 	atomic_inc(&smmu_domain->nr_ats_masters);
2696 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2697 	if (pci_enable_ats(pdev, stu))
2698 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2699 }
2700 
2701 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2702 {
2703 	struct arm_smmu_domain *smmu_domain = master->domain;
2704 
2705 	if (!master->ats_enabled)
2706 		return;
2707 
2708 	pci_disable_ats(to_pci_dev(master->dev));
2709 	/*
2710 	 * Ensure ATS is disabled at the endpoint before we issue the
2711 	 * ATC invalidation via the SMMU.
2712 	 */
2713 	wmb();
2714 	arm_smmu_atc_inv_master(master);
2715 	atomic_dec(&smmu_domain->nr_ats_masters);
2716 }
2717 
2718 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2719 {
2720 	int ret;
2721 	int features;
2722 	int num_pasids;
2723 	struct pci_dev *pdev;
2724 
2725 	if (!dev_is_pci(master->dev))
2726 		return -ENODEV;
2727 
2728 	pdev = to_pci_dev(master->dev);
2729 
2730 	features = pci_pasid_features(pdev);
2731 	if (features < 0)
2732 		return features;
2733 
2734 	num_pasids = pci_max_pasids(pdev);
2735 	if (num_pasids <= 0)
2736 		return num_pasids;
2737 
2738 	ret = pci_enable_pasid(pdev, features);
2739 	if (ret) {
2740 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2741 		return ret;
2742 	}
2743 
2744 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2745 				  master->smmu->ssid_bits);
2746 	return 0;
2747 }
2748 
2749 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2750 {
2751 	struct pci_dev *pdev;
2752 
2753 	if (!dev_is_pci(master->dev))
2754 		return;
2755 
2756 	pdev = to_pci_dev(master->dev);
2757 
2758 	if (!pdev->pasid_enabled)
2759 		return;
2760 
2761 	master->ssid_bits = 0;
2762 	pci_disable_pasid(pdev);
2763 }
2764 
2765 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2766 {
2767 	unsigned long flags;
2768 	struct arm_smmu_domain *smmu_domain = master->domain;
2769 
2770 	if (!smmu_domain)
2771 		return;
2772 
2773 	arm_smmu_disable_ats(master);
2774 
2775 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2776 	list_del(&master->domain_head);
2777 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2778 
2779 	master->domain = NULL;
2780 	master->ats_enabled = false;
2781 	arm_smmu_install_ste_for_dev(master);
2782 }
2783 
2784 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2785 {
2786 	int ret = 0;
2787 	unsigned long flags;
2788 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2789 	struct arm_smmu_device *smmu;
2790 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2791 	struct arm_smmu_master *master;
2792 
2793 	if (!fwspec)
2794 		return -ENOENT;
2795 
2796 	master = dev_iommu_priv_get(dev);
2797 	smmu = master->smmu;
2798 
2799 	arm_smmu_detach_dev(master);
2800 
2801 	mutex_lock(&smmu_domain->init_mutex);
2802 
2803 	if (!smmu_domain->smmu) {
2804 		smmu_domain->smmu = smmu;
2805 		ret = arm_smmu_domain_finalise(domain, master);
2806 		if (ret) {
2807 			smmu_domain->smmu = NULL;
2808 			goto out_unlock;
2809 		}
2810 	} else if (smmu_domain->smmu != smmu) {
2811 		dev_err(dev,
2812 			"cannot attach to SMMU %s (upstream of %s)\n",
2813 			dev_name(smmu_domain->smmu->dev),
2814 			dev_name(smmu->dev));
2815 		ret = -ENXIO;
2816 		goto out_unlock;
2817 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2818 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2819 		dev_err(dev,
2820 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2821 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2822 		ret = -EINVAL;
2823 		goto out_unlock;
2824 	}
2825 
2826 	master->domain = smmu_domain;
2827 
2828 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2829 		master->ats_enabled = arm_smmu_ats_supported(master);
2830 
2831 	arm_smmu_install_ste_for_dev(master);
2832 
2833 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2834 	list_add(&master->domain_head, &smmu_domain->devices);
2835 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2836 
2837 	arm_smmu_enable_ats(master);
2838 
2839 out_unlock:
2840 	mutex_unlock(&smmu_domain->init_mutex);
2841 	return ret;
2842 }
2843 
2844 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2845 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2846 {
2847 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2848 
2849 	if (!ops)
2850 		return -ENODEV;
2851 
2852 	return ops->map(ops, iova, paddr, size, prot, gfp);
2853 }
2854 
2855 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2856 			     size_t size, struct iommu_iotlb_gather *gather)
2857 {
2858 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2859 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2860 
2861 	if (!ops)
2862 		return 0;
2863 
2864 	return ops->unmap(ops, iova, size, gather);
2865 }
2866 
2867 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2868 {
2869 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2870 
2871 	if (smmu_domain->smmu)
2872 		arm_smmu_tlb_inv_context(smmu_domain);
2873 }
2874 
2875 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2876 				struct iommu_iotlb_gather *gather)
2877 {
2878 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2879 
2880 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2881 			       gather->pgsize, true, smmu_domain);
2882 }
2883 
2884 static phys_addr_t
2885 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2886 {
2887 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2888 
2889 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2890 		return iova;
2891 
2892 	if (!ops)
2893 		return 0;
2894 
2895 	return ops->iova_to_phys(ops, iova);
2896 }
2897 
2898 static struct platform_driver arm_smmu_driver;
2899 
2900 static
2901 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2902 {
2903 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2904 							  fwnode);
2905 	put_device(dev);
2906 	return dev ? dev_get_drvdata(dev) : NULL;
2907 }
2908 
2909 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2910 {
2911 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2912 
2913 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2914 		limit *= 1UL << STRTAB_SPLIT;
2915 
2916 	return sid < limit;
2917 }
2918 
2919 static struct iommu_ops arm_smmu_ops;
2920 
2921 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2922 {
2923 	int i, ret;
2924 	struct arm_smmu_device *smmu;
2925 	struct arm_smmu_master *master;
2926 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2927 
2928 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2929 		return ERR_PTR(-ENODEV);
2930 
2931 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2932 		return ERR_PTR(-EBUSY);
2933 
2934 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2935 	if (!smmu)
2936 		return ERR_PTR(-ENODEV);
2937 
2938 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2939 	if (!master)
2940 		return ERR_PTR(-ENOMEM);
2941 
2942 	master->dev = dev;
2943 	master->smmu = smmu;
2944 	master->sids = fwspec->ids;
2945 	master->num_sids = fwspec->num_ids;
2946 	dev_iommu_priv_set(dev, master);
2947 
2948 	/* Check the SIDs are in range of the SMMU and our stream table */
2949 	for (i = 0; i < master->num_sids; i++) {
2950 		u32 sid = master->sids[i];
2951 
2952 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2953 			ret = -ERANGE;
2954 			goto err_free_master;
2955 		}
2956 
2957 		/* Ensure l2 strtab is initialised */
2958 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2959 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2960 			if (ret)
2961 				goto err_free_master;
2962 		}
2963 	}
2964 
2965 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2966 
2967 	/*
2968 	 * Note that PASID must be enabled before, and disabled after ATS:
2969 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2970 	 *
2971 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2972 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2973 	 *   are changed.
2974 	 */
2975 	arm_smmu_enable_pasid(master);
2976 
2977 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2978 		master->ssid_bits = min_t(u8, master->ssid_bits,
2979 					  CTXDESC_LINEAR_CDMAX);
2980 
2981 	return &smmu->iommu;
2982 
2983 err_free_master:
2984 	kfree(master);
2985 	dev_iommu_priv_set(dev, NULL);
2986 	return ERR_PTR(ret);
2987 }
2988 
2989 static void arm_smmu_release_device(struct device *dev)
2990 {
2991 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2992 	struct arm_smmu_master *master;
2993 
2994 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2995 		return;
2996 
2997 	master = dev_iommu_priv_get(dev);
2998 	arm_smmu_detach_dev(master);
2999 	arm_smmu_disable_pasid(master);
3000 	kfree(master);
3001 	iommu_fwspec_free(dev);
3002 }
3003 
3004 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3005 {
3006 	struct iommu_group *group;
3007 
3008 	/*
3009 	 * We don't support devices sharing stream IDs other than PCI RID
3010 	 * aliases, since the necessary ID-to-device lookup becomes rather
3011 	 * impractical given a potential sparse 32-bit stream ID space.
3012 	 */
3013 	if (dev_is_pci(dev))
3014 		group = pci_device_group(dev);
3015 	else
3016 		group = generic_device_group(dev);
3017 
3018 	return group;
3019 }
3020 
3021 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
3022 				    enum iommu_attr attr, void *data)
3023 {
3024 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3025 
3026 	switch (domain->type) {
3027 	case IOMMU_DOMAIN_UNMANAGED:
3028 		switch (attr) {
3029 		case DOMAIN_ATTR_NESTING:
3030 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
3031 			return 0;
3032 		default:
3033 			return -ENODEV;
3034 		}
3035 		break;
3036 	case IOMMU_DOMAIN_DMA:
3037 		switch (attr) {
3038 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3039 			*(int *)data = smmu_domain->non_strict;
3040 			return 0;
3041 		default:
3042 			return -ENODEV;
3043 		}
3044 		break;
3045 	default:
3046 		return -EINVAL;
3047 	}
3048 }
3049 
3050 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
3051 				    enum iommu_attr attr, void *data)
3052 {
3053 	int ret = 0;
3054 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3055 
3056 	mutex_lock(&smmu_domain->init_mutex);
3057 
3058 	switch (domain->type) {
3059 	case IOMMU_DOMAIN_UNMANAGED:
3060 		switch (attr) {
3061 		case DOMAIN_ATTR_NESTING:
3062 			if (smmu_domain->smmu) {
3063 				ret = -EPERM;
3064 				goto out_unlock;
3065 			}
3066 
3067 			if (*(int *)data)
3068 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
3069 			else
3070 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3071 			break;
3072 		default:
3073 			ret = -ENODEV;
3074 		}
3075 		break;
3076 	case IOMMU_DOMAIN_DMA:
3077 		switch(attr) {
3078 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3079 			smmu_domain->non_strict = *(int *)data;
3080 			break;
3081 		default:
3082 			ret = -ENODEV;
3083 		}
3084 		break;
3085 	default:
3086 		ret = -EINVAL;
3087 	}
3088 
3089 out_unlock:
3090 	mutex_unlock(&smmu_domain->init_mutex);
3091 	return ret;
3092 }
3093 
3094 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
3095 {
3096 	return iommu_fwspec_add_ids(dev, args->args, 1);
3097 }
3098 
3099 static void arm_smmu_get_resv_regions(struct device *dev,
3100 				      struct list_head *head)
3101 {
3102 	struct iommu_resv_region *region;
3103 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3104 
3105 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3106 					 prot, IOMMU_RESV_SW_MSI);
3107 	if (!region)
3108 		return;
3109 
3110 	list_add_tail(&region->list, head);
3111 
3112 	iommu_dma_get_resv_regions(dev, head);
3113 }
3114 
3115 static struct iommu_ops arm_smmu_ops = {
3116 	.capable		= arm_smmu_capable,
3117 	.domain_alloc		= arm_smmu_domain_alloc,
3118 	.domain_free		= arm_smmu_domain_free,
3119 	.attach_dev		= arm_smmu_attach_dev,
3120 	.map			= arm_smmu_map,
3121 	.unmap			= arm_smmu_unmap,
3122 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3123 	.iotlb_sync		= arm_smmu_iotlb_sync,
3124 	.iova_to_phys		= arm_smmu_iova_to_phys,
3125 	.probe_device		= arm_smmu_probe_device,
3126 	.release_device		= arm_smmu_release_device,
3127 	.device_group		= arm_smmu_device_group,
3128 	.domain_get_attr	= arm_smmu_domain_get_attr,
3129 	.domain_set_attr	= arm_smmu_domain_set_attr,
3130 	.of_xlate		= arm_smmu_of_xlate,
3131 	.get_resv_regions	= arm_smmu_get_resv_regions,
3132 	.put_resv_regions	= generic_iommu_put_resv_regions,
3133 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3134 };
3135 
3136 /* Probing and initialisation functions */
3137 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3138 				   struct arm_smmu_queue *q,
3139 				   unsigned long prod_off,
3140 				   unsigned long cons_off,
3141 				   size_t dwords, const char *name)
3142 {
3143 	size_t qsz;
3144 
3145 	do {
3146 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3147 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3148 					      GFP_KERNEL);
3149 		if (q->base || qsz < PAGE_SIZE)
3150 			break;
3151 
3152 		q->llq.max_n_shift--;
3153 	} while (1);
3154 
3155 	if (!q->base) {
3156 		dev_err(smmu->dev,
3157 			"failed to allocate queue (0x%zx bytes) for %s\n",
3158 			qsz, name);
3159 		return -ENOMEM;
3160 	}
3161 
3162 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3163 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3164 			 1 << q->llq.max_n_shift, name);
3165 	}
3166 
3167 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
3168 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
3169 	q->ent_dwords	= dwords;
3170 
3171 	q->q_base  = Q_BASE_RWA;
3172 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3173 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3174 
3175 	q->llq.prod = q->llq.cons = 0;
3176 	return 0;
3177 }
3178 
3179 static void arm_smmu_cmdq_free_bitmap(void *data)
3180 {
3181 	unsigned long *bitmap = data;
3182 	bitmap_free(bitmap);
3183 }
3184 
3185 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3186 {
3187 	int ret = 0;
3188 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3189 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3190 	atomic_long_t *bitmap;
3191 
3192 	atomic_set(&cmdq->owner_prod, 0);
3193 	atomic_set(&cmdq->lock, 0);
3194 
3195 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
3196 	if (!bitmap) {
3197 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
3198 		ret = -ENOMEM;
3199 	} else {
3200 		cmdq->valid_map = bitmap;
3201 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
3202 	}
3203 
3204 	return ret;
3205 }
3206 
3207 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3208 {
3209 	int ret;
3210 
3211 	/* cmdq */
3212 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
3213 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
3214 				      "cmdq");
3215 	if (ret)
3216 		return ret;
3217 
3218 	ret = arm_smmu_cmdq_init(smmu);
3219 	if (ret)
3220 		return ret;
3221 
3222 	/* evtq */
3223 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
3224 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
3225 				      "evtq");
3226 	if (ret)
3227 		return ret;
3228 
3229 	/* priq */
3230 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3231 		return 0;
3232 
3233 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
3234 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
3235 				       "priq");
3236 }
3237 
3238 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3239 {
3240 	unsigned int i;
3241 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3242 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3243 	void *strtab = smmu->strtab_cfg.strtab;
3244 
3245 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3246 	if (!cfg->l1_desc) {
3247 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
3248 		return -ENOMEM;
3249 	}
3250 
3251 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3252 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3253 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3254 	}
3255 
3256 	return 0;
3257 }
3258 
3259 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3260 {
3261 	void *strtab;
3262 	u64 reg;
3263 	u32 size, l1size;
3264 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3265 
3266 	/* Calculate the L1 size, capped to the SIDSIZE. */
3267 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3268 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3269 	cfg->num_l1_ents = 1 << size;
3270 
3271 	size += STRTAB_SPLIT;
3272 	if (size < smmu->sid_bits)
3273 		dev_warn(smmu->dev,
3274 			 "2-level strtab only covers %u/%u bits of SID\n",
3275 			 size, smmu->sid_bits);
3276 
3277 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3278 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3279 				     GFP_KERNEL);
3280 	if (!strtab) {
3281 		dev_err(smmu->dev,
3282 			"failed to allocate l1 stream table (%u bytes)\n",
3283 			size);
3284 		return -ENOMEM;
3285 	}
3286 	cfg->strtab = strtab;
3287 
3288 	/* Configure strtab_base_cfg for 2 levels */
3289 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3290 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3291 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3292 	cfg->strtab_base_cfg = reg;
3293 
3294 	return arm_smmu_init_l1_strtab(smmu);
3295 }
3296 
3297 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3298 {
3299 	void *strtab;
3300 	u64 reg;
3301 	u32 size;
3302 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3303 
3304 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3305 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3306 				     GFP_KERNEL);
3307 	if (!strtab) {
3308 		dev_err(smmu->dev,
3309 			"failed to allocate linear stream table (%u bytes)\n",
3310 			size);
3311 		return -ENOMEM;
3312 	}
3313 	cfg->strtab = strtab;
3314 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3315 
3316 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3317 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3318 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3319 	cfg->strtab_base_cfg = reg;
3320 
3321 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3322 	return 0;
3323 }
3324 
3325 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3326 {
3327 	u64 reg;
3328 	int ret;
3329 
3330 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3331 		ret = arm_smmu_init_strtab_2lvl(smmu);
3332 	else
3333 		ret = arm_smmu_init_strtab_linear(smmu);
3334 
3335 	if (ret)
3336 		return ret;
3337 
3338 	/* Set the strtab base address */
3339 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3340 	reg |= STRTAB_BASE_RA;
3341 	smmu->strtab_cfg.strtab_base = reg;
3342 
3343 	/* Allocate the first VMID for stage-2 bypass STEs */
3344 	set_bit(0, smmu->vmid_map);
3345 	return 0;
3346 }
3347 
3348 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3349 {
3350 	int ret;
3351 
3352 	ret = arm_smmu_init_queues(smmu);
3353 	if (ret)
3354 		return ret;
3355 
3356 	return arm_smmu_init_strtab(smmu);
3357 }
3358 
3359 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3360 				   unsigned int reg_off, unsigned int ack_off)
3361 {
3362 	u32 reg;
3363 
3364 	writel_relaxed(val, smmu->base + reg_off);
3365 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3366 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3367 }
3368 
3369 /* GBPA is "special" */
3370 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3371 {
3372 	int ret;
3373 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3374 
3375 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3376 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3377 	if (ret)
3378 		return ret;
3379 
3380 	reg &= ~clr;
3381 	reg |= set;
3382 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3383 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3384 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3385 
3386 	if (ret)
3387 		dev_err(smmu->dev, "GBPA not responding to update\n");
3388 	return ret;
3389 }
3390 
3391 static void arm_smmu_free_msis(void *data)
3392 {
3393 	struct device *dev = data;
3394 	platform_msi_domain_free_irqs(dev);
3395 }
3396 
3397 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3398 {
3399 	phys_addr_t doorbell;
3400 	struct device *dev = msi_desc_to_dev(desc);
3401 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3402 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3403 
3404 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3405 	doorbell &= MSI_CFG0_ADDR_MASK;
3406 
3407 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3408 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3409 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3410 }
3411 
3412 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3413 {
3414 	struct msi_desc *desc;
3415 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3416 	struct device *dev = smmu->dev;
3417 
3418 	/* Clear the MSI address regs */
3419 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3420 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3421 
3422 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3423 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3424 	else
3425 		nvec--;
3426 
3427 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3428 		return;
3429 
3430 	if (!dev->msi_domain) {
3431 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3432 		return;
3433 	}
3434 
3435 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3436 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3437 	if (ret) {
3438 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3439 		return;
3440 	}
3441 
3442 	for_each_msi_entry(desc, dev) {
3443 		switch (desc->platform.msi_index) {
3444 		case EVTQ_MSI_INDEX:
3445 			smmu->evtq.q.irq = desc->irq;
3446 			break;
3447 		case GERROR_MSI_INDEX:
3448 			smmu->gerr_irq = desc->irq;
3449 			break;
3450 		case PRIQ_MSI_INDEX:
3451 			smmu->priq.q.irq = desc->irq;
3452 			break;
3453 		default:	/* Unknown */
3454 			continue;
3455 		}
3456 	}
3457 
3458 	/* Add callback to free MSIs on teardown */
3459 	devm_add_action(dev, arm_smmu_free_msis, dev);
3460 }
3461 
3462 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3463 {
3464 	int irq, ret;
3465 
3466 	arm_smmu_setup_msis(smmu);
3467 
3468 	/* Request interrupt lines */
3469 	irq = smmu->evtq.q.irq;
3470 	if (irq) {
3471 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3472 						arm_smmu_evtq_thread,
3473 						IRQF_ONESHOT,
3474 						"arm-smmu-v3-evtq", smmu);
3475 		if (ret < 0)
3476 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3477 	} else {
3478 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3479 	}
3480 
3481 	irq = smmu->gerr_irq;
3482 	if (irq) {
3483 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3484 				       0, "arm-smmu-v3-gerror", smmu);
3485 		if (ret < 0)
3486 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3487 	} else {
3488 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3489 	}
3490 
3491 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3492 		irq = smmu->priq.q.irq;
3493 		if (irq) {
3494 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3495 							arm_smmu_priq_thread,
3496 							IRQF_ONESHOT,
3497 							"arm-smmu-v3-priq",
3498 							smmu);
3499 			if (ret < 0)
3500 				dev_warn(smmu->dev,
3501 					 "failed to enable priq irq\n");
3502 		} else {
3503 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3504 		}
3505 	}
3506 }
3507 
3508 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3509 {
3510 	int ret, irq;
3511 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3512 
3513 	/* Disable IRQs first */
3514 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3515 				      ARM_SMMU_IRQ_CTRLACK);
3516 	if (ret) {
3517 		dev_err(smmu->dev, "failed to disable irqs\n");
3518 		return ret;
3519 	}
3520 
3521 	irq = smmu->combined_irq;
3522 	if (irq) {
3523 		/*
3524 		 * Cavium ThunderX2 implementation doesn't support unique irq
3525 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3526 		 */
3527 		ret = devm_request_threaded_irq(smmu->dev, irq,
3528 					arm_smmu_combined_irq_handler,
3529 					arm_smmu_combined_irq_thread,
3530 					IRQF_ONESHOT,
3531 					"arm-smmu-v3-combined-irq", smmu);
3532 		if (ret < 0)
3533 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3534 	} else
3535 		arm_smmu_setup_unique_irqs(smmu);
3536 
3537 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3538 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3539 
3540 	/* Enable interrupt generation on the SMMU */
3541 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3542 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3543 	if (ret)
3544 		dev_warn(smmu->dev, "failed to enable irqs\n");
3545 
3546 	return 0;
3547 }
3548 
3549 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3550 {
3551 	int ret;
3552 
3553 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3554 	if (ret)
3555 		dev_err(smmu->dev, "failed to clear cr0\n");
3556 
3557 	return ret;
3558 }
3559 
3560 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3561 {
3562 	int ret;
3563 	u32 reg, enables;
3564 	struct arm_smmu_cmdq_ent cmd;
3565 
3566 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3567 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3568 	if (reg & CR0_SMMUEN) {
3569 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3570 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3571 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3572 	}
3573 
3574 	ret = arm_smmu_device_disable(smmu);
3575 	if (ret)
3576 		return ret;
3577 
3578 	/* CR1 (table and queue memory attributes) */
3579 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3580 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3581 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3582 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3583 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3584 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3585 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3586 
3587 	/* CR2 (random crap) */
3588 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3589 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3590 
3591 	/* Stream table */
3592 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3593 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3594 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3595 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3596 
3597 	/* Command queue */
3598 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3599 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3600 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3601 
3602 	enables = CR0_CMDQEN;
3603 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3604 				      ARM_SMMU_CR0ACK);
3605 	if (ret) {
3606 		dev_err(smmu->dev, "failed to enable command queue\n");
3607 		return ret;
3608 	}
3609 
3610 	/* Invalidate any cached configuration */
3611 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3612 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3613 	arm_smmu_cmdq_issue_sync(smmu);
3614 
3615 	/* Invalidate any stale TLB entries */
3616 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3617 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3618 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3619 	}
3620 
3621 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3622 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3623 	arm_smmu_cmdq_issue_sync(smmu);
3624 
3625 	/* Event queue */
3626 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3627 	writel_relaxed(smmu->evtq.q.llq.prod,
3628 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3629 	writel_relaxed(smmu->evtq.q.llq.cons,
3630 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3631 
3632 	enables |= CR0_EVTQEN;
3633 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3634 				      ARM_SMMU_CR0ACK);
3635 	if (ret) {
3636 		dev_err(smmu->dev, "failed to enable event queue\n");
3637 		return ret;
3638 	}
3639 
3640 	/* PRI queue */
3641 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3642 		writeq_relaxed(smmu->priq.q.q_base,
3643 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3644 		writel_relaxed(smmu->priq.q.llq.prod,
3645 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3646 		writel_relaxed(smmu->priq.q.llq.cons,
3647 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3648 
3649 		enables |= CR0_PRIQEN;
3650 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3651 					      ARM_SMMU_CR0ACK);
3652 		if (ret) {
3653 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3654 			return ret;
3655 		}
3656 	}
3657 
3658 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3659 		enables |= CR0_ATSCHK;
3660 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3661 					      ARM_SMMU_CR0ACK);
3662 		if (ret) {
3663 			dev_err(smmu->dev, "failed to enable ATS check\n");
3664 			return ret;
3665 		}
3666 	}
3667 
3668 	ret = arm_smmu_setup_irqs(smmu);
3669 	if (ret) {
3670 		dev_err(smmu->dev, "failed to setup irqs\n");
3671 		return ret;
3672 	}
3673 
3674 	if (is_kdump_kernel())
3675 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3676 
3677 	/* Enable the SMMU interface, or ensure bypass */
3678 	if (!bypass || disable_bypass) {
3679 		enables |= CR0_SMMUEN;
3680 	} else {
3681 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3682 		if (ret)
3683 			return ret;
3684 	}
3685 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3686 				      ARM_SMMU_CR0ACK);
3687 	if (ret) {
3688 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3689 		return ret;
3690 	}
3691 
3692 	return 0;
3693 }
3694 
3695 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3696 {
3697 	u32 reg;
3698 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3699 
3700 	/* IDR0 */
3701 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3702 
3703 	/* 2-level structures */
3704 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3705 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3706 
3707 	if (reg & IDR0_CD2L)
3708 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3709 
3710 	/*
3711 	 * Translation table endianness.
3712 	 * We currently require the same endianness as the CPU, but this
3713 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3714 	 */
3715 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3716 	case IDR0_TTENDIAN_MIXED:
3717 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3718 		break;
3719 #ifdef __BIG_ENDIAN
3720 	case IDR0_TTENDIAN_BE:
3721 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3722 		break;
3723 #else
3724 	case IDR0_TTENDIAN_LE:
3725 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3726 		break;
3727 #endif
3728 	default:
3729 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3730 		return -ENXIO;
3731 	}
3732 
3733 	/* Boolean feature flags */
3734 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3735 		smmu->features |= ARM_SMMU_FEAT_PRI;
3736 
3737 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3738 		smmu->features |= ARM_SMMU_FEAT_ATS;
3739 
3740 	if (reg & IDR0_SEV)
3741 		smmu->features |= ARM_SMMU_FEAT_SEV;
3742 
3743 	if (reg & IDR0_MSI)
3744 		smmu->features |= ARM_SMMU_FEAT_MSI;
3745 
3746 	if (reg & IDR0_HYP)
3747 		smmu->features |= ARM_SMMU_FEAT_HYP;
3748 
3749 	/*
3750 	 * The coherency feature as set by FW is used in preference to the ID
3751 	 * register, but warn on mismatch.
3752 	 */
3753 	if (!!(reg & IDR0_COHACC) != coherent)
3754 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3755 			 coherent ? "true" : "false");
3756 
3757 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3758 	case IDR0_STALL_MODEL_FORCE:
3759 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3760 		fallthrough;
3761 	case IDR0_STALL_MODEL_STALL:
3762 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3763 	}
3764 
3765 	if (reg & IDR0_S1P)
3766 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3767 
3768 	if (reg & IDR0_S2P)
3769 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3770 
3771 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3772 		dev_err(smmu->dev, "no translation support!\n");
3773 		return -ENXIO;
3774 	}
3775 
3776 	/* We only support the AArch64 table format at present */
3777 	switch (FIELD_GET(IDR0_TTF, reg)) {
3778 	case IDR0_TTF_AARCH32_64:
3779 		smmu->ias = 40;
3780 		fallthrough;
3781 	case IDR0_TTF_AARCH64:
3782 		break;
3783 	default:
3784 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3785 		return -ENXIO;
3786 	}
3787 
3788 	/* ASID/VMID sizes */
3789 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3790 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3791 
3792 	/* IDR1 */
3793 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3794 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3795 		dev_err(smmu->dev, "embedded implementation not supported\n");
3796 		return -ENXIO;
3797 	}
3798 
3799 	/* Queue sizes, capped to ensure natural alignment */
3800 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3801 					     FIELD_GET(IDR1_CMDQS, reg));
3802 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3803 		/*
3804 		 * We don't support splitting up batches, so one batch of
3805 		 * commands plus an extra sync needs to fit inside the command
3806 		 * queue. There's also no way we can handle the weird alignment
3807 		 * restrictions on the base pointer for a unit-length queue.
3808 		 */
3809 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3810 			CMDQ_BATCH_ENTRIES);
3811 		return -ENXIO;
3812 	}
3813 
3814 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3815 					     FIELD_GET(IDR1_EVTQS, reg));
3816 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3817 					     FIELD_GET(IDR1_PRIQS, reg));
3818 
3819 	/* SID/SSID sizes */
3820 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3821 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3822 
3823 	/*
3824 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3825 	 * table, use a linear table instead.
3826 	 */
3827 	if (smmu->sid_bits <= STRTAB_SPLIT)
3828 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3829 
3830 	/* IDR3 */
3831 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3832 	if (FIELD_GET(IDR3_RIL, reg))
3833 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3834 
3835 	/* IDR5 */
3836 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3837 
3838 	/* Maximum number of outstanding stalls */
3839 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3840 
3841 	/* Page sizes */
3842 	if (reg & IDR5_GRAN64K)
3843 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3844 	if (reg & IDR5_GRAN16K)
3845 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3846 	if (reg & IDR5_GRAN4K)
3847 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3848 
3849 	/* Input address size */
3850 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3851 		smmu->features |= ARM_SMMU_FEAT_VAX;
3852 
3853 	/* Output address size */
3854 	switch (FIELD_GET(IDR5_OAS, reg)) {
3855 	case IDR5_OAS_32_BIT:
3856 		smmu->oas = 32;
3857 		break;
3858 	case IDR5_OAS_36_BIT:
3859 		smmu->oas = 36;
3860 		break;
3861 	case IDR5_OAS_40_BIT:
3862 		smmu->oas = 40;
3863 		break;
3864 	case IDR5_OAS_42_BIT:
3865 		smmu->oas = 42;
3866 		break;
3867 	case IDR5_OAS_44_BIT:
3868 		smmu->oas = 44;
3869 		break;
3870 	case IDR5_OAS_52_BIT:
3871 		smmu->oas = 52;
3872 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3873 		break;
3874 	default:
3875 		dev_info(smmu->dev,
3876 			"unknown output address size. Truncating to 48-bit\n");
3877 		fallthrough;
3878 	case IDR5_OAS_48_BIT:
3879 		smmu->oas = 48;
3880 	}
3881 
3882 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3883 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3884 	else
3885 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3886 
3887 	/* Set the DMA mask for our table walker */
3888 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3889 		dev_warn(smmu->dev,
3890 			 "failed to set DMA mask for table walker\n");
3891 
3892 	smmu->ias = max(smmu->ias, smmu->oas);
3893 
3894 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3895 		 smmu->ias, smmu->oas, smmu->features);
3896 	return 0;
3897 }
3898 
3899 #ifdef CONFIG_ACPI
3900 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3901 {
3902 	switch (model) {
3903 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3904 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3905 		break;
3906 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3907 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3908 		break;
3909 	}
3910 
3911 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3912 }
3913 
3914 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3915 				      struct arm_smmu_device *smmu)
3916 {
3917 	struct acpi_iort_smmu_v3 *iort_smmu;
3918 	struct device *dev = smmu->dev;
3919 	struct acpi_iort_node *node;
3920 
3921 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3922 
3923 	/* Retrieve SMMUv3 specific data */
3924 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3925 
3926 	acpi_smmu_get_options(iort_smmu->model, smmu);
3927 
3928 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3929 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3930 
3931 	return 0;
3932 }
3933 #else
3934 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3935 					     struct arm_smmu_device *smmu)
3936 {
3937 	return -ENODEV;
3938 }
3939 #endif
3940 
3941 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3942 				    struct arm_smmu_device *smmu)
3943 {
3944 	struct device *dev = &pdev->dev;
3945 	u32 cells;
3946 	int ret = -EINVAL;
3947 
3948 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3949 		dev_err(dev, "missing #iommu-cells property\n");
3950 	else if (cells != 1)
3951 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3952 	else
3953 		ret = 0;
3954 
3955 	parse_driver_options(smmu);
3956 
3957 	if (of_dma_is_coherent(dev->of_node))
3958 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3959 
3960 	return ret;
3961 }
3962 
3963 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3964 {
3965 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3966 		return SZ_64K;
3967 	else
3968 		return SZ_128K;
3969 }
3970 
3971 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3972 {
3973 	int err;
3974 
3975 #ifdef CONFIG_PCI
3976 	if (pci_bus_type.iommu_ops != ops) {
3977 		err = bus_set_iommu(&pci_bus_type, ops);
3978 		if (err)
3979 			return err;
3980 	}
3981 #endif
3982 #ifdef CONFIG_ARM_AMBA
3983 	if (amba_bustype.iommu_ops != ops) {
3984 		err = bus_set_iommu(&amba_bustype, ops);
3985 		if (err)
3986 			goto err_reset_pci_ops;
3987 	}
3988 #endif
3989 	if (platform_bus_type.iommu_ops != ops) {
3990 		err = bus_set_iommu(&platform_bus_type, ops);
3991 		if (err)
3992 			goto err_reset_amba_ops;
3993 	}
3994 
3995 	return 0;
3996 
3997 err_reset_amba_ops:
3998 #ifdef CONFIG_ARM_AMBA
3999 	bus_set_iommu(&amba_bustype, NULL);
4000 #endif
4001 err_reset_pci_ops: __maybe_unused;
4002 #ifdef CONFIG_PCI
4003 	bus_set_iommu(&pci_bus_type, NULL);
4004 #endif
4005 	return err;
4006 }
4007 
4008 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4009 				      resource_size_t size)
4010 {
4011 	struct resource res = {
4012 		.flags = IORESOURCE_MEM,
4013 		.start = start,
4014 		.end = start + size - 1,
4015 	};
4016 
4017 	return devm_ioremap_resource(dev, &res);
4018 }
4019 
4020 static int arm_smmu_device_probe(struct platform_device *pdev)
4021 {
4022 	int irq, ret;
4023 	struct resource *res;
4024 	resource_size_t ioaddr;
4025 	struct arm_smmu_device *smmu;
4026 	struct device *dev = &pdev->dev;
4027 	bool bypass;
4028 
4029 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4030 	if (!smmu) {
4031 		dev_err(dev, "failed to allocate arm_smmu_device\n");
4032 		return -ENOMEM;
4033 	}
4034 	smmu->dev = dev;
4035 
4036 	if (dev->of_node) {
4037 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4038 	} else {
4039 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4040 		if (ret == -ENODEV)
4041 			return ret;
4042 	}
4043 
4044 	/* Set bypass mode according to firmware probing result */
4045 	bypass = !!ret;
4046 
4047 	/* Base address */
4048 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4049 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4050 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4051 		return -EINVAL;
4052 	}
4053 	ioaddr = res->start;
4054 
4055 	/*
4056 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4057 	 * the PMCG registers which are reserved by the PMU driver.
4058 	 */
4059 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4060 	if (IS_ERR(smmu->base))
4061 		return PTR_ERR(smmu->base);
4062 
4063 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4064 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4065 					       ARM_SMMU_REG_SZ);
4066 		if (IS_ERR(smmu->page1))
4067 			return PTR_ERR(smmu->page1);
4068 	} else {
4069 		smmu->page1 = smmu->base;
4070 	}
4071 
4072 	/* Interrupt lines */
4073 
4074 	irq = platform_get_irq_byname_optional(pdev, "combined");
4075 	if (irq > 0)
4076 		smmu->combined_irq = irq;
4077 	else {
4078 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4079 		if (irq > 0)
4080 			smmu->evtq.q.irq = irq;
4081 
4082 		irq = platform_get_irq_byname_optional(pdev, "priq");
4083 		if (irq > 0)
4084 			smmu->priq.q.irq = irq;
4085 
4086 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4087 		if (irq > 0)
4088 			smmu->gerr_irq = irq;
4089 	}
4090 	/* Probe the h/w */
4091 	ret = arm_smmu_device_hw_probe(smmu);
4092 	if (ret)
4093 		return ret;
4094 
4095 	/* Initialise in-memory data structures */
4096 	ret = arm_smmu_init_structures(smmu);
4097 	if (ret)
4098 		return ret;
4099 
4100 	/* Record our private device structure */
4101 	platform_set_drvdata(pdev, smmu);
4102 
4103 	/* Reset the device */
4104 	ret = arm_smmu_device_reset(smmu, bypass);
4105 	if (ret)
4106 		return ret;
4107 
4108 	/* And we're up. Go go go! */
4109 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4110 				     "smmu3.%pa", &ioaddr);
4111 	if (ret)
4112 		return ret;
4113 
4114 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
4115 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
4116 
4117 	ret = iommu_device_register(&smmu->iommu);
4118 	if (ret) {
4119 		dev_err(dev, "Failed to register iommu\n");
4120 		return ret;
4121 	}
4122 
4123 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
4124 }
4125 
4126 static int arm_smmu_device_remove(struct platform_device *pdev)
4127 {
4128 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4129 
4130 	arm_smmu_set_bus_ops(NULL);
4131 	iommu_device_unregister(&smmu->iommu);
4132 	iommu_device_sysfs_remove(&smmu->iommu);
4133 	arm_smmu_device_disable(smmu);
4134 
4135 	return 0;
4136 }
4137 
4138 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4139 {
4140 	arm_smmu_device_remove(pdev);
4141 }
4142 
4143 static const struct of_device_id arm_smmu_of_match[] = {
4144 	{ .compatible = "arm,smmu-v3", },
4145 	{ },
4146 };
4147 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4148 
4149 static struct platform_driver arm_smmu_driver = {
4150 	.driver	= {
4151 		.name			= "arm-smmu-v3",
4152 		.of_match_table		= arm_smmu_of_match,
4153 		.suppress_bind_attrs	= true,
4154 	},
4155 	.probe	= arm_smmu_device_probe,
4156 	.remove	= arm_smmu_device_remove,
4157 	.shutdown = arm_smmu_device_shutdown,
4158 };
4159 module_platform_driver(arm_smmu_driver);
4160 
4161 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4162 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4163 MODULE_ALIAS("platform:arm-smmu-v3");
4164 MODULE_LICENSE("GPL v2");
4165