1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/iopoll.h>
24 #include <linux/module.h>
25 #include <linux/msi.h>
26 #include <linux/of.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/pci-ats.h>
32 #include <linux/platform_device.h>
33 
34 #include <linux/amba/bus.h>
35 
36 /* MMIO registers */
37 #define ARM_SMMU_IDR0			0x0
38 #define IDR0_ST_LVL			GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL		1
40 #define IDR0_STALL_MODEL		GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL		0
42 #define IDR0_STALL_MODEL_FORCE		2
43 #define IDR0_TTENDIAN			GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED		0
45 #define IDR0_TTENDIAN_LE		2
46 #define IDR0_TTENDIAN_BE		3
47 #define IDR0_CD2L			(1 << 19)
48 #define IDR0_VMID16			(1 << 18)
49 #define IDR0_PRI			(1 << 16)
50 #define IDR0_SEV			(1 << 14)
51 #define IDR0_MSI			(1 << 13)
52 #define IDR0_ASID16			(1 << 12)
53 #define IDR0_ATS			(1 << 10)
54 #define IDR0_HYP			(1 << 9)
55 #define IDR0_COHACC			(1 << 4)
56 #define IDR0_TTF			GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64		2
58 #define IDR0_TTF_AARCH32_64		3
59 #define IDR0_S1P			(1 << 1)
60 #define IDR0_S2P			(1 << 0)
61 
62 #define ARM_SMMU_IDR1			0x4
63 #define IDR1_TABLES_PRESET		(1 << 30)
64 #define IDR1_QUEUES_PRESET		(1 << 29)
65 #define IDR1_REL			(1 << 28)
66 #define IDR1_CMDQS			GENMASK(25, 21)
67 #define IDR1_EVTQS			GENMASK(20, 16)
68 #define IDR1_PRIQS			GENMASK(15, 11)
69 #define IDR1_SSIDSIZE			GENMASK(10, 6)
70 #define IDR1_SIDSIZE			GENMASK(5, 0)
71 
72 #define ARM_SMMU_IDR3			0xc
73 #define IDR3_RIL			(1 << 10)
74 
75 #define ARM_SMMU_IDR5			0x14
76 #define IDR5_STALL_MAX			GENMASK(31, 16)
77 #define IDR5_GRAN64K			(1 << 6)
78 #define IDR5_GRAN16K			(1 << 5)
79 #define IDR5_GRAN4K			(1 << 4)
80 #define IDR5_OAS			GENMASK(2, 0)
81 #define IDR5_OAS_32_BIT			0
82 #define IDR5_OAS_36_BIT			1
83 #define IDR5_OAS_40_BIT			2
84 #define IDR5_OAS_42_BIT			3
85 #define IDR5_OAS_44_BIT			4
86 #define IDR5_OAS_48_BIT			5
87 #define IDR5_OAS_52_BIT			6
88 #define IDR5_VAX			GENMASK(11, 10)
89 #define IDR5_VAX_52_BIT			1
90 
91 #define ARM_SMMU_CR0			0x20
92 #define CR0_ATSCHK			(1 << 4)
93 #define CR0_CMDQEN			(1 << 3)
94 #define CR0_EVTQEN			(1 << 2)
95 #define CR0_PRIQEN			(1 << 1)
96 #define CR0_SMMUEN			(1 << 0)
97 
98 #define ARM_SMMU_CR0ACK			0x24
99 
100 #define ARM_SMMU_CR1			0x28
101 #define CR1_TABLE_SH			GENMASK(11, 10)
102 #define CR1_TABLE_OC			GENMASK(9, 8)
103 #define CR1_TABLE_IC			GENMASK(7, 6)
104 #define CR1_QUEUE_SH			GENMASK(5, 4)
105 #define CR1_QUEUE_OC			GENMASK(3, 2)
106 #define CR1_QUEUE_IC			GENMASK(1, 0)
107 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
108 #define CR1_CACHE_NC			0
109 #define CR1_CACHE_WB			1
110 #define CR1_CACHE_WT			2
111 
112 #define ARM_SMMU_CR2			0x2c
113 #define CR2_PTM				(1 << 2)
114 #define CR2_RECINVSID			(1 << 1)
115 #define CR2_E2H				(1 << 0)
116 
117 #define ARM_SMMU_GBPA			0x44
118 #define GBPA_UPDATE			(1 << 31)
119 #define GBPA_ABORT			(1 << 20)
120 
121 #define ARM_SMMU_IRQ_CTRL		0x50
122 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
123 #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
124 #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
125 
126 #define ARM_SMMU_IRQ_CTRLACK		0x54
127 
128 #define ARM_SMMU_GERROR			0x60
129 #define GERROR_SFM_ERR			(1 << 8)
130 #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
131 #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
132 #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
133 #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
134 #define GERROR_PRIQ_ABT_ERR		(1 << 3)
135 #define GERROR_EVTQ_ABT_ERR		(1 << 2)
136 #define GERROR_CMDQ_ERR			(1 << 0)
137 #define GERROR_ERR_MASK			0xfd
138 
139 #define ARM_SMMU_GERRORN		0x64
140 
141 #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
142 #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
143 #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
144 
145 #define ARM_SMMU_STRTAB_BASE		0x80
146 #define STRTAB_BASE_RA			(1UL << 62)
147 #define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
148 
149 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
150 #define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
151 #define STRTAB_BASE_CFG_FMT_LINEAR	0
152 #define STRTAB_BASE_CFG_FMT_2LVL	1
153 #define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
154 #define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
155 
156 #define ARM_SMMU_CMDQ_BASE		0x90
157 #define ARM_SMMU_CMDQ_PROD		0x98
158 #define ARM_SMMU_CMDQ_CONS		0x9c
159 
160 #define ARM_SMMU_EVTQ_BASE		0xa0
161 #define ARM_SMMU_EVTQ_PROD		0x100a8
162 #define ARM_SMMU_EVTQ_CONS		0x100ac
163 #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
164 #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
165 #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
166 
167 #define ARM_SMMU_PRIQ_BASE		0xc0
168 #define ARM_SMMU_PRIQ_PROD		0x100c8
169 #define ARM_SMMU_PRIQ_CONS		0x100cc
170 #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
171 #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
172 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
173 
174 #define ARM_SMMU_REG_SZ			0xe00
175 
176 /* Common MSI config fields */
177 #define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
178 #define MSI_CFG2_SH			GENMASK(5, 4)
179 #define MSI_CFG2_MEMATTR		GENMASK(3, 0)
180 
181 /* Common memory attribute values */
182 #define ARM_SMMU_SH_NSH			0
183 #define ARM_SMMU_SH_OSH			2
184 #define ARM_SMMU_SH_ISH			3
185 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
186 #define ARM_SMMU_MEMATTR_OIWB		0xf
187 
188 #define Q_IDX(llq, p)			((p) & ((1 << (llq)->max_n_shift) - 1))
189 #define Q_WRP(llq, p)			((p) & (1 << (llq)->max_n_shift))
190 #define Q_OVERFLOW_FLAG			(1U << 31)
191 #define Q_OVF(p)			((p) & Q_OVERFLOW_FLAG)
192 #define Q_ENT(q, p)			((q)->base +			\
193 					 Q_IDX(&((q)->llq), p) *	\
194 					 (q)->ent_dwords)
195 
196 #define Q_BASE_RWA			(1UL << 62)
197 #define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
198 #define Q_BASE_LOG2SIZE			GENMASK(4, 0)
199 
200 /* Ensure DMA allocations are naturally aligned */
201 #ifdef CONFIG_CMA_ALIGNMENT
202 #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
203 #else
204 #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + MAX_ORDER - 1)
205 #endif
206 
207 /*
208  * Stream table.
209  *
210  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
211  * 2lvl: 128k L1 entries,
212  *       256 lazy entries per table (each table covers a PCI bus)
213  */
214 #define STRTAB_L1_SZ_SHIFT		20
215 #define STRTAB_SPLIT			8
216 
217 #define STRTAB_L1_DESC_DWORDS		1
218 #define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
219 #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
220 
221 #define STRTAB_STE_DWORDS		8
222 #define STRTAB_STE_0_V			(1UL << 0)
223 #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
224 #define STRTAB_STE_0_CFG_ABORT		0
225 #define STRTAB_STE_0_CFG_BYPASS		4
226 #define STRTAB_STE_0_CFG_S1_TRANS	5
227 #define STRTAB_STE_0_CFG_S2_TRANS	6
228 
229 #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
230 #define STRTAB_STE_0_S1FMT_LINEAR	0
231 #define STRTAB_STE_0_S1FMT_64K_L2	2
232 #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
233 #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
234 
235 #define STRTAB_STE_1_S1DSS		GENMASK_ULL(1, 0)
236 #define STRTAB_STE_1_S1DSS_TERMINATE	0x0
237 #define STRTAB_STE_1_S1DSS_BYPASS	0x1
238 #define STRTAB_STE_1_S1DSS_SSID0	0x2
239 
240 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
241 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
242 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
243 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
244 #define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
245 #define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
246 #define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
247 
248 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
249 
250 #define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
251 #define STRTAB_STE_1_EATS_ABT		0UL
252 #define STRTAB_STE_1_EATS_TRANS		1UL
253 #define STRTAB_STE_1_EATS_S1CHK		2UL
254 
255 #define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
256 #define STRTAB_STE_1_STRW_NSEL1		0UL
257 #define STRTAB_STE_1_STRW_EL2		2UL
258 
259 #define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
260 #define STRTAB_STE_1_SHCFG_INCOMING	1UL
261 
262 #define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
263 #define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
264 #define STRTAB_STE_2_VTCR_S2T0SZ	GENMASK_ULL(5, 0)
265 #define STRTAB_STE_2_VTCR_S2SL0		GENMASK_ULL(7, 6)
266 #define STRTAB_STE_2_VTCR_S2IR0		GENMASK_ULL(9, 8)
267 #define STRTAB_STE_2_VTCR_S2OR0		GENMASK_ULL(11, 10)
268 #define STRTAB_STE_2_VTCR_S2SH0		GENMASK_ULL(13, 12)
269 #define STRTAB_STE_2_VTCR_S2TG		GENMASK_ULL(15, 14)
270 #define STRTAB_STE_2_VTCR_S2PS		GENMASK_ULL(18, 16)
271 #define STRTAB_STE_2_S2AA64		(1UL << 51)
272 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
273 #define STRTAB_STE_2_S2PTW		(1UL << 54)
274 #define STRTAB_STE_2_S2R		(1UL << 58)
275 
276 #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
277 
278 /*
279  * Context descriptors.
280  *
281  * Linear: when less than 1024 SSIDs are supported
282  * 2lvl: at most 1024 L1 entries,
283  *       1024 lazy entries per table.
284  */
285 #define CTXDESC_SPLIT			10
286 #define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
287 
288 #define CTXDESC_L1_DESC_DWORDS		1
289 #define CTXDESC_L1_DESC_V		(1UL << 0)
290 #define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
291 
292 #define CTXDESC_CD_DWORDS		8
293 #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
294 #define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
295 #define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
296 #define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
297 #define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
298 #define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
299 #define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
300 
301 #define CTXDESC_CD_0_ENDI		(1UL << 15)
302 #define CTXDESC_CD_0_V			(1UL << 31)
303 
304 #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
305 #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
306 
307 #define CTXDESC_CD_0_AA64		(1UL << 41)
308 #define CTXDESC_CD_0_S			(1UL << 44)
309 #define CTXDESC_CD_0_R			(1UL << 45)
310 #define CTXDESC_CD_0_A			(1UL << 46)
311 #define CTXDESC_CD_0_ASET		(1UL << 47)
312 #define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
313 
314 #define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
315 
316 /*
317  * When the SMMU only supports linear context descriptor tables, pick a
318  * reasonable size limit (64kB).
319  */
320 #define CTXDESC_LINEAR_CDMAX		ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
321 
322 /* Command queue */
323 #define CMDQ_ENT_SZ_SHIFT		4
324 #define CMDQ_ENT_DWORDS			((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
325 #define CMDQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
326 
327 #define CMDQ_CONS_ERR			GENMASK(30, 24)
328 #define CMDQ_ERR_CERROR_NONE_IDX	0
329 #define CMDQ_ERR_CERROR_ILL_IDX		1
330 #define CMDQ_ERR_CERROR_ABT_IDX		2
331 #define CMDQ_ERR_CERROR_ATC_INV_IDX	3
332 
333 #define CMDQ_PROD_OWNED_FLAG		Q_OVERFLOW_FLAG
334 
335 /*
336  * This is used to size the command queue and therefore must be at least
337  * BITS_PER_LONG so that the valid_map works correctly (it relies on the
338  * total number of queue entries being a multiple of BITS_PER_LONG).
339  */
340 #define CMDQ_BATCH_ENTRIES		BITS_PER_LONG
341 
342 #define CMDQ_0_OP			GENMASK_ULL(7, 0)
343 #define CMDQ_0_SSV			(1UL << 11)
344 
345 #define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
346 #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
347 #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
348 
349 #define CMDQ_CFGI_0_SSID		GENMASK_ULL(31, 12)
350 #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
351 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
352 #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
353 
354 #define CMDQ_TLBI_0_NUM			GENMASK_ULL(16, 12)
355 #define CMDQ_TLBI_RANGE_NUM_MAX		31
356 #define CMDQ_TLBI_0_SCALE		GENMASK_ULL(24, 20)
357 #define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
358 #define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
359 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
360 #define CMDQ_TLBI_1_TTL			GENMASK_ULL(9, 8)
361 #define CMDQ_TLBI_1_TG			GENMASK_ULL(11, 10)
362 #define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
363 #define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
364 
365 #define CMDQ_ATC_0_SSID			GENMASK_ULL(31, 12)
366 #define CMDQ_ATC_0_SID			GENMASK_ULL(63, 32)
367 #define CMDQ_ATC_0_GLOBAL		(1UL << 9)
368 #define CMDQ_ATC_1_SIZE			GENMASK_ULL(5, 0)
369 #define CMDQ_ATC_1_ADDR_MASK		GENMASK_ULL(63, 12)
370 
371 #define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
372 #define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
373 #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
374 #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
375 
376 #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
377 #define CMDQ_SYNC_0_CS_NONE		0
378 #define CMDQ_SYNC_0_CS_IRQ		1
379 #define CMDQ_SYNC_0_CS_SEV		2
380 #define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
381 #define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
382 #define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
383 #define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
384 
385 /* Event queue */
386 #define EVTQ_ENT_SZ_SHIFT		5
387 #define EVTQ_ENT_DWORDS			((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
388 #define EVTQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
389 
390 #define EVTQ_0_ID			GENMASK_ULL(7, 0)
391 
392 /* PRI queue */
393 #define PRIQ_ENT_SZ_SHIFT		4
394 #define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
395 #define PRIQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
396 
397 #define PRIQ_0_SID			GENMASK_ULL(31, 0)
398 #define PRIQ_0_SSID			GENMASK_ULL(51, 32)
399 #define PRIQ_0_PERM_PRIV		(1UL << 58)
400 #define PRIQ_0_PERM_EXEC		(1UL << 59)
401 #define PRIQ_0_PERM_READ		(1UL << 60)
402 #define PRIQ_0_PERM_WRITE		(1UL << 61)
403 #define PRIQ_0_PRG_LAST			(1UL << 62)
404 #define PRIQ_0_SSID_V			(1UL << 63)
405 
406 #define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
407 #define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
408 
409 /* High-level queue structures */
410 #define ARM_SMMU_POLL_TIMEOUT_US	1000000 /* 1s! */
411 #define ARM_SMMU_POLL_SPIN_COUNT	10
412 
413 #define MSI_IOVA_BASE			0x8000000
414 #define MSI_IOVA_LENGTH			0x100000
415 
416 static bool disable_bypass = 1;
417 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
418 MODULE_PARM_DESC(disable_bypass,
419 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
420 
421 enum pri_resp {
422 	PRI_RESP_DENY = 0,
423 	PRI_RESP_FAIL = 1,
424 	PRI_RESP_SUCC = 2,
425 };
426 
427 enum arm_smmu_msi_index {
428 	EVTQ_MSI_INDEX,
429 	GERROR_MSI_INDEX,
430 	PRIQ_MSI_INDEX,
431 	ARM_SMMU_MAX_MSIS,
432 };
433 
434 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
435 	[EVTQ_MSI_INDEX] = {
436 		ARM_SMMU_EVTQ_IRQ_CFG0,
437 		ARM_SMMU_EVTQ_IRQ_CFG1,
438 		ARM_SMMU_EVTQ_IRQ_CFG2,
439 	},
440 	[GERROR_MSI_INDEX] = {
441 		ARM_SMMU_GERROR_IRQ_CFG0,
442 		ARM_SMMU_GERROR_IRQ_CFG1,
443 		ARM_SMMU_GERROR_IRQ_CFG2,
444 	},
445 	[PRIQ_MSI_INDEX] = {
446 		ARM_SMMU_PRIQ_IRQ_CFG0,
447 		ARM_SMMU_PRIQ_IRQ_CFG1,
448 		ARM_SMMU_PRIQ_IRQ_CFG2,
449 	},
450 };
451 
452 struct arm_smmu_cmdq_ent {
453 	/* Common fields */
454 	u8				opcode;
455 	bool				substream_valid;
456 
457 	/* Command-specific fields */
458 	union {
459 		#define CMDQ_OP_PREFETCH_CFG	0x1
460 		struct {
461 			u32			sid;
462 			u8			size;
463 			u64			addr;
464 		} prefetch;
465 
466 		#define CMDQ_OP_CFGI_STE	0x3
467 		#define CMDQ_OP_CFGI_ALL	0x4
468 		#define CMDQ_OP_CFGI_CD		0x5
469 		#define CMDQ_OP_CFGI_CD_ALL	0x6
470 		struct {
471 			u32			sid;
472 			u32			ssid;
473 			union {
474 				bool		leaf;
475 				u8		span;
476 			};
477 		} cfgi;
478 
479 		#define CMDQ_OP_TLBI_NH_ASID	0x11
480 		#define CMDQ_OP_TLBI_NH_VA	0x12
481 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
482 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
483 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
484 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
485 		struct {
486 			u8			num;
487 			u8			scale;
488 			u16			asid;
489 			u16			vmid;
490 			bool			leaf;
491 			u8			ttl;
492 			u8			tg;
493 			u64			addr;
494 		} tlbi;
495 
496 		#define CMDQ_OP_ATC_INV		0x40
497 		#define ATC_INV_SIZE_ALL	52
498 		struct {
499 			u32			sid;
500 			u32			ssid;
501 			u64			addr;
502 			u8			size;
503 			bool			global;
504 		} atc;
505 
506 		#define CMDQ_OP_PRI_RESP	0x41
507 		struct {
508 			u32			sid;
509 			u32			ssid;
510 			u16			grpid;
511 			enum pri_resp		resp;
512 		} pri;
513 
514 		#define CMDQ_OP_CMD_SYNC	0x46
515 		struct {
516 			u64			msiaddr;
517 		} sync;
518 	};
519 };
520 
521 struct arm_smmu_ll_queue {
522 	union {
523 		u64			val;
524 		struct {
525 			u32		prod;
526 			u32		cons;
527 		};
528 		struct {
529 			atomic_t	prod;
530 			atomic_t	cons;
531 		} atomic;
532 		u8			__pad[SMP_CACHE_BYTES];
533 	} ____cacheline_aligned_in_smp;
534 	u32				max_n_shift;
535 };
536 
537 struct arm_smmu_queue {
538 	struct arm_smmu_ll_queue	llq;
539 	int				irq; /* Wired interrupt */
540 
541 	__le64				*base;
542 	dma_addr_t			base_dma;
543 	u64				q_base;
544 
545 	size_t				ent_dwords;
546 
547 	u32 __iomem			*prod_reg;
548 	u32 __iomem			*cons_reg;
549 };
550 
551 struct arm_smmu_queue_poll {
552 	ktime_t				timeout;
553 	unsigned int			delay;
554 	unsigned int			spin_cnt;
555 	bool				wfe;
556 };
557 
558 struct arm_smmu_cmdq {
559 	struct arm_smmu_queue		q;
560 	atomic_long_t			*valid_map;
561 	atomic_t			owner_prod;
562 	atomic_t			lock;
563 };
564 
565 struct arm_smmu_cmdq_batch {
566 	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
567 	int				num;
568 };
569 
570 struct arm_smmu_evtq {
571 	struct arm_smmu_queue		q;
572 	u32				max_stalls;
573 };
574 
575 struct arm_smmu_priq {
576 	struct arm_smmu_queue		q;
577 };
578 
579 /* High-level stream table and context descriptor structures */
580 struct arm_smmu_strtab_l1_desc {
581 	u8				span;
582 
583 	__le64				*l2ptr;
584 	dma_addr_t			l2ptr_dma;
585 };
586 
587 struct arm_smmu_ctx_desc {
588 	u16				asid;
589 	u64				ttbr;
590 	u64				tcr;
591 	u64				mair;
592 };
593 
594 struct arm_smmu_l1_ctx_desc {
595 	__le64				*l2ptr;
596 	dma_addr_t			l2ptr_dma;
597 };
598 
599 struct arm_smmu_ctx_desc_cfg {
600 	__le64				*cdtab;
601 	dma_addr_t			cdtab_dma;
602 	struct arm_smmu_l1_ctx_desc	*l1_desc;
603 	unsigned int			num_l1_ents;
604 };
605 
606 struct arm_smmu_s1_cfg {
607 	struct arm_smmu_ctx_desc_cfg	cdcfg;
608 	struct arm_smmu_ctx_desc	cd;
609 	u8				s1fmt;
610 	u8				s1cdmax;
611 };
612 
613 struct arm_smmu_s2_cfg {
614 	u16				vmid;
615 	u64				vttbr;
616 	u64				vtcr;
617 };
618 
619 struct arm_smmu_strtab_cfg {
620 	__le64				*strtab;
621 	dma_addr_t			strtab_dma;
622 	struct arm_smmu_strtab_l1_desc	*l1_desc;
623 	unsigned int			num_l1_ents;
624 
625 	u64				strtab_base;
626 	u32				strtab_base_cfg;
627 };
628 
629 /* An SMMUv3 instance */
630 struct arm_smmu_device {
631 	struct device			*dev;
632 	void __iomem			*base;
633 	void __iomem			*page1;
634 
635 #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
636 #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
637 #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
638 #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
639 #define ARM_SMMU_FEAT_PRI		(1 << 4)
640 #define ARM_SMMU_FEAT_ATS		(1 << 5)
641 #define ARM_SMMU_FEAT_SEV		(1 << 6)
642 #define ARM_SMMU_FEAT_MSI		(1 << 7)
643 #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
644 #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
645 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
646 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
647 #define ARM_SMMU_FEAT_HYP		(1 << 12)
648 #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
649 #define ARM_SMMU_FEAT_VAX		(1 << 14)
650 #define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
651 	u32				features;
652 
653 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
654 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
655 	u32				options;
656 
657 	struct arm_smmu_cmdq		cmdq;
658 	struct arm_smmu_evtq		evtq;
659 	struct arm_smmu_priq		priq;
660 
661 	int				gerr_irq;
662 	int				combined_irq;
663 
664 	unsigned long			ias; /* IPA */
665 	unsigned long			oas; /* PA */
666 	unsigned long			pgsize_bitmap;
667 
668 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
669 	unsigned int			asid_bits;
670 
671 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
672 	unsigned int			vmid_bits;
673 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
674 
675 	unsigned int			ssid_bits;
676 	unsigned int			sid_bits;
677 
678 	struct arm_smmu_strtab_cfg	strtab_cfg;
679 
680 	/* IOMMU core code handle */
681 	struct iommu_device		iommu;
682 };
683 
684 /* SMMU private data for each master */
685 struct arm_smmu_master {
686 	struct arm_smmu_device		*smmu;
687 	struct device			*dev;
688 	struct arm_smmu_domain		*domain;
689 	struct list_head		domain_head;
690 	u32				*sids;
691 	unsigned int			num_sids;
692 	bool				ats_enabled;
693 	unsigned int			ssid_bits;
694 };
695 
696 /* SMMU private data for an IOMMU domain */
697 enum arm_smmu_domain_stage {
698 	ARM_SMMU_DOMAIN_S1 = 0,
699 	ARM_SMMU_DOMAIN_S2,
700 	ARM_SMMU_DOMAIN_NESTED,
701 	ARM_SMMU_DOMAIN_BYPASS,
702 };
703 
704 struct arm_smmu_domain {
705 	struct arm_smmu_device		*smmu;
706 	struct mutex			init_mutex; /* Protects smmu pointer */
707 
708 	struct io_pgtable_ops		*pgtbl_ops;
709 	bool				non_strict;
710 	atomic_t			nr_ats_masters;
711 
712 	enum arm_smmu_domain_stage	stage;
713 	union {
714 		struct arm_smmu_s1_cfg	s1_cfg;
715 		struct arm_smmu_s2_cfg	s2_cfg;
716 	};
717 
718 	struct iommu_domain		domain;
719 
720 	struct list_head		devices;
721 	spinlock_t			devices_lock;
722 };
723 
724 struct arm_smmu_option_prop {
725 	u32 opt;
726 	const char *prop;
727 };
728 
729 static DEFINE_XARRAY_ALLOC1(asid_xa);
730 
731 static struct arm_smmu_option_prop arm_smmu_options[] = {
732 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
733 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
734 	{ 0, NULL},
735 };
736 
737 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
738 						 struct arm_smmu_device *smmu)
739 {
740 	if (offset > SZ_64K)
741 		return smmu->page1 + offset - SZ_64K;
742 
743 	return smmu->base + offset;
744 }
745 
746 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
747 {
748 	return container_of(dom, struct arm_smmu_domain, domain);
749 }
750 
751 static void parse_driver_options(struct arm_smmu_device *smmu)
752 {
753 	int i = 0;
754 
755 	do {
756 		if (of_property_read_bool(smmu->dev->of_node,
757 						arm_smmu_options[i].prop)) {
758 			smmu->options |= arm_smmu_options[i].opt;
759 			dev_notice(smmu->dev, "option %s\n",
760 				arm_smmu_options[i].prop);
761 		}
762 	} while (arm_smmu_options[++i].opt);
763 }
764 
765 /* Low-level queue manipulation functions */
766 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
767 {
768 	u32 space, prod, cons;
769 
770 	prod = Q_IDX(q, q->prod);
771 	cons = Q_IDX(q, q->cons);
772 
773 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
774 		space = (1 << q->max_n_shift) - (prod - cons);
775 	else
776 		space = cons - prod;
777 
778 	return space >= n;
779 }
780 
781 static bool queue_full(struct arm_smmu_ll_queue *q)
782 {
783 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
784 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
785 }
786 
787 static bool queue_empty(struct arm_smmu_ll_queue *q)
788 {
789 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
790 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
791 }
792 
793 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
794 {
795 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
796 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
797 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
798 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
799 }
800 
801 static void queue_sync_cons_out(struct arm_smmu_queue *q)
802 {
803 	/*
804 	 * Ensure that all CPU accesses (reads and writes) to the queue
805 	 * are complete before we update the cons pointer.
806 	 */
807 	mb();
808 	writel_relaxed(q->llq.cons, q->cons_reg);
809 }
810 
811 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
812 {
813 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
814 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
815 }
816 
817 static int queue_sync_prod_in(struct arm_smmu_queue *q)
818 {
819 	int ret = 0;
820 	u32 prod = readl_relaxed(q->prod_reg);
821 
822 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
823 		ret = -EOVERFLOW;
824 
825 	q->llq.prod = prod;
826 	return ret;
827 }
828 
829 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
830 {
831 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
832 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
833 }
834 
835 static void queue_poll_init(struct arm_smmu_device *smmu,
836 			    struct arm_smmu_queue_poll *qp)
837 {
838 	qp->delay = 1;
839 	qp->spin_cnt = 0;
840 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
841 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
842 }
843 
844 static int queue_poll(struct arm_smmu_queue_poll *qp)
845 {
846 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
847 		return -ETIMEDOUT;
848 
849 	if (qp->wfe) {
850 		wfe();
851 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
852 		cpu_relax();
853 	} else {
854 		udelay(qp->delay);
855 		qp->delay *= 2;
856 		qp->spin_cnt = 0;
857 	}
858 
859 	return 0;
860 }
861 
862 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
863 {
864 	int i;
865 
866 	for (i = 0; i < n_dwords; ++i)
867 		*dst++ = cpu_to_le64(*src++);
868 }
869 
870 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
871 {
872 	int i;
873 
874 	for (i = 0; i < n_dwords; ++i)
875 		*dst++ = le64_to_cpu(*src++);
876 }
877 
878 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
879 {
880 	if (queue_empty(&q->llq))
881 		return -EAGAIN;
882 
883 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
884 	queue_inc_cons(&q->llq);
885 	queue_sync_cons_out(q);
886 	return 0;
887 }
888 
889 /* High-level queue accessors */
890 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
891 {
892 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
893 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
894 
895 	switch (ent->opcode) {
896 	case CMDQ_OP_TLBI_EL2_ALL:
897 	case CMDQ_OP_TLBI_NSNH_ALL:
898 		break;
899 	case CMDQ_OP_PREFETCH_CFG:
900 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
901 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
902 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
903 		break;
904 	case CMDQ_OP_CFGI_CD:
905 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
906 		/* Fallthrough */
907 	case CMDQ_OP_CFGI_STE:
908 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
909 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
910 		break;
911 	case CMDQ_OP_CFGI_CD_ALL:
912 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
913 		break;
914 	case CMDQ_OP_CFGI_ALL:
915 		/* Cover the entire SID range */
916 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
917 		break;
918 	case CMDQ_OP_TLBI_NH_VA:
919 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
920 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
921 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
922 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
923 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
924 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
925 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
926 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
927 		break;
928 	case CMDQ_OP_TLBI_S2_IPA:
929 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
930 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
931 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
932 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
933 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
934 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
935 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
936 		break;
937 	case CMDQ_OP_TLBI_NH_ASID:
938 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
939 		/* Fallthrough */
940 	case CMDQ_OP_TLBI_S12_VMALL:
941 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
942 		break;
943 	case CMDQ_OP_ATC_INV:
944 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
945 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
946 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
947 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
948 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
949 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
950 		break;
951 	case CMDQ_OP_PRI_RESP:
952 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
953 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
954 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
955 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
956 		switch (ent->pri.resp) {
957 		case PRI_RESP_DENY:
958 		case PRI_RESP_FAIL:
959 		case PRI_RESP_SUCC:
960 			break;
961 		default:
962 			return -EINVAL;
963 		}
964 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
965 		break;
966 	case CMDQ_OP_CMD_SYNC:
967 		if (ent->sync.msiaddr) {
968 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
969 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
970 		} else {
971 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
972 		}
973 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
974 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
975 		break;
976 	default:
977 		return -ENOENT;
978 	}
979 
980 	return 0;
981 }
982 
983 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
984 					 u32 prod)
985 {
986 	struct arm_smmu_queue *q = &smmu->cmdq.q;
987 	struct arm_smmu_cmdq_ent ent = {
988 		.opcode = CMDQ_OP_CMD_SYNC,
989 	};
990 
991 	/*
992 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
993 	 * payload, so the write will zero the entire command on that platform.
994 	 */
995 	if (smmu->features & ARM_SMMU_FEAT_MSI &&
996 	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
997 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
998 				   q->ent_dwords * 8;
999 	}
1000 
1001 	arm_smmu_cmdq_build_cmd(cmd, &ent);
1002 }
1003 
1004 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
1005 {
1006 	static const char *cerror_str[] = {
1007 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
1008 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
1009 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
1010 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
1011 	};
1012 
1013 	int i;
1014 	u64 cmd[CMDQ_ENT_DWORDS];
1015 	struct arm_smmu_queue *q = &smmu->cmdq.q;
1016 	u32 cons = readl_relaxed(q->cons_reg);
1017 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
1018 	struct arm_smmu_cmdq_ent cmd_sync = {
1019 		.opcode = CMDQ_OP_CMD_SYNC,
1020 	};
1021 
1022 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
1023 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
1024 
1025 	switch (idx) {
1026 	case CMDQ_ERR_CERROR_ABT_IDX:
1027 		dev_err(smmu->dev, "retrying command fetch\n");
1028 	case CMDQ_ERR_CERROR_NONE_IDX:
1029 		return;
1030 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
1031 		/*
1032 		 * ATC Invalidation Completion timeout. CONS is still pointing
1033 		 * at the CMD_SYNC. Attempt to complete other pending commands
1034 		 * by repeating the CMD_SYNC, though we might well end up back
1035 		 * here since the ATC invalidation may still be pending.
1036 		 */
1037 		return;
1038 	case CMDQ_ERR_CERROR_ILL_IDX:
1039 		/* Fallthrough */
1040 	default:
1041 		break;
1042 	}
1043 
1044 	/*
1045 	 * We may have concurrent producers, so we need to be careful
1046 	 * not to touch any of the shadow cmdq state.
1047 	 */
1048 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
1049 	dev_err(smmu->dev, "skipping command in error state:\n");
1050 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1051 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1052 
1053 	/* Convert the erroneous command into a CMD_SYNC */
1054 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1055 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1056 		return;
1057 	}
1058 
1059 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
1060 }
1061 
1062 /*
1063  * Command queue locking.
1064  * This is a form of bastardised rwlock with the following major changes:
1065  *
1066  * - The only LOCK routines are exclusive_trylock() and shared_lock().
1067  *   Neither have barrier semantics, and instead provide only a control
1068  *   dependency.
1069  *
1070  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1071  *   fails if the caller appears to be the last lock holder (yes, this is
1072  *   racy). All successful UNLOCK routines have RELEASE semantics.
1073  */
1074 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
1075 {
1076 	int val;
1077 
1078 	/*
1079 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
1080 	 * lock counter. When held in exclusive state, the lock counter is set
1081 	 * to INT_MIN so these increments won't hurt as the value will remain
1082 	 * negative.
1083 	 */
1084 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1085 		return;
1086 
1087 	do {
1088 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1089 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1090 }
1091 
1092 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1093 {
1094 	(void)atomic_dec_return_release(&cmdq->lock);
1095 }
1096 
1097 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1098 {
1099 	if (atomic_read(&cmdq->lock) == 1)
1100 		return false;
1101 
1102 	arm_smmu_cmdq_shared_unlock(cmdq);
1103 	return true;
1104 }
1105 
1106 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
1107 ({									\
1108 	bool __ret;							\
1109 	local_irq_save(flags);						\
1110 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
1111 	if (!__ret)							\
1112 		local_irq_restore(flags);				\
1113 	__ret;								\
1114 })
1115 
1116 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
1117 ({									\
1118 	atomic_set_release(&cmdq->lock, 0);				\
1119 	local_irq_restore(flags);					\
1120 })
1121 
1122 
1123 /*
1124  * Command queue insertion.
1125  * This is made fiddly by our attempts to achieve some sort of scalability
1126  * since there is one queue shared amongst all of the CPUs in the system.  If
1127  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1128  * then you'll *love* this monstrosity.
1129  *
1130  * The basic idea is to split the queue up into ranges of commands that are
1131  * owned by a given CPU; the owner may not have written all of the commands
1132  * itself, but is responsible for advancing the hardware prod pointer when
1133  * the time comes. The algorithm is roughly:
1134  *
1135  * 	1. Allocate some space in the queue. At this point we also discover
1136  *	   whether the head of the queue is currently owned by another CPU,
1137  *	   or whether we are the owner.
1138  *
1139  *	2. Write our commands into our allocated slots in the queue.
1140  *
1141  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1142  *
1143  *	4. If we are an owner:
1144  *		a. Wait for the previous owner to finish.
1145  *		b. Mark the queue head as unowned, which tells us the range
1146  *		   that we are responsible for publishing.
1147  *		c. Wait for all commands in our owned range to become valid.
1148  *		d. Advance the hardware prod pointer.
1149  *		e. Tell the next owner we've finished.
1150  *
1151  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
1152  *	   owner), then we need to stick around until it has completed:
1153  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1154  *		   to clear the first 4 bytes.
1155  *		b. Otherwise, we spin waiting for the hardware cons pointer to
1156  *		   advance past our command.
1157  *
1158  * The devil is in the details, particularly the use of locking for handling
1159  * SYNC completion and freeing up space in the queue before we think that it is
1160  * full.
1161  */
1162 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1163 					       u32 sprod, u32 eprod, bool set)
1164 {
1165 	u32 swidx, sbidx, ewidx, ebidx;
1166 	struct arm_smmu_ll_queue llq = {
1167 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1168 		.prod		= sprod,
1169 	};
1170 
1171 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1172 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
1173 
1174 	while (llq.prod != eprod) {
1175 		unsigned long mask;
1176 		atomic_long_t *ptr;
1177 		u32 limit = BITS_PER_LONG;
1178 
1179 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1180 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1181 
1182 		ptr = &cmdq->valid_map[swidx];
1183 
1184 		if ((swidx == ewidx) && (sbidx < ebidx))
1185 			limit = ebidx;
1186 
1187 		mask = GENMASK(limit - 1, sbidx);
1188 
1189 		/*
1190 		 * The valid bit is the inverse of the wrap bit. This means
1191 		 * that a zero-initialised queue is invalid and, after marking
1192 		 * all entries as valid, they become invalid again when we
1193 		 * wrap.
1194 		 */
1195 		if (set) {
1196 			atomic_long_xor(mask, ptr);
1197 		} else { /* Poll */
1198 			unsigned long valid;
1199 
1200 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1201 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1202 		}
1203 
1204 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
1205 	}
1206 }
1207 
1208 /* Mark all entries in the range [sprod, eprod) as valid */
1209 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1210 					u32 sprod, u32 eprod)
1211 {
1212 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1213 }
1214 
1215 /* Wait for all entries in the range [sprod, eprod) to become valid */
1216 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1217 					 u32 sprod, u32 eprod)
1218 {
1219 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1220 }
1221 
1222 /* Wait for the command queue to become non-full */
1223 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1224 					     struct arm_smmu_ll_queue *llq)
1225 {
1226 	unsigned long flags;
1227 	struct arm_smmu_queue_poll qp;
1228 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1229 	int ret = 0;
1230 
1231 	/*
1232 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1233 	 * that fails, spin until somebody else updates it for us.
1234 	 */
1235 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1236 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1237 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1238 		llq->val = READ_ONCE(cmdq->q.llq.val);
1239 		return 0;
1240 	}
1241 
1242 	queue_poll_init(smmu, &qp);
1243 	do {
1244 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1245 		if (!queue_full(llq))
1246 			break;
1247 
1248 		ret = queue_poll(&qp);
1249 	} while (!ret);
1250 
1251 	return ret;
1252 }
1253 
1254 /*
1255  * Wait until the SMMU signals a CMD_SYNC completion MSI.
1256  * Must be called with the cmdq lock held in some capacity.
1257  */
1258 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1259 					  struct arm_smmu_ll_queue *llq)
1260 {
1261 	int ret = 0;
1262 	struct arm_smmu_queue_poll qp;
1263 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1264 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
1265 
1266 	queue_poll_init(smmu, &qp);
1267 
1268 	/*
1269 	 * The MSI won't generate an event, since it's being written back
1270 	 * into the command queue.
1271 	 */
1272 	qp.wfe = false;
1273 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1274 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1275 	return ret;
1276 }
1277 
1278 /*
1279  * Wait until the SMMU cons index passes llq->prod.
1280  * Must be called with the cmdq lock held in some capacity.
1281  */
1282 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1283 					       struct arm_smmu_ll_queue *llq)
1284 {
1285 	struct arm_smmu_queue_poll qp;
1286 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1287 	u32 prod = llq->prod;
1288 	int ret = 0;
1289 
1290 	queue_poll_init(smmu, &qp);
1291 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1292 	do {
1293 		if (queue_consumed(llq, prod))
1294 			break;
1295 
1296 		ret = queue_poll(&qp);
1297 
1298 		/*
1299 		 * This needs to be a readl() so that our subsequent call
1300 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1301 		 *
1302 		 * Specifically, we need to ensure that we observe all
1303 		 * shared_lock()s by other CMD_SYNCs that share our owner,
1304 		 * so that a failing call to tryunlock() means that we're
1305 		 * the last one out and therefore we can safely advance
1306 		 * cmdq->q.llq.cons. Roughly speaking:
1307 		 *
1308 		 * CPU 0		CPU1			CPU2 (us)
1309 		 *
1310 		 * if (sync)
1311 		 * 	shared_lock();
1312 		 *
1313 		 * dma_wmb();
1314 		 * set_valid_map();
1315 		 *
1316 		 * 			if (owner) {
1317 		 *				poll_valid_map();
1318 		 *				<control dependency>
1319 		 *				writel(prod_reg);
1320 		 *
1321 		 *						readl(cons_reg);
1322 		 *						tryunlock();
1323 		 *
1324 		 * Requires us to see CPU 0's shared_lock() acquisition.
1325 		 */
1326 		llq->cons = readl(cmdq->q.cons_reg);
1327 	} while (!ret);
1328 
1329 	return ret;
1330 }
1331 
1332 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1333 					 struct arm_smmu_ll_queue *llq)
1334 {
1335 	if (smmu->features & ARM_SMMU_FEAT_MSI &&
1336 	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
1337 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1338 
1339 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1340 }
1341 
1342 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1343 					u32 prod, int n)
1344 {
1345 	int i;
1346 	struct arm_smmu_ll_queue llq = {
1347 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1348 		.prod		= prod,
1349 	};
1350 
1351 	for (i = 0; i < n; ++i) {
1352 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1353 
1354 		prod = queue_inc_prod_n(&llq, i);
1355 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1356 	}
1357 }
1358 
1359 /*
1360  * This is the actual insertion function, and provides the following
1361  * ordering guarantees to callers:
1362  *
1363  * - There is a dma_wmb() before publishing any commands to the queue.
1364  *   This can be relied upon to order prior writes to data structures
1365  *   in memory (such as a CD or an STE) before the command.
1366  *
1367  * - On completion of a CMD_SYNC, there is a control dependency.
1368  *   This can be relied upon to order subsequent writes to memory (e.g.
1369  *   freeing an IOVA) after completion of the CMD_SYNC.
1370  *
1371  * - Command insertion is totally ordered, so if two CPUs each race to
1372  *   insert their own list of commands then all of the commands from one
1373  *   CPU will appear before any of the commands from the other CPU.
1374  */
1375 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1376 				       u64 *cmds, int n, bool sync)
1377 {
1378 	u64 cmd_sync[CMDQ_ENT_DWORDS];
1379 	u32 prod;
1380 	unsigned long flags;
1381 	bool owner;
1382 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1383 	struct arm_smmu_ll_queue llq = {
1384 		.max_n_shift = cmdq->q.llq.max_n_shift,
1385 	}, head = llq;
1386 	int ret = 0;
1387 
1388 	/* 1. Allocate some space in the queue */
1389 	local_irq_save(flags);
1390 	llq.val = READ_ONCE(cmdq->q.llq.val);
1391 	do {
1392 		u64 old;
1393 
1394 		while (!queue_has_space(&llq, n + sync)) {
1395 			local_irq_restore(flags);
1396 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1397 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1398 			local_irq_save(flags);
1399 		}
1400 
1401 		head.cons = llq.cons;
1402 		head.prod = queue_inc_prod_n(&llq, n + sync) |
1403 					     CMDQ_PROD_OWNED_FLAG;
1404 
1405 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1406 		if (old == llq.val)
1407 			break;
1408 
1409 		llq.val = old;
1410 	} while (1);
1411 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1412 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1413 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1414 
1415 	/*
1416 	 * 2. Write our commands into the queue
1417 	 * Dependency ordering from the cmpxchg() loop above.
1418 	 */
1419 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1420 	if (sync) {
1421 		prod = queue_inc_prod_n(&llq, n);
1422 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1423 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1424 
1425 		/*
1426 		 * In order to determine completion of our CMD_SYNC, we must
1427 		 * ensure that the queue can't wrap twice without us noticing.
1428 		 * We achieve that by taking the cmdq lock as shared before
1429 		 * marking our slot as valid.
1430 		 */
1431 		arm_smmu_cmdq_shared_lock(cmdq);
1432 	}
1433 
1434 	/* 3. Mark our slots as valid, ensuring commands are visible first */
1435 	dma_wmb();
1436 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1437 
1438 	/* 4. If we are the owner, take control of the SMMU hardware */
1439 	if (owner) {
1440 		/* a. Wait for previous owner to finish */
1441 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1442 
1443 		/* b. Stop gathering work by clearing the owned flag */
1444 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1445 						   &cmdq->q.llq.atomic.prod);
1446 		prod &= ~CMDQ_PROD_OWNED_FLAG;
1447 
1448 		/*
1449 		 * c. Wait for any gathered work to be written to the queue.
1450 		 * Note that we read our own entries so that we have the control
1451 		 * dependency required by (d).
1452 		 */
1453 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1454 
1455 		/*
1456 		 * d. Advance the hardware prod pointer
1457 		 * Control dependency ordering from the entries becoming valid.
1458 		 */
1459 		writel_relaxed(prod, cmdq->q.prod_reg);
1460 
1461 		/*
1462 		 * e. Tell the next owner we're done
1463 		 * Make sure we've updated the hardware first, so that we don't
1464 		 * race to update prod and potentially move it backwards.
1465 		 */
1466 		atomic_set_release(&cmdq->owner_prod, prod);
1467 	}
1468 
1469 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1470 	if (sync) {
1471 		llq.prod = queue_inc_prod_n(&llq, n);
1472 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1473 		if (ret) {
1474 			dev_err_ratelimited(smmu->dev,
1475 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1476 					    llq.prod,
1477 					    readl_relaxed(cmdq->q.prod_reg),
1478 					    readl_relaxed(cmdq->q.cons_reg));
1479 		}
1480 
1481 		/*
1482 		 * Try to unlock the cmdq lock. This will fail if we're the last
1483 		 * reader, in which case we can safely update cmdq->q.llq.cons
1484 		 */
1485 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1486 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1487 			arm_smmu_cmdq_shared_unlock(cmdq);
1488 		}
1489 	}
1490 
1491 	local_irq_restore(flags);
1492 	return ret;
1493 }
1494 
1495 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1496 				   struct arm_smmu_cmdq_ent *ent)
1497 {
1498 	u64 cmd[CMDQ_ENT_DWORDS];
1499 
1500 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1501 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1502 			 ent->opcode);
1503 		return -EINVAL;
1504 	}
1505 
1506 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1507 }
1508 
1509 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1510 {
1511 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
1512 }
1513 
1514 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
1515 				    struct arm_smmu_cmdq_batch *cmds,
1516 				    struct arm_smmu_cmdq_ent *cmd)
1517 {
1518 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
1519 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
1520 		cmds->num = 0;
1521 	}
1522 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
1523 	cmds->num++;
1524 }
1525 
1526 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
1527 				      struct arm_smmu_cmdq_batch *cmds)
1528 {
1529 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
1530 }
1531 
1532 /* Context descriptor manipulation functions */
1533 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1534 			     int ssid, bool leaf)
1535 {
1536 	size_t i;
1537 	unsigned long flags;
1538 	struct arm_smmu_master *master;
1539 	struct arm_smmu_cmdq_batch cmds = {};
1540 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1541 	struct arm_smmu_cmdq_ent cmd = {
1542 		.opcode	= CMDQ_OP_CFGI_CD,
1543 		.cfgi	= {
1544 			.ssid	= ssid,
1545 			.leaf	= leaf,
1546 		},
1547 	};
1548 
1549 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1550 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1551 		for (i = 0; i < master->num_sids; i++) {
1552 			cmd.cfgi.sid = master->sids[i];
1553 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1554 		}
1555 	}
1556 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1557 
1558 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1559 }
1560 
1561 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1562 					struct arm_smmu_l1_ctx_desc *l1_desc)
1563 {
1564 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1565 
1566 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1567 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1568 	if (!l1_desc->l2ptr) {
1569 		dev_warn(smmu->dev,
1570 			 "failed to allocate context descriptor table\n");
1571 		return -ENOMEM;
1572 	}
1573 	return 0;
1574 }
1575 
1576 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1577 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1578 {
1579 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1580 		  CTXDESC_L1_DESC_V;
1581 
1582 	/* See comment in arm_smmu_write_ctx_desc() */
1583 	WRITE_ONCE(*dst, cpu_to_le64(val));
1584 }
1585 
1586 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1587 				   u32 ssid)
1588 {
1589 	__le64 *l1ptr;
1590 	unsigned int idx;
1591 	struct arm_smmu_l1_ctx_desc *l1_desc;
1592 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1593 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1594 
1595 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1596 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1597 
1598 	idx = ssid >> CTXDESC_SPLIT;
1599 	l1_desc = &cdcfg->l1_desc[idx];
1600 	if (!l1_desc->l2ptr) {
1601 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1602 			return NULL;
1603 
1604 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1605 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1606 		/* An invalid L1CD can be cached */
1607 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1608 	}
1609 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1610 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1611 }
1612 
1613 static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1614 				   int ssid, struct arm_smmu_ctx_desc *cd)
1615 {
1616 	/*
1617 	 * This function handles the following cases:
1618 	 *
1619 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1620 	 * (2) Install a secondary CD, for SID+SSID traffic.
1621 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1622 	 *     CD, then invalidate the old entry and mappings.
1623 	 * (4) Remove a secondary CD.
1624 	 */
1625 	u64 val;
1626 	bool cd_live;
1627 	__le64 *cdptr;
1628 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1629 
1630 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1631 		return -E2BIG;
1632 
1633 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1634 	if (!cdptr)
1635 		return -ENOMEM;
1636 
1637 	val = le64_to_cpu(cdptr[0]);
1638 	cd_live = !!(val & CTXDESC_CD_0_V);
1639 
1640 	if (!cd) { /* (4) */
1641 		val = 0;
1642 	} else if (cd_live) { /* (3) */
1643 		val &= ~CTXDESC_CD_0_ASID;
1644 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1645 		/*
1646 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1647 		 * this substream's traffic
1648 		 */
1649 	} else { /* (1) and (2) */
1650 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1651 		cdptr[2] = 0;
1652 		cdptr[3] = cpu_to_le64(cd->mair);
1653 
1654 		/*
1655 		 * STE is live, and the SMMU might read dwords of this CD in any
1656 		 * order. Ensure that it observes valid values before reading
1657 		 * V=1.
1658 		 */
1659 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1660 
1661 		val = cd->tcr |
1662 #ifdef __BIG_ENDIAN
1663 			CTXDESC_CD_0_ENDI |
1664 #endif
1665 			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1666 			CTXDESC_CD_0_AA64 |
1667 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1668 			CTXDESC_CD_0_V;
1669 
1670 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1671 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1672 			val |= CTXDESC_CD_0_S;
1673 	}
1674 
1675 	/*
1676 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1677 	 * "Configuration structures and configuration invalidation completion"
1678 	 *
1679 	 *   The size of single-copy atomic reads made by the SMMU is
1680 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1681 	 *   field within an aligned 64-bit span of a structure can be altered
1682 	 *   without first making the structure invalid.
1683 	 */
1684 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1685 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1686 	return 0;
1687 }
1688 
1689 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1690 {
1691 	int ret;
1692 	size_t l1size;
1693 	size_t max_contexts;
1694 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1695 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1696 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1697 
1698 	max_contexts = 1 << cfg->s1cdmax;
1699 
1700 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1701 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1702 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1703 		cdcfg->num_l1_ents = max_contexts;
1704 
1705 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1706 	} else {
1707 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1708 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1709 						  CTXDESC_L2_ENTRIES);
1710 
1711 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1712 					      sizeof(*cdcfg->l1_desc),
1713 					      GFP_KERNEL);
1714 		if (!cdcfg->l1_desc)
1715 			return -ENOMEM;
1716 
1717 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1718 	}
1719 
1720 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1721 					   GFP_KERNEL);
1722 	if (!cdcfg->cdtab) {
1723 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1724 		ret = -ENOMEM;
1725 		goto err_free_l1;
1726 	}
1727 
1728 	return 0;
1729 
1730 err_free_l1:
1731 	if (cdcfg->l1_desc) {
1732 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1733 		cdcfg->l1_desc = NULL;
1734 	}
1735 	return ret;
1736 }
1737 
1738 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1739 {
1740 	int i;
1741 	size_t size, l1size;
1742 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1743 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1744 
1745 	if (cdcfg->l1_desc) {
1746 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1747 
1748 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1749 			if (!cdcfg->l1_desc[i].l2ptr)
1750 				continue;
1751 
1752 			dmam_free_coherent(smmu->dev, size,
1753 					   cdcfg->l1_desc[i].l2ptr,
1754 					   cdcfg->l1_desc[i].l2ptr_dma);
1755 		}
1756 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1757 		cdcfg->l1_desc = NULL;
1758 
1759 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1760 	} else {
1761 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1762 	}
1763 
1764 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1765 	cdcfg->cdtab_dma = 0;
1766 	cdcfg->cdtab = NULL;
1767 }
1768 
1769 static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1770 {
1771 	if (!cd->asid)
1772 		return;
1773 
1774 	xa_erase(&asid_xa, cd->asid);
1775 }
1776 
1777 /* Stream table manipulation functions */
1778 static void
1779 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1780 {
1781 	u64 val = 0;
1782 
1783 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1784 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1785 
1786 	/* See comment in arm_smmu_write_ctx_desc() */
1787 	WRITE_ONCE(*dst, cpu_to_le64(val));
1788 }
1789 
1790 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1791 {
1792 	struct arm_smmu_cmdq_ent cmd = {
1793 		.opcode	= CMDQ_OP_CFGI_STE,
1794 		.cfgi	= {
1795 			.sid	= sid,
1796 			.leaf	= true,
1797 		},
1798 	};
1799 
1800 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1801 	arm_smmu_cmdq_issue_sync(smmu);
1802 }
1803 
1804 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1805 				      __le64 *dst)
1806 {
1807 	/*
1808 	 * This is hideously complicated, but we only really care about
1809 	 * three cases at the moment:
1810 	 *
1811 	 * 1. Invalid (all zero) -> bypass/fault (init)
1812 	 * 2. Bypass/fault -> translation/bypass (attach)
1813 	 * 3. Translation/bypass -> bypass/fault (detach)
1814 	 *
1815 	 * Given that we can't update the STE atomically and the SMMU
1816 	 * doesn't read the thing in a defined order, that leaves us
1817 	 * with the following maintenance requirements:
1818 	 *
1819 	 * 1. Update Config, return (init time STEs aren't live)
1820 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1821 	 * 3. Update Config, sync
1822 	 */
1823 	u64 val = le64_to_cpu(dst[0]);
1824 	bool ste_live = false;
1825 	struct arm_smmu_device *smmu = NULL;
1826 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1827 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1828 	struct arm_smmu_domain *smmu_domain = NULL;
1829 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1830 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1831 		.prefetch	= {
1832 			.sid	= sid,
1833 		},
1834 	};
1835 
1836 	if (master) {
1837 		smmu_domain = master->domain;
1838 		smmu = master->smmu;
1839 	}
1840 
1841 	if (smmu_domain) {
1842 		switch (smmu_domain->stage) {
1843 		case ARM_SMMU_DOMAIN_S1:
1844 			s1_cfg = &smmu_domain->s1_cfg;
1845 			break;
1846 		case ARM_SMMU_DOMAIN_S2:
1847 		case ARM_SMMU_DOMAIN_NESTED:
1848 			s2_cfg = &smmu_domain->s2_cfg;
1849 			break;
1850 		default:
1851 			break;
1852 		}
1853 	}
1854 
1855 	if (val & STRTAB_STE_0_V) {
1856 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1857 		case STRTAB_STE_0_CFG_BYPASS:
1858 			break;
1859 		case STRTAB_STE_0_CFG_S1_TRANS:
1860 		case STRTAB_STE_0_CFG_S2_TRANS:
1861 			ste_live = true;
1862 			break;
1863 		case STRTAB_STE_0_CFG_ABORT:
1864 			BUG_ON(!disable_bypass);
1865 			break;
1866 		default:
1867 			BUG(); /* STE corruption */
1868 		}
1869 	}
1870 
1871 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1872 	val = STRTAB_STE_0_V;
1873 
1874 	/* Bypass/fault */
1875 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1876 		if (!smmu_domain && disable_bypass)
1877 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1878 		else
1879 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1880 
1881 		dst[0] = cpu_to_le64(val);
1882 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1883 						STRTAB_STE_1_SHCFG_INCOMING));
1884 		dst[2] = 0; /* Nuke the VMID */
1885 		/*
1886 		 * The SMMU can perform negative caching, so we must sync
1887 		 * the STE regardless of whether the old value was live.
1888 		 */
1889 		if (smmu)
1890 			arm_smmu_sync_ste_for_sid(smmu, sid);
1891 		return;
1892 	}
1893 
1894 	if (s1_cfg) {
1895 		BUG_ON(ste_live);
1896 		dst[1] = cpu_to_le64(
1897 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1898 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1899 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1900 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1901 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1902 
1903 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1904 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1905 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1906 
1907 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1908 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1909 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1910 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1911 	}
1912 
1913 	if (s2_cfg) {
1914 		BUG_ON(ste_live);
1915 		dst[2] = cpu_to_le64(
1916 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1917 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1918 #ifdef __BIG_ENDIAN
1919 			 STRTAB_STE_2_S2ENDI |
1920 #endif
1921 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1922 			 STRTAB_STE_2_S2R);
1923 
1924 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1925 
1926 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1927 	}
1928 
1929 	if (master->ats_enabled)
1930 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1931 						 STRTAB_STE_1_EATS_TRANS));
1932 
1933 	arm_smmu_sync_ste_for_sid(smmu, sid);
1934 	/* See comment in arm_smmu_write_ctx_desc() */
1935 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1936 	arm_smmu_sync_ste_for_sid(smmu, sid);
1937 
1938 	/* It's likely that we'll want to use the new STE soon */
1939 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1940 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1941 }
1942 
1943 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1944 {
1945 	unsigned int i;
1946 
1947 	for (i = 0; i < nent; ++i) {
1948 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1949 		strtab += STRTAB_STE_DWORDS;
1950 	}
1951 }
1952 
1953 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1954 {
1955 	size_t size;
1956 	void *strtab;
1957 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1958 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1959 
1960 	if (desc->l2ptr)
1961 		return 0;
1962 
1963 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1964 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1965 
1966 	desc->span = STRTAB_SPLIT + 1;
1967 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1968 					  GFP_KERNEL);
1969 	if (!desc->l2ptr) {
1970 		dev_err(smmu->dev,
1971 			"failed to allocate l2 stream table for SID %u\n",
1972 			sid);
1973 		return -ENOMEM;
1974 	}
1975 
1976 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1977 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1978 	return 0;
1979 }
1980 
1981 /* IRQ and event handlers */
1982 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1983 {
1984 	int i;
1985 	struct arm_smmu_device *smmu = dev;
1986 	struct arm_smmu_queue *q = &smmu->evtq.q;
1987 	struct arm_smmu_ll_queue *llq = &q->llq;
1988 	u64 evt[EVTQ_ENT_DWORDS];
1989 
1990 	do {
1991 		while (!queue_remove_raw(q, evt)) {
1992 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1993 
1994 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1995 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1996 				dev_info(smmu->dev, "\t0x%016llx\n",
1997 					 (unsigned long long)evt[i]);
1998 
1999 		}
2000 
2001 		/*
2002 		 * Not much we can do on overflow, so scream and pretend we're
2003 		 * trying harder.
2004 		 */
2005 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2006 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
2007 	} while (!queue_empty(llq));
2008 
2009 	/* Sync our overflow flag, as we believe we're up to speed */
2010 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2011 		    Q_IDX(llq, llq->cons);
2012 	return IRQ_HANDLED;
2013 }
2014 
2015 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
2016 {
2017 	u32 sid, ssid;
2018 	u16 grpid;
2019 	bool ssv, last;
2020 
2021 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
2022 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
2023 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
2024 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
2025 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
2026 
2027 	dev_info(smmu->dev, "unexpected PRI request received:\n");
2028 	dev_info(smmu->dev,
2029 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
2030 		 sid, ssid, grpid, last ? "L" : "",
2031 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
2032 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
2033 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
2034 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
2035 		 evt[1] & PRIQ_1_ADDR_MASK);
2036 
2037 	if (last) {
2038 		struct arm_smmu_cmdq_ent cmd = {
2039 			.opcode			= CMDQ_OP_PRI_RESP,
2040 			.substream_valid	= ssv,
2041 			.pri			= {
2042 				.sid	= sid,
2043 				.ssid	= ssid,
2044 				.grpid	= grpid,
2045 				.resp	= PRI_RESP_DENY,
2046 			},
2047 		};
2048 
2049 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2050 	}
2051 }
2052 
2053 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2054 {
2055 	struct arm_smmu_device *smmu = dev;
2056 	struct arm_smmu_queue *q = &smmu->priq.q;
2057 	struct arm_smmu_ll_queue *llq = &q->llq;
2058 	u64 evt[PRIQ_ENT_DWORDS];
2059 
2060 	do {
2061 		while (!queue_remove_raw(q, evt))
2062 			arm_smmu_handle_ppr(smmu, evt);
2063 
2064 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2065 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2066 	} while (!queue_empty(llq));
2067 
2068 	/* Sync our overflow flag, as we believe we're up to speed */
2069 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2070 		      Q_IDX(llq, llq->cons);
2071 	queue_sync_cons_out(q);
2072 	return IRQ_HANDLED;
2073 }
2074 
2075 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2076 
2077 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2078 {
2079 	u32 gerror, gerrorn, active;
2080 	struct arm_smmu_device *smmu = dev;
2081 
2082 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2083 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2084 
2085 	active = gerror ^ gerrorn;
2086 	if (!(active & GERROR_ERR_MASK))
2087 		return IRQ_NONE; /* No errors pending */
2088 
2089 	dev_warn(smmu->dev,
2090 		 "unexpected global error reported (0x%08x), this could be serious\n",
2091 		 active);
2092 
2093 	if (active & GERROR_SFM_ERR) {
2094 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2095 		arm_smmu_device_disable(smmu);
2096 	}
2097 
2098 	if (active & GERROR_MSI_GERROR_ABT_ERR)
2099 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2100 
2101 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
2102 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2103 
2104 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
2105 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2106 
2107 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
2108 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2109 
2110 	if (active & GERROR_PRIQ_ABT_ERR)
2111 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2112 
2113 	if (active & GERROR_EVTQ_ABT_ERR)
2114 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2115 
2116 	if (active & GERROR_CMDQ_ERR)
2117 		arm_smmu_cmdq_skip_err(smmu);
2118 
2119 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2120 	return IRQ_HANDLED;
2121 }
2122 
2123 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2124 {
2125 	struct arm_smmu_device *smmu = dev;
2126 
2127 	arm_smmu_evtq_thread(irq, dev);
2128 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2129 		arm_smmu_priq_thread(irq, dev);
2130 
2131 	return IRQ_HANDLED;
2132 }
2133 
2134 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2135 {
2136 	arm_smmu_gerror_handler(irq, dev);
2137 	return IRQ_WAKE_THREAD;
2138 }
2139 
2140 static void
2141 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2142 			struct arm_smmu_cmdq_ent *cmd)
2143 {
2144 	size_t log2_span;
2145 	size_t span_mask;
2146 	/* ATC invalidates are always on 4096-bytes pages */
2147 	size_t inval_grain_shift = 12;
2148 	unsigned long page_start, page_end;
2149 
2150 	*cmd = (struct arm_smmu_cmdq_ent) {
2151 		.opcode			= CMDQ_OP_ATC_INV,
2152 		.substream_valid	= !!ssid,
2153 		.atc.ssid		= ssid,
2154 	};
2155 
2156 	if (!size) {
2157 		cmd->atc.size = ATC_INV_SIZE_ALL;
2158 		return;
2159 	}
2160 
2161 	page_start	= iova >> inval_grain_shift;
2162 	page_end	= (iova + size - 1) >> inval_grain_shift;
2163 
2164 	/*
2165 	 * In an ATS Invalidate Request, the address must be aligned on the
2166 	 * range size, which must be a power of two number of page sizes. We
2167 	 * thus have to choose between grossly over-invalidating the region, or
2168 	 * splitting the invalidation into multiple commands. For simplicity
2169 	 * we'll go with the first solution, but should refine it in the future
2170 	 * if multiple commands are shown to be more efficient.
2171 	 *
2172 	 * Find the smallest power of two that covers the range. The most
2173 	 * significant differing bit between the start and end addresses,
2174 	 * fls(start ^ end), indicates the required span. For example:
2175 	 *
2176 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
2177 	 *		x = 0b1000 ^ 0b1011 = 0b11
2178 	 *		span = 1 << fls(x) = 4
2179 	 *
2180 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2181 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2182 	 *		span = 1 << fls(x) = 16
2183 	 */
2184 	log2_span	= fls_long(page_start ^ page_end);
2185 	span_mask	= (1ULL << log2_span) - 1;
2186 
2187 	page_start	&= ~span_mask;
2188 
2189 	cmd->atc.addr	= page_start << inval_grain_shift;
2190 	cmd->atc.size	= log2_span;
2191 }
2192 
2193 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
2194 {
2195 	int i;
2196 	struct arm_smmu_cmdq_ent cmd;
2197 
2198 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2199 
2200 	for (i = 0; i < master->num_sids; i++) {
2201 		cmd.atc.sid = master->sids[i];
2202 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
2203 	}
2204 
2205 	return arm_smmu_cmdq_issue_sync(master->smmu);
2206 }
2207 
2208 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2209 				   int ssid, unsigned long iova, size_t size)
2210 {
2211 	int i;
2212 	unsigned long flags;
2213 	struct arm_smmu_cmdq_ent cmd;
2214 	struct arm_smmu_master *master;
2215 	struct arm_smmu_cmdq_batch cmds = {};
2216 
2217 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2218 		return 0;
2219 
2220 	/*
2221 	 * Ensure that we've completed prior invalidation of the main TLBs
2222 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2223 	 * arm_smmu_enable_ats():
2224 	 *
2225 	 *	// unmap()			// arm_smmu_enable_ats()
2226 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2227 	 *	smp_mb();			[...]
2228 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2229 	 *
2230 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2231 	 * ATS was enabled at the PCI device before completion of the TLBI.
2232 	 */
2233 	smp_mb();
2234 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2235 		return 0;
2236 
2237 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2238 
2239 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2240 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
2241 		if (!master->ats_enabled)
2242 			continue;
2243 
2244 		for (i = 0; i < master->num_sids; i++) {
2245 			cmd.atc.sid = master->sids[i];
2246 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2247 		}
2248 	}
2249 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2250 
2251 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2252 }
2253 
2254 /* IO_PGTABLE API */
2255 static void arm_smmu_tlb_inv_context(void *cookie)
2256 {
2257 	struct arm_smmu_domain *smmu_domain = cookie;
2258 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2259 	struct arm_smmu_cmdq_ent cmd;
2260 
2261 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2262 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
2263 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2264 		cmd.tlbi.vmid	= 0;
2265 	} else {
2266 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2267 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2268 	}
2269 
2270 	/*
2271 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2272 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2273 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2274 	 * insertion to guarantee those are observed before the TLBI. Do be
2275 	 * careful, 007.
2276 	 */
2277 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2278 	arm_smmu_cmdq_issue_sync(smmu);
2279 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2280 }
2281 
2282 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2283 				   size_t granule, bool leaf,
2284 				   struct arm_smmu_domain *smmu_domain)
2285 {
2286 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2287 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
2288 	size_t inv_range = granule;
2289 	struct arm_smmu_cmdq_batch cmds = {};
2290 	struct arm_smmu_cmdq_ent cmd = {
2291 		.tlbi = {
2292 			.leaf	= leaf,
2293 		},
2294 	};
2295 
2296 	if (!size)
2297 		return;
2298 
2299 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2300 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
2301 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2302 	} else {
2303 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2304 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2305 	}
2306 
2307 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2308 		/* Get the leaf page size */
2309 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2310 
2311 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2312 		cmd.tlbi.tg = (tg - 10) / 2;
2313 
2314 		/* Determine what level the granule is at */
2315 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2316 
2317 		num_pages = size >> tg;
2318 	}
2319 
2320 	while (iova < end) {
2321 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2322 			/*
2323 			 * On each iteration of the loop, the range is 5 bits
2324 			 * worth of the aligned size remaining.
2325 			 * The range in pages is:
2326 			 *
2327 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2328 			 */
2329 			unsigned long scale, num;
2330 
2331 			/* Determine the power of 2 multiple number of pages */
2332 			scale = __ffs(num_pages);
2333 			cmd.tlbi.scale = scale;
2334 
2335 			/* Determine how many chunks of 2^scale size we have */
2336 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2337 			cmd.tlbi.num = num - 1;
2338 
2339 			/* range is num * 2^scale * pgsize */
2340 			inv_range = num << (scale + tg);
2341 
2342 			/* Clear out the lower order bits for the next iteration */
2343 			num_pages -= num << scale;
2344 		}
2345 
2346 		cmd.tlbi.addr = iova;
2347 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
2348 		iova += inv_range;
2349 	}
2350 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2351 
2352 	/*
2353 	 * Unfortunately, this can't be leaf-only since we may have
2354 	 * zapped an entire table.
2355 	 */
2356 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
2357 }
2358 
2359 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2360 					 unsigned long iova, size_t granule,
2361 					 void *cookie)
2362 {
2363 	struct arm_smmu_domain *smmu_domain = cookie;
2364 	struct iommu_domain *domain = &smmu_domain->domain;
2365 
2366 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2367 }
2368 
2369 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2370 				  size_t granule, void *cookie)
2371 {
2372 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
2373 }
2374 
2375 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2376 				  size_t granule, void *cookie)
2377 {
2378 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
2379 }
2380 
2381 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2382 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2383 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2384 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
2385 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2386 };
2387 
2388 /* IOMMU API */
2389 static bool arm_smmu_capable(enum iommu_cap cap)
2390 {
2391 	switch (cap) {
2392 	case IOMMU_CAP_CACHE_COHERENCY:
2393 		return true;
2394 	case IOMMU_CAP_NOEXEC:
2395 		return true;
2396 	default:
2397 		return false;
2398 	}
2399 }
2400 
2401 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2402 {
2403 	struct arm_smmu_domain *smmu_domain;
2404 
2405 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2406 	    type != IOMMU_DOMAIN_DMA &&
2407 	    type != IOMMU_DOMAIN_IDENTITY)
2408 		return NULL;
2409 
2410 	/*
2411 	 * Allocate the domain and initialise some of its data structures.
2412 	 * We can't really do anything meaningful until we've added a
2413 	 * master.
2414 	 */
2415 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2416 	if (!smmu_domain)
2417 		return NULL;
2418 
2419 	if (type == IOMMU_DOMAIN_DMA &&
2420 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
2421 		kfree(smmu_domain);
2422 		return NULL;
2423 	}
2424 
2425 	mutex_init(&smmu_domain->init_mutex);
2426 	INIT_LIST_HEAD(&smmu_domain->devices);
2427 	spin_lock_init(&smmu_domain->devices_lock);
2428 
2429 	return &smmu_domain->domain;
2430 }
2431 
2432 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2433 {
2434 	int idx, size = 1 << span;
2435 
2436 	do {
2437 		idx = find_first_zero_bit(map, size);
2438 		if (idx == size)
2439 			return -ENOSPC;
2440 	} while (test_and_set_bit(idx, map));
2441 
2442 	return idx;
2443 }
2444 
2445 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2446 {
2447 	clear_bit(idx, map);
2448 }
2449 
2450 static void arm_smmu_domain_free(struct iommu_domain *domain)
2451 {
2452 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2453 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2454 
2455 	iommu_put_dma_cookie(domain);
2456 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2457 
2458 	/* Free the CD and ASID, if we allocated them */
2459 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2460 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2461 
2462 		if (cfg->cdcfg.cdtab)
2463 			arm_smmu_free_cd_tables(smmu_domain);
2464 		arm_smmu_free_asid(&cfg->cd);
2465 	} else {
2466 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2467 		if (cfg->vmid)
2468 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2469 	}
2470 
2471 	kfree(smmu_domain);
2472 }
2473 
2474 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2475 				       struct arm_smmu_master *master,
2476 				       struct io_pgtable_cfg *pgtbl_cfg)
2477 {
2478 	int ret;
2479 	u32 asid;
2480 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2481 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2482 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2483 
2484 	ret = xa_alloc(&asid_xa, &asid, &cfg->cd,
2485 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2486 	if (ret)
2487 		return ret;
2488 
2489 	cfg->s1cdmax = master->ssid_bits;
2490 
2491 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2492 	if (ret)
2493 		goto out_free_asid;
2494 
2495 	cfg->cd.asid	= (u16)asid;
2496 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2497 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2498 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2499 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2500 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2501 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2502 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2503 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2504 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2505 
2506 	/*
2507 	 * Note that this will end up calling arm_smmu_sync_cd() before
2508 	 * the master has been added to the devices list for this domain.
2509 	 * This isn't an issue because the STE hasn't been installed yet.
2510 	 */
2511 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2512 	if (ret)
2513 		goto out_free_cd_tables;
2514 
2515 	return 0;
2516 
2517 out_free_cd_tables:
2518 	arm_smmu_free_cd_tables(smmu_domain);
2519 out_free_asid:
2520 	arm_smmu_free_asid(&cfg->cd);
2521 	return ret;
2522 }
2523 
2524 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2525 				       struct arm_smmu_master *master,
2526 				       struct io_pgtable_cfg *pgtbl_cfg)
2527 {
2528 	int vmid;
2529 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2530 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2531 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2532 
2533 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2534 	if (vmid < 0)
2535 		return vmid;
2536 
2537 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2538 	cfg->vmid	= (u16)vmid;
2539 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2540 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2541 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2542 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2543 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2544 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2545 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2546 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2547 	return 0;
2548 }
2549 
2550 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2551 				    struct arm_smmu_master *master)
2552 {
2553 	int ret;
2554 	unsigned long ias, oas;
2555 	enum io_pgtable_fmt fmt;
2556 	struct io_pgtable_cfg pgtbl_cfg;
2557 	struct io_pgtable_ops *pgtbl_ops;
2558 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2559 				 struct arm_smmu_master *,
2560 				 struct io_pgtable_cfg *);
2561 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2562 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2563 
2564 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2565 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2566 		return 0;
2567 	}
2568 
2569 	/* Restrict the stage to what we can actually support */
2570 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2571 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2572 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2573 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2574 
2575 	switch (smmu_domain->stage) {
2576 	case ARM_SMMU_DOMAIN_S1:
2577 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2578 		ias = min_t(unsigned long, ias, VA_BITS);
2579 		oas = smmu->ias;
2580 		fmt = ARM_64_LPAE_S1;
2581 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2582 		break;
2583 	case ARM_SMMU_DOMAIN_NESTED:
2584 	case ARM_SMMU_DOMAIN_S2:
2585 		ias = smmu->ias;
2586 		oas = smmu->oas;
2587 		fmt = ARM_64_LPAE_S2;
2588 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2589 		break;
2590 	default:
2591 		return -EINVAL;
2592 	}
2593 
2594 	pgtbl_cfg = (struct io_pgtable_cfg) {
2595 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2596 		.ias		= ias,
2597 		.oas		= oas,
2598 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2599 		.tlb		= &arm_smmu_flush_ops,
2600 		.iommu_dev	= smmu->dev,
2601 	};
2602 
2603 	if (smmu_domain->non_strict)
2604 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2605 
2606 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2607 	if (!pgtbl_ops)
2608 		return -ENOMEM;
2609 
2610 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2611 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2612 	domain->geometry.force_aperture = true;
2613 
2614 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2615 	if (ret < 0) {
2616 		free_io_pgtable_ops(pgtbl_ops);
2617 		return ret;
2618 	}
2619 
2620 	smmu_domain->pgtbl_ops = pgtbl_ops;
2621 	return 0;
2622 }
2623 
2624 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2625 {
2626 	__le64 *step;
2627 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2628 
2629 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2630 		struct arm_smmu_strtab_l1_desc *l1_desc;
2631 		int idx;
2632 
2633 		/* Two-level walk */
2634 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2635 		l1_desc = &cfg->l1_desc[idx];
2636 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2637 		step = &l1_desc->l2ptr[idx];
2638 	} else {
2639 		/* Simple linear lookup */
2640 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2641 	}
2642 
2643 	return step;
2644 }
2645 
2646 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2647 {
2648 	int i, j;
2649 	struct arm_smmu_device *smmu = master->smmu;
2650 
2651 	for (i = 0; i < master->num_sids; ++i) {
2652 		u32 sid = master->sids[i];
2653 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2654 
2655 		/* Bridged PCI devices may end up with duplicated IDs */
2656 		for (j = 0; j < i; j++)
2657 			if (master->sids[j] == sid)
2658 				break;
2659 		if (j < i)
2660 			continue;
2661 
2662 		arm_smmu_write_strtab_ent(master, sid, step);
2663 	}
2664 }
2665 
2666 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2667 {
2668 	struct device *dev = master->dev;
2669 	struct arm_smmu_device *smmu = master->smmu;
2670 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2671 
2672 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2673 		return false;
2674 
2675 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2676 		return false;
2677 
2678 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2679 }
2680 
2681 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2682 {
2683 	size_t stu;
2684 	struct pci_dev *pdev;
2685 	struct arm_smmu_device *smmu = master->smmu;
2686 	struct arm_smmu_domain *smmu_domain = master->domain;
2687 
2688 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2689 	if (!master->ats_enabled)
2690 		return;
2691 
2692 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2693 	stu = __ffs(smmu->pgsize_bitmap);
2694 	pdev = to_pci_dev(master->dev);
2695 
2696 	atomic_inc(&smmu_domain->nr_ats_masters);
2697 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2698 	if (pci_enable_ats(pdev, stu))
2699 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2700 }
2701 
2702 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2703 {
2704 	struct arm_smmu_domain *smmu_domain = master->domain;
2705 
2706 	if (!master->ats_enabled)
2707 		return;
2708 
2709 	pci_disable_ats(to_pci_dev(master->dev));
2710 	/*
2711 	 * Ensure ATS is disabled at the endpoint before we issue the
2712 	 * ATC invalidation via the SMMU.
2713 	 */
2714 	wmb();
2715 	arm_smmu_atc_inv_master(master);
2716 	atomic_dec(&smmu_domain->nr_ats_masters);
2717 }
2718 
2719 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2720 {
2721 	int ret;
2722 	int features;
2723 	int num_pasids;
2724 	struct pci_dev *pdev;
2725 
2726 	if (!dev_is_pci(master->dev))
2727 		return -ENODEV;
2728 
2729 	pdev = to_pci_dev(master->dev);
2730 
2731 	features = pci_pasid_features(pdev);
2732 	if (features < 0)
2733 		return features;
2734 
2735 	num_pasids = pci_max_pasids(pdev);
2736 	if (num_pasids <= 0)
2737 		return num_pasids;
2738 
2739 	ret = pci_enable_pasid(pdev, features);
2740 	if (ret) {
2741 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2742 		return ret;
2743 	}
2744 
2745 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2746 				  master->smmu->ssid_bits);
2747 	return 0;
2748 }
2749 
2750 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2751 {
2752 	struct pci_dev *pdev;
2753 
2754 	if (!dev_is_pci(master->dev))
2755 		return;
2756 
2757 	pdev = to_pci_dev(master->dev);
2758 
2759 	if (!pdev->pasid_enabled)
2760 		return;
2761 
2762 	master->ssid_bits = 0;
2763 	pci_disable_pasid(pdev);
2764 }
2765 
2766 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2767 {
2768 	unsigned long flags;
2769 	struct arm_smmu_domain *smmu_domain = master->domain;
2770 
2771 	if (!smmu_domain)
2772 		return;
2773 
2774 	arm_smmu_disable_ats(master);
2775 
2776 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2777 	list_del(&master->domain_head);
2778 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2779 
2780 	master->domain = NULL;
2781 	master->ats_enabled = false;
2782 	arm_smmu_install_ste_for_dev(master);
2783 }
2784 
2785 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2786 {
2787 	int ret = 0;
2788 	unsigned long flags;
2789 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2790 	struct arm_smmu_device *smmu;
2791 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2792 	struct arm_smmu_master *master;
2793 
2794 	if (!fwspec)
2795 		return -ENOENT;
2796 
2797 	master = dev_iommu_priv_get(dev);
2798 	smmu = master->smmu;
2799 
2800 	arm_smmu_detach_dev(master);
2801 
2802 	mutex_lock(&smmu_domain->init_mutex);
2803 
2804 	if (!smmu_domain->smmu) {
2805 		smmu_domain->smmu = smmu;
2806 		ret = arm_smmu_domain_finalise(domain, master);
2807 		if (ret) {
2808 			smmu_domain->smmu = NULL;
2809 			goto out_unlock;
2810 		}
2811 	} else if (smmu_domain->smmu != smmu) {
2812 		dev_err(dev,
2813 			"cannot attach to SMMU %s (upstream of %s)\n",
2814 			dev_name(smmu_domain->smmu->dev),
2815 			dev_name(smmu->dev));
2816 		ret = -ENXIO;
2817 		goto out_unlock;
2818 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2819 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2820 		dev_err(dev,
2821 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2822 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2823 		ret = -EINVAL;
2824 		goto out_unlock;
2825 	}
2826 
2827 	master->domain = smmu_domain;
2828 
2829 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2830 		master->ats_enabled = arm_smmu_ats_supported(master);
2831 
2832 	arm_smmu_install_ste_for_dev(master);
2833 
2834 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2835 	list_add(&master->domain_head, &smmu_domain->devices);
2836 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2837 
2838 	arm_smmu_enable_ats(master);
2839 
2840 out_unlock:
2841 	mutex_unlock(&smmu_domain->init_mutex);
2842 	return ret;
2843 }
2844 
2845 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2846 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2847 {
2848 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2849 
2850 	if (!ops)
2851 		return -ENODEV;
2852 
2853 	return ops->map(ops, iova, paddr, size, prot, gfp);
2854 }
2855 
2856 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2857 			     size_t size, struct iommu_iotlb_gather *gather)
2858 {
2859 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2860 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2861 
2862 	if (!ops)
2863 		return 0;
2864 
2865 	return ops->unmap(ops, iova, size, gather);
2866 }
2867 
2868 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2869 {
2870 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2871 
2872 	if (smmu_domain->smmu)
2873 		arm_smmu_tlb_inv_context(smmu_domain);
2874 }
2875 
2876 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2877 				struct iommu_iotlb_gather *gather)
2878 {
2879 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2880 
2881 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2882 			       gather->pgsize, true, smmu_domain);
2883 }
2884 
2885 static phys_addr_t
2886 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2887 {
2888 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2889 
2890 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2891 		return iova;
2892 
2893 	if (!ops)
2894 		return 0;
2895 
2896 	return ops->iova_to_phys(ops, iova);
2897 }
2898 
2899 static struct platform_driver arm_smmu_driver;
2900 
2901 static
2902 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2903 {
2904 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2905 							  fwnode);
2906 	put_device(dev);
2907 	return dev ? dev_get_drvdata(dev) : NULL;
2908 }
2909 
2910 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2911 {
2912 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2913 
2914 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2915 		limit *= 1UL << STRTAB_SPLIT;
2916 
2917 	return sid < limit;
2918 }
2919 
2920 static struct iommu_ops arm_smmu_ops;
2921 
2922 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2923 {
2924 	int i, ret;
2925 	struct arm_smmu_device *smmu;
2926 	struct arm_smmu_master *master;
2927 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2928 
2929 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2930 		return ERR_PTR(-ENODEV);
2931 
2932 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2933 		return ERR_PTR(-EBUSY);
2934 
2935 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2936 	if (!smmu)
2937 		return ERR_PTR(-ENODEV);
2938 
2939 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2940 	if (!master)
2941 		return ERR_PTR(-ENOMEM);
2942 
2943 	master->dev = dev;
2944 	master->smmu = smmu;
2945 	master->sids = fwspec->ids;
2946 	master->num_sids = fwspec->num_ids;
2947 	dev_iommu_priv_set(dev, master);
2948 
2949 	/* Check the SIDs are in range of the SMMU and our stream table */
2950 	for (i = 0; i < master->num_sids; i++) {
2951 		u32 sid = master->sids[i];
2952 
2953 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2954 			ret = -ERANGE;
2955 			goto err_free_master;
2956 		}
2957 
2958 		/* Ensure l2 strtab is initialised */
2959 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2960 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2961 			if (ret)
2962 				goto err_free_master;
2963 		}
2964 	}
2965 
2966 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2967 
2968 	/*
2969 	 * Note that PASID must be enabled before, and disabled after ATS:
2970 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2971 	 *
2972 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2973 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2974 	 *   are changed.
2975 	 */
2976 	arm_smmu_enable_pasid(master);
2977 
2978 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2979 		master->ssid_bits = min_t(u8, master->ssid_bits,
2980 					  CTXDESC_LINEAR_CDMAX);
2981 
2982 	return &smmu->iommu;
2983 
2984 err_free_master:
2985 	kfree(master);
2986 	dev_iommu_priv_set(dev, NULL);
2987 	return ERR_PTR(ret);
2988 }
2989 
2990 static void arm_smmu_release_device(struct device *dev)
2991 {
2992 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2993 	struct arm_smmu_master *master;
2994 
2995 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2996 		return;
2997 
2998 	master = dev_iommu_priv_get(dev);
2999 	arm_smmu_detach_dev(master);
3000 	arm_smmu_disable_pasid(master);
3001 	kfree(master);
3002 	iommu_fwspec_free(dev);
3003 }
3004 
3005 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3006 {
3007 	struct iommu_group *group;
3008 
3009 	/*
3010 	 * We don't support devices sharing stream IDs other than PCI RID
3011 	 * aliases, since the necessary ID-to-device lookup becomes rather
3012 	 * impractical given a potential sparse 32-bit stream ID space.
3013 	 */
3014 	if (dev_is_pci(dev))
3015 		group = pci_device_group(dev);
3016 	else
3017 		group = generic_device_group(dev);
3018 
3019 	return group;
3020 }
3021 
3022 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
3023 				    enum iommu_attr attr, void *data)
3024 {
3025 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3026 
3027 	switch (domain->type) {
3028 	case IOMMU_DOMAIN_UNMANAGED:
3029 		switch (attr) {
3030 		case DOMAIN_ATTR_NESTING:
3031 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
3032 			return 0;
3033 		default:
3034 			return -ENODEV;
3035 		}
3036 		break;
3037 	case IOMMU_DOMAIN_DMA:
3038 		switch (attr) {
3039 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3040 			*(int *)data = smmu_domain->non_strict;
3041 			return 0;
3042 		default:
3043 			return -ENODEV;
3044 		}
3045 		break;
3046 	default:
3047 		return -EINVAL;
3048 	}
3049 }
3050 
3051 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
3052 				    enum iommu_attr attr, void *data)
3053 {
3054 	int ret = 0;
3055 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3056 
3057 	mutex_lock(&smmu_domain->init_mutex);
3058 
3059 	switch (domain->type) {
3060 	case IOMMU_DOMAIN_UNMANAGED:
3061 		switch (attr) {
3062 		case DOMAIN_ATTR_NESTING:
3063 			if (smmu_domain->smmu) {
3064 				ret = -EPERM;
3065 				goto out_unlock;
3066 			}
3067 
3068 			if (*(int *)data)
3069 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
3070 			else
3071 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3072 			break;
3073 		default:
3074 			ret = -ENODEV;
3075 		}
3076 		break;
3077 	case IOMMU_DOMAIN_DMA:
3078 		switch(attr) {
3079 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3080 			smmu_domain->non_strict = *(int *)data;
3081 			break;
3082 		default:
3083 			ret = -ENODEV;
3084 		}
3085 		break;
3086 	default:
3087 		ret = -EINVAL;
3088 	}
3089 
3090 out_unlock:
3091 	mutex_unlock(&smmu_domain->init_mutex);
3092 	return ret;
3093 }
3094 
3095 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
3096 {
3097 	return iommu_fwspec_add_ids(dev, args->args, 1);
3098 }
3099 
3100 static void arm_smmu_get_resv_regions(struct device *dev,
3101 				      struct list_head *head)
3102 {
3103 	struct iommu_resv_region *region;
3104 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3105 
3106 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3107 					 prot, IOMMU_RESV_SW_MSI);
3108 	if (!region)
3109 		return;
3110 
3111 	list_add_tail(&region->list, head);
3112 
3113 	iommu_dma_get_resv_regions(dev, head);
3114 }
3115 
3116 static struct iommu_ops arm_smmu_ops = {
3117 	.capable		= arm_smmu_capable,
3118 	.domain_alloc		= arm_smmu_domain_alloc,
3119 	.domain_free		= arm_smmu_domain_free,
3120 	.attach_dev		= arm_smmu_attach_dev,
3121 	.map			= arm_smmu_map,
3122 	.unmap			= arm_smmu_unmap,
3123 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3124 	.iotlb_sync		= arm_smmu_iotlb_sync,
3125 	.iova_to_phys		= arm_smmu_iova_to_phys,
3126 	.probe_device		= arm_smmu_probe_device,
3127 	.release_device		= arm_smmu_release_device,
3128 	.device_group		= arm_smmu_device_group,
3129 	.domain_get_attr	= arm_smmu_domain_get_attr,
3130 	.domain_set_attr	= arm_smmu_domain_set_attr,
3131 	.of_xlate		= arm_smmu_of_xlate,
3132 	.get_resv_regions	= arm_smmu_get_resv_regions,
3133 	.put_resv_regions	= generic_iommu_put_resv_regions,
3134 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3135 };
3136 
3137 /* Probing and initialisation functions */
3138 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3139 				   struct arm_smmu_queue *q,
3140 				   unsigned long prod_off,
3141 				   unsigned long cons_off,
3142 				   size_t dwords, const char *name)
3143 {
3144 	size_t qsz;
3145 
3146 	do {
3147 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3148 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3149 					      GFP_KERNEL);
3150 		if (q->base || qsz < PAGE_SIZE)
3151 			break;
3152 
3153 		q->llq.max_n_shift--;
3154 	} while (1);
3155 
3156 	if (!q->base) {
3157 		dev_err(smmu->dev,
3158 			"failed to allocate queue (0x%zx bytes) for %s\n",
3159 			qsz, name);
3160 		return -ENOMEM;
3161 	}
3162 
3163 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3164 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3165 			 1 << q->llq.max_n_shift, name);
3166 	}
3167 
3168 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
3169 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
3170 	q->ent_dwords	= dwords;
3171 
3172 	q->q_base  = Q_BASE_RWA;
3173 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3174 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3175 
3176 	q->llq.prod = q->llq.cons = 0;
3177 	return 0;
3178 }
3179 
3180 static void arm_smmu_cmdq_free_bitmap(void *data)
3181 {
3182 	unsigned long *bitmap = data;
3183 	bitmap_free(bitmap);
3184 }
3185 
3186 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3187 {
3188 	int ret = 0;
3189 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3190 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3191 	atomic_long_t *bitmap;
3192 
3193 	atomic_set(&cmdq->owner_prod, 0);
3194 	atomic_set(&cmdq->lock, 0);
3195 
3196 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
3197 	if (!bitmap) {
3198 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
3199 		ret = -ENOMEM;
3200 	} else {
3201 		cmdq->valid_map = bitmap;
3202 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
3203 	}
3204 
3205 	return ret;
3206 }
3207 
3208 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3209 {
3210 	int ret;
3211 
3212 	/* cmdq */
3213 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
3214 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
3215 				      "cmdq");
3216 	if (ret)
3217 		return ret;
3218 
3219 	ret = arm_smmu_cmdq_init(smmu);
3220 	if (ret)
3221 		return ret;
3222 
3223 	/* evtq */
3224 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
3225 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
3226 				      "evtq");
3227 	if (ret)
3228 		return ret;
3229 
3230 	/* priq */
3231 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3232 		return 0;
3233 
3234 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
3235 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
3236 				       "priq");
3237 }
3238 
3239 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3240 {
3241 	unsigned int i;
3242 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3243 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3244 	void *strtab = smmu->strtab_cfg.strtab;
3245 
3246 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3247 	if (!cfg->l1_desc) {
3248 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
3249 		return -ENOMEM;
3250 	}
3251 
3252 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3253 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3254 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3255 	}
3256 
3257 	return 0;
3258 }
3259 
3260 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3261 {
3262 	void *strtab;
3263 	u64 reg;
3264 	u32 size, l1size;
3265 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3266 
3267 	/* Calculate the L1 size, capped to the SIDSIZE. */
3268 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3269 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3270 	cfg->num_l1_ents = 1 << size;
3271 
3272 	size += STRTAB_SPLIT;
3273 	if (size < smmu->sid_bits)
3274 		dev_warn(smmu->dev,
3275 			 "2-level strtab only covers %u/%u bits of SID\n",
3276 			 size, smmu->sid_bits);
3277 
3278 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3279 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3280 				     GFP_KERNEL);
3281 	if (!strtab) {
3282 		dev_err(smmu->dev,
3283 			"failed to allocate l1 stream table (%u bytes)\n",
3284 			size);
3285 		return -ENOMEM;
3286 	}
3287 	cfg->strtab = strtab;
3288 
3289 	/* Configure strtab_base_cfg for 2 levels */
3290 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3291 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3292 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3293 	cfg->strtab_base_cfg = reg;
3294 
3295 	return arm_smmu_init_l1_strtab(smmu);
3296 }
3297 
3298 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3299 {
3300 	void *strtab;
3301 	u64 reg;
3302 	u32 size;
3303 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3304 
3305 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3306 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3307 				     GFP_KERNEL);
3308 	if (!strtab) {
3309 		dev_err(smmu->dev,
3310 			"failed to allocate linear stream table (%u bytes)\n",
3311 			size);
3312 		return -ENOMEM;
3313 	}
3314 	cfg->strtab = strtab;
3315 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3316 
3317 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3318 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3319 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3320 	cfg->strtab_base_cfg = reg;
3321 
3322 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3323 	return 0;
3324 }
3325 
3326 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3327 {
3328 	u64 reg;
3329 	int ret;
3330 
3331 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3332 		ret = arm_smmu_init_strtab_2lvl(smmu);
3333 	else
3334 		ret = arm_smmu_init_strtab_linear(smmu);
3335 
3336 	if (ret)
3337 		return ret;
3338 
3339 	/* Set the strtab base address */
3340 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3341 	reg |= STRTAB_BASE_RA;
3342 	smmu->strtab_cfg.strtab_base = reg;
3343 
3344 	/* Allocate the first VMID for stage-2 bypass STEs */
3345 	set_bit(0, smmu->vmid_map);
3346 	return 0;
3347 }
3348 
3349 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3350 {
3351 	int ret;
3352 
3353 	ret = arm_smmu_init_queues(smmu);
3354 	if (ret)
3355 		return ret;
3356 
3357 	return arm_smmu_init_strtab(smmu);
3358 }
3359 
3360 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3361 				   unsigned int reg_off, unsigned int ack_off)
3362 {
3363 	u32 reg;
3364 
3365 	writel_relaxed(val, smmu->base + reg_off);
3366 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3367 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3368 }
3369 
3370 /* GBPA is "special" */
3371 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3372 {
3373 	int ret;
3374 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3375 
3376 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3377 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3378 	if (ret)
3379 		return ret;
3380 
3381 	reg &= ~clr;
3382 	reg |= set;
3383 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3384 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3385 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3386 
3387 	if (ret)
3388 		dev_err(smmu->dev, "GBPA not responding to update\n");
3389 	return ret;
3390 }
3391 
3392 static void arm_smmu_free_msis(void *data)
3393 {
3394 	struct device *dev = data;
3395 	platform_msi_domain_free_irqs(dev);
3396 }
3397 
3398 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3399 {
3400 	phys_addr_t doorbell;
3401 	struct device *dev = msi_desc_to_dev(desc);
3402 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3403 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3404 
3405 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3406 	doorbell &= MSI_CFG0_ADDR_MASK;
3407 
3408 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3409 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3410 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3411 }
3412 
3413 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3414 {
3415 	struct msi_desc *desc;
3416 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3417 	struct device *dev = smmu->dev;
3418 
3419 	/* Clear the MSI address regs */
3420 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3421 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3422 
3423 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3424 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3425 	else
3426 		nvec--;
3427 
3428 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3429 		return;
3430 
3431 	if (!dev->msi_domain) {
3432 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3433 		return;
3434 	}
3435 
3436 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3437 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3438 	if (ret) {
3439 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3440 		return;
3441 	}
3442 
3443 	for_each_msi_entry(desc, dev) {
3444 		switch (desc->platform.msi_index) {
3445 		case EVTQ_MSI_INDEX:
3446 			smmu->evtq.q.irq = desc->irq;
3447 			break;
3448 		case GERROR_MSI_INDEX:
3449 			smmu->gerr_irq = desc->irq;
3450 			break;
3451 		case PRIQ_MSI_INDEX:
3452 			smmu->priq.q.irq = desc->irq;
3453 			break;
3454 		default:	/* Unknown */
3455 			continue;
3456 		}
3457 	}
3458 
3459 	/* Add callback to free MSIs on teardown */
3460 	devm_add_action(dev, arm_smmu_free_msis, dev);
3461 }
3462 
3463 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3464 {
3465 	int irq, ret;
3466 
3467 	arm_smmu_setup_msis(smmu);
3468 
3469 	/* Request interrupt lines */
3470 	irq = smmu->evtq.q.irq;
3471 	if (irq) {
3472 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3473 						arm_smmu_evtq_thread,
3474 						IRQF_ONESHOT,
3475 						"arm-smmu-v3-evtq", smmu);
3476 		if (ret < 0)
3477 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3478 	} else {
3479 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3480 	}
3481 
3482 	irq = smmu->gerr_irq;
3483 	if (irq) {
3484 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3485 				       0, "arm-smmu-v3-gerror", smmu);
3486 		if (ret < 0)
3487 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3488 	} else {
3489 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3490 	}
3491 
3492 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3493 		irq = smmu->priq.q.irq;
3494 		if (irq) {
3495 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3496 							arm_smmu_priq_thread,
3497 							IRQF_ONESHOT,
3498 							"arm-smmu-v3-priq",
3499 							smmu);
3500 			if (ret < 0)
3501 				dev_warn(smmu->dev,
3502 					 "failed to enable priq irq\n");
3503 		} else {
3504 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3505 		}
3506 	}
3507 }
3508 
3509 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3510 {
3511 	int ret, irq;
3512 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3513 
3514 	/* Disable IRQs first */
3515 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3516 				      ARM_SMMU_IRQ_CTRLACK);
3517 	if (ret) {
3518 		dev_err(smmu->dev, "failed to disable irqs\n");
3519 		return ret;
3520 	}
3521 
3522 	irq = smmu->combined_irq;
3523 	if (irq) {
3524 		/*
3525 		 * Cavium ThunderX2 implementation doesn't support unique irq
3526 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3527 		 */
3528 		ret = devm_request_threaded_irq(smmu->dev, irq,
3529 					arm_smmu_combined_irq_handler,
3530 					arm_smmu_combined_irq_thread,
3531 					IRQF_ONESHOT,
3532 					"arm-smmu-v3-combined-irq", smmu);
3533 		if (ret < 0)
3534 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3535 	} else
3536 		arm_smmu_setup_unique_irqs(smmu);
3537 
3538 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3539 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3540 
3541 	/* Enable interrupt generation on the SMMU */
3542 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3543 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3544 	if (ret)
3545 		dev_warn(smmu->dev, "failed to enable irqs\n");
3546 
3547 	return 0;
3548 }
3549 
3550 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3551 {
3552 	int ret;
3553 
3554 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3555 	if (ret)
3556 		dev_err(smmu->dev, "failed to clear cr0\n");
3557 
3558 	return ret;
3559 }
3560 
3561 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3562 {
3563 	int ret;
3564 	u32 reg, enables;
3565 	struct arm_smmu_cmdq_ent cmd;
3566 
3567 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3568 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3569 	if (reg & CR0_SMMUEN) {
3570 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3571 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3572 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3573 	}
3574 
3575 	ret = arm_smmu_device_disable(smmu);
3576 	if (ret)
3577 		return ret;
3578 
3579 	/* CR1 (table and queue memory attributes) */
3580 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3581 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3582 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3583 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3584 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3585 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3586 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3587 
3588 	/* CR2 (random crap) */
3589 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3590 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3591 
3592 	/* Stream table */
3593 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3594 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3595 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3596 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3597 
3598 	/* Command queue */
3599 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3600 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3601 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3602 
3603 	enables = CR0_CMDQEN;
3604 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3605 				      ARM_SMMU_CR0ACK);
3606 	if (ret) {
3607 		dev_err(smmu->dev, "failed to enable command queue\n");
3608 		return ret;
3609 	}
3610 
3611 	/* Invalidate any cached configuration */
3612 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3613 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3614 	arm_smmu_cmdq_issue_sync(smmu);
3615 
3616 	/* Invalidate any stale TLB entries */
3617 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3618 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3619 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3620 	}
3621 
3622 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3623 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3624 	arm_smmu_cmdq_issue_sync(smmu);
3625 
3626 	/* Event queue */
3627 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3628 	writel_relaxed(smmu->evtq.q.llq.prod,
3629 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3630 	writel_relaxed(smmu->evtq.q.llq.cons,
3631 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3632 
3633 	enables |= CR0_EVTQEN;
3634 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3635 				      ARM_SMMU_CR0ACK);
3636 	if (ret) {
3637 		dev_err(smmu->dev, "failed to enable event queue\n");
3638 		return ret;
3639 	}
3640 
3641 	/* PRI queue */
3642 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3643 		writeq_relaxed(smmu->priq.q.q_base,
3644 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3645 		writel_relaxed(smmu->priq.q.llq.prod,
3646 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3647 		writel_relaxed(smmu->priq.q.llq.cons,
3648 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3649 
3650 		enables |= CR0_PRIQEN;
3651 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3652 					      ARM_SMMU_CR0ACK);
3653 		if (ret) {
3654 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3655 			return ret;
3656 		}
3657 	}
3658 
3659 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3660 		enables |= CR0_ATSCHK;
3661 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3662 					      ARM_SMMU_CR0ACK);
3663 		if (ret) {
3664 			dev_err(smmu->dev, "failed to enable ATS check\n");
3665 			return ret;
3666 		}
3667 	}
3668 
3669 	ret = arm_smmu_setup_irqs(smmu);
3670 	if (ret) {
3671 		dev_err(smmu->dev, "failed to setup irqs\n");
3672 		return ret;
3673 	}
3674 
3675 	if (is_kdump_kernel())
3676 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3677 
3678 	/* Enable the SMMU interface, or ensure bypass */
3679 	if (!bypass || disable_bypass) {
3680 		enables |= CR0_SMMUEN;
3681 	} else {
3682 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3683 		if (ret)
3684 			return ret;
3685 	}
3686 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3687 				      ARM_SMMU_CR0ACK);
3688 	if (ret) {
3689 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3690 		return ret;
3691 	}
3692 
3693 	return 0;
3694 }
3695 
3696 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3697 {
3698 	u32 reg;
3699 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3700 
3701 	/* IDR0 */
3702 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3703 
3704 	/* 2-level structures */
3705 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3706 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3707 
3708 	if (reg & IDR0_CD2L)
3709 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3710 
3711 	/*
3712 	 * Translation table endianness.
3713 	 * We currently require the same endianness as the CPU, but this
3714 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3715 	 */
3716 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3717 	case IDR0_TTENDIAN_MIXED:
3718 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3719 		break;
3720 #ifdef __BIG_ENDIAN
3721 	case IDR0_TTENDIAN_BE:
3722 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3723 		break;
3724 #else
3725 	case IDR0_TTENDIAN_LE:
3726 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3727 		break;
3728 #endif
3729 	default:
3730 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3731 		return -ENXIO;
3732 	}
3733 
3734 	/* Boolean feature flags */
3735 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3736 		smmu->features |= ARM_SMMU_FEAT_PRI;
3737 
3738 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3739 		smmu->features |= ARM_SMMU_FEAT_ATS;
3740 
3741 	if (reg & IDR0_SEV)
3742 		smmu->features |= ARM_SMMU_FEAT_SEV;
3743 
3744 	if (reg & IDR0_MSI)
3745 		smmu->features |= ARM_SMMU_FEAT_MSI;
3746 
3747 	if (reg & IDR0_HYP)
3748 		smmu->features |= ARM_SMMU_FEAT_HYP;
3749 
3750 	/*
3751 	 * The coherency feature as set by FW is used in preference to the ID
3752 	 * register, but warn on mismatch.
3753 	 */
3754 	if (!!(reg & IDR0_COHACC) != coherent)
3755 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3756 			 coherent ? "true" : "false");
3757 
3758 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3759 	case IDR0_STALL_MODEL_FORCE:
3760 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3761 		/* Fallthrough */
3762 	case IDR0_STALL_MODEL_STALL:
3763 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3764 	}
3765 
3766 	if (reg & IDR0_S1P)
3767 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3768 
3769 	if (reg & IDR0_S2P)
3770 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3771 
3772 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3773 		dev_err(smmu->dev, "no translation support!\n");
3774 		return -ENXIO;
3775 	}
3776 
3777 	/* We only support the AArch64 table format at present */
3778 	switch (FIELD_GET(IDR0_TTF, reg)) {
3779 	case IDR0_TTF_AARCH32_64:
3780 		smmu->ias = 40;
3781 		/* Fallthrough */
3782 	case IDR0_TTF_AARCH64:
3783 		break;
3784 	default:
3785 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3786 		return -ENXIO;
3787 	}
3788 
3789 	/* ASID/VMID sizes */
3790 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3791 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3792 
3793 	/* IDR1 */
3794 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3795 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3796 		dev_err(smmu->dev, "embedded implementation not supported\n");
3797 		return -ENXIO;
3798 	}
3799 
3800 	/* Queue sizes, capped to ensure natural alignment */
3801 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3802 					     FIELD_GET(IDR1_CMDQS, reg));
3803 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3804 		/*
3805 		 * We don't support splitting up batches, so one batch of
3806 		 * commands plus an extra sync needs to fit inside the command
3807 		 * queue. There's also no way we can handle the weird alignment
3808 		 * restrictions on the base pointer for a unit-length queue.
3809 		 */
3810 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3811 			CMDQ_BATCH_ENTRIES);
3812 		return -ENXIO;
3813 	}
3814 
3815 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3816 					     FIELD_GET(IDR1_EVTQS, reg));
3817 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3818 					     FIELD_GET(IDR1_PRIQS, reg));
3819 
3820 	/* SID/SSID sizes */
3821 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3822 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3823 
3824 	/*
3825 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3826 	 * table, use a linear table instead.
3827 	 */
3828 	if (smmu->sid_bits <= STRTAB_SPLIT)
3829 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3830 
3831 	/* IDR3 */
3832 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3833 	if (FIELD_GET(IDR3_RIL, reg))
3834 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3835 
3836 	/* IDR5 */
3837 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3838 
3839 	/* Maximum number of outstanding stalls */
3840 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3841 
3842 	/* Page sizes */
3843 	if (reg & IDR5_GRAN64K)
3844 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3845 	if (reg & IDR5_GRAN16K)
3846 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3847 	if (reg & IDR5_GRAN4K)
3848 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3849 
3850 	/* Input address size */
3851 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3852 		smmu->features |= ARM_SMMU_FEAT_VAX;
3853 
3854 	/* Output address size */
3855 	switch (FIELD_GET(IDR5_OAS, reg)) {
3856 	case IDR5_OAS_32_BIT:
3857 		smmu->oas = 32;
3858 		break;
3859 	case IDR5_OAS_36_BIT:
3860 		smmu->oas = 36;
3861 		break;
3862 	case IDR5_OAS_40_BIT:
3863 		smmu->oas = 40;
3864 		break;
3865 	case IDR5_OAS_42_BIT:
3866 		smmu->oas = 42;
3867 		break;
3868 	case IDR5_OAS_44_BIT:
3869 		smmu->oas = 44;
3870 		break;
3871 	case IDR5_OAS_52_BIT:
3872 		smmu->oas = 52;
3873 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3874 		break;
3875 	default:
3876 		dev_info(smmu->dev,
3877 			"unknown output address size. Truncating to 48-bit\n");
3878 		/* Fallthrough */
3879 	case IDR5_OAS_48_BIT:
3880 		smmu->oas = 48;
3881 	}
3882 
3883 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3884 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3885 	else
3886 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3887 
3888 	/* Set the DMA mask for our table walker */
3889 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3890 		dev_warn(smmu->dev,
3891 			 "failed to set DMA mask for table walker\n");
3892 
3893 	smmu->ias = max(smmu->ias, smmu->oas);
3894 
3895 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3896 		 smmu->ias, smmu->oas, smmu->features);
3897 	return 0;
3898 }
3899 
3900 #ifdef CONFIG_ACPI
3901 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3902 {
3903 	switch (model) {
3904 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3905 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3906 		break;
3907 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3908 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3909 		break;
3910 	}
3911 
3912 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3913 }
3914 
3915 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3916 				      struct arm_smmu_device *smmu)
3917 {
3918 	struct acpi_iort_smmu_v3 *iort_smmu;
3919 	struct device *dev = smmu->dev;
3920 	struct acpi_iort_node *node;
3921 
3922 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3923 
3924 	/* Retrieve SMMUv3 specific data */
3925 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3926 
3927 	acpi_smmu_get_options(iort_smmu->model, smmu);
3928 
3929 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3930 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3931 
3932 	return 0;
3933 }
3934 #else
3935 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3936 					     struct arm_smmu_device *smmu)
3937 {
3938 	return -ENODEV;
3939 }
3940 #endif
3941 
3942 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3943 				    struct arm_smmu_device *smmu)
3944 {
3945 	struct device *dev = &pdev->dev;
3946 	u32 cells;
3947 	int ret = -EINVAL;
3948 
3949 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3950 		dev_err(dev, "missing #iommu-cells property\n");
3951 	else if (cells != 1)
3952 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3953 	else
3954 		ret = 0;
3955 
3956 	parse_driver_options(smmu);
3957 
3958 	if (of_dma_is_coherent(dev->of_node))
3959 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3960 
3961 	return ret;
3962 }
3963 
3964 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3965 {
3966 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3967 		return SZ_64K;
3968 	else
3969 		return SZ_128K;
3970 }
3971 
3972 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3973 {
3974 	int err;
3975 
3976 #ifdef CONFIG_PCI
3977 	if (pci_bus_type.iommu_ops != ops) {
3978 		err = bus_set_iommu(&pci_bus_type, ops);
3979 		if (err)
3980 			return err;
3981 	}
3982 #endif
3983 #ifdef CONFIG_ARM_AMBA
3984 	if (amba_bustype.iommu_ops != ops) {
3985 		err = bus_set_iommu(&amba_bustype, ops);
3986 		if (err)
3987 			goto err_reset_pci_ops;
3988 	}
3989 #endif
3990 	if (platform_bus_type.iommu_ops != ops) {
3991 		err = bus_set_iommu(&platform_bus_type, ops);
3992 		if (err)
3993 			goto err_reset_amba_ops;
3994 	}
3995 
3996 	return 0;
3997 
3998 err_reset_amba_ops:
3999 #ifdef CONFIG_ARM_AMBA
4000 	bus_set_iommu(&amba_bustype, NULL);
4001 #endif
4002 err_reset_pci_ops: __maybe_unused;
4003 #ifdef CONFIG_PCI
4004 	bus_set_iommu(&pci_bus_type, NULL);
4005 #endif
4006 	return err;
4007 }
4008 
4009 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4010 				      resource_size_t size)
4011 {
4012 	struct resource res = {
4013 		.flags = IORESOURCE_MEM,
4014 		.start = start,
4015 		.end = start + size - 1,
4016 	};
4017 
4018 	return devm_ioremap_resource(dev, &res);
4019 }
4020 
4021 static int arm_smmu_device_probe(struct platform_device *pdev)
4022 {
4023 	int irq, ret;
4024 	struct resource *res;
4025 	resource_size_t ioaddr;
4026 	struct arm_smmu_device *smmu;
4027 	struct device *dev = &pdev->dev;
4028 	bool bypass;
4029 
4030 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4031 	if (!smmu) {
4032 		dev_err(dev, "failed to allocate arm_smmu_device\n");
4033 		return -ENOMEM;
4034 	}
4035 	smmu->dev = dev;
4036 
4037 	if (dev->of_node) {
4038 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4039 	} else {
4040 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4041 		if (ret == -ENODEV)
4042 			return ret;
4043 	}
4044 
4045 	/* Set bypass mode according to firmware probing result */
4046 	bypass = !!ret;
4047 
4048 	/* Base address */
4049 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4050 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4051 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4052 		return -EINVAL;
4053 	}
4054 	ioaddr = res->start;
4055 
4056 	/*
4057 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4058 	 * the PMCG registers which are reserved by the PMU driver.
4059 	 */
4060 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4061 	if (IS_ERR(smmu->base))
4062 		return PTR_ERR(smmu->base);
4063 
4064 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4065 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4066 					       ARM_SMMU_REG_SZ);
4067 		if (IS_ERR(smmu->page1))
4068 			return PTR_ERR(smmu->page1);
4069 	} else {
4070 		smmu->page1 = smmu->base;
4071 	}
4072 
4073 	/* Interrupt lines */
4074 
4075 	irq = platform_get_irq_byname_optional(pdev, "combined");
4076 	if (irq > 0)
4077 		smmu->combined_irq = irq;
4078 	else {
4079 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4080 		if (irq > 0)
4081 			smmu->evtq.q.irq = irq;
4082 
4083 		irq = platform_get_irq_byname_optional(pdev, "priq");
4084 		if (irq > 0)
4085 			smmu->priq.q.irq = irq;
4086 
4087 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4088 		if (irq > 0)
4089 			smmu->gerr_irq = irq;
4090 	}
4091 	/* Probe the h/w */
4092 	ret = arm_smmu_device_hw_probe(smmu);
4093 	if (ret)
4094 		return ret;
4095 
4096 	/* Initialise in-memory data structures */
4097 	ret = arm_smmu_init_structures(smmu);
4098 	if (ret)
4099 		return ret;
4100 
4101 	/* Record our private device structure */
4102 	platform_set_drvdata(pdev, smmu);
4103 
4104 	/* Reset the device */
4105 	ret = arm_smmu_device_reset(smmu, bypass);
4106 	if (ret)
4107 		return ret;
4108 
4109 	/* And we're up. Go go go! */
4110 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4111 				     "smmu3.%pa", &ioaddr);
4112 	if (ret)
4113 		return ret;
4114 
4115 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
4116 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
4117 
4118 	ret = iommu_device_register(&smmu->iommu);
4119 	if (ret) {
4120 		dev_err(dev, "Failed to register iommu\n");
4121 		return ret;
4122 	}
4123 
4124 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
4125 }
4126 
4127 static int arm_smmu_device_remove(struct platform_device *pdev)
4128 {
4129 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4130 
4131 	arm_smmu_set_bus_ops(NULL);
4132 	iommu_device_unregister(&smmu->iommu);
4133 	iommu_device_sysfs_remove(&smmu->iommu);
4134 	arm_smmu_device_disable(smmu);
4135 
4136 	return 0;
4137 }
4138 
4139 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4140 {
4141 	arm_smmu_device_remove(pdev);
4142 }
4143 
4144 static const struct of_device_id arm_smmu_of_match[] = {
4145 	{ .compatible = "arm,smmu-v3", },
4146 	{ },
4147 };
4148 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4149 
4150 static struct platform_driver arm_smmu_driver = {
4151 	.driver	= {
4152 		.name			= "arm-smmu-v3",
4153 		.of_match_table		= arm_smmu_of_match,
4154 		.suppress_bind_attrs	= true,
4155 	},
4156 	.probe	= arm_smmu_device_probe,
4157 	.remove	= arm_smmu_device_remove,
4158 	.shutdown = arm_smmu_device_shutdown,
4159 };
4160 module_platform_driver(arm_smmu_driver);
4161 
4162 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4163 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4164 MODULE_ALIAS("platform:arm-smmu-v3");
4165 MODULE_LICENSE("GPL v2");
4166