1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3  */
4 #ifndef _IOMMUFD_H
5 #define _IOMMUFD_H
6 
7 #include <linux/types.h>
8 #include <linux/ioctl.h>
9 
10 #define IOMMUFD_TYPE (';')
11 
12 /**
13  * DOC: General ioctl format
14  *
15  * The ioctl interface follows a general format to allow for extensibility. Each
16  * ioctl is passed in a structure pointer as the argument providing the size of
17  * the structure in the first u32. The kernel checks that any structure space
18  * beyond what it understands is 0. This allows userspace to use the backward
19  * compatible portion while consistently using the newer, larger, structures.
20  *
21  * ioctls use a standard meaning for common errnos:
22  *
23  *  - ENOTTY: The IOCTL number itself is not supported at all
24  *  - E2BIG: The IOCTL number is supported, but the provided structure has
25  *    non-zero in a part the kernel does not understand.
26  *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27  *    understood, however a known field has a value the kernel does not
28  *    understand or support.
29  *  - EINVAL: Everything about the IOCTL was understood, but a field is not
30  *    correct.
31  *  - ENOENT: An ID or IOVA provided does not exist.
32  *  - ENOMEM: Out of memory.
33  *  - EOVERFLOW: Mathematics overflowed.
34  *
35  * As well as additional errnos, within specific ioctls.
36  */
37 enum {
38 	IOMMUFD_CMD_BASE = 0x80,
39 	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40 	IOMMUFD_CMD_IOAS_ALLOC,
41 	IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
42 	IOMMUFD_CMD_IOAS_COPY,
43 	IOMMUFD_CMD_IOAS_IOVA_RANGES,
44 	IOMMUFD_CMD_IOAS_MAP,
45 	IOMMUFD_CMD_IOAS_UNMAP,
46 	IOMMUFD_CMD_OPTION,
47 	IOMMUFD_CMD_VFIO_IOAS,
48 	IOMMUFD_CMD_HWPT_ALLOC,
49 	IOMMUFD_CMD_GET_HW_INFO,
50 	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
51 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
52 	IOMMUFD_CMD_HWPT_INVALIDATE,
53 };
54 
55 /**
56  * struct iommu_destroy - ioctl(IOMMU_DESTROY)
57  * @size: sizeof(struct iommu_destroy)
58  * @id: iommufd object ID to destroy. Can be any destroyable object type.
59  *
60  * Destroy any object held within iommufd.
61  */
62 struct iommu_destroy {
63 	__u32 size;
64 	__u32 id;
65 };
66 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
67 
68 /**
69  * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
70  * @size: sizeof(struct iommu_ioas_alloc)
71  * @flags: Must be 0
72  * @out_ioas_id: Output IOAS ID for the allocated object
73  *
74  * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
75  * to memory mapping.
76  */
77 struct iommu_ioas_alloc {
78 	__u32 size;
79 	__u32 flags;
80 	__u32 out_ioas_id;
81 };
82 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
83 
84 /**
85  * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
86  * @start: First IOVA
87  * @last: Inclusive last IOVA
88  *
89  * An interval in IOVA space.
90  */
91 struct iommu_iova_range {
92 	__aligned_u64 start;
93 	__aligned_u64 last;
94 };
95 
96 /**
97  * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
98  * @size: sizeof(struct iommu_ioas_iova_ranges)
99  * @ioas_id: IOAS ID to read ranges from
100  * @num_iovas: Input/Output total number of ranges in the IOAS
101  * @__reserved: Must be 0
102  * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
103  * @out_iova_alignment: Minimum alignment required for mapping IOVA
104  *
105  * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
106  * is not allowed. num_iovas will be set to the total number of iovas and
107  * the allowed_iovas[] will be filled in as space permits.
108  *
109  * The allowed ranges are dependent on the HW path the DMA operation takes, and
110  * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
111  * full range, and each attached device will narrow the ranges based on that
112  * device's HW restrictions. Detaching a device can widen the ranges. Userspace
113  * should query ranges after every attach/detach to know what IOVAs are valid
114  * for mapping.
115  *
116  * On input num_iovas is the length of the allowed_iovas array. On output it is
117  * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
118  * num_iovas to the required value if num_iovas is too small. In this case the
119  * caller should allocate a larger output array and re-issue the ioctl.
120  *
121  * out_iova_alignment returns the minimum IOVA alignment that can be given
122  * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
123  *
124  *   starting_iova % out_iova_alignment == 0
125  *   (starting_iova + length) % out_iova_alignment == 0
126  *
127  * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
128  * be higher than the system PAGE_SIZE.
129  */
130 struct iommu_ioas_iova_ranges {
131 	__u32 size;
132 	__u32 ioas_id;
133 	__u32 num_iovas;
134 	__u32 __reserved;
135 	__aligned_u64 allowed_iovas;
136 	__aligned_u64 out_iova_alignment;
137 };
138 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
139 
140 /**
141  * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
142  * @size: sizeof(struct iommu_ioas_allow_iovas)
143  * @ioas_id: IOAS ID to allow IOVAs from
144  * @num_iovas: Input/Output total number of ranges in the IOAS
145  * @__reserved: Must be 0
146  * @allowed_iovas: Pointer to array of struct iommu_iova_range
147  *
148  * Ensure a range of IOVAs are always available for allocation. If this call
149  * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
150  * that are narrower than the ranges provided here. This call will fail if
151  * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
152  *
153  * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
154  * devices are attached the IOVA will narrow based on the device restrictions.
155  * When an allowed range is specified any narrowing will be refused, ie device
156  * attachment can fail if the device requires limiting within the allowed range.
157  *
158  * Automatic IOVA allocation is also impacted by this call. MAP will only
159  * allocate within the allowed IOVAs if they are present.
160  *
161  * This call replaces the entire allowed list with the given list.
162  */
163 struct iommu_ioas_allow_iovas {
164 	__u32 size;
165 	__u32 ioas_id;
166 	__u32 num_iovas;
167 	__u32 __reserved;
168 	__aligned_u64 allowed_iovas;
169 };
170 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
171 
172 /**
173  * enum iommufd_ioas_map_flags - Flags for map and copy
174  * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
175  *                             IOVA to place the mapping at
176  * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
177  * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
178  */
179 enum iommufd_ioas_map_flags {
180 	IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
181 	IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
182 	IOMMU_IOAS_MAP_READABLE = 1 << 2,
183 };
184 
185 /**
186  * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
187  * @size: sizeof(struct iommu_ioas_map)
188  * @flags: Combination of enum iommufd_ioas_map_flags
189  * @ioas_id: IOAS ID to change the mapping of
190  * @__reserved: Must be 0
191  * @user_va: Userspace pointer to start mapping from
192  * @length: Number of bytes to map
193  * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
194  *        then this must be provided as input.
195  *
196  * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
197  * mapping will be established at iova, otherwise a suitable location based on
198  * the reserved and allowed lists will be automatically selected and returned in
199  * iova.
200  *
201  * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
202  * be unused, existing IOVA cannot be replaced.
203  */
204 struct iommu_ioas_map {
205 	__u32 size;
206 	__u32 flags;
207 	__u32 ioas_id;
208 	__u32 __reserved;
209 	__aligned_u64 user_va;
210 	__aligned_u64 length;
211 	__aligned_u64 iova;
212 };
213 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
214 
215 /**
216  * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
217  * @size: sizeof(struct iommu_ioas_copy)
218  * @flags: Combination of enum iommufd_ioas_map_flags
219  * @dst_ioas_id: IOAS ID to change the mapping of
220  * @src_ioas_id: IOAS ID to copy from
221  * @length: Number of bytes to copy and map
222  * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
223  *            set then this must be provided as input.
224  * @src_iova: IOVA to start the copy
225  *
226  * Copy an already existing mapping from src_ioas_id and establish it in
227  * dst_ioas_id. The src iova/length must exactly match a range used with
228  * IOMMU_IOAS_MAP.
229  *
230  * This may be used to efficiently clone a subset of an IOAS to another, or as a
231  * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
232  * establishing equivalent new mappings, as internal resources are shared, and
233  * the kernel will pin the user memory only once.
234  */
235 struct iommu_ioas_copy {
236 	__u32 size;
237 	__u32 flags;
238 	__u32 dst_ioas_id;
239 	__u32 src_ioas_id;
240 	__aligned_u64 length;
241 	__aligned_u64 dst_iova;
242 	__aligned_u64 src_iova;
243 };
244 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
245 
246 /**
247  * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
248  * @size: sizeof(struct iommu_ioas_unmap)
249  * @ioas_id: IOAS ID to change the mapping of
250  * @iova: IOVA to start the unmapping at
251  * @length: Number of bytes to unmap, and return back the bytes unmapped
252  *
253  * Unmap an IOVA range. The iova/length must be a superset of a previously
254  * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
255  * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
256  * everything.
257  */
258 struct iommu_ioas_unmap {
259 	__u32 size;
260 	__u32 ioas_id;
261 	__aligned_u64 iova;
262 	__aligned_u64 length;
263 };
264 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
265 
266 /**
267  * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
268  *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
269  * @IOMMU_OPTION_RLIMIT_MODE:
270  *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
271  *    to invoke this. Value 0 (default) is user based accouting, 1 uses process
272  *    based accounting. Global option, object_id must be 0
273  * @IOMMU_OPTION_HUGE_PAGES:
274  *    Value 1 (default) allows contiguous pages to be combined when generating
275  *    iommu mappings. Value 0 disables combining, everything is mapped to
276  *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
277  *    option, the object_id must be the IOAS ID.
278  */
279 enum iommufd_option {
280 	IOMMU_OPTION_RLIMIT_MODE = 0,
281 	IOMMU_OPTION_HUGE_PAGES = 1,
282 };
283 
284 /**
285  * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
286  *                           ioctl(IOMMU_OPTION_OP_GET)
287  * @IOMMU_OPTION_OP_SET: Set the option's value
288  * @IOMMU_OPTION_OP_GET: Get the option's value
289  */
290 enum iommufd_option_ops {
291 	IOMMU_OPTION_OP_SET = 0,
292 	IOMMU_OPTION_OP_GET = 1,
293 };
294 
295 /**
296  * struct iommu_option - iommu option multiplexer
297  * @size: sizeof(struct iommu_option)
298  * @option_id: One of enum iommufd_option
299  * @op: One of enum iommufd_option_ops
300  * @__reserved: Must be 0
301  * @object_id: ID of the object if required
302  * @val64: Option value to set or value returned on get
303  *
304  * Change a simple option value. This multiplexor allows controlling options
305  * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
306  * will return the current value.
307  */
308 struct iommu_option {
309 	__u32 size;
310 	__u32 option_id;
311 	__u16 op;
312 	__u16 __reserved;
313 	__u32 object_id;
314 	__aligned_u64 val64;
315 };
316 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
317 
318 /**
319  * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
320  * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
321  * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
322  * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
323  */
324 enum iommufd_vfio_ioas_op {
325 	IOMMU_VFIO_IOAS_GET = 0,
326 	IOMMU_VFIO_IOAS_SET = 1,
327 	IOMMU_VFIO_IOAS_CLEAR = 2,
328 };
329 
330 /**
331  * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
332  * @size: sizeof(struct iommu_vfio_ioas)
333  * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
334  *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
335  * @op: One of enum iommufd_vfio_ioas_op
336  * @__reserved: Must be 0
337  *
338  * The VFIO compatibility support uses a single ioas because VFIO APIs do not
339  * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
340  * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
341  * compatibility ioas, either by taking what is already set, or auto creating
342  * one. From then on VFIO will continue to use that ioas and is not effected by
343  * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
344  */
345 struct iommu_vfio_ioas {
346 	__u32 size;
347 	__u32 ioas_id;
348 	__u16 op;
349 	__u16 __reserved;
350 };
351 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
352 
353 /**
354  * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
355  * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
356  *                                the parent HWPT in a nesting configuration.
357  * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
358  *                                   enforced on device attachment
359  */
360 enum iommufd_hwpt_alloc_flags {
361 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
362 	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
363 };
364 
365 /**
366  * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
367  *                                entry attributes
368  * @IOMMU_VTD_S1_SRE: Supervisor request
369  * @IOMMU_VTD_S1_EAFE: Extended access enable
370  * @IOMMU_VTD_S1_WPE: Write protect enable
371  */
372 enum iommu_hwpt_vtd_s1_flags {
373 	IOMMU_VTD_S1_SRE = 1 << 0,
374 	IOMMU_VTD_S1_EAFE = 1 << 1,
375 	IOMMU_VTD_S1_WPE = 1 << 2,
376 };
377 
378 /**
379  * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
380  *                            info (IOMMU_HWPT_DATA_VTD_S1)
381  * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
382  * @pgtbl_addr: The base address of the stage-1 page table.
383  * @addr_width: The address width of the stage-1 page table
384  * @__reserved: Must be 0
385  */
386 struct iommu_hwpt_vtd_s1 {
387 	__aligned_u64 flags;
388 	__aligned_u64 pgtbl_addr;
389 	__u32 addr_width;
390 	__u32 __reserved;
391 };
392 
393 /**
394  * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
395  * @IOMMU_HWPT_DATA_NONE: no data
396  * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
397  */
398 enum iommu_hwpt_data_type {
399 	IOMMU_HWPT_DATA_NONE,
400 	IOMMU_HWPT_DATA_VTD_S1,
401 };
402 
403 /**
404  * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
405  * @size: sizeof(struct iommu_hwpt_alloc)
406  * @flags: Combination of enum iommufd_hwpt_alloc_flags
407  * @dev_id: The device to allocate this HWPT for
408  * @pt_id: The IOAS or HWPT to connect this HWPT to
409  * @out_hwpt_id: The ID of the new HWPT
410  * @__reserved: Must be 0
411  * @data_type: One of enum iommu_hwpt_data_type
412  * @data_len: Length of the type specific data
413  * @data_uptr: User pointer to the type specific data
414  *
415  * Explicitly allocate a hardware page table object. This is the same object
416  * type that is returned by iommufd_device_attach() and represents the
417  * underlying iommu driver's iommu_domain kernel object.
418  *
419  * A kernel-managed HWPT will be created with the mappings from the given
420  * IOAS via the @pt_id. The @data_type for this allocation must be set to
421  * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
422  * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
423  *
424  * A user-managed nested HWPT will be created from a given parent HWPT via
425  * @pt_id, in which the parent HWPT must be allocated previously via the
426  * same ioctl from a given IOAS (@pt_id). In this case, the @data_type
427  * must be set to a pre-defined type corresponding to an I/O page table
428  * type supported by the underlying IOMMU hardware.
429  *
430  * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
431  * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
432  * must be given.
433  */
434 struct iommu_hwpt_alloc {
435 	__u32 size;
436 	__u32 flags;
437 	__u32 dev_id;
438 	__u32 pt_id;
439 	__u32 out_hwpt_id;
440 	__u32 __reserved;
441 	__u32 data_type;
442 	__u32 data_len;
443 	__aligned_u64 data_uptr;
444 };
445 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
446 
447 /**
448  * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
449  * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
450  *                                         on a nested_parent domain.
451  *                                         https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
452  */
453 enum iommu_hw_info_vtd_flags {
454 	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
455 };
456 
457 /**
458  * struct iommu_hw_info_vtd - Intel VT-d hardware information
459  *
460  * @flags: Combination of enum iommu_hw_info_vtd_flags
461  * @__reserved: Must be 0
462  *
463  * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
464  *           section 11.4.2 Capability Register.
465  * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
466  *            section 11.4.3 Extended Capability Register.
467  *
468  * User needs to understand the Intel VT-d specification to decode the
469  * register value.
470  */
471 struct iommu_hw_info_vtd {
472 	__u32 flags;
473 	__u32 __reserved;
474 	__aligned_u64 cap_reg;
475 	__aligned_u64 ecap_reg;
476 };
477 
478 /**
479  * enum iommu_hw_info_type - IOMMU Hardware Info Types
480  * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
481  *                           info
482  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
483  */
484 enum iommu_hw_info_type {
485 	IOMMU_HW_INFO_TYPE_NONE,
486 	IOMMU_HW_INFO_TYPE_INTEL_VTD,
487 };
488 
489 /**
490  * enum iommufd_hw_capabilities
491  * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
492  *                               If available, it means the following APIs
493  *                               are supported:
494  *
495  *                                   IOMMU_HWPT_GET_DIRTY_BITMAP
496  *                                   IOMMU_HWPT_SET_DIRTY_TRACKING
497  *
498  */
499 enum iommufd_hw_capabilities {
500 	IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
501 };
502 
503 /**
504  * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
505  * @size: sizeof(struct iommu_hw_info)
506  * @flags: Must be 0
507  * @dev_id: The device bound to the iommufd
508  * @data_len: Input the length of a user buffer in bytes. Output the length of
509  *            data that kernel supports
510  * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
511  *             the iommu type specific hardware information data
512  * @out_data_type: Output the iommu hardware info type as defined in the enum
513  *                 iommu_hw_info_type.
514  * @out_capabilities: Output the generic iommu capability info type as defined
515  *                    in the enum iommu_hw_capabilities.
516  * @__reserved: Must be 0
517  *
518  * Query an iommu type specific hardware information data from an iommu behind
519  * a given device that has been bound to iommufd. This hardware info data will
520  * be used to sync capabilities between the virtual iommu and the physical
521  * iommu, e.g. a nested translation setup needs to check the hardware info, so
522  * a guest stage-1 page table can be compatible with the physical iommu.
523  *
524  * To capture an iommu type specific hardware information data, @data_uptr and
525  * its length @data_len must be provided. Trailing bytes will be zeroed if the
526  * user buffer is larger than the data that kernel has. Otherwise, kernel only
527  * fills the buffer using the given length in @data_len. If the ioctl succeeds,
528  * @data_len will be updated to the length that kernel actually supports,
529  * @out_data_type will be filled to decode the data filled in the buffer
530  * pointed by @data_uptr. Input @data_len == zero is allowed.
531  */
532 struct iommu_hw_info {
533 	__u32 size;
534 	__u32 flags;
535 	__u32 dev_id;
536 	__u32 data_len;
537 	__aligned_u64 data_uptr;
538 	__u32 out_data_type;
539 	__u32 __reserved;
540 	__aligned_u64 out_capabilities;
541 };
542 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
543 
544 /*
545  * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
546  *                                              tracking
547  * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
548  */
549 enum iommufd_hwpt_set_dirty_tracking_flags {
550 	IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
551 };
552 
553 /**
554  * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
555  * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
556  * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
557  * @hwpt_id: HW pagetable ID that represents the IOMMU domain
558  * @__reserved: Must be 0
559  *
560  * Toggle dirty tracking on an HW pagetable.
561  */
562 struct iommu_hwpt_set_dirty_tracking {
563 	__u32 size;
564 	__u32 flags;
565 	__u32 hwpt_id;
566 	__u32 __reserved;
567 };
568 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
569 					  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
570 
571 /**
572  * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
573  * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
574  *                                        any dirty bits metadata. This flag
575  *                                        can be passed in the expectation
576  *                                        where the next operation is an unmap
577  *                                        of the same IOVA range.
578  *
579  */
580 enum iommufd_hwpt_get_dirty_bitmap_flags {
581 	IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
582 };
583 
584 /**
585  * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
586  * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
587  * @hwpt_id: HW pagetable ID that represents the IOMMU domain
588  * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
589  * @__reserved: Must be 0
590  * @iova: base IOVA of the bitmap first bit
591  * @length: IOVA range size
592  * @page_size: page size granularity of each bit in the bitmap
593  * @data: bitmap where to set the dirty bits. The bitmap bits each
594  *        represent a page_size which you deviate from an arbitrary iova.
595  *
596  * Checking a given IOVA is dirty:
597  *
598  *  data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
599  *
600  * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
601  * with the dirty IOVAs. In doing so it will also by default clear any
602  * dirty bit metadata set in the IOPTE.
603  */
604 struct iommu_hwpt_get_dirty_bitmap {
605 	__u32 size;
606 	__u32 hwpt_id;
607 	__u32 flags;
608 	__u32 __reserved;
609 	__aligned_u64 iova;
610 	__aligned_u64 length;
611 	__aligned_u64 page_size;
612 	__aligned_u64 data;
613 };
614 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
615 					IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
616 
617 /**
618  * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
619  *                                        Data Type
620  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
621  */
622 enum iommu_hwpt_invalidate_data_type {
623 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
624 };
625 
626 /**
627  * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
628  *                                           stage-1 cache invalidation
629  * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
630  *                            to all-levels page structure cache or just
631  *                            the leaf PTE cache.
632  */
633 enum iommu_hwpt_vtd_s1_invalidate_flags {
634 	IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
635 };
636 
637 /**
638  * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
639  *                                       (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
640  * @addr: The start address of the range to be invalidated. It needs to
641  *        be 4KB aligned.
642  * @npages: Number of contiguous 4K pages to be invalidated.
643  * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
644  * @__reserved: Must be 0
645  *
646  * The Intel VT-d specific invalidation data for user-managed stage-1 cache
647  * invalidation in nested translation. Userspace uses this structure to
648  * tell the impacted cache scope after modifying the stage-1 page table.
649  *
650  * Invalidating all the caches related to the page table by setting @addr
651  * to be 0 and @npages to be U64_MAX.
652  *
653  * The device TLB will be invalidated automatically if ATS is enabled.
654  */
655 struct iommu_hwpt_vtd_s1_invalidate {
656 	__aligned_u64 addr;
657 	__aligned_u64 npages;
658 	__u32 flags;
659 	__u32 __reserved;
660 };
661 
662 /**
663  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
664  * @size: sizeof(struct iommu_hwpt_invalidate)
665  * @hwpt_id: ID of a nested HWPT for cache invalidation
666  * @data_uptr: User pointer to an array of driver-specific cache invalidation
667  *             data.
668  * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
669  *             type of all the entries in the invalidation request array. It
670  *             should be a type supported by the hwpt pointed by @hwpt_id.
671  * @entry_len: Length (in bytes) of a request entry in the request array
672  * @entry_num: Input the number of cache invalidation requests in the array.
673  *             Output the number of requests successfully handled by kernel.
674  * @__reserved: Must be 0.
675  *
676  * Invalidate the iommu cache for user-managed page table. Modifications on a
677  * user-managed page table should be followed by this operation to sync cache.
678  * Each ioctl can support one or more cache invalidation requests in the array
679  * that has a total size of @entry_len * @entry_num.
680  *
681  * An empty invalidation request array by setting @entry_num==0 is allowed, and
682  * @entry_len and @data_uptr would be ignored in this case. This can be used to
683  * check if the given @data_type is supported or not by kernel.
684  */
685 struct iommu_hwpt_invalidate {
686 	__u32 size;
687 	__u32 hwpt_id;
688 	__aligned_u64 data_uptr;
689 	__u32 data_type;
690 	__u32 entry_len;
691 	__u32 entry_num;
692 	__u32 __reserved;
693 };
694 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
695 #endif
696