iommufd.h source code [Linux/include/uapi/linux/iommufd.h]

1	/ SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /
2	/ Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.*
3	*/
4	#ifndef _UAPI_IOMMUFD_H
5	#define _UAPI_IOMMUFD_H
6
7	#include <linux/ioctl.h>
8	#include <linux/types.h>
9
10	#define IOMMUFD_TYPE (';')
11
12	/**
13	* DOC: General ioctl format
14	*
15	* The ioctl interface follows a general format to allow for extensibility. Each
16	* ioctl is passed in a structure pointer as the argument providing the size of
17	* the structure in the first u32. The kernel checks that any structure space
18	* beyond what it understands is 0. This allows userspace to use the backward
19	* compatible portion while consistently using the newer, larger, structures.
20	*
21	* ioctls use a standard meaning for common errnos:
22	*
23	* - ENOTTY: The IOCTL number itself is not supported at all
24	* - E2BIG: The IOCTL number is supported, but the provided structure has
25	* non-zero in a part the kernel does not understand.
26	* - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27	* understood, however a known field has a value the kernel does not
28	* understand or support.
29	* - EINVAL: Everything about the IOCTL was understood, but a field is not
30	* correct.
31	* - ENOENT: An ID or IOVA provided does not exist.
32	* - ENOMEM: Out of memory.
33	* - EOVERFLOW: Mathematics overflowed.
34	*
35	* As well as additional errnos, within specific ioctls.
36	*/
37	enum {
38	IOMMUFD_CMD_BASE = `0x80`,
39	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40	IOMMUFD_CMD_IOAS_ALLOC = `0x81`,
41	IOMMUFD_CMD_IOAS_ALLOW_IOVAS = `0x82`,
42	IOMMUFD_CMD_IOAS_COPY = `0x83`,
43	IOMMUFD_CMD_IOAS_IOVA_RANGES = `0x84`,
44	IOMMUFD_CMD_IOAS_MAP = `0x85`,
45	IOMMUFD_CMD_IOAS_UNMAP = `0x86`,
46	IOMMUFD_CMD_OPTION = `0x87`,
47	IOMMUFD_CMD_VFIO_IOAS = `0x88`,
48	IOMMUFD_CMD_HWPT_ALLOC = `0x89`,
49	IOMMUFD_CMD_GET_HW_INFO = `0x8a`,
50	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = `0x8b`,
51	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = `0x8c`,
52	IOMMUFD_CMD_HWPT_INVALIDATE = `0x8d`,
53	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = `0x8e`,
54	IOMMUFD_CMD_IOAS_MAP_FILE = `0x8f`,
55	IOMMUFD_CMD_VIOMMU_ALLOC = `0x90`,
56	IOMMUFD_CMD_VDEVICE_ALLOC = `0x91`,
57	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = `0x92`,
58	IOMMUFD_CMD_VEVENTQ_ALLOC = `0x93`,
59	IOMMUFD_CMD_HW_QUEUE_ALLOC = `0x94`,
60	};
61
62	/**
63	* struct iommu_destroy - ioctl(IOMMU_DESTROY)
64	* @size: sizeof(struct iommu_destroy)
65	* @id: iommufd object ID to destroy. Can be any destroyable object type.
66	*
67	* Destroy any object held within iommufd.
68	*/
69	struct iommu_destroy {
70	__u32 size;
71	__u32 id;
72	};
73	#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
74
75	/**
76	* struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
77	* @size: sizeof(struct iommu_ioas_alloc)
78	* @flags: Must be 0
79	* @out_ioas_id: Output IOAS ID for the allocated object
80	*
81	* Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
82	* to memory mapping.
83	*/
84	struct iommu_ioas_alloc {
85	__u32 size;
86	__u32 flags;
87	__u32 out_ioas_id;
88	};
89	#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
90
91	/**
92	* struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
93	* @start: First IOVA
94	* @last: Inclusive last IOVA
95	*
96	* An interval in IOVA space.
97	*/
98	struct iommu_iova_range {
99	__aligned_u64 start;
100	__aligned_u64 last;
101	};
102
103	/**
104	* struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
105	* @size: sizeof(struct iommu_ioas_iova_ranges)
106	* @ioas_id: IOAS ID to read ranges from
107	* @num_iovas: Input/Output total number of ranges in the IOAS
108	* @__reserved: Must be 0
109	* @allowed_iovas: Pointer to the output array of struct iommu_iova_range
110	* @out_iova_alignment: Minimum alignment required for mapping IOVA
111	*
112	* Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
113	* is not allowed. num_iovas will be set to the total number of iovas and
114	* the allowed_iovas[] will be filled in as space permits.
115	*
116	* The allowed ranges are dependent on the HW path the DMA operation takes, and
117	* can change during the lifetime of the IOAS. A fresh empty IOAS will have a
118	* full range, and each attached device will narrow the ranges based on that
119	* device's HW restrictions. Detaching a device can widen the ranges. Userspace
120	* should query ranges after every attach/detach to know what IOVAs are valid
121	* for mapping.
122	*
123	* On input num_iovas is the length of the allowed_iovas array. On output it is
124	* the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
125	* num_iovas to the required value if num_iovas is too small. In this case the
126	* caller should allocate a larger output array and re-issue the ioctl.
127	*
128	* out_iova_alignment returns the minimum IOVA alignment that can be given
129	* to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
130	*
131	* starting_iova % out_iova_alignment == 0
132	* (starting_iova + length) % out_iova_alignment == 0
133	*
134	* out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
135	* be higher than the system PAGE_SIZE.
136	*/
137	struct iommu_ioas_iova_ranges {
138	__u32 size;
139	__u32 ioas_id;
140	__u32 num_iovas;
141	__u32 __reserved;
142	__aligned_u64 allowed_iovas;
143	__aligned_u64 out_iova_alignment;
144	};
145	#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
146
147	/**
148	* struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
149	* @size: sizeof(struct iommu_ioas_allow_iovas)
150	* @ioas_id: IOAS ID to allow IOVAs from
151	* @num_iovas: Input/Output total number of ranges in the IOAS
152	* @__reserved: Must be 0
153	* @allowed_iovas: Pointer to array of struct iommu_iova_range
154	*
155	* Ensure a range of IOVAs are always available for allocation. If this call
156	* succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
157	* that are narrower than the ranges provided here. This call will fail if
158	* IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
159	*
160	* When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
161	* devices are attached the IOVA will narrow based on the device restrictions.
162	* When an allowed range is specified any narrowing will be refused, ie device
163	* attachment can fail if the device requires limiting within the allowed range.
164	*
165	* Automatic IOVA allocation is also impacted by this call. MAP will only
166	* allocate within the allowed IOVAs if they are present.
167	*
168	* This call replaces the entire allowed list with the given list.
169	*/
170	struct iommu_ioas_allow_iovas {
171	__u32 size;
172	__u32 ioas_id;
173	__u32 num_iovas;
174	__u32 __reserved;
175	__aligned_u64 allowed_iovas;
176	};
177	#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
178
179	/**
180	* enum iommufd_ioas_map_flags - Flags for map and copy
181	* @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
182	* IOVA to place the mapping at
183	* @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
184	* @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
185	*/
186	enum iommufd_ioas_map_flags {
187	IOMMU_IOAS_MAP_FIXED_IOVA = `1` << `0`,
188	IOMMU_IOAS_MAP_WRITEABLE = `1` << `1`,
189	IOMMU_IOAS_MAP_READABLE = `1` << `2`,
190	};
191
192	/**
193	* struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
194	* @size: sizeof(struct iommu_ioas_map)
195	* @flags: Combination of enum iommufd_ioas_map_flags
196	* @ioas_id: IOAS ID to change the mapping of
197	* @__reserved: Must be 0
198	* @user_va: Userspace pointer to start mapping from
199	* @length: Number of bytes to map
200	* @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
201	* then this must be provided as input.
202	*
203	* Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
204	* mapping will be established at iova, otherwise a suitable location based on
205	* the reserved and allowed lists will be automatically selected and returned in
206	* iova.
207	*
208	* If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
209	* be unused, existing IOVA cannot be replaced.
210	*/
211	struct iommu_ioas_map {
212	__u32 size;
213	__u32 flags;
214	__u32 ioas_id;
215	__u32 __reserved;
216	__aligned_u64 user_va;
217	__aligned_u64 length;
218	__aligned_u64 iova;
219	};
220	#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
221
222	/**
223	* struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
224	* @size: sizeof(struct iommu_ioas_map_file)
225	* @flags: same as for iommu_ioas_map
226	* @ioas_id: same as for iommu_ioas_map
227	* @fd: the memfd to map
228	* @start: byte offset from start of file to map from
229	* @length: same as for iommu_ioas_map
230	* @iova: same as for iommu_ioas_map
231	*
232	* Set an IOVA mapping from a memfd file. All other arguments and semantics
233	* match those of IOMMU_IOAS_MAP.
234	*/
235	struct iommu_ioas_map_file {
236	__u32 size;
237	__u32 flags;
238	__u32 ioas_id;
239	__s32 fd;
240	__aligned_u64 start;
241	__aligned_u64 length;
242	__aligned_u64 iova;
243	};
244	#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
245
246	/**
247	* struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
248	* @size: sizeof(struct iommu_ioas_copy)
249	* @flags: Combination of enum iommufd_ioas_map_flags
250	* @dst_ioas_id: IOAS ID to change the mapping of
251	* @src_ioas_id: IOAS ID to copy from
252	* @length: Number of bytes to copy and map
253	* @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
254	* set then this must be provided as input.
255	* @src_iova: IOVA to start the copy
256	*
257	* Copy an already existing mapping from src_ioas_id and establish it in
258	* dst_ioas_id. The src iova/length must exactly match a range used with
259	* IOMMU_IOAS_MAP.
260	*
261	* This may be used to efficiently clone a subset of an IOAS to another, or as a
262	* kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
263	* establishing equivalent new mappings, as internal resources are shared, and
264	* the kernel will pin the user memory only once.
265	*/
266	struct iommu_ioas_copy {
267	__u32 size;
268	__u32 flags;
269	__u32 dst_ioas_id;
270	__u32 src_ioas_id;
271	__aligned_u64 length;
272	__aligned_u64 dst_iova;
273	__aligned_u64 src_iova;
274	};
275	#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
276
277	/**
278	* struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
279	* @size: sizeof(struct iommu_ioas_unmap)
280	* @ioas_id: IOAS ID to change the mapping of
281	* @iova: IOVA to start the unmapping at
282	* @length: Number of bytes to unmap, and return back the bytes unmapped
283	*
284	* Unmap an IOVA range. The iova/length must be a superset of a previously
285	* mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
286	* truncating ranges is not allowed. The values 0 to U64_MAX will unmap
287	* everything.
288	*/
289	struct iommu_ioas_unmap {
290	__u32 size;
291	__u32 ioas_id;
292	__aligned_u64 iova;
293	__aligned_u64 length;
294	};
295	#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
296
297	/**
298	* enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
299	* ioctl(IOMMU_OPTION_HUGE_PAGES)
300	* @IOMMU_OPTION_RLIMIT_MODE:
301	* Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
302	* to invoke this. Value 0 (default) is user based accounting, 1 uses process
303	* based accounting. Global option, object_id must be 0
304	* @IOMMU_OPTION_HUGE_PAGES:
305	* Value 1 (default) allows contiguous pages to be combined when generating
306	* iommu mappings. Value 0 disables combining, everything is mapped to
307	* PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
308	* option, the object_id must be the IOAS ID.
309	*/
310	enum iommufd_option {
311	IOMMU_OPTION_RLIMIT_MODE = `0`,
312	IOMMU_OPTION_HUGE_PAGES = `1`,
313	};
314
315	/**
316	* enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
317	* ioctl(IOMMU_OPTION_OP_GET)
318	* @IOMMU_OPTION_OP_SET: Set the option's value
319	* @IOMMU_OPTION_OP_GET: Get the option's value
320	*/
321	enum iommufd_option_ops {
322	IOMMU_OPTION_OP_SET = `0`,
323	IOMMU_OPTION_OP_GET = `1`,
324	};
325
326	/**
327	* struct iommu_option - iommu option multiplexer
328	* @size: sizeof(struct iommu_option)
329	* @option_id: One of enum iommufd_option
330	* @op: One of enum iommufd_option_ops
331	* @__reserved: Must be 0
332	* @object_id: ID of the object if required
333	* @val64: Option value to set or value returned on get
334	*
335	* Change a simple option value. This multiplexor allows controlling options
336	* on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
337	* will return the current value.
338	*/
339	struct iommu_option {
340	__u32 size;
341	__u32 option_id;
342	__u16 op;
343	__u16 __reserved;
344	__u32 object_id;
345	__aligned_u64 val64;
346	};
347	#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
348
349	/**
350	* enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
351	* @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
352	* @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
353	* @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
354	*/
355	enum iommufd_vfio_ioas_op {
356	IOMMU_VFIO_IOAS_GET = `0`,
357	IOMMU_VFIO_IOAS_SET = `1`,
358	IOMMU_VFIO_IOAS_CLEAR = `2`,
359	};
360
361	/**
362	* struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
363	* @size: sizeof(struct iommu_vfio_ioas)
364	* @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
365	* For IOMMU_VFIO_IOAS_GET will output the IOAS ID
366	* @op: One of enum iommufd_vfio_ioas_op
367	* @__reserved: Must be 0
368	*
369	* The VFIO compatibility support uses a single ioas because VFIO APIs do not
370	* support the ID field. Set or Get the IOAS that VFIO compatibility will use.
371	* When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
372	* compatibility ioas, either by taking what is already set, or auto creating
373	* one. From then on VFIO will continue to use that ioas and is not effected by
374	* this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
375	*/
376	struct iommu_vfio_ioas {
377	__u32 size;
378	__u32 ioas_id;
379	__u16 op;
380	__u16 __reserved;
381	};
382	#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
383
384	/**
385	* enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
386	* @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
387	* the parent HWPT in a nesting configuration.
388	* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
389	* enforced on device attachment
390	* @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
391	* valid.
392	* @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
393	* domain can be attached to any PASID on the device.
394	* Any domain attached to the non-PASID part of the
395	* device must also be flagged, otherwise attaching a
396	* PASID will blocked.
397	* For the user that wants to attach PASID, ioas is
398	* not recommended for both the non-PASID part
399	* and PASID part of the device.
400	* If IOMMU does not support PASID it will return
401	* error (-EOPNOTSUPP).
402	*/
403	enum iommufd_hwpt_alloc_flags {
404	IOMMU_HWPT_ALLOC_NEST_PARENT = `1` << `0`,
405	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = `1` << `1`,
406	IOMMU_HWPT_FAULT_ID_VALID = `1` << `2`,
407	IOMMU_HWPT_ALLOC_PASID = `1` << `3`,
408	};
409
410	/**
411	* enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
412	* entry attributes
413	* @IOMMU_VTD_S1_SRE: Supervisor request
414	* @IOMMU_VTD_S1_EAFE: Extended access enable
415	* @IOMMU_VTD_S1_WPE: Write protect enable
416	*/
417	enum iommu_hwpt_vtd_s1_flags {
418	IOMMU_VTD_S1_SRE = `1` << `0`,
419	IOMMU_VTD_S1_EAFE = `1` << `1`,
420	IOMMU_VTD_S1_WPE = `1` << `2`,
421	};
422
423	/**
424	* struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
425	* info (IOMMU_HWPT_DATA_VTD_S1)
426	* @flags: Combination of enum iommu_hwpt_vtd_s1_flags
427	* @pgtbl_addr: The base address of the stage-1 page table.
428	* @addr_width: The address width of the stage-1 page table
429	* @__reserved: Must be 0
430	*/
431	struct iommu_hwpt_vtd_s1 {
432	__aligned_u64 flags;
433	__aligned_u64 pgtbl_addr;
434	__u32 addr_width;
435	__u32 __reserved;
436	};
437
438	/**
439	* struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
440	* (IOMMU_HWPT_DATA_ARM_SMMUV3)
441	*
442	* @ste: The first two double words of the user space Stream Table Entry for
443	* the translation. Must be little-endian.
444	* Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
445	* - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
446	* - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
447	*
448	* -EIO will be returned if @ste is not legal or contains any non-allowed field.
449	* Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
450	* nested domain will translate the same as the nesting parent. The S1 will
451	* install a Context Descriptor Table pointing at userspace memory translated
452	* by the nesting parent.
453	*/
454	struct iommu_hwpt_arm_smmuv3 {
455	__aligned_le64 ste[`2`];
456	};
457
458	/**
459	* enum iommu_hwpt_data_type - IOMMU HWPT Data Type
460	* @IOMMU_HWPT_DATA_NONE: no data
461	* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
462	* @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
463	*/
464	enum iommu_hwpt_data_type {
465	IOMMU_HWPT_DATA_NONE = `0`,
466	IOMMU_HWPT_DATA_VTD_S1 = `1`,
467	IOMMU_HWPT_DATA_ARM_SMMUV3 = `2`,
468	};
469
470	/**
471	* struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
472	* @size: sizeof(struct iommu_hwpt_alloc)
473	* @flags: Combination of enum iommufd_hwpt_alloc_flags
474	* @dev_id: The device to allocate this HWPT for
475	* @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
476	* @out_hwpt_id: The ID of the new HWPT
477	* @__reserved: Must be 0
478	* @data_type: One of enum iommu_hwpt_data_type
479	* @data_len: Length of the type specific data
480	* @data_uptr: User pointer to the type specific data
481	* @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
482	* IOMMU_HWPT_FAULT_ID_VALID is set.
483	* @__reserved2: Padding to 64-bit alignment. Must be 0.
484	*
485	* Explicitly allocate a hardware page table object. This is the same object
486	* type that is returned by iommufd_device_attach() and represents the
487	* underlying iommu driver's iommu_domain kernel object.
488	*
489	* A kernel-managed HWPT will be created with the mappings from the given
490	* IOAS via the @pt_id. The @data_type for this allocation must be set to
491	* IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
492	* nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
493	*
494	* A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
495	* parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
496	* allocated previously via the same ioctl from a given IOAS (@pt_id). In this
497	* case, the @data_type must be set to a pre-defined type corresponding to an
498	* I/O page table type supported by the underlying IOMMU hardware. The device
499	* via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
500	* instance.
501	*
502	* If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
503	* @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
504	* must be given.
505	*/
506	struct iommu_hwpt_alloc {
507	__u32 size;
508	__u32 flags;
509	__u32 dev_id;
510	__u32 pt_id;
511	__u32 out_hwpt_id;
512	__u32 __reserved;
513	__u32 data_type;
514	__u32 data_len;
515	__aligned_u64 data_uptr;
516	__u32 fault_id;
517	__u32 __reserved2;
518	};
519	#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
520
521	/**
522	* enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
523	* @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
524	* on a nested_parent domain.
525	* https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
526	*/
527	enum iommu_hw_info_vtd_flags {
528	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = `1` << `0`,
529	};
530
531	/**
532	* struct iommu_hw_info_vtd - Intel VT-d hardware information
533	*
534	* @flags: Combination of enum iommu_hw_info_vtd_flags
535	* @__reserved: Must be 0
536	*
537	* @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
538	* section 11.4.2 Capability Register.
539	* @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
540	* section 11.4.3 Extended Capability Register.
541	*
542	* User needs to understand the Intel VT-d specification to decode the
543	* register value.
544	*/
545	struct iommu_hw_info_vtd {
546	__u32 flags;
547	__u32 __reserved;
548	__aligned_u64 cap_reg;
549	__aligned_u64 ecap_reg;
550	};
551
552	/**
553	* struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
554	* (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
555	*
556	* @flags: Must be set to 0
557	* @__reserved: Must be 0
558	* @idr: Implemented features for ARM SMMU Non-secure programming interface
559	* @iidr: Information about the implementation and implementer of ARM SMMU,
560	* and architecture version supported
561	* @aidr: ARM SMMU architecture version
562	*
563	* For the details of @idr, @iidr and @aidr, please refer to the chapters
564	* from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
565	*
566	* This reports the raw HW capability, and not all bits are meaningful to be
567	* read by userspace. Only the following fields should be used:
568	*
569	* idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
570	* idr[1]: SIDSIZE, SSIDSIZE
571	* idr[3]: BBML, RIL
572	* idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
573	*
574	* - S1P should be assumed to be true if a NESTED HWPT can be created
575	* - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
576	* true.
577	* - ATS is a per-device property. If the VMM describes any devices as ATS
578	* capable in ACPI/DT it should set the corresponding idr.
579	*
580	* This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
581	* important that VMMs do not read bits outside the list to allow for
582	* compatibility with future kernels. Several features in the SMMUv3
583	* architecture are not currently supported by the kernel for nesting: HTTU,
584	* BTM, MPAM and others.
585	*/
586	struct iommu_hw_info_arm_smmuv3 {
587	__u32 flags;
588	__u32 __reserved;
589	__u32 idr[`6`];
590	__u32 iidr;
591	__u32 aidr;
592	};
593
594	/**
595	* struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware
596	* Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV)
597	*
598	* @flags: Must be 0
599	* @version: Version number for the CMDQ-V HW for PARAM bits[03:00]
600	* @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04]
601	* @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12]
602	* @__reserved: Must be 0
603	*
604	* VMM can use these fields directly in its emulated global PARAM register. Note
605	* that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM
606	* bits[11:08] should be set to 0 for log2 of the total number of VINTFs.
607	*/
608	struct iommu_hw_info_tegra241_cmdqv {
609	__u32 flags;
610	__u8 version;
611	__u8 log2vcmdqs;
612	__u8 log2vsids;
613	__u8 __reserved;
614	};
615
616	/**
617	* enum iommu_hw_info_type - IOMMU Hardware Info Types
618	* @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
619	* info
620	* @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type
621	* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
622	* @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
623	* @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
624	* SMMUv3) info type
625	*/
626	enum iommu_hw_info_type {
627	IOMMU_HW_INFO_TYPE_NONE = `0`,
628	IOMMU_HW_INFO_TYPE_DEFAULT = `0`,
629	IOMMU_HW_INFO_TYPE_INTEL_VTD = `1`,
630	IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = `2`,
631	IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = `3`,
632	};
633
634	/**
635	* enum iommufd_hw_capabilities
636	* @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
637	* If available, it means the following APIs
638	* are supported:
639	*
640	* IOMMU_HWPT_GET_DIRTY_BITMAP
641	* IOMMU_HWPT_SET_DIRTY_TRACKING
642	*
643	* @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it
644	* when the struct
645	* iommu_hw_info::out_max_pasid_log2 is zero.
646	* @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it
647	* when the struct
648	* iommu_hw_info::out_max_pasid_log2 is zero.
649	*/
650	enum iommufd_hw_capabilities {
651	IOMMU_HW_CAP_DIRTY_TRACKING = `1` << `0`,
652	IOMMU_HW_CAP_PCI_PASID_EXEC = `1` << `1`,
653	IOMMU_HW_CAP_PCI_PASID_PRIV = `1` << `2`,
654	};
655
656	/**
657	* enum iommufd_hw_info_flags - Flags for iommu_hw_info
658	* @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type
659	* for user space to request for a specific info
660	*/
661	enum iommufd_hw_info_flags {
662	IOMMU_HW_INFO_FLAG_INPUT_TYPE = `1` << `0`,
663	};
664
665	/**
666	* struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
667	* @size: sizeof(struct iommu_hw_info)
668	* @flags: Must be 0
669	* @dev_id: The device bound to the iommufd
670	* @data_len: Input the length of a user buffer in bytes. Output the length of
671	* data that kernel supports
672	* @data_uptr: User pointer to a user-space buffer used by the kernel to fill
673	* the iommu type specific hardware information data
674	* @in_data_type: This shares the same field with @out_data_type, making it be
675	* a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is
676	* set, an input type carried via this @in_data_type field will
677	* be valid, requesting for the info data to the given type. If
678	* IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will
679	* be seen as IOMMU_HW_INFO_TYPE_DEFAULT
680	* @out_data_type: Output the iommu hardware info type as defined in the enum
681	* iommu_hw_info_type.
682	* @out_capabilities: Output the generic iommu capability info type as defined
683	* in the enum iommu_hw_capabilities.
684	* @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support.
685	* PCI devices turn to out_capabilities to check if the
686	* specific capabilities is supported or not.
687	* @__reserved: Must be 0
688	*
689	* Query an iommu type specific hardware information data from an iommu behind
690	* a given device that has been bound to iommufd. This hardware info data will
691	* be used to sync capabilities between the virtual iommu and the physical
692	* iommu, e.g. a nested translation setup needs to check the hardware info, so
693	* a guest stage-1 page table can be compatible with the physical iommu.
694	*
695	* To capture an iommu type specific hardware information data, @data_uptr and
696	* its length @data_len must be provided. Trailing bytes will be zeroed if the
697	* user buffer is larger than the data that kernel has. Otherwise, kernel only
698	* fills the buffer using the given length in @data_len. If the ioctl succeeds,
699	* @data_len will be updated to the length that kernel actually supports,
700	* @out_data_type will be filled to decode the data filled in the buffer
701	* pointed by @data_uptr. Input @data_len == zero is allowed.
702	*/
703	struct iommu_hw_info {
704	__u32 size;
705	__u32 flags;
706	__u32 dev_id;
707	__u32 data_len;
708	__aligned_u64 data_uptr;
709	union {
710	__u32 in_data_type;
711	__u32 out_data_type;
712	};
713	__u8 out_max_pasid_log2;
714	__u8 __reserved[`3`];
715	__aligned_u64 out_capabilities;
716	};
717	#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
718
719	/*
720	* enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
721	* tracking
722	* @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
723	*/
724	enum iommufd_hwpt_set_dirty_tracking_flags {
725	IOMMU_HWPT_DIRTY_TRACKING_ENABLE = `1`,
726	};
727
728	/**
729	* struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
730	* @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
731	* @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
732	* @hwpt_id: HW pagetable ID that represents the IOMMU domain
733	* @__reserved: Must be 0
734	*
735	* Toggle dirty tracking on an HW pagetable.
736	*/
737	struct iommu_hwpt_set_dirty_tracking {
738	__u32 size;
739	__u32 flags;
740	__u32 hwpt_id;
741	__u32 __reserved;
742	};
743	#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
744	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
745
746	/**
747	* enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
748	* @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
749	* any dirty bits metadata. This flag
750	* can be passed in the expectation
751	* where the next operation is an unmap
752	* of the same IOVA range.
753	*
754	*/
755	enum iommufd_hwpt_get_dirty_bitmap_flags {
756	IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = `1`,
757	};
758
759	/**
760	* struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
761	* @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
762	* @hwpt_id: HW pagetable ID that represents the IOMMU domain
763	* @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
764	* @__reserved: Must be 0
765	* @iova: base IOVA of the bitmap first bit
766	* @length: IOVA range size
767	* @page_size: page size granularity of each bit in the bitmap
768	* @data: bitmap where to set the dirty bits. The bitmap bits each
769	* represent a page_size which you deviate from an arbitrary iova.
770	*
771	* Checking a given IOVA is dirty:
772	*
773	* data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
774	*
775	* Walk the IOMMU pagetables for a given IOVA range to return a bitmap
776	* with the dirty IOVAs. In doing so it will also by default clear any
777	* dirty bit metadata set in the IOPTE.
778	*/
779	struct iommu_hwpt_get_dirty_bitmap {
780	__u32 size;
781	__u32 hwpt_id;
782	__u32 flags;
783	__u32 __reserved;
784	__aligned_u64 iova;
785	__aligned_u64 length;
786	__aligned_u64 page_size;
787	__aligned_u64 data;
788	};
789	#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
790	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
791
792	/**
793	* enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
794	* Data Type
795	* @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
796	* @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
797	*/
798	enum iommu_hwpt_invalidate_data_type {
799	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = `0`,
800	IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = `1`,
801	};
802
803	/**
804	* enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
805	* stage-1 cache invalidation
806	* @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
807	* to all-levels page structure cache or just
808	* the leaf PTE cache.
809	*/
810	enum iommu_hwpt_vtd_s1_invalidate_flags {
811	IOMMU_VTD_INV_FLAGS_LEAF = `1` << `0`,
812	};
813
814	/**
815	* struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
816	* (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
817	* @addr: The start address of the range to be invalidated. It needs to
818	* be 4KB aligned.
819	* @npages: Number of contiguous 4K pages to be invalidated.
820	* @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
821	* @__reserved: Must be 0
822	*
823	* The Intel VT-d specific invalidation data for user-managed stage-1 cache
824	* invalidation in nested translation. Userspace uses this structure to
825	* tell the impacted cache scope after modifying the stage-1 page table.
826	*
827	* Invalidating all the caches related to the page table by setting @addr
828	* to be 0 and @npages to be U64_MAX.
829	*
830	* The device TLB will be invalidated automatically if ATS is enabled.
831	*/
832	struct iommu_hwpt_vtd_s1_invalidate {
833	__aligned_u64 addr;
834	__aligned_u64 npages;
835	__u32 flags;
836	__u32 __reserved;
837	};
838
839	/**
840	* struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
841	* (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
842	* @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
843	* Must be little-endian.
844	*
845	* Supported command list only when passing in a vIOMMU via @hwpt_id:
846	* CMDQ_OP_TLBI_NSNH_ALL
847	* CMDQ_OP_TLBI_NH_VA
848	* CMDQ_OP_TLBI_NH_VAA
849	* CMDQ_OP_TLBI_NH_ALL
850	* CMDQ_OP_TLBI_NH_ASID
851	* CMDQ_OP_ATC_INV
852	* CMDQ_OP_CFGI_CD
853	* CMDQ_OP_CFGI_CD_ALL
854	*
855	* -EIO will be returned if the command is not supported.
856	*/
857	struct iommu_viommu_arm_smmuv3_invalidate {
858	__aligned_le64 cmd[`2`];
859	};
860
861	/**
862	* struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
863	* @size: sizeof(struct iommu_hwpt_invalidate)
864	* @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
865	* @data_uptr: User pointer to an array of driver-specific cache invalidation
866	* data.
867	* @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
868	* type of all the entries in the invalidation request array. It
869	* should be a type supported by the hwpt pointed by @hwpt_id.
870	* @entry_len: Length (in bytes) of a request entry in the request array
871	* @entry_num: Input the number of cache invalidation requests in the array.
872	* Output the number of requests successfully handled by kernel.
873	* @__reserved: Must be 0.
874	*
875	* Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
876	* on a user-managed page table should be followed by this operation, if a HWPT
877	* is passed in via @hwpt_id. Other caches, such as device cache or descriptor
878	* cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
879	*
880	* Each ioctl can support one or more cache invalidation requests in the array
881	* that has a total size of @entry_len * @entry_num.
882	*
883	* An empty invalidation request array by setting @entry_num==0 is allowed, and
884	* @entry_len and @data_uptr would be ignored in this case. This can be used to
885	* check if the given @data_type is supported or not by kernel.
886	*/
887	struct iommu_hwpt_invalidate {
888	__u32 size;
889	__u32 hwpt_id;
890	__aligned_u64 data_uptr;
891	__u32 data_type;
892	__u32 entry_len;
893	__u32 entry_num;
894	__u32 __reserved;
895	};
896	#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
897
898	/**
899	* enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
900	* @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
901	* valid.
902	* @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
903	*/
904	enum iommu_hwpt_pgfault_flags {
905	IOMMU_PGFAULT_FLAGS_PASID_VALID = (`1` << `0`),
906	IOMMU_PGFAULT_FLAGS_LAST_PAGE = (`1` << `1`),
907	};
908
909	/**
910	* enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
911	* @IOMMU_PGFAULT_PERM_READ: request for read permission
912	* @IOMMU_PGFAULT_PERM_WRITE: request for write permission
913	* @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
914	* Execute Requested bit set in PASID TLP Prefix.
915	* @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
916	* Privileged Mode Requested bit set in PASID TLP
917	* Prefix.
918	*/
919	enum iommu_hwpt_pgfault_perm {
920	IOMMU_PGFAULT_PERM_READ = (`1` << `0`),
921	IOMMU_PGFAULT_PERM_WRITE = (`1` << `1`),
922	IOMMU_PGFAULT_PERM_EXEC = (`1` << `2`),
923	IOMMU_PGFAULT_PERM_PRIV = (`1` << `3`),
924	};
925
926	/**
927	* struct iommu_hwpt_pgfault - iommu page fault data
928	* @flags: Combination of enum iommu_hwpt_pgfault_flags
929	* @dev_id: id of the originated device
930	* @pasid: Process Address Space ID
931	* @grpid: Page Request Group Index
932	* @perm: Combination of enum iommu_hwpt_pgfault_perm
933	* @__reserved: Must be 0.
934	* @addr: Fault address
935	* @length: a hint of how much data the requestor is expecting to fetch. For
936	* example, if the PRI initiator knows it is going to do a 10MB
937	* transfer, it could fill in 10MB and the OS could pre-fault in
938	* 10MB of IOVA. It's default to 0 if there's no such hint.
939	* @cookie: kernel-managed cookie identifying a group of fault messages. The
940	* cookie number encoded in the last page fault of the group should
941	* be echoed back in the response message.
942	*/
943	struct iommu_hwpt_pgfault {
944	__u32 flags;
945	__u32 dev_id;
946	__u32 pasid;
947	__u32 grpid;
948	__u32 perm;
949	__u32 __reserved;
950	__aligned_u64 addr;
951	__u32 length;
952	__u32 cookie;
953	};
954
955	/**
956	* enum iommufd_page_response_code - Return status of fault handlers
957	* @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
958	* populated, retry the access. This is the
959	* "Success" defined in PCI 10.4.2.1.
960	* @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
961	* access. This is the "Invalid Request" in PCI
962	* 10.4.2.1.
963	*/
964	enum iommufd_page_response_code {
965	IOMMUFD_PAGE_RESP_SUCCESS = `0`,
966	IOMMUFD_PAGE_RESP_INVALID = `1`,
967	};
968
969	/**
970	* struct iommu_hwpt_page_response - IOMMU page fault response
971	* @cookie: The kernel-managed cookie reported in the fault message.
972	* @code: One of response code in enum iommufd_page_response_code.
973	*/
974	struct iommu_hwpt_page_response {
975	__u32 cookie;
976	__u32 code;
977	};
978
979	/**
980	* struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
981	* @size: sizeof(struct iommu_fault_alloc)
982	* @flags: Must be 0
983	* @out_fault_id: The ID of the new FAULT
984	* @out_fault_fd: The fd of the new FAULT
985	*
986	* Explicitly allocate a fault handling object.
987	*/
988	struct iommu_fault_alloc {
989	__u32 size;
990	__u32 flags;
991	__u32 out_fault_id;
992	__u32 out_fault_fd;
993	};
994	#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
995
996	/**
997	* enum iommu_viommu_type - Virtual IOMMU Type
998	* @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
999	* @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
1000	* @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1001	* SMMUv3) enabled ARM SMMUv3 type
1002	*/
1003	enum iommu_viommu_type {
1004	IOMMU_VIOMMU_TYPE_DEFAULT = `0`,
1005	IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = `1`,
1006	IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = `2`,
1007	};
1008
1009	/**
1010	* struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface
1011	* (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
1012	* @out_vintf_mmap_offset: mmap offset argument for VINTF's page0
1013	* @out_vintf_mmap_length: mmap length argument for VINTF's page0
1014	*
1015	* Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel
1016	* for user space to mmap the VINTF page0 from the host physical address space
1017	* to the guest physical address space so that a guest kernel can directly R/W
1018	* access to the VINTF page0 in order to control its virtual command queues.
1019	*/
1020	struct iommu_viommu_tegra241_cmdqv {
1021	__aligned_u64 out_vintf_mmap_offset;
1022	__aligned_u64 out_vintf_mmap_length;
1023	};
1024
1025	/**
1026	* struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
1027	* @size: sizeof(struct iommu_viommu_alloc)
1028	* @flags: Must be 0
1029	* @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
1030	* @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
1031	* @hwpt_id: ID of a nesting parent HWPT to associate to
1032	* @out_viommu_id: Output virtual IOMMU ID for the allocated object
1033	* @data_len: Length of the type specific data
1034	* @__reserved: Must be 0
1035	* @data_uptr: User pointer to a driver-specific virtual IOMMU data
1036	*
1037	* Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
1038	* virtualization support that is a security-isolated slice of the real IOMMU HW
1039	* that is unique to a specific VM. Operations global to the IOMMU are connected
1040	* to the vIOMMU, such as:
1041	* - Security namespace for guest owned ID, e.g. guest-controlled cache tags
1042	* - Non-device-affiliated event reporting, e.g. invalidation queue errors
1043	* - Access to a sharable nesting parent pagetable across physical IOMMUs
1044	* - Virtualization of various platforms IDs, e.g. RIDs and others
1045	* - Delivery of paravirtualized invalidation
1046	* - Direct assigned invalidation queues
1047	* - Direct assigned interrupts
1048	*/
1049	struct iommu_viommu_alloc {
1050	__u32 size;
1051	__u32 flags;
1052	__u32 type;
1053	__u32 dev_id;
1054	__u32 hwpt_id;
1055	__u32 out_viommu_id;
1056	__u32 data_len;
1057	__u32 __reserved;
1058	__aligned_u64 data_uptr;
1059	};
1060	#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
1061
1062	/**
1063	* struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
1064	* @size: sizeof(struct iommu_vdevice_alloc)
1065	* @viommu_id: vIOMMU ID to associate with the virtual device
1066	* @dev_id: The physical device to allocate a virtual instance on the vIOMMU
1067	* @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
1068	* @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
1069	* of AMD IOMMU, and vRID of Intel VT-d
1070	*
1071	* Allocate a virtual device instance (for a physical device) against a vIOMMU.
1072	* This instance holds the device's information (related to its vIOMMU) in a VM.
1073	* User should use IOMMU_DESTROY to destroy the virtual device before
1074	* destroying the physical device (by closing vfio_cdev fd). Otherwise the
1075	* virtual device would be forcibly destroyed on physical device destruction,
1076	* its vdevice_id would be permanently leaked (unremovable & unreusable) until
1077	* iommu fd closed.
1078	*/
1079	struct iommu_vdevice_alloc {
1080	__u32 size;
1081	__u32 viommu_id;
1082	__u32 dev_id;
1083	__u32 out_vdevice_id;
1084	__aligned_u64 virt_id;
1085	};
1086	#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
1087
1088	/**
1089	* struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
1090	* @size: sizeof(struct iommu_ioas_change_process)
1091	* @__reserved: Must be 0
1092	*
1093	* This transfers pinned memory counts for every memory map in every IOAS
1094	* in the context to the current process. This only supports maps created
1095	* with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
1096	* If the ioctl returns a failure status, then nothing is changed.
1097	*
1098	* This API is useful for transferring operation of a device from one process
1099	* to another, such as during userland live update.
1100	*/
1101	struct iommu_ioas_change_process {
1102	__u32 size;
1103	__u32 __reserved;
1104	};
1105
1106	#define IOMMU_IOAS_CHANGE_PROCESS \
1107	_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
1108
1109	/**
1110	* enum iommu_veventq_flag - flag for struct iommufd_vevent_header
1111	* @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs
1112	*/
1113	enum iommu_veventq_flag {
1114	IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (`1U` << `0`),
1115	};
1116
1117	/**
1118	* struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status
1119	* @flags: Combination of enum iommu_veventq_flag
1120	* @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of
1121	* [0, INT_MAX] where the following index of INT_MAX is 0
1122	*
1123	* Each iommufd_vevent_header reports a sequence index of the following vEVENT:
1124	*
1125	* +----------------------+-------+----------------------+-------+---+-------+
1126	* \| header0 {sequence=0} \| data0 \| header1 {sequence=1} \| data1 \|...\| dataN \|
1127	* +----------------------+-------+----------------------+-------+---+-------+
1128	*
1129	* And this sequence index is expected to be monotonic to the sequence index of
1130	* the previous vEVENT. If two adjacent sequence indexes has a delta larger than
1131	* 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
1132	*
1133	* +-----+----------------------+-------+----------------------+-------+-----+
1134	* \| ... \| header3 {sequence=3} \| data3 \| header6 {sequence=6} \| data6 \| ... \|
1135	* +-----+----------------------+-------+----------------------+-------+-----+
1136	*
1137	* If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
1138	* providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header
1139	* would be added to the tail, and no data would follow this header:
1140	*
1141	* +--+----------------------+-------+-----------------------------------------+
1142	* \|..\| header3 {sequence=3} \| data3 \| header4 {flags=LOST_EVENTS, sequence=4} \|
1143	* +--+----------------------+-------+-----------------------------------------+
1144	*/
1145	struct iommufd_vevent_header {
1146	__u32 flags;
1147	__u32 sequence;
1148	};
1149
1150	/**
1151	* enum iommu_veventq_type - Virtual Event Queue Type
1152	* @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use
1153	* @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue
1154	* @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ
1155	*/
1156	enum iommu_veventq_type {
1157	IOMMU_VEVENTQ_TYPE_DEFAULT = `0`,
1158	IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = `1`,
1159	IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = `2`,
1160	};
1161
1162	/**
1163	* struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event
1164	* (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3)
1165	* @evt: 256-bit ARM SMMUv3 Event record, little-endian.
1166	* Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec)
1167	* - 0x04 C_BAD_STE
1168	* - 0x06 F_STREAM_DISABLED
1169	* - 0x08 C_BAD_SUBSTREAMID
1170	* - 0x0a C_BAD_CD
1171	* - 0x10 F_TRANSLATION
1172	* - 0x11 F_ADDR_SIZE
1173	* - 0x12 F_ACCESS
1174	* - 0x13 F_PERMISSION
1175	*
1176	* StreamID field reports a virtual device ID. To receive a virtual event for a
1177	* device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC.
1178	*/
1179	struct iommu_vevent_arm_smmuv3 {
1180	__aligned_le64 evt[`4`];
1181	};
1182
1183	/**
1184	* struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ
1185	* (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV)
1186	* @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian.
1187	* (Refer to register LVCMDQ_ERR_MAPs per VINTF )
1188	*
1189	* The 128-bit register value from HW exclusively reflect the error bits for a
1190	* Virtual Interface represented by a vIOMMU object. Read and report directly.
1191	*/
1192	struct iommu_vevent_tegra241_cmdqv {
1193	__aligned_le64 lvcmdq_err_map[`2`];
1194	};
1195
1196	/**
1197	* struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC)
1198	* @size: sizeof(struct iommu_veventq_alloc)
1199	* @flags: Must be 0
1200	* @viommu_id: virtual IOMMU ID to associate the vEVENTQ with
1201	* @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type
1202	* @veventq_depth: Maximum number of events in the vEVENTQ
1203	* @out_veventq_id: The ID of the new vEVENTQ
1204	* @out_veventq_fd: The fd of the new vEVENTQ. User space must close the
1205	* successfully returned fd after using it
1206	* @__reserved: Must be 0
1207	*
1208	* Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU
1209	* can have multiple FDs for different types, but is confined to one per @type.
1210	* User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ,
1211	* if there are vEVENTs available. A vEVENTQ will lose events due to overflow,
1212	* if the number of the vEVENTs hits @veventq_depth.
1213	*
1214	* Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by
1215	* a type-specific data structure, in a normal case:
1216	*
1217	* +-+---------+-------+---------+-------+-----+---------+-------+-+
1218	* \| \| header0 \| data0 \| header1 \| data1 \| ... \| headerN \| dataN \| \|
1219	* +-+---------+-------+---------+-------+-----+---------+-------+-+
1220	*
1221	* unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to
1222	* struct iommufd_vevent_header).
1223	*/
1224	struct iommu_veventq_alloc {
1225	__u32 size;
1226	__u32 flags;
1227	__u32 viommu_id;
1228	__u32 type;
1229	__u32 veventq_depth;
1230	__u32 out_veventq_id;
1231	__u32 out_veventq_fd;
1232	__u32 __reserved;
1233	};
1234	#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC)
1235
1236	/**
1237	* enum iommu_hw_queue_type - HW Queue Type
1238	* @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use
1239	* @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1240	* SMMUv3) Virtual Command Queue (VCMDQ)
1241	*/
1242	enum iommu_hw_queue_type {
1243	IOMMU_HW_QUEUE_TYPE_DEFAULT = `0`,
1244	/*
1245	* TEGRA241_CMDQV requirements (otherwise, allocation will fail)
1246	* - alloc starts from the lowest @index=0 in ascending order
1247	* - destroy starts from the last allocated @index in descending order
1248	* - @base_addr must be aligned to @length in bytes and mapped in IOAS
1249	* - @length must be a power of 2, with a minimum 32 bytes and a maximum
1250	* 2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1]
1251	* from struct iommu_hw_info_arm_smmuv3)
1252	* - suggest to back the queue memory with contiguous physical pages or
1253	* a single huge page with alignment of the queue size, and limit the
1254	* emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes)
1255	*/
1256	IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = `1`,
1257	};
1258
1259	/**
1260	* struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC)
1261	* @size: sizeof(struct iommu_hw_queue_alloc)
1262	* @flags: Must be 0
1263	* @viommu_id: Virtual IOMMU ID to associate the HW queue with
1264	* @type: One of enum iommu_hw_queue_type
1265	* @index: The logical index to the HW queue per virtual IOMMU for a multi-queue
1266	* model
1267	* @out_hw_queue_id: The ID of the new HW queue
1268	* @nesting_parent_iova: Base address of the queue memory in the guest physical
1269	* address space
1270	* @length: Length of the queue memory
1271	*
1272	* Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which
1273	* allows HW to access a guest queue memory described using @nesting_parent_iova
1274	* and @length.
1275	*
1276	* A vIOMMU can allocate multiple queues, but it must use a different @index per
1277	* type to separate each allocation, e.g::
1278	*
1279	* Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ...
1280	*/
1281	struct iommu_hw_queue_alloc {
1282	__u32 size;
1283	__u32 flags;
1284	__u32 viommu_id;
1285	__u32 type;
1286	__u32 index;
1287	__u32 out_hw_queue_id;
1288	__aligned_u64 nesting_parent_iova;
1289	__aligned_u64 length;
1290	};
1291	#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC)
1292	#endif
1293

Browse the source code of Linux/include/uapi/linux/iommufd.h