1  /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2  /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3   */
4  #ifndef _UAPI_IOMMUFD_H
5  #define _UAPI_IOMMUFD_H
6  
7  #include <linux/ioctl.h>
8  #include <linux/types.h>
9  
10  #define IOMMUFD_TYPE (';')
11  
12  /**
13   * DOC: General ioctl format
14   *
15   * The ioctl interface follows a general format to allow for extensibility. Each
16   * ioctl is passed in a structure pointer as the argument providing the size of
17   * the structure in the first u32. The kernel checks that any structure space
18   * beyond what it understands is 0. This allows userspace to use the backward
19   * compatible portion while consistently using the newer, larger, structures.
20   *
21   * ioctls use a standard meaning for common errnos:
22   *
23   *  - ENOTTY: The IOCTL number itself is not supported at all
24   *  - E2BIG: The IOCTL number is supported, but the provided structure has
25   *    non-zero in a part the kernel does not understand.
26   *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27   *    understood, however a known field has a value the kernel does not
28   *    understand or support.
29   *  - EINVAL: Everything about the IOCTL was understood, but a field is not
30   *    correct.
31   *  - ENOENT: An ID or IOVA provided does not exist.
32   *  - ENOMEM: Out of memory.
33   *  - EOVERFLOW: Mathematics overflowed.
34   *
35   * As well as additional errnos, within specific ioctls.
36   */
37  enum {
38  	IOMMUFD_CMD_BASE = 0x80,
39  	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40  	IOMMUFD_CMD_IOAS_ALLOC = 0x81,
41  	IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
42  	IOMMUFD_CMD_IOAS_COPY = 0x83,
43  	IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
44  	IOMMUFD_CMD_IOAS_MAP = 0x85,
45  	IOMMUFD_CMD_IOAS_UNMAP = 0x86,
46  	IOMMUFD_CMD_OPTION = 0x87,
47  	IOMMUFD_CMD_VFIO_IOAS = 0x88,
48  	IOMMUFD_CMD_HWPT_ALLOC = 0x89,
49  	IOMMUFD_CMD_GET_HW_INFO = 0x8a,
50  	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
51  	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
52  	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
53  	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
54  };
55  
56  /**
57   * struct iommu_destroy - ioctl(IOMMU_DESTROY)
58   * @size: sizeof(struct iommu_destroy)
59   * @id: iommufd object ID to destroy. Can be any destroyable object type.
60   *
61   * Destroy any object held within iommufd.
62   */
63  struct iommu_destroy {
64  	__u32 size;
65  	__u32 id;
66  };
67  #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
68  
69  /**
70   * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
71   * @size: sizeof(struct iommu_ioas_alloc)
72   * @flags: Must be 0
73   * @out_ioas_id: Output IOAS ID for the allocated object
74   *
75   * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
76   * to memory mapping.
77   */
78  struct iommu_ioas_alloc {
79  	__u32 size;
80  	__u32 flags;
81  	__u32 out_ioas_id;
82  };
83  #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
84  
85  /**
86   * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
87   * @start: First IOVA
88   * @last: Inclusive last IOVA
89   *
90   * An interval in IOVA space.
91   */
92  struct iommu_iova_range {
93  	__aligned_u64 start;
94  	__aligned_u64 last;
95  };
96  
97  /**
98   * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
99   * @size: sizeof(struct iommu_ioas_iova_ranges)
100   * @ioas_id: IOAS ID to read ranges from
101   * @num_iovas: Input/Output total number of ranges in the IOAS
102   * @__reserved: Must be 0
103   * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
104   * @out_iova_alignment: Minimum alignment required for mapping IOVA
105   *
106   * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
107   * is not allowed. num_iovas will be set to the total number of iovas and
108   * the allowed_iovas[] will be filled in as space permits.
109   *
110   * The allowed ranges are dependent on the HW path the DMA operation takes, and
111   * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
112   * full range, and each attached device will narrow the ranges based on that
113   * device's HW restrictions. Detaching a device can widen the ranges. Userspace
114   * should query ranges after every attach/detach to know what IOVAs are valid
115   * for mapping.
116   *
117   * On input num_iovas is the length of the allowed_iovas array. On output it is
118   * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
119   * num_iovas to the required value if num_iovas is too small. In this case the
120   * caller should allocate a larger output array and re-issue the ioctl.
121   *
122   * out_iova_alignment returns the minimum IOVA alignment that can be given
123   * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
124   *
125   *   starting_iova % out_iova_alignment == 0
126   *   (starting_iova + length) % out_iova_alignment == 0
127   *
128   * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
129   * be higher than the system PAGE_SIZE.
130   */
131  struct iommu_ioas_iova_ranges {
132  	__u32 size;
133  	__u32 ioas_id;
134  	__u32 num_iovas;
135  	__u32 __reserved;
136  	__aligned_u64 allowed_iovas;
137  	__aligned_u64 out_iova_alignment;
138  };
139  #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
140  
141  /**
142   * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
143   * @size: sizeof(struct iommu_ioas_allow_iovas)
144   * @ioas_id: IOAS ID to allow IOVAs from
145   * @num_iovas: Input/Output total number of ranges in the IOAS
146   * @__reserved: Must be 0
147   * @allowed_iovas: Pointer to array of struct iommu_iova_range
148   *
149   * Ensure a range of IOVAs are always available for allocation. If this call
150   * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
151   * that are narrower than the ranges provided here. This call will fail if
152   * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
153   *
154   * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
155   * devices are attached the IOVA will narrow based on the device restrictions.
156   * When an allowed range is specified any narrowing will be refused, ie device
157   * attachment can fail if the device requires limiting within the allowed range.
158   *
159   * Automatic IOVA allocation is also impacted by this call. MAP will only
160   * allocate within the allowed IOVAs if they are present.
161   *
162   * This call replaces the entire allowed list with the given list.
163   */
164  struct iommu_ioas_allow_iovas {
165  	__u32 size;
166  	__u32 ioas_id;
167  	__u32 num_iovas;
168  	__u32 __reserved;
169  	__aligned_u64 allowed_iovas;
170  };
171  #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
172  
173  /**
174   * enum iommufd_ioas_map_flags - Flags for map and copy
175   * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
176   *                             IOVA to place the mapping at
177   * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
178   * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
179   */
180  enum iommufd_ioas_map_flags {
181  	IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
182  	IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
183  	IOMMU_IOAS_MAP_READABLE = 1 << 2,
184  };
185  
186  /**
187   * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
188   * @size: sizeof(struct iommu_ioas_map)
189   * @flags: Combination of enum iommufd_ioas_map_flags
190   * @ioas_id: IOAS ID to change the mapping of
191   * @__reserved: Must be 0
192   * @user_va: Userspace pointer to start mapping from
193   * @length: Number of bytes to map
194   * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
195   *        then this must be provided as input.
196   *
197   * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
198   * mapping will be established at iova, otherwise a suitable location based on
199   * the reserved and allowed lists will be automatically selected and returned in
200   * iova.
201   *
202   * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
203   * be unused, existing IOVA cannot be replaced.
204   */
205  struct iommu_ioas_map {
206  	__u32 size;
207  	__u32 flags;
208  	__u32 ioas_id;
209  	__u32 __reserved;
210  	__aligned_u64 user_va;
211  	__aligned_u64 length;
212  	__aligned_u64 iova;
213  };
214  #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
215  
216  /**
217   * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
218   * @size: sizeof(struct iommu_ioas_copy)
219   * @flags: Combination of enum iommufd_ioas_map_flags
220   * @dst_ioas_id: IOAS ID to change the mapping of
221   * @src_ioas_id: IOAS ID to copy from
222   * @length: Number of bytes to copy and map
223   * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
224   *            set then this must be provided as input.
225   * @src_iova: IOVA to start the copy
226   *
227   * Copy an already existing mapping from src_ioas_id and establish it in
228   * dst_ioas_id. The src iova/length must exactly match a range used with
229   * IOMMU_IOAS_MAP.
230   *
231   * This may be used to efficiently clone a subset of an IOAS to another, or as a
232   * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
233   * establishing equivalent new mappings, as internal resources are shared, and
234   * the kernel will pin the user memory only once.
235   */
236  struct iommu_ioas_copy {
237  	__u32 size;
238  	__u32 flags;
239  	__u32 dst_ioas_id;
240  	__u32 src_ioas_id;
241  	__aligned_u64 length;
242  	__aligned_u64 dst_iova;
243  	__aligned_u64 src_iova;
244  };
245  #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
246  
247  /**
248   * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
249   * @size: sizeof(struct iommu_ioas_unmap)
250   * @ioas_id: IOAS ID to change the mapping of
251   * @iova: IOVA to start the unmapping at
252   * @length: Number of bytes to unmap, and return back the bytes unmapped
253   *
254   * Unmap an IOVA range. The iova/length must be a superset of a previously
255   * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
256   * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
257   * everything.
258   */
259  struct iommu_ioas_unmap {
260  	__u32 size;
261  	__u32 ioas_id;
262  	__aligned_u64 iova;
263  	__aligned_u64 length;
264  };
265  #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
266  
267  /**
268   * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
269   *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
270   * @IOMMU_OPTION_RLIMIT_MODE:
271   *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
272   *    to invoke this. Value 0 (default) is user based accouting, 1 uses process
273   *    based accounting. Global option, object_id must be 0
274   * @IOMMU_OPTION_HUGE_PAGES:
275   *    Value 1 (default) allows contiguous pages to be combined when generating
276   *    iommu mappings. Value 0 disables combining, everything is mapped to
277   *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
278   *    option, the object_id must be the IOAS ID.
279   */
280  enum iommufd_option {
281  	IOMMU_OPTION_RLIMIT_MODE = 0,
282  	IOMMU_OPTION_HUGE_PAGES = 1,
283  };
284  
285  /**
286   * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
287   *                           ioctl(IOMMU_OPTION_OP_GET)
288   * @IOMMU_OPTION_OP_SET: Set the option's value
289   * @IOMMU_OPTION_OP_GET: Get the option's value
290   */
291  enum iommufd_option_ops {
292  	IOMMU_OPTION_OP_SET = 0,
293  	IOMMU_OPTION_OP_GET = 1,
294  };
295  
296  /**
297   * struct iommu_option - iommu option multiplexer
298   * @size: sizeof(struct iommu_option)
299   * @option_id: One of enum iommufd_option
300   * @op: One of enum iommufd_option_ops
301   * @__reserved: Must be 0
302   * @object_id: ID of the object if required
303   * @val64: Option value to set or value returned on get
304   *
305   * Change a simple option value. This multiplexor allows controlling options
306   * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
307   * will return the current value.
308   */
309  struct iommu_option {
310  	__u32 size;
311  	__u32 option_id;
312  	__u16 op;
313  	__u16 __reserved;
314  	__u32 object_id;
315  	__aligned_u64 val64;
316  };
317  #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
318  
319  /**
320   * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
321   * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
322   * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
323   * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
324   */
325  enum iommufd_vfio_ioas_op {
326  	IOMMU_VFIO_IOAS_GET = 0,
327  	IOMMU_VFIO_IOAS_SET = 1,
328  	IOMMU_VFIO_IOAS_CLEAR = 2,
329  };
330  
331  /**
332   * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
333   * @size: sizeof(struct iommu_vfio_ioas)
334   * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
335   *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
336   * @op: One of enum iommufd_vfio_ioas_op
337   * @__reserved: Must be 0
338   *
339   * The VFIO compatibility support uses a single ioas because VFIO APIs do not
340   * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
341   * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
342   * compatibility ioas, either by taking what is already set, or auto creating
343   * one. From then on VFIO will continue to use that ioas and is not effected by
344   * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
345   */
346  struct iommu_vfio_ioas {
347  	__u32 size;
348  	__u32 ioas_id;
349  	__u16 op;
350  	__u16 __reserved;
351  };
352  #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
353  
354  /**
355   * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
356   * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
357   *                                the parent HWPT in a nesting configuration.
358   * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
359   *                                   enforced on device attachment
360   * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
361   *                             valid.
362   */
363  enum iommufd_hwpt_alloc_flags {
364  	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
365  	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
366  	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
367  };
368  
369  /**
370   * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
371   *                                entry attributes
372   * @IOMMU_VTD_S1_SRE: Supervisor request
373   * @IOMMU_VTD_S1_EAFE: Extended access enable
374   * @IOMMU_VTD_S1_WPE: Write protect enable
375   */
376  enum iommu_hwpt_vtd_s1_flags {
377  	IOMMU_VTD_S1_SRE = 1 << 0,
378  	IOMMU_VTD_S1_EAFE = 1 << 1,
379  	IOMMU_VTD_S1_WPE = 1 << 2,
380  };
381  
382  /**
383   * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
384   *                            info (IOMMU_HWPT_DATA_VTD_S1)
385   * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
386   * @pgtbl_addr: The base address of the stage-1 page table.
387   * @addr_width: The address width of the stage-1 page table
388   * @__reserved: Must be 0
389   */
390  struct iommu_hwpt_vtd_s1 {
391  	__aligned_u64 flags;
392  	__aligned_u64 pgtbl_addr;
393  	__u32 addr_width;
394  	__u32 __reserved;
395  };
396  
397  /**
398   * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
399   * @IOMMU_HWPT_DATA_NONE: no data
400   * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
401   */
402  enum iommu_hwpt_data_type {
403  	IOMMU_HWPT_DATA_NONE = 0,
404  	IOMMU_HWPT_DATA_VTD_S1 = 1,
405  };
406  
407  /**
408   * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
409   * @size: sizeof(struct iommu_hwpt_alloc)
410   * @flags: Combination of enum iommufd_hwpt_alloc_flags
411   * @dev_id: The device to allocate this HWPT for
412   * @pt_id: The IOAS or HWPT to connect this HWPT to
413   * @out_hwpt_id: The ID of the new HWPT
414   * @__reserved: Must be 0
415   * @data_type: One of enum iommu_hwpt_data_type
416   * @data_len: Length of the type specific data
417   * @data_uptr: User pointer to the type specific data
418   * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
419   *            IOMMU_HWPT_FAULT_ID_VALID is set.
420   * @__reserved2: Padding to 64-bit alignment. Must be 0.
421   *
422   * Explicitly allocate a hardware page table object. This is the same object
423   * type that is returned by iommufd_device_attach() and represents the
424   * underlying iommu driver's iommu_domain kernel object.
425   *
426   * A kernel-managed HWPT will be created with the mappings from the given
427   * IOAS via the @pt_id. The @data_type for this allocation must be set to
428   * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
429   * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
430   *
431   * A user-managed nested HWPT will be created from a given parent HWPT via
432   * @pt_id, in which the parent HWPT must be allocated previously via the
433   * same ioctl from a given IOAS (@pt_id). In this case, the @data_type
434   * must be set to a pre-defined type corresponding to an I/O page table
435   * type supported by the underlying IOMMU hardware.
436   *
437   * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
438   * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
439   * must be given.
440   */
441  struct iommu_hwpt_alloc {
442  	__u32 size;
443  	__u32 flags;
444  	__u32 dev_id;
445  	__u32 pt_id;
446  	__u32 out_hwpt_id;
447  	__u32 __reserved;
448  	__u32 data_type;
449  	__u32 data_len;
450  	__aligned_u64 data_uptr;
451  	__u32 fault_id;
452  	__u32 __reserved2;
453  };
454  #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
455  
456  /**
457   * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
458   * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
459   *                                         on a nested_parent domain.
460   *                                         https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
461   */
462  enum iommu_hw_info_vtd_flags {
463  	IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
464  };
465  
466  /**
467   * struct iommu_hw_info_vtd - Intel VT-d hardware information
468   *
469   * @flags: Combination of enum iommu_hw_info_vtd_flags
470   * @__reserved: Must be 0
471   *
472   * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
473   *           section 11.4.2 Capability Register.
474   * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
475   *            section 11.4.3 Extended Capability Register.
476   *
477   * User needs to understand the Intel VT-d specification to decode the
478   * register value.
479   */
480  struct iommu_hw_info_vtd {
481  	__u32 flags;
482  	__u32 __reserved;
483  	__aligned_u64 cap_reg;
484  	__aligned_u64 ecap_reg;
485  };
486  
487  /**
488   * enum iommu_hw_info_type - IOMMU Hardware Info Types
489   * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
490   *                           info
491   * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
492   */
493  enum iommu_hw_info_type {
494  	IOMMU_HW_INFO_TYPE_NONE = 0,
495  	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
496  };
497  
498  /**
499   * enum iommufd_hw_capabilities
500   * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
501   *                               If available, it means the following APIs
502   *                               are supported:
503   *
504   *                                   IOMMU_HWPT_GET_DIRTY_BITMAP
505   *                                   IOMMU_HWPT_SET_DIRTY_TRACKING
506   *
507   */
508  enum iommufd_hw_capabilities {
509  	IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
510  };
511  
512  /**
513   * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
514   * @size: sizeof(struct iommu_hw_info)
515   * @flags: Must be 0
516   * @dev_id: The device bound to the iommufd
517   * @data_len: Input the length of a user buffer in bytes. Output the length of
518   *            data that kernel supports
519   * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
520   *             the iommu type specific hardware information data
521   * @out_data_type: Output the iommu hardware info type as defined in the enum
522   *                 iommu_hw_info_type.
523   * @out_capabilities: Output the generic iommu capability info type as defined
524   *                    in the enum iommu_hw_capabilities.
525   * @__reserved: Must be 0
526   *
527   * Query an iommu type specific hardware information data from an iommu behind
528   * a given device that has been bound to iommufd. This hardware info data will
529   * be used to sync capabilities between the virtual iommu and the physical
530   * iommu, e.g. a nested translation setup needs to check the hardware info, so
531   * a guest stage-1 page table can be compatible with the physical iommu.
532   *
533   * To capture an iommu type specific hardware information data, @data_uptr and
534   * its length @data_len must be provided. Trailing bytes will be zeroed if the
535   * user buffer is larger than the data that kernel has. Otherwise, kernel only
536   * fills the buffer using the given length in @data_len. If the ioctl succeeds,
537   * @data_len will be updated to the length that kernel actually supports,
538   * @out_data_type will be filled to decode the data filled in the buffer
539   * pointed by @data_uptr. Input @data_len == zero is allowed.
540   */
541  struct iommu_hw_info {
542  	__u32 size;
543  	__u32 flags;
544  	__u32 dev_id;
545  	__u32 data_len;
546  	__aligned_u64 data_uptr;
547  	__u32 out_data_type;
548  	__u32 __reserved;
549  	__aligned_u64 out_capabilities;
550  };
551  #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
552  
553  /*
554   * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
555   *                                              tracking
556   * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
557   */
558  enum iommufd_hwpt_set_dirty_tracking_flags {
559  	IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
560  };
561  
562  /**
563   * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
564   * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
565   * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
566   * @hwpt_id: HW pagetable ID that represents the IOMMU domain
567   * @__reserved: Must be 0
568   *
569   * Toggle dirty tracking on an HW pagetable.
570   */
571  struct iommu_hwpt_set_dirty_tracking {
572  	__u32 size;
573  	__u32 flags;
574  	__u32 hwpt_id;
575  	__u32 __reserved;
576  };
577  #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
578  					  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
579  
580  /**
581   * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
582   * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
583   *                                        any dirty bits metadata. This flag
584   *                                        can be passed in the expectation
585   *                                        where the next operation is an unmap
586   *                                        of the same IOVA range.
587   *
588   */
589  enum iommufd_hwpt_get_dirty_bitmap_flags {
590  	IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
591  };
592  
593  /**
594   * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
595   * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
596   * @hwpt_id: HW pagetable ID that represents the IOMMU domain
597   * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
598   * @__reserved: Must be 0
599   * @iova: base IOVA of the bitmap first bit
600   * @length: IOVA range size
601   * @page_size: page size granularity of each bit in the bitmap
602   * @data: bitmap where to set the dirty bits. The bitmap bits each
603   *        represent a page_size which you deviate from an arbitrary iova.
604   *
605   * Checking a given IOVA is dirty:
606   *
607   *  data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
608   *
609   * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
610   * with the dirty IOVAs. In doing so it will also by default clear any
611   * dirty bit metadata set in the IOPTE.
612   */
613  struct iommu_hwpt_get_dirty_bitmap {
614  	__u32 size;
615  	__u32 hwpt_id;
616  	__u32 flags;
617  	__u32 __reserved;
618  	__aligned_u64 iova;
619  	__aligned_u64 length;
620  	__aligned_u64 page_size;
621  	__aligned_u64 data;
622  };
623  #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
624  					IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
625  
626  /**
627   * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
628   *                                        Data Type
629   * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
630   */
631  enum iommu_hwpt_invalidate_data_type {
632  	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
633  };
634  
635  /**
636   * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
637   *                                           stage-1 cache invalidation
638   * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
639   *                            to all-levels page structure cache or just
640   *                            the leaf PTE cache.
641   */
642  enum iommu_hwpt_vtd_s1_invalidate_flags {
643  	IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
644  };
645  
646  /**
647   * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
648   *                                       (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
649   * @addr: The start address of the range to be invalidated. It needs to
650   *        be 4KB aligned.
651   * @npages: Number of contiguous 4K pages to be invalidated.
652   * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
653   * @__reserved: Must be 0
654   *
655   * The Intel VT-d specific invalidation data for user-managed stage-1 cache
656   * invalidation in nested translation. Userspace uses this structure to
657   * tell the impacted cache scope after modifying the stage-1 page table.
658   *
659   * Invalidating all the caches related to the page table by setting @addr
660   * to be 0 and @npages to be U64_MAX.
661   *
662   * The device TLB will be invalidated automatically if ATS is enabled.
663   */
664  struct iommu_hwpt_vtd_s1_invalidate {
665  	__aligned_u64 addr;
666  	__aligned_u64 npages;
667  	__u32 flags;
668  	__u32 __reserved;
669  };
670  
671  /**
672   * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
673   * @size: sizeof(struct iommu_hwpt_invalidate)
674   * @hwpt_id: ID of a nested HWPT for cache invalidation
675   * @data_uptr: User pointer to an array of driver-specific cache invalidation
676   *             data.
677   * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
678   *             type of all the entries in the invalidation request array. It
679   *             should be a type supported by the hwpt pointed by @hwpt_id.
680   * @entry_len: Length (in bytes) of a request entry in the request array
681   * @entry_num: Input the number of cache invalidation requests in the array.
682   *             Output the number of requests successfully handled by kernel.
683   * @__reserved: Must be 0.
684   *
685   * Invalidate the iommu cache for user-managed page table. Modifications on a
686   * user-managed page table should be followed by this operation to sync cache.
687   * Each ioctl can support one or more cache invalidation requests in the array
688   * that has a total size of @entry_len * @entry_num.
689   *
690   * An empty invalidation request array by setting @entry_num==0 is allowed, and
691   * @entry_len and @data_uptr would be ignored in this case. This can be used to
692   * check if the given @data_type is supported or not by kernel.
693   */
694  struct iommu_hwpt_invalidate {
695  	__u32 size;
696  	__u32 hwpt_id;
697  	__aligned_u64 data_uptr;
698  	__u32 data_type;
699  	__u32 entry_len;
700  	__u32 entry_num;
701  	__u32 __reserved;
702  };
703  #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
704  
705  /**
706   * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
707   * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
708   *                                   valid.
709   * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
710   */
711  enum iommu_hwpt_pgfault_flags {
712  	IOMMU_PGFAULT_FLAGS_PASID_VALID		= (1 << 0),
713  	IOMMU_PGFAULT_FLAGS_LAST_PAGE		= (1 << 1),
714  };
715  
716  /**
717   * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
718   * @IOMMU_PGFAULT_PERM_READ: request for read permission
719   * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
720   * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
721   *                           Execute Requested bit set in PASID TLP Prefix.
722   * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
723   *                           Privileged Mode Requested bit set in PASID TLP
724   *                           Prefix.
725   */
726  enum iommu_hwpt_pgfault_perm {
727  	IOMMU_PGFAULT_PERM_READ			= (1 << 0),
728  	IOMMU_PGFAULT_PERM_WRITE		= (1 << 1),
729  	IOMMU_PGFAULT_PERM_EXEC			= (1 << 2),
730  	IOMMU_PGFAULT_PERM_PRIV			= (1 << 3),
731  };
732  
733  /**
734   * struct iommu_hwpt_pgfault - iommu page fault data
735   * @flags: Combination of enum iommu_hwpt_pgfault_flags
736   * @dev_id: id of the originated device
737   * @pasid: Process Address Space ID
738   * @grpid: Page Request Group Index
739   * @perm: Combination of enum iommu_hwpt_pgfault_perm
740   * @addr: Fault address
741   * @length: a hint of how much data the requestor is expecting to fetch. For
742   *          example, if the PRI initiator knows it is going to do a 10MB
743   *          transfer, it could fill in 10MB and the OS could pre-fault in
744   *          10MB of IOVA. It's default to 0 if there's no such hint.
745   * @cookie: kernel-managed cookie identifying a group of fault messages. The
746   *          cookie number encoded in the last page fault of the group should
747   *          be echoed back in the response message.
748   */
749  struct iommu_hwpt_pgfault {
750  	__u32 flags;
751  	__u32 dev_id;
752  	__u32 pasid;
753  	__u32 grpid;
754  	__u32 perm;
755  	__u64 addr;
756  	__u32 length;
757  	__u32 cookie;
758  };
759  
760  /**
761   * enum iommufd_page_response_code - Return status of fault handlers
762   * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
763   *                             populated, retry the access. This is the
764   *                             "Success" defined in PCI 10.4.2.1.
765   * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
766   *                             access. This is the "Invalid Request" in PCI
767   *                             10.4.2.1.
768   */
769  enum iommufd_page_response_code {
770  	IOMMUFD_PAGE_RESP_SUCCESS = 0,
771  	IOMMUFD_PAGE_RESP_INVALID = 1,
772  };
773  
774  /**
775   * struct iommu_hwpt_page_response - IOMMU page fault response
776   * @cookie: The kernel-managed cookie reported in the fault message.
777   * @code: One of response code in enum iommufd_page_response_code.
778   */
779  struct iommu_hwpt_page_response {
780  	__u32 cookie;
781  	__u32 code;
782  };
783  
784  /**
785   * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
786   * @size: sizeof(struct iommu_fault_alloc)
787   * @flags: Must be 0
788   * @out_fault_id: The ID of the new FAULT
789   * @out_fault_fd: The fd of the new FAULT
790   *
791   * Explicitly allocate a fault handling object.
792   */
793  struct iommu_fault_alloc {
794  	__u32 size;
795  	__u32 flags;
796  	__u32 out_fault_id;
797  	__u32 out_fault_fd;
798  };
799  #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
800  #endif
801