1  /*
2   * SPDX-License-Identifier: MIT
3   *
4   * Copyright © 2016 Intel Corporation
5   */
6  
7  #ifndef __I915_GEM_OBJECT_TYPES_H__
8  #define __I915_GEM_OBJECT_TYPES_H__
9  
10  #include <linux/mmu_notifier.h>
11  
12  #include <drm/drm_gem.h>
13  #include <drm/ttm/ttm_bo.h>
14  #include <uapi/drm/i915_drm.h>
15  
16  #include "i915_active.h"
17  #include "i915_selftest.h"
18  #include "i915_vma_resource.h"
19  
20  #include "gt/intel_gt_defines.h"
21  
22  struct drm_i915_gem_object;
23  struct intel_fronbuffer;
24  struct intel_memory_region;
25  
26  /*
27   * struct i915_lut_handle tracks the fast lookups from handle to vma used
28   * for execbuf. Although we use a radixtree for that mapping, in order to
29   * remove them as the object or context is closed, we need a secondary list
30   * and a translation entry (i915_lut_handle).
31   */
32  struct i915_lut_handle {
33  	struct list_head obj_link;
34  	struct i915_gem_context *ctx;
35  	u32 handle;
36  };
37  
38  struct drm_i915_gem_object_ops {
39  	unsigned int flags;
40  #define I915_GEM_OBJECT_IS_SHRINKABLE			BIT(1)
41  /* Skip the shrinker management in set_pages/unset_pages */
42  #define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST	BIT(2)
43  #define I915_GEM_OBJECT_IS_PROXY			BIT(3)
44  #define I915_GEM_OBJECT_NO_MMAP				BIT(4)
45  
46  	/* Interface between the GEM object and its backing storage.
47  	 * get_pages() is called once prior to the use of the associated set
48  	 * of pages before to binding them into the GTT, and put_pages() is
49  	 * called after we no longer need them. As we expect there to be
50  	 * associated cost with migrating pages between the backing storage
51  	 * and making them available for the GPU (e.g. clflush), we may hold
52  	 * onto the pages after they are no longer referenced by the GPU
53  	 * in case they may be used again shortly (for example migrating the
54  	 * pages to a different memory domain within the GTT). put_pages()
55  	 * will therefore most likely be called when the object itself is
56  	 * being released or under memory pressure (where we attempt to
57  	 * reap pages for the shrinker).
58  	 */
59  	int (*get_pages)(struct drm_i915_gem_object *obj);
60  	void (*put_pages)(struct drm_i915_gem_object *obj,
61  			  struct sg_table *pages);
62  	int (*truncate)(struct drm_i915_gem_object *obj);
63  	/**
64  	 * shrink - Perform further backend specific actions to facilate
65  	 * shrinking.
66  	 * @obj: The gem object
67  	 * @flags: Extra flags to control shrinking behaviour in the backend
68  	 *
69  	 * Possible values for @flags:
70  	 *
71  	 * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the
72  	 * backing pages, if supported.
73  	 *
74  	 * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to
75  	 * idle.  Active objects can be considered later. The TTM backend for
76  	 * example might have aync migrations going on, which don't use any
77  	 * i915_vma to track the active GTT binding, and hence having an unbound
78  	 * object might not be enough.
79  	 */
80  #define I915_GEM_OBJECT_SHRINK_WRITEBACK   BIT(0)
81  #define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1)
82  	int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags);
83  
84  	int (*pread)(struct drm_i915_gem_object *obj,
85  		     const struct drm_i915_gem_pread *arg);
86  	int (*pwrite)(struct drm_i915_gem_object *obj,
87  		      const struct drm_i915_gem_pwrite *arg);
88  	u64 (*mmap_offset)(struct drm_i915_gem_object *obj);
89  	void (*unmap_virtual)(struct drm_i915_gem_object *obj);
90  
91  	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
92  
93  	/**
94  	 * adjust_lru - notify that the madvise value was updated
95  	 * @obj: The gem object
96  	 *
97  	 * The madvise value may have been updated, or object was recently
98  	 * referenced so act accordingly (Perhaps changing an LRU list etc).
99  	 */
100  	void (*adjust_lru)(struct drm_i915_gem_object *obj);
101  
102  	/**
103  	 * delayed_free - Override the default delayed free implementation
104  	 */
105  	void (*delayed_free)(struct drm_i915_gem_object *obj);
106  
107  	/**
108  	 * migrate - Migrate object to a different region either for
109  	 * pinning or for as long as the object lock is held.
110  	 */
111  	int (*migrate)(struct drm_i915_gem_object *obj,
112  		       struct intel_memory_region *mr,
113  		       unsigned int flags);
114  
115  	void (*release)(struct drm_i915_gem_object *obj);
116  
117  	const struct vm_operations_struct *mmap_ops;
118  	const char *name; /* friendly name for debug, e.g. lockdep classes */
119  };
120  
121  /**
122   * enum i915_cache_level - The supported GTT caching values for system memory
123   * pages.
124   *
125   * These translate to some special GTT PTE bits when binding pages into some
126   * address space. It also determines whether an object, or rather its pages are
127   * coherent with the GPU, when also reading or writing through the CPU cache
128   * with those pages.
129   *
130   * Userspace can also control this through struct drm_i915_gem_caching.
131   */
132  enum i915_cache_level {
133  	/**
134  	 * @I915_CACHE_NONE:
135  	 *
136  	 * GPU access is not coherent with the CPU cache. If the cache is dirty
137  	 * and we need the underlying pages to be coherent with some later GPU
138  	 * access then we need to manually flush the pages.
139  	 *
140  	 * On shared LLC platforms reads and writes through the CPU cache are
141  	 * still coherent even with this setting. See also
142  	 * &drm_i915_gem_object.cache_coherent for more details. Due to this we
143  	 * should only ever use uncached for scanout surfaces, otherwise we end
144  	 * up over-flushing in some places.
145  	 *
146  	 * This is the default on non-LLC platforms.
147  	 */
148  	I915_CACHE_NONE = 0,
149  	/**
150  	 * @I915_CACHE_LLC:
151  	 *
152  	 * GPU access is coherent with the CPU cache. If the cache is dirty,
153  	 * then the GPU will ensure that access remains coherent, when both
154  	 * reading and writing through the CPU cache. GPU writes can dirty the
155  	 * CPU cache.
156  	 *
157  	 * Not used for scanout surfaces.
158  	 *
159  	 * Applies to both platforms with shared LLC(HAS_LLC), and snooping
160  	 * based platforms(HAS_SNOOP).
161  	 *
162  	 * This is the default on shared LLC platforms.  The only exception is
163  	 * scanout objects, where the display engine is not coherent with the
164  	 * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is
165  	 * automatically applied by the kernel in pin_for_display, if userspace
166  	 * has not done so already.
167  	 */
168  	I915_CACHE_LLC,
169  	/**
170  	 * @I915_CACHE_L3_LLC:
171  	 *
172  	 * Explicitly enable the Gfx L3 cache, with coherent LLC.
173  	 *
174  	 * The Gfx L3 sits between the domain specific caches, e.g
175  	 * sampler/render caches, and the larger LLC. LLC is coherent with the
176  	 * GPU, but L3 is only visible to the GPU, so likely needs to be flushed
177  	 * when the workload completes.
178  	 *
179  	 * Not used for scanout surfaces.
180  	 *
181  	 * Only exposed on some gen7 + GGTT. More recent hardware has dropped
182  	 * this explicit setting, where it should now be enabled by default.
183  	 */
184  	I915_CACHE_L3_LLC,
185  	/**
186  	 * @I915_CACHE_WT:
187  	 *
188  	 * Write-through. Used for scanout surfaces.
189  	 *
190  	 * The GPU can utilise the caches, while still having the display engine
191  	 * be coherent with GPU writes, as a result we don't need to flush the
192  	 * CPU caches when moving out of the render domain. This is the default
193  	 * setting chosen by the kernel, if supported by the HW, otherwise we
194  	 * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU
195  	 * cache still need to be flushed, to remain coherent with the display
196  	 * engine.
197  	 */
198  	I915_CACHE_WT,
199  	/**
200  	 * @I915_MAX_CACHE_LEVEL:
201  	 *
202  	 * Mark the last entry in the enum. Used for defining cachelevel_to_pat
203  	 * array for cache_level to pat translation table.
204  	 */
205  	I915_MAX_CACHE_LEVEL,
206  };
207  
208  enum i915_map_type {
209  	I915_MAP_WB = 0,
210  	I915_MAP_WC,
211  #define I915_MAP_OVERRIDE BIT(31)
212  	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
213  	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
214  };
215  
216  enum i915_mmap_type {
217  	I915_MMAP_TYPE_GTT = 0,
218  	I915_MMAP_TYPE_WC,
219  	I915_MMAP_TYPE_WB,
220  	I915_MMAP_TYPE_UC,
221  	I915_MMAP_TYPE_FIXED,
222  };
223  
224  struct i915_mmap_offset {
225  	struct drm_vma_offset_node vma_node;
226  	struct drm_i915_gem_object *obj;
227  	enum i915_mmap_type mmap_type;
228  
229  	struct rb_node offset;
230  };
231  
232  struct i915_gem_object_page_iter {
233  	struct scatterlist *sg_pos;
234  	unsigned int sg_idx; /* in pages, but 32bit eek! */
235  
236  	struct radix_tree_root radix;
237  	struct mutex lock; /* protects this cache */
238  };
239  
240  struct drm_i915_gem_object {
241  	/*
242  	 * We might have reason to revisit the below since it wastes
243  	 * a lot of space for non-ttm gem objects.
244  	 * In any case, always use the accessors for the ttm_buffer_object
245  	 * when accessing it.
246  	 */
247  	union {
248  		struct drm_gem_object base;
249  		struct ttm_buffer_object __do_not_access;
250  	};
251  
252  	const struct drm_i915_gem_object_ops *ops;
253  
254  	struct {
255  		/**
256  		 * @vma.lock: protect the list/tree of vmas
257  		 */
258  		spinlock_t lock;
259  
260  		/**
261  		 * @vma.list: List of VMAs backed by this object
262  		 *
263  		 * The VMA on this list are ordered by type, all GGTT vma are
264  		 * placed at the head and all ppGTT vma are placed at the tail.
265  		 * The different types of GGTT vma are unordered between
266  		 * themselves, use the @vma.tree (which has a defined order
267  		 * between all VMA) to quickly find an exact match.
268  		 */
269  		struct list_head list;
270  
271  		/**
272  		 * @vma.tree: Ordered tree of VMAs backed by this object
273  		 *
274  		 * All VMA created for this object are placed in the @vma.tree
275  		 * for fast retrieval via a binary search in
276  		 * i915_vma_instance(). They are also added to @vma.list for
277  		 * easy iteration.
278  		 */
279  		struct rb_root tree;
280  	} vma;
281  
282  	/**
283  	 * @lut_list: List of vma lookup entries in use for this object.
284  	 *
285  	 * If this object is closed, we need to remove all of its VMA from
286  	 * the fast lookup index in associated contexts; @lut_list provides
287  	 * this translation from object to context->handles_vma.
288  	 */
289  	struct list_head lut_list;
290  	spinlock_t lut_lock; /* guards lut_list */
291  
292  	/**
293  	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
294  	 *
295  	 * When we lock this object through i915_gem_object_lock() with a
296  	 * context, we add it to the list to ensure we can unlock everything
297  	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
298  	 */
299  	struct list_head obj_link;
300  	/**
301  	 * @shared_resv_from: The object shares the resv from this vm.
302  	 */
303  	struct i915_address_space *shares_resv_from;
304  
305  #ifdef CONFIG_PROC_FS
306  	/**
307  	 * @client: @i915_drm_client which created the object
308  	 */
309  	struct i915_drm_client *client;
310  
311  	/**
312  	 * @client_link: Link into @i915_drm_client.objects_list
313  	 */
314  	struct list_head client_link;
315  #endif
316  
317  	union {
318  		struct rcu_head rcu;
319  		struct llist_node freed;
320  	};
321  
322  	/**
323  	 * Whether the object is currently in the GGTT or any other supported
324  	 * fake offset mmap backed by lmem.
325  	 */
326  	unsigned int userfault_count;
327  	struct list_head userfault_link;
328  
329  	struct {
330  		spinlock_t lock; /* Protects access to mmo offsets */
331  		struct rb_root offsets;
332  	} mmo;
333  
334  	I915_SELFTEST_DECLARE(struct list_head st_link);
335  
336  	unsigned long flags;
337  #define I915_BO_ALLOC_CONTIGUOUS  BIT(0)
338  #define I915_BO_ALLOC_VOLATILE    BIT(1)
339  #define I915_BO_ALLOC_CPU_CLEAR   BIT(2)
340  #define I915_BO_ALLOC_USER        BIT(3)
341  /* Object is allowed to lose its contents on suspend / resume, even if pinned */
342  #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
343  /* Object needs to be restored early using memcpy during resume */
344  #define I915_BO_ALLOC_PM_EARLY    BIT(5)
345  /*
346   * Object is likely never accessed by the CPU. This will prioritise the BO to be
347   * allocated in the non-mappable portion of lmem. This is merely a hint, and if
348   * dealing with userspace objects the CPU fault handler is free to ignore this.
349   */
350  #define I915_BO_ALLOC_GPU_ONLY	  BIT(6)
351  #define I915_BO_ALLOC_CCS_AUX	  BIT(7)
352  /*
353   * Object is allowed to retain its initial data and will not be cleared on first
354   * access if used along with I915_BO_ALLOC_USER. This is mainly to keep
355   * preallocated framebuffer data intact while transitioning it to i915drmfb.
356   */
357  #define I915_BO_PREALLOC	  BIT(8)
358  #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
359  			     I915_BO_ALLOC_VOLATILE | \
360  			     I915_BO_ALLOC_CPU_CLEAR | \
361  			     I915_BO_ALLOC_USER | \
362  			     I915_BO_ALLOC_PM_VOLATILE | \
363  			     I915_BO_ALLOC_PM_EARLY | \
364  			     I915_BO_ALLOC_GPU_ONLY | \
365  			     I915_BO_ALLOC_CCS_AUX | \
366  			     I915_BO_PREALLOC)
367  #define I915_BO_READONLY          BIT(9)
368  #define I915_TILING_QUIRK_BIT     10 /* unknown swizzling; do not release! */
369  #define I915_BO_PROTECTED         BIT(11)
370  	/**
371  	 * @mem_flags - Mutable placement-related flags
372  	 *
373  	 * These are flags that indicate specifics of the memory region
374  	 * the object is currently in. As such they are only stable
375  	 * either under the object lock or if the object is pinned.
376  	 */
377  	unsigned int mem_flags;
378  #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
379  #define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
380  	/**
381  	 * @pat_index: The desired PAT index.
382  	 *
383  	 * See hardware specification for valid PAT indices for each platform.
384  	 * This field replaces the @cache_level that contains a value of enum
385  	 * i915_cache_level since PAT indices are being used by both userspace
386  	 * and kernel mode driver for caching policy control after GEN12.
387  	 * In the meantime platform specific tables are created to translate
388  	 * i915_cache_level into pat index, for more details check the macros
389  	 * defined i915/i915_pci.c, e.g. TGL_CACHELEVEL.
390  	 * For backward compatibility, this field contains values exactly match
391  	 * the entries of enum i915_cache_level for pre-GEN12 platforms (See
392  	 * LEGACY_CACHELEVEL), so that the PTE encode functions for these
393  	 * legacy platforms can stay the same.
394  	 */
395  	unsigned int pat_index:6;
396  	/**
397  	 * @pat_set_by_user: Indicate whether pat_index is set by user space
398  	 *
399  	 * This field is set to false by default, only set to true if the
400  	 * pat_index is set by user space. By design, user space is capable of
401  	 * managing caching behavior by setting pat_index, in which case this
402  	 * kernel mode driver should never touch the pat_index.
403  	 */
404  	unsigned int pat_set_by_user:1;
405  	/**
406  	 * @cache_coherent:
407  	 *
408  	 * Note: with the change above which replaced @cache_level with pat_index,
409  	 * the use of @cache_coherent is limited to the objects created by kernel
410  	 * or by userspace without pat index specified.
411  	 * Check for @pat_set_by_user to find out if an object has pat index set
412  	 * by userspace. The ioctl's to change cache settings have also been
413  	 * disabled for the objects with pat index set by userspace. Please don't
414  	 * assume @cache_coherent having the flags set as describe here. A helper
415  	 * function i915_gem_object_has_cache_level() provides one way to bypass
416  	 * the use of this field.
417  	 *
418  	 * Track whether the pages are coherent with the GPU if reading or
419  	 * writing through the CPU caches. The largely depends on the
420  	 * @cache_level setting.
421  	 *
422  	 * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom
423  	 * platforms, coherency must be explicitly requested with some special
424  	 * GTT caching bits(see enum i915_cache_level). When enabling coherency
425  	 * it does come at a performance and power cost on such platforms. On
426  	 * the flip side the kernel does not need to manually flush any buffers
427  	 * which need to be coherent with the GPU, if the object is not coherent
428  	 * i.e @cache_coherent is zero.
429  	 *
430  	 * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory
431  	 * access will automatically snoop the CPU caches(even with CACHE_NONE).
432  	 * The one exception is when dealing with the display engine, like with
433  	 * scanout surfaces. To handle this the kernel will always flush the
434  	 * surface out of the CPU caches when preparing it for scanout.  Also
435  	 * note that since scanout surfaces are only ever read by the display
436  	 * engine we only need to care about flushing any writes through the CPU
437  	 * cache, reads on the other hand will always be coherent.
438  	 *
439  	 * Something strange here is why @cache_coherent is not a simple
440  	 * boolean, i.e coherent vs non-coherent. The reasoning for this is back
441  	 * to the display engine not being fully coherent. As a result scanout
442  	 * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT.
443  	 * In the case of seeing I915_CACHE_NONE the kernel makes the assumption
444  	 * that this is likely a scanout surface, and will set @cache_coherent
445  	 * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared
446  	 * LLC. The kernel uses this to always flush writes through the CPU
447  	 * cache as early as possible, where it can, in effect keeping
448  	 * @cache_dirty clean, so we can potentially avoid stalling when
449  	 * flushing the surface just before doing the scanout.  This does mean
450  	 * we might unnecessarily flush non-scanout objects in some places, but
451  	 * the default assumption is that all normal objects should be using
452  	 * I915_CACHE_LLC, at least on platforms with the shared LLC.
453  	 *
454  	 * Supported values:
455  	 *
456  	 * I915_BO_CACHE_COHERENT_FOR_READ:
457  	 *
458  	 * On shared LLC platforms, we use this for special scanout surfaces,
459  	 * where the display engine is not coherent with the CPU cache. As such
460  	 * we need to ensure we flush any writes before doing the scanout. As an
461  	 * optimisation we try to flush any writes as early as possible to avoid
462  	 * stalling later.
463  	 *
464  	 * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC
465  	 * platforms, we use:
466  	 *
467  	 *	cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ
468  	 *
469  	 * While for normal objects that are fully coherent, including special
470  	 * scanout surfaces marked as I915_CACHE_WT, we use:
471  	 *
472  	 *	cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ |
473  	 *			 I915_BO_CACHE_COHERENT_FOR_WRITE
474  	 *
475  	 * And then for objects that are not coherent at all we use:
476  	 *
477  	 *	cache_coherent = 0
478  	 *
479  	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
480  	 *
481  	 * When writing through the CPU cache, the GPU is still coherent. Note
482  	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
483  	 */
484  #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
485  #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
486  	unsigned int cache_coherent:2;
487  
488  	/**
489  	 * @cache_dirty:
490  	 *
491  	 * Note: with the change above which replaced cache_level with pat_index,
492  	 * the use of @cache_dirty is limited to the objects created by kernel
493  	 * or by userspace without pat index specified.
494  	 * Check for @pat_set_by_user to find out if an object has pat index set
495  	 * by userspace. The ioctl's to change cache settings have also been
496  	 * disabled for the objects with pat_index set by userspace. Please don't
497  	 * assume @cache_dirty is set as describe here. Also see helper function
498  	 * i915_gem_object_has_cache_level() for possible ways to bypass the use
499  	 * of this field.
500  	 *
501  	 * Track if we are we dirty with writes through the CPU cache for this
502  	 * object. As a result reading directly from main memory might yield
503  	 * stale data.
504  	 *
505  	 * This also ties into whether the kernel is tracking the object as
506  	 * coherent with the GPU, as per @cache_coherent, as it determines if
507  	 * flushing might be needed at various points.
508  	 *
509  	 * Another part of @cache_dirty is managing flushing when first
510  	 * acquiring the pages for system memory, at this point the pages are
511  	 * considered foreign, so the default assumption is that the cache is
512  	 * dirty, for example the page zeroing done by the kernel might leave
513  	 * writes though the CPU cache, or swapping-in, while the actual data in
514  	 * main memory is potentially stale.  Note that this is a potential
515  	 * security issue when dealing with userspace objects and zeroing. Now,
516  	 * whether we actually need apply the big sledgehammer of flushing all
517  	 * the pages on acquire depends on if @cache_coherent is marked as
518  	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
519  	 * for both reads and writes though the CPU cache.
520  	 *
521  	 * Note that on shared LLC platforms we still apply the heavy flush for
522  	 * I915_CACHE_NONE objects, under the assumption that this is going to
523  	 * be used for scanout.
524  	 *
525  	 * Update: On some hardware there is now also the 'Bypass LLC' MOCS
526  	 * entry, which defeats our @cache_coherent tracking, since userspace
527  	 * can freely bypass the CPU cache when touching the pages with the GPU,
528  	 * where the kernel is completely unaware. On such platform we need
529  	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
530  	 *
531  	 * Special care is taken on non-LLC platforms, to prevent potential
532  	 * information leak. The driver currently ensures:
533  	 *
534  	 *   1. All userspace objects, by default, have @cache_level set as
535  	 *   I915_CACHE_NONE. The only exception is userptr objects, where we
536  	 *   instead force I915_CACHE_LLC, but we also don't allow userspace to
537  	 *   ever change the @cache_level for such objects. Another special case
538  	 *   is dma-buf, which doesn't rely on @cache_dirty, but there we
539  	 *   always do a forced flush when acquiring the pages, if there is a
540  	 *   chance that the pages can be read directly from main memory with
541  	 *   the GPU.
542  	 *
543  	 *   2. All I915_CACHE_NONE objects have @cache_dirty initially true.
544  	 *
545  	 *   3. All swapped-out objects(i.e shmem) have @cache_dirty set to
546  	 *   true.
547  	 *
548  	 *   4. The @cache_dirty is never freely reset before the initial
549  	 *   flush, even if userspace adjusts the @cache_level through the
550  	 *   i915_gem_set_caching_ioctl.
551  	 *
552  	 *   5. All @cache_dirty objects(including swapped-in) are initially
553  	 *   flushed with a synchronous call to drm_clflush_sg in
554  	 *   __i915_gem_object_set_pages. The @cache_dirty can be freely reset
555  	 *   at this point. All further asynchronous clfushes are never security
556  	 *   critical, i.e userspace is free to race against itself.
557  	 */
558  	unsigned int cache_dirty:1;
559  
560  	/* @is_dpt: Object houses a display page table (DPT) */
561  	unsigned int is_dpt:1;
562  
563  	/**
564  	 * @read_domains: Read memory domains.
565  	 *
566  	 * These monitor which caches contain read/write data related to the
567  	 * object. When transitioning from one set of domains to another,
568  	 * the driver is called to ensure that caches are suitably flushed and
569  	 * invalidated.
570  	 */
571  	u16 read_domains;
572  
573  	/**
574  	 * @write_domain: Corresponding unique write memory domain.
575  	 */
576  	u16 write_domain;
577  
578  	struct intel_frontbuffer __rcu *frontbuffer;
579  
580  	/** Current tiling stride for the object, if it's tiled. */
581  	unsigned int tiling_and_stride;
582  #define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
583  #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
584  #define STRIDE_MASK (~TILING_MASK)
585  
586  	struct {
587  		/*
588  		 * Protects the pages and their use. Do not use directly, but
589  		 * instead go through the pin/unpin interfaces.
590  		 */
591  		atomic_t pages_pin_count;
592  
593  		/**
594  		 * @shrink_pin: Prevents the pages from being made visible to
595  		 * the shrinker, while the shrink_pin is non-zero. Most users
596  		 * should pretty much never have to care about this, outside of
597  		 * some special use cases.
598  		 *
599  		 * By default most objects will start out as visible to the
600  		 * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
601  		 * backing pages are attached to the object, like in
602  		 * __i915_gem_object_set_pages(). They will then be removed the
603  		 * shrinker list once the pages are released.
604  		 *
605  		 * The @shrink_pin is incremented by calling
606  		 * i915_gem_object_make_unshrinkable(), which will also remove
607  		 * the object from the shrinker list, if the pin count was zero.
608  		 *
609  		 * Callers will then typically call
610  		 * i915_gem_object_make_shrinkable() or
611  		 * i915_gem_object_make_purgeable() to decrement the pin count,
612  		 * and make the pages visible again.
613  		 */
614  		atomic_t shrink_pin;
615  
616  		/**
617  		 * @ttm_shrinkable: True when the object is using shmem pages
618  		 * underneath. Protected by the object lock.
619  		 */
620  		bool ttm_shrinkable;
621  
622  		/**
623  		 * @unknown_state: Indicate that the object is effectively
624  		 * borked. This is write-once and set if we somehow encounter a
625  		 * fatal error when moving/clearing the pages, and we are not
626  		 * able to fallback to memcpy/memset, like on small-BAR systems.
627  		 * The GPU should also be wedged (or in the process) at this
628  		 * point.
629  		 *
630  		 * Only valid to read this after acquiring the dma-resv lock and
631  		 * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled,
632  		 * or if we otherwise know that the moving fence has signalled,
633  		 * and we are certain the pages underneath are valid for
634  		 * immediate access (under normal operation), like just prior to
635  		 * binding the object or when setting up the CPU fault handler.
636  		 * See i915_gem_object_has_unknown_state();
637  		 */
638  		bool unknown_state;
639  
640  		/**
641  		 * Priority list of potential placements for this object.
642  		 */
643  		struct intel_memory_region **placements;
644  		int n_placements;
645  
646  		/**
647  		 * Memory region for this object.
648  		 */
649  		struct intel_memory_region *region;
650  
651  		/**
652  		 * Memory manager resource allocated for this object. Only
653  		 * needed for the mock region.
654  		 */
655  		struct ttm_resource *res;
656  
657  		/**
658  		 * Element within memory_region->objects or region->purgeable
659  		 * if the object is marked as DONTNEED. Access is protected by
660  		 * region->obj_lock.
661  		 */
662  		struct list_head region_link;
663  
664  		struct i915_refct_sgt *rsgt;
665  		struct sg_table *pages;
666  		void *mapping;
667  
668  		struct i915_page_sizes page_sizes;
669  
670  		I915_SELFTEST_DECLARE(unsigned int page_mask);
671  
672  		struct i915_gem_object_page_iter get_page;
673  		struct i915_gem_object_page_iter get_dma_page;
674  
675  		/**
676  		 * Element within i915->mm.shrink_list or i915->mm.purge_list,
677  		 * locked by i915->mm.obj_lock.
678  		 */
679  		struct list_head link;
680  
681  		/**
682  		 * Advice: are the backing pages purgeable?
683  		 */
684  		unsigned int madv:2;
685  
686  		/**
687  		 * This is set if the object has been written to since the
688  		 * pages were last acquired.
689  		 */
690  		bool dirty:1;
691  
692  		u32 tlb[I915_MAX_GT];
693  	} mm;
694  
695  	struct {
696  		struct i915_refct_sgt *cached_io_rsgt;
697  		struct i915_gem_object_page_iter get_io_page;
698  		struct drm_i915_gem_object *backup;
699  		bool created:1;
700  	} ttm;
701  
702  	/*
703  	 * Record which PXP key instance this object was created against (if
704  	 * any), so we can use it to determine if the encryption is valid by
705  	 * comparing against the current key instance.
706  	 */
707  	u32 pxp_key_instance;
708  
709  	/** Record of address bit 17 of each page at last unbind. */
710  	unsigned long *bit_17;
711  
712  	union {
713  #ifdef CONFIG_MMU_NOTIFIER
714  		struct i915_gem_userptr {
715  			uintptr_t ptr;
716  			unsigned long notifier_seq;
717  
718  			struct mmu_interval_notifier notifier;
719  			struct page **pvec;
720  			int page_ref;
721  		} userptr;
722  #endif
723  
724  		struct drm_mm_node *stolen;
725  
726  		resource_size_t bo_offset;
727  
728  		unsigned long scratch;
729  		u64 encode;
730  
731  		void *gvt_info;
732  	};
733  };
734  
735  #define intel_bo_to_drm_bo(bo) (&(bo)->base)
736  #define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
737  
738  static inline struct drm_i915_gem_object *
to_intel_bo(struct drm_gem_object * gem)739  to_intel_bo(struct drm_gem_object *gem)
740  {
741  	/* Assert that to_intel_bo(NULL) == NULL */
742  	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
743  
744  	return container_of(gem, struct drm_i915_gem_object, base);
745  }
746  
747  #endif
748