1  // SPDX-License-Identifier: MIT
2  /*
3   * Copyright © 2021 Intel Corporation
4   */
5  
6  #include "xe_vm.h"
7  
8  #include <linux/dma-fence-array.h>
9  #include <linux/nospec.h>
10  
11  #include <drm/drm_exec.h>
12  #include <drm/drm_print.h>
13  #include <drm/ttm/ttm_execbuf_util.h>
14  #include <drm/ttm/ttm_tt.h>
15  #include <uapi/drm/xe_drm.h>
16  #include <linux/ascii85.h>
17  #include <linux/delay.h>
18  #include <linux/kthread.h>
19  #include <linux/mm.h>
20  #include <linux/swap.h>
21  
22  #include <generated/xe_wa_oob.h>
23  
24  #include "regs/xe_gtt_defs.h"
25  #include "xe_assert.h"
26  #include "xe_bo.h"
27  #include "xe_device.h"
28  #include "xe_drm_client.h"
29  #include "xe_exec_queue.h"
30  #include "xe_gt_pagefault.h"
31  #include "xe_gt_tlb_invalidation.h"
32  #include "xe_migrate.h"
33  #include "xe_pat.h"
34  #include "xe_pm.h"
35  #include "xe_preempt_fence.h"
36  #include "xe_pt.h"
37  #include "xe_res_cursor.h"
38  #include "xe_sync.h"
39  #include "xe_trace_bo.h"
40  #include "xe_wa.h"
41  #include "xe_hmm.h"
42  
xe_vm_obj(struct xe_vm * vm)43  static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
44  {
45  	return vm->gpuvm.r_obj;
46  }
47  
48  /**
49   * xe_vma_userptr_check_repin() - Advisory check for repin needed
50   * @uvma: The userptr vma
51   *
52   * Check if the userptr vma has been invalidated since last successful
53   * repin. The check is advisory only and can the function can be called
54   * without the vm->userptr.notifier_lock held. There is no guarantee that the
55   * vma userptr will remain valid after a lockless check, so typically
56   * the call needs to be followed by a proper check under the notifier_lock.
57   *
58   * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
59   */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)60  int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
61  {
62  	return mmu_interval_check_retry(&uvma->userptr.notifier,
63  					uvma->userptr.notifier_seq) ?
64  		-EAGAIN : 0;
65  }
66  
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)67  int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
68  {
69  	struct xe_vma *vma = &uvma->vma;
70  	struct xe_vm *vm = xe_vma_vm(vma);
71  	struct xe_device *xe = vm->xe;
72  
73  	lockdep_assert_held(&vm->lock);
74  	xe_assert(xe, xe_vma_is_userptr(vma));
75  
76  	return xe_hmm_userptr_populate_range(uvma, false);
77  }
78  
preempt_fences_waiting(struct xe_vm * vm)79  static bool preempt_fences_waiting(struct xe_vm *vm)
80  {
81  	struct xe_exec_queue *q;
82  
83  	lockdep_assert_held(&vm->lock);
84  	xe_vm_assert_held(vm);
85  
86  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
87  		if (!q->lr.pfence ||
88  		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
89  			     &q->lr.pfence->flags)) {
90  			return true;
91  		}
92  	}
93  
94  	return false;
95  }
96  
free_preempt_fences(struct list_head * list)97  static void free_preempt_fences(struct list_head *list)
98  {
99  	struct list_head *link, *next;
100  
101  	list_for_each_safe(link, next, list)
102  		xe_preempt_fence_free(to_preempt_fence_from_link(link));
103  }
104  
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)105  static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
106  				unsigned int *count)
107  {
108  	lockdep_assert_held(&vm->lock);
109  	xe_vm_assert_held(vm);
110  
111  	if (*count >= vm->preempt.num_exec_queues)
112  		return 0;
113  
114  	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
115  		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
116  
117  		if (IS_ERR(pfence))
118  			return PTR_ERR(pfence);
119  
120  		list_move_tail(xe_preempt_fence_link(pfence), list);
121  	}
122  
123  	return 0;
124  }
125  
wait_for_existing_preempt_fences(struct xe_vm * vm)126  static int wait_for_existing_preempt_fences(struct xe_vm *vm)
127  {
128  	struct xe_exec_queue *q;
129  
130  	xe_vm_assert_held(vm);
131  
132  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
133  		if (q->lr.pfence) {
134  			long timeout = dma_fence_wait(q->lr.pfence, false);
135  
136  			/* Only -ETIME on fence indicates VM needs to be killed */
137  			if (timeout < 0 || q->lr.pfence->error == -ETIME)
138  				return -ETIME;
139  
140  			dma_fence_put(q->lr.pfence);
141  			q->lr.pfence = NULL;
142  		}
143  	}
144  
145  	return 0;
146  }
147  
xe_vm_is_idle(struct xe_vm * vm)148  static bool xe_vm_is_idle(struct xe_vm *vm)
149  {
150  	struct xe_exec_queue *q;
151  
152  	xe_vm_assert_held(vm);
153  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
154  		if (!xe_exec_queue_is_idle(q))
155  			return false;
156  	}
157  
158  	return true;
159  }
160  
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)161  static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
162  {
163  	struct list_head *link;
164  	struct xe_exec_queue *q;
165  
166  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
167  		struct dma_fence *fence;
168  
169  		link = list->next;
170  		xe_assert(vm->xe, link != list);
171  
172  		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
173  					     q, q->lr.context,
174  					     ++q->lr.seqno);
175  		dma_fence_put(q->lr.pfence);
176  		q->lr.pfence = fence;
177  	}
178  }
179  
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)180  static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
181  {
182  	struct xe_exec_queue *q;
183  	int err;
184  
185  	xe_bo_assert_held(bo);
186  
187  	if (!vm->preempt.num_exec_queues)
188  		return 0;
189  
190  	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
191  	if (err)
192  		return err;
193  
194  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
195  		if (q->lr.pfence) {
196  			dma_resv_add_fence(bo->ttm.base.resv,
197  					   q->lr.pfence,
198  					   DMA_RESV_USAGE_BOOKKEEP);
199  		}
200  
201  	return 0;
202  }
203  
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)204  static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
205  						struct drm_exec *exec)
206  {
207  	struct xe_exec_queue *q;
208  
209  	lockdep_assert_held(&vm->lock);
210  	xe_vm_assert_held(vm);
211  
212  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
213  		q->ops->resume(q);
214  
215  		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
216  					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
217  	}
218  }
219  
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)220  int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
221  {
222  	struct drm_gpuvm_exec vm_exec = {
223  		.vm = &vm->gpuvm,
224  		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
225  		.num_fences = 1,
226  	};
227  	struct drm_exec *exec = &vm_exec.exec;
228  	struct dma_fence *pfence;
229  	int err;
230  	bool wait;
231  
232  	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
233  
234  	down_write(&vm->lock);
235  	err = drm_gpuvm_exec_lock(&vm_exec);
236  	if (err)
237  		goto out_up_write;
238  
239  	pfence = xe_preempt_fence_create(q, q->lr.context,
240  					 ++q->lr.seqno);
241  	if (!pfence) {
242  		err = -ENOMEM;
243  		goto out_fini;
244  	}
245  
246  	list_add(&q->lr.link, &vm->preempt.exec_queues);
247  	++vm->preempt.num_exec_queues;
248  	q->lr.pfence = pfence;
249  
250  	down_read(&vm->userptr.notifier_lock);
251  
252  	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
253  				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
254  
255  	/*
256  	 * Check to see if a preemption on VM is in flight or userptr
257  	 * invalidation, if so trigger this preempt fence to sync state with
258  	 * other preempt fences on the VM.
259  	 */
260  	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
261  	if (wait)
262  		dma_fence_enable_sw_signaling(pfence);
263  
264  	up_read(&vm->userptr.notifier_lock);
265  
266  out_fini:
267  	drm_exec_fini(exec);
268  out_up_write:
269  	up_write(&vm->lock);
270  
271  	return err;
272  }
273  
274  /**
275   * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
276   * @vm: The VM.
277   * @q: The exec_queue
278   *
279   * Note that this function might be called multiple times on the same queue.
280   */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)281  void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
282  {
283  	if (!xe_vm_in_preempt_fence_mode(vm))
284  		return;
285  
286  	down_write(&vm->lock);
287  	if (!list_empty(&q->lr.link)) {
288  		list_del_init(&q->lr.link);
289  		--vm->preempt.num_exec_queues;
290  	}
291  	if (q->lr.pfence) {
292  		dma_fence_enable_sw_signaling(q->lr.pfence);
293  		dma_fence_put(q->lr.pfence);
294  		q->lr.pfence = NULL;
295  	}
296  	up_write(&vm->lock);
297  }
298  
299  /**
300   * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
301   * that need repinning.
302   * @vm: The VM.
303   *
304   * This function checks for whether the VM has userptrs that need repinning,
305   * and provides a release-type barrier on the userptr.notifier_lock after
306   * checking.
307   *
308   * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
309   */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)310  int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
311  {
312  	lockdep_assert_held_read(&vm->userptr.notifier_lock);
313  
314  	return (list_empty(&vm->userptr.repin_list) &&
315  		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
316  }
317  
318  #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
319  
320  /**
321   * xe_vm_kill() - VM Kill
322   * @vm: The VM.
323   * @unlocked: Flag indicates the VM's dma-resv is not held
324   *
325   * Kill the VM by setting banned flag indicated VM is no longer available for
326   * use. If in preempt fence mode, also kill all exec queue attached to the VM.
327   */
xe_vm_kill(struct xe_vm * vm,bool unlocked)328  void xe_vm_kill(struct xe_vm *vm, bool unlocked)
329  {
330  	struct xe_exec_queue *q;
331  
332  	lockdep_assert_held(&vm->lock);
333  
334  	if (unlocked)
335  		xe_vm_lock(vm, false);
336  
337  	vm->flags |= XE_VM_FLAG_BANNED;
338  	trace_xe_vm_kill(vm);
339  
340  	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
341  		q->ops->kill(q);
342  
343  	if (unlocked)
344  		xe_vm_unlock(vm);
345  
346  	/* TODO: Inform user the VM is banned */
347  }
348  
349  /**
350   * xe_vm_validate_should_retry() - Whether to retry after a validate error.
351   * @exec: The drm_exec object used for locking before validation.
352   * @err: The error returned from ttm_bo_validate().
353   * @end: A ktime_t cookie that should be set to 0 before first use and
354   * that should be reused on subsequent calls.
355   *
356   * With multiple active VMs, under memory pressure, it is possible that
357   * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
358   * Until ttm properly handles locking in such scenarios, best thing the
359   * driver can do is retry with a timeout. Check if that is necessary, and
360   * if so unlock the drm_exec's objects while keeping the ticket to prepare
361   * for a rerun.
362   *
363   * Return: true if a retry after drm_exec_init() is recommended;
364   * false otherwise.
365   */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)366  bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
367  {
368  	ktime_t cur;
369  
370  	if (err != -ENOMEM)
371  		return false;
372  
373  	cur = ktime_get();
374  	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
375  	if (!ktime_before(cur, *end))
376  		return false;
377  
378  	msleep(20);
379  	return true;
380  }
381  
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)382  static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
383  {
384  	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
385  	struct drm_gpuva *gpuva;
386  	int ret;
387  
388  	lockdep_assert_held(&vm->lock);
389  	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
390  		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
391  			       &vm->rebind_list);
392  
393  	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
394  	if (ret)
395  		return ret;
396  
397  	vm_bo->evicted = false;
398  	return 0;
399  }
400  
401  /**
402   * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
403   * @vm: The vm for which we are rebinding.
404   * @exec: The struct drm_exec with the locked GEM objects.
405   * @num_fences: The number of fences to reserve for the operation, not
406   * including rebinds and validations.
407   *
408   * Validates all evicted gem objects and rebinds their vmas. Note that
409   * rebindings may cause evictions and hence the validation-rebind
410   * sequence is rerun until there are no more objects to validate.
411   *
412   * Return: 0 on success, negative error code on error. In particular,
413   * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
414   * the drm_exec transaction needs to be restarted.
415   */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)416  int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
417  			  unsigned int num_fences)
418  {
419  	struct drm_gem_object *obj;
420  	unsigned long index;
421  	int ret;
422  
423  	do {
424  		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
425  		if (ret)
426  			return ret;
427  
428  		ret = xe_vm_rebind(vm, false);
429  		if (ret)
430  			return ret;
431  	} while (!list_empty(&vm->gpuvm.evict.list));
432  
433  	drm_exec_for_each_locked_object(exec, index, obj) {
434  		ret = dma_resv_reserve_fences(obj->resv, num_fences);
435  		if (ret)
436  			return ret;
437  	}
438  
439  	return 0;
440  }
441  
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)442  static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
443  				 bool *done)
444  {
445  	int err;
446  
447  	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
448  	if (err)
449  		return err;
450  
451  	if (xe_vm_is_idle(vm)) {
452  		vm->preempt.rebind_deactivated = true;
453  		*done = true;
454  		return 0;
455  	}
456  
457  	if (!preempt_fences_waiting(vm)) {
458  		*done = true;
459  		return 0;
460  	}
461  
462  	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
463  	if (err)
464  		return err;
465  
466  	err = wait_for_existing_preempt_fences(vm);
467  	if (err)
468  		return err;
469  
470  	/*
471  	 * Add validation and rebinding to the locking loop since both can
472  	 * cause evictions which may require blocing dma_resv locks.
473  	 * The fence reservation here is intended for the new preempt fences
474  	 * we attach at the end of the rebind work.
475  	 */
476  	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
477  }
478  
preempt_rebind_work_func(struct work_struct * w)479  static void preempt_rebind_work_func(struct work_struct *w)
480  {
481  	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
482  	struct drm_exec exec;
483  	unsigned int fence_count = 0;
484  	LIST_HEAD(preempt_fences);
485  	ktime_t end = 0;
486  	int err = 0;
487  	long wait;
488  	int __maybe_unused tries = 0;
489  
490  	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
491  	trace_xe_vm_rebind_worker_enter(vm);
492  
493  	down_write(&vm->lock);
494  
495  	if (xe_vm_is_closed_or_banned(vm)) {
496  		up_write(&vm->lock);
497  		trace_xe_vm_rebind_worker_exit(vm);
498  		return;
499  	}
500  
501  retry:
502  	if (xe_vm_userptr_check_repin(vm)) {
503  		err = xe_vm_userptr_pin(vm);
504  		if (err)
505  			goto out_unlock_outer;
506  	}
507  
508  	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
509  
510  	drm_exec_until_all_locked(&exec) {
511  		bool done = false;
512  
513  		err = xe_preempt_work_begin(&exec, vm, &done);
514  		drm_exec_retry_on_contention(&exec);
515  		if (err || done) {
516  			drm_exec_fini(&exec);
517  			if (err && xe_vm_validate_should_retry(&exec, err, &end))
518  				err = -EAGAIN;
519  
520  			goto out_unlock_outer;
521  		}
522  	}
523  
524  	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
525  	if (err)
526  		goto out_unlock;
527  
528  	err = xe_vm_rebind(vm, true);
529  	if (err)
530  		goto out_unlock;
531  
532  	/* Wait on rebinds and munmap style VM unbinds */
533  	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
534  				     DMA_RESV_USAGE_KERNEL,
535  				     false, MAX_SCHEDULE_TIMEOUT);
536  	if (wait <= 0) {
537  		err = -ETIME;
538  		goto out_unlock;
539  	}
540  
541  #define retry_required(__tries, __vm) \
542  	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
543  	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
544  	__xe_vm_userptr_needs_repin(__vm))
545  
546  	down_read(&vm->userptr.notifier_lock);
547  	if (retry_required(tries, vm)) {
548  		up_read(&vm->userptr.notifier_lock);
549  		err = -EAGAIN;
550  		goto out_unlock;
551  	}
552  
553  #undef retry_required
554  
555  	spin_lock(&vm->xe->ttm.lru_lock);
556  	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
557  	spin_unlock(&vm->xe->ttm.lru_lock);
558  
559  	/* Point of no return. */
560  	arm_preempt_fences(vm, &preempt_fences);
561  	resume_and_reinstall_preempt_fences(vm, &exec);
562  	up_read(&vm->userptr.notifier_lock);
563  
564  out_unlock:
565  	drm_exec_fini(&exec);
566  out_unlock_outer:
567  	if (err == -EAGAIN) {
568  		trace_xe_vm_rebind_worker_retry(vm);
569  		goto retry;
570  	}
571  
572  	if (err) {
573  		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
574  		xe_vm_kill(vm, true);
575  	}
576  	up_write(&vm->lock);
577  
578  	free_preempt_fences(&preempt_fences);
579  
580  	trace_xe_vm_rebind_worker_exit(vm);
581  }
582  
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)583  static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
584  				   const struct mmu_notifier_range *range,
585  				   unsigned long cur_seq)
586  {
587  	struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
588  	struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
589  	struct xe_vma *vma = &uvma->vma;
590  	struct xe_vm *vm = xe_vma_vm(vma);
591  	struct dma_resv_iter cursor;
592  	struct dma_fence *fence;
593  	long err;
594  
595  	xe_assert(vm->xe, xe_vma_is_userptr(vma));
596  	trace_xe_vma_userptr_invalidate(vma);
597  
598  	if (!mmu_notifier_range_blockable(range))
599  		return false;
600  
601  	vm_dbg(&xe_vma_vm(vma)->xe->drm,
602  	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
603  		xe_vma_start(vma), xe_vma_size(vma));
604  
605  	down_write(&vm->userptr.notifier_lock);
606  	mmu_interval_set_seq(mni, cur_seq);
607  
608  	/* No need to stop gpu access if the userptr is not yet bound. */
609  	if (!userptr->initial_bind) {
610  		up_write(&vm->userptr.notifier_lock);
611  		return true;
612  	}
613  
614  	/*
615  	 * Tell exec and rebind worker they need to repin and rebind this
616  	 * userptr.
617  	 */
618  	if (!xe_vm_in_fault_mode(vm) &&
619  	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
620  		spin_lock(&vm->userptr.invalidated_lock);
621  		list_move_tail(&userptr->invalidate_link,
622  			       &vm->userptr.invalidated);
623  		spin_unlock(&vm->userptr.invalidated_lock);
624  	}
625  
626  	up_write(&vm->userptr.notifier_lock);
627  
628  	/*
629  	 * Preempt fences turn into schedule disables, pipeline these.
630  	 * Note that even in fault mode, we need to wait for binds and
631  	 * unbinds to complete, and those are attached as BOOKMARK fences
632  	 * to the vm.
633  	 */
634  	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
635  			    DMA_RESV_USAGE_BOOKKEEP);
636  	dma_resv_for_each_fence_unlocked(&cursor, fence)
637  		dma_fence_enable_sw_signaling(fence);
638  	dma_resv_iter_end(&cursor);
639  
640  	err = dma_resv_wait_timeout(xe_vm_resv(vm),
641  				    DMA_RESV_USAGE_BOOKKEEP,
642  				    false, MAX_SCHEDULE_TIMEOUT);
643  	XE_WARN_ON(err <= 0);
644  
645  	if (xe_vm_in_fault_mode(vm)) {
646  		err = xe_vm_invalidate_vma(vma);
647  		XE_WARN_ON(err);
648  	}
649  
650  	trace_xe_vma_userptr_invalidate_complete(vma);
651  
652  	return true;
653  }
654  
655  static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
656  	.invalidate = vma_userptr_invalidate,
657  };
658  
xe_vm_userptr_pin(struct xe_vm * vm)659  int xe_vm_userptr_pin(struct xe_vm *vm)
660  {
661  	struct xe_userptr_vma *uvma, *next;
662  	int err = 0;
663  	LIST_HEAD(tmp_evict);
664  
665  	xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
666  	lockdep_assert_held_write(&vm->lock);
667  
668  	/* Collect invalidated userptrs */
669  	spin_lock(&vm->userptr.invalidated_lock);
670  	list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
671  				 userptr.invalidate_link) {
672  		list_del_init(&uvma->userptr.invalidate_link);
673  		list_move_tail(&uvma->userptr.repin_link,
674  			       &vm->userptr.repin_list);
675  	}
676  	spin_unlock(&vm->userptr.invalidated_lock);
677  
678  	/* Pin and move to temporary list */
679  	list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
680  				 userptr.repin_link) {
681  		err = xe_vma_userptr_pin_pages(uvma);
682  		if (err == -EFAULT) {
683  			list_del_init(&uvma->userptr.repin_link);
684  
685  			/* Wait for pending binds */
686  			xe_vm_lock(vm, false);
687  			dma_resv_wait_timeout(xe_vm_resv(vm),
688  					      DMA_RESV_USAGE_BOOKKEEP,
689  					      false, MAX_SCHEDULE_TIMEOUT);
690  
691  			err = xe_vm_invalidate_vma(&uvma->vma);
692  			xe_vm_unlock(vm);
693  			if (err)
694  				return err;
695  		} else {
696  			if (err < 0)
697  				return err;
698  
699  			list_del_init(&uvma->userptr.repin_link);
700  			list_move_tail(&uvma->vma.combined_links.rebind,
701  				       &vm->rebind_list);
702  		}
703  	}
704  
705  	return 0;
706  }
707  
708  /**
709   * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
710   * that need repinning.
711   * @vm: The VM.
712   *
713   * This function does an advisory check for whether the VM has userptrs that
714   * need repinning.
715   *
716   * Return: 0 if there are no indications of userptrs needing repinning,
717   * -EAGAIN if there are.
718   */
xe_vm_userptr_check_repin(struct xe_vm * vm)719  int xe_vm_userptr_check_repin(struct xe_vm *vm)
720  {
721  	return (list_empty_careful(&vm->userptr.repin_list) &&
722  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
723  }
724  
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)725  static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
726  {
727  	int i;
728  
729  	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
730  		if (!vops->pt_update_ops[i].num_ops)
731  			continue;
732  
733  		vops->pt_update_ops[i].ops =
734  			kmalloc_array(vops->pt_update_ops[i].num_ops,
735  				      sizeof(*vops->pt_update_ops[i].ops),
736  				      GFP_KERNEL);
737  		if (!vops->pt_update_ops[i].ops)
738  			return array_of_binds ? -ENOBUFS : -ENOMEM;
739  	}
740  
741  	return 0;
742  }
743  
xe_vma_ops_fini(struct xe_vma_ops * vops)744  static void xe_vma_ops_fini(struct xe_vma_ops *vops)
745  {
746  	int i;
747  
748  	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
749  		kfree(vops->pt_update_ops[i].ops);
750  }
751  
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask)752  static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
753  {
754  	int i;
755  
756  	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
757  		if (BIT(i) & tile_mask)
758  			++vops->pt_update_ops[i].num_ops;
759  }
760  
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)761  static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
762  				  u8 tile_mask)
763  {
764  	INIT_LIST_HEAD(&op->link);
765  	op->tile_mask = tile_mask;
766  	op->base.op = DRM_GPUVA_OP_MAP;
767  	op->base.map.va.addr = vma->gpuva.va.addr;
768  	op->base.map.va.range = vma->gpuva.va.range;
769  	op->base.map.gem.obj = vma->gpuva.gem.obj;
770  	op->base.map.gem.offset = vma->gpuva.gem.offset;
771  	op->map.vma = vma;
772  	op->map.immediate = true;
773  	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
774  	op->map.is_null = xe_vma_is_null(vma);
775  }
776  
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)777  static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
778  				u8 tile_mask)
779  {
780  	struct xe_vma_op *op;
781  
782  	op = kzalloc(sizeof(*op), GFP_KERNEL);
783  	if (!op)
784  		return -ENOMEM;
785  
786  	xe_vm_populate_rebind(op, vma, tile_mask);
787  	list_add_tail(&op->link, &vops->list);
788  	xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
789  
790  	return 0;
791  }
792  
793  static struct dma_fence *ops_execute(struct xe_vm *vm,
794  				     struct xe_vma_ops *vops);
795  static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
796  			    struct xe_exec_queue *q,
797  			    struct xe_sync_entry *syncs, u32 num_syncs);
798  
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)799  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
800  {
801  	struct dma_fence *fence;
802  	struct xe_vma *vma, *next;
803  	struct xe_vma_ops vops;
804  	struct xe_vma_op *op, *next_op;
805  	int err, i;
806  
807  	lockdep_assert_held(&vm->lock);
808  	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
809  	    list_empty(&vm->rebind_list))
810  		return 0;
811  
812  	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
813  	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
814  		vops.pt_update_ops[i].wait_vm_bookkeep = true;
815  
816  	xe_vm_assert_held(vm);
817  	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
818  		xe_assert(vm->xe, vma->tile_present);
819  
820  		if (rebind_worker)
821  			trace_xe_vma_rebind_worker(vma);
822  		else
823  			trace_xe_vma_rebind_exec(vma);
824  
825  		err = xe_vm_ops_add_rebind(&vops, vma,
826  					   vma->tile_present);
827  		if (err)
828  			goto free_ops;
829  	}
830  
831  	err = xe_vma_ops_alloc(&vops, false);
832  	if (err)
833  		goto free_ops;
834  
835  	fence = ops_execute(vm, &vops);
836  	if (IS_ERR(fence)) {
837  		err = PTR_ERR(fence);
838  	} else {
839  		dma_fence_put(fence);
840  		list_for_each_entry_safe(vma, next, &vm->rebind_list,
841  					 combined_links.rebind)
842  			list_del_init(&vma->combined_links.rebind);
843  	}
844  free_ops:
845  	list_for_each_entry_safe(op, next_op, &vops.list, link) {
846  		list_del(&op->link);
847  		kfree(op);
848  	}
849  	xe_vma_ops_fini(&vops);
850  
851  	return err;
852  }
853  
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)854  struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
855  {
856  	struct dma_fence *fence = NULL;
857  	struct xe_vma_ops vops;
858  	struct xe_vma_op *op, *next_op;
859  	struct xe_tile *tile;
860  	u8 id;
861  	int err;
862  
863  	lockdep_assert_held(&vm->lock);
864  	xe_vm_assert_held(vm);
865  	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
866  
867  	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
868  	for_each_tile(tile, vm->xe, id) {
869  		vops.pt_update_ops[id].wait_vm_bookkeep = true;
870  		vops.pt_update_ops[tile->id].q =
871  			xe_tile_migrate_exec_queue(tile);
872  	}
873  
874  	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
875  	if (err)
876  		return ERR_PTR(err);
877  
878  	err = xe_vma_ops_alloc(&vops, false);
879  	if (err) {
880  		fence = ERR_PTR(err);
881  		goto free_ops;
882  	}
883  
884  	fence = ops_execute(vm, &vops);
885  
886  free_ops:
887  	list_for_each_entry_safe(op, next_op, &vops.list, link) {
888  		list_del(&op->link);
889  		kfree(op);
890  	}
891  	xe_vma_ops_fini(&vops);
892  
893  	return fence;
894  }
895  
xe_vma_free(struct xe_vma * vma)896  static void xe_vma_free(struct xe_vma *vma)
897  {
898  	if (xe_vma_is_userptr(vma))
899  		kfree(to_userptr_vma(vma));
900  	else
901  		kfree(vma);
902  }
903  
904  #define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
905  #define VMA_CREATE_FLAG_IS_NULL		BIT(1)
906  #define VMA_CREATE_FLAG_DUMPABLE	BIT(2)
907  
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)908  static struct xe_vma *xe_vma_create(struct xe_vm *vm,
909  				    struct xe_bo *bo,
910  				    u64 bo_offset_or_userptr,
911  				    u64 start, u64 end,
912  				    u16 pat_index, unsigned int flags)
913  {
914  	struct xe_vma *vma;
915  	struct xe_tile *tile;
916  	u8 id;
917  	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
918  	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
919  	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
920  
921  	xe_assert(vm->xe, start < end);
922  	xe_assert(vm->xe, end < vm->size);
923  
924  	/*
925  	 * Allocate and ensure that the xe_vma_is_userptr() return
926  	 * matches what was allocated.
927  	 */
928  	if (!bo && !is_null) {
929  		struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
930  
931  		if (!uvma)
932  			return ERR_PTR(-ENOMEM);
933  
934  		vma = &uvma->vma;
935  	} else {
936  		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
937  		if (!vma)
938  			return ERR_PTR(-ENOMEM);
939  
940  		if (is_null)
941  			vma->gpuva.flags |= DRM_GPUVA_SPARSE;
942  		if (bo)
943  			vma->gpuva.gem.obj = &bo->ttm.base;
944  	}
945  
946  	INIT_LIST_HEAD(&vma->combined_links.rebind);
947  
948  	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
949  	vma->gpuva.vm = &vm->gpuvm;
950  	vma->gpuva.va.addr = start;
951  	vma->gpuva.va.range = end - start + 1;
952  	if (read_only)
953  		vma->gpuva.flags |= XE_VMA_READ_ONLY;
954  	if (dumpable)
955  		vma->gpuva.flags |= XE_VMA_DUMPABLE;
956  
957  	for_each_tile(tile, vm->xe, id)
958  		vma->tile_mask |= 0x1 << id;
959  
960  	if (vm->xe->info.has_atomic_enable_pte_bit)
961  		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
962  
963  	vma->pat_index = pat_index;
964  
965  	if (bo) {
966  		struct drm_gpuvm_bo *vm_bo;
967  
968  		xe_bo_assert_held(bo);
969  
970  		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
971  		if (IS_ERR(vm_bo)) {
972  			xe_vma_free(vma);
973  			return ERR_CAST(vm_bo);
974  		}
975  
976  		drm_gpuvm_bo_extobj_add(vm_bo);
977  		drm_gem_object_get(&bo->ttm.base);
978  		vma->gpuva.gem.offset = bo_offset_or_userptr;
979  		drm_gpuva_link(&vma->gpuva, vm_bo);
980  		drm_gpuvm_bo_put(vm_bo);
981  	} else /* userptr or null */ {
982  		if (!is_null) {
983  			struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
984  			u64 size = end - start + 1;
985  			int err;
986  
987  			INIT_LIST_HEAD(&userptr->invalidate_link);
988  			INIT_LIST_HEAD(&userptr->repin_link);
989  			vma->gpuva.gem.offset = bo_offset_or_userptr;
990  
991  			err = mmu_interval_notifier_insert(&userptr->notifier,
992  							   current->mm,
993  							   xe_vma_userptr(vma), size,
994  							   &vma_userptr_notifier_ops);
995  			if (err) {
996  				xe_vma_free(vma);
997  				return ERR_PTR(err);
998  			}
999  
1000  			userptr->notifier_seq = LONG_MAX;
1001  		}
1002  
1003  		xe_vm_get(vm);
1004  	}
1005  
1006  	return vma;
1007  }
1008  
xe_vma_destroy_late(struct xe_vma * vma)1009  static void xe_vma_destroy_late(struct xe_vma *vma)
1010  {
1011  	struct xe_vm *vm = xe_vma_vm(vma);
1012  
1013  	if (vma->ufence) {
1014  		xe_sync_ufence_put(vma->ufence);
1015  		vma->ufence = NULL;
1016  	}
1017  
1018  	if (xe_vma_is_userptr(vma)) {
1019  		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1020  		struct xe_userptr *userptr = &uvma->userptr;
1021  
1022  		if (userptr->sg)
1023  			xe_hmm_userptr_free_sg(uvma);
1024  
1025  		/*
1026  		 * Since userptr pages are not pinned, we can't remove
1027  		 * the notifer until we're sure the GPU is not accessing
1028  		 * them anymore
1029  		 */
1030  		mmu_interval_notifier_remove(&userptr->notifier);
1031  		xe_vm_put(vm);
1032  	} else if (xe_vma_is_null(vma)) {
1033  		xe_vm_put(vm);
1034  	} else {
1035  		xe_bo_put(xe_vma_bo(vma));
1036  	}
1037  
1038  	xe_vma_free(vma);
1039  }
1040  
vma_destroy_work_func(struct work_struct * w)1041  static void vma_destroy_work_func(struct work_struct *w)
1042  {
1043  	struct xe_vma *vma =
1044  		container_of(w, struct xe_vma, destroy_work);
1045  
1046  	xe_vma_destroy_late(vma);
1047  }
1048  
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1049  static void vma_destroy_cb(struct dma_fence *fence,
1050  			   struct dma_fence_cb *cb)
1051  {
1052  	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1053  
1054  	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1055  	queue_work(system_unbound_wq, &vma->destroy_work);
1056  }
1057  
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1058  static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1059  {
1060  	struct xe_vm *vm = xe_vma_vm(vma);
1061  
1062  	lockdep_assert_held_write(&vm->lock);
1063  	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1064  
1065  	if (xe_vma_is_userptr(vma)) {
1066  		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1067  
1068  		spin_lock(&vm->userptr.invalidated_lock);
1069  		list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1070  		spin_unlock(&vm->userptr.invalidated_lock);
1071  	} else if (!xe_vma_is_null(vma)) {
1072  		xe_bo_assert_held(xe_vma_bo(vma));
1073  
1074  		drm_gpuva_unlink(&vma->gpuva);
1075  	}
1076  
1077  	xe_vm_assert_held(vm);
1078  	if (fence) {
1079  		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1080  						 vma_destroy_cb);
1081  
1082  		if (ret) {
1083  			XE_WARN_ON(ret != -ENOENT);
1084  			xe_vma_destroy_late(vma);
1085  		}
1086  	} else {
1087  		xe_vma_destroy_late(vma);
1088  	}
1089  }
1090  
1091  /**
1092   * xe_vm_lock_vma() - drm_exec utility to lock a vma
1093   * @exec: The drm_exec object we're currently locking for.
1094   * @vma: The vma for witch we want to lock the vm resv and any attached
1095   * object's resv.
1096   *
1097   * Return: 0 on success, negative error code on error. In particular
1098   * may return -EDEADLK on WW transaction contention and -EINTR if
1099   * an interruptible wait is terminated by a signal.
1100   */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1101  int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1102  {
1103  	struct xe_vm *vm = xe_vma_vm(vma);
1104  	struct xe_bo *bo = xe_vma_bo(vma);
1105  	int err;
1106  
1107  	XE_WARN_ON(!vm);
1108  
1109  	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1110  	if (!err && bo && !bo->vm)
1111  		err = drm_exec_lock_obj(exec, &bo->ttm.base);
1112  
1113  	return err;
1114  }
1115  
xe_vma_destroy_unlocked(struct xe_vma * vma)1116  static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1117  {
1118  	struct drm_exec exec;
1119  	int err;
1120  
1121  	drm_exec_init(&exec, 0, 0);
1122  	drm_exec_until_all_locked(&exec) {
1123  		err = xe_vm_lock_vma(&exec, vma);
1124  		drm_exec_retry_on_contention(&exec);
1125  		if (XE_WARN_ON(err))
1126  			break;
1127  	}
1128  
1129  	xe_vma_destroy(vma, NULL);
1130  
1131  	drm_exec_fini(&exec);
1132  }
1133  
1134  struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1135  xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1136  {
1137  	struct drm_gpuva *gpuva;
1138  
1139  	lockdep_assert_held(&vm->lock);
1140  
1141  	if (xe_vm_is_closed_or_banned(vm))
1142  		return NULL;
1143  
1144  	xe_assert(vm->xe, start + range <= vm->size);
1145  
1146  	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1147  
1148  	return gpuva ? gpuva_to_vma(gpuva) : NULL;
1149  }
1150  
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1151  static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1152  {
1153  	int err;
1154  
1155  	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1156  	lockdep_assert_held(&vm->lock);
1157  
1158  	mutex_lock(&vm->snap_mutex);
1159  	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1160  	mutex_unlock(&vm->snap_mutex);
1161  	XE_WARN_ON(err);	/* Shouldn't be possible */
1162  
1163  	return err;
1164  }
1165  
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1166  static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1167  {
1168  	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1169  	lockdep_assert_held(&vm->lock);
1170  
1171  	mutex_lock(&vm->snap_mutex);
1172  	drm_gpuva_remove(&vma->gpuva);
1173  	mutex_unlock(&vm->snap_mutex);
1174  	if (vm->usm.last_fault_vma == vma)
1175  		vm->usm.last_fault_vma = NULL;
1176  }
1177  
xe_vm_op_alloc(void)1178  static struct drm_gpuva_op *xe_vm_op_alloc(void)
1179  {
1180  	struct xe_vma_op *op;
1181  
1182  	op = kzalloc(sizeof(*op), GFP_KERNEL);
1183  
1184  	if (unlikely(!op))
1185  		return NULL;
1186  
1187  	return &op->base;
1188  }
1189  
1190  static void xe_vm_free(struct drm_gpuvm *gpuvm);
1191  
1192  static const struct drm_gpuvm_ops gpuvm_ops = {
1193  	.op_alloc = xe_vm_op_alloc,
1194  	.vm_bo_validate = xe_gpuvm_validate,
1195  	.vm_free = xe_vm_free,
1196  };
1197  
pde_encode_pat_index(u16 pat_index)1198  static u64 pde_encode_pat_index(u16 pat_index)
1199  {
1200  	u64 pte = 0;
1201  
1202  	if (pat_index & BIT(0))
1203  		pte |= XE_PPGTT_PTE_PAT0;
1204  
1205  	if (pat_index & BIT(1))
1206  		pte |= XE_PPGTT_PTE_PAT1;
1207  
1208  	return pte;
1209  }
1210  
pte_encode_pat_index(u16 pat_index,u32 pt_level)1211  static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1212  {
1213  	u64 pte = 0;
1214  
1215  	if (pat_index & BIT(0))
1216  		pte |= XE_PPGTT_PTE_PAT0;
1217  
1218  	if (pat_index & BIT(1))
1219  		pte |= XE_PPGTT_PTE_PAT1;
1220  
1221  	if (pat_index & BIT(2)) {
1222  		if (pt_level)
1223  			pte |= XE_PPGTT_PDE_PDPE_PAT2;
1224  		else
1225  			pte |= XE_PPGTT_PTE_PAT2;
1226  	}
1227  
1228  	if (pat_index & BIT(3))
1229  		pte |= XELPG_PPGTT_PTE_PAT3;
1230  
1231  	if (pat_index & (BIT(4)))
1232  		pte |= XE2_PPGTT_PTE_PAT4;
1233  
1234  	return pte;
1235  }
1236  
pte_encode_ps(u32 pt_level)1237  static u64 pte_encode_ps(u32 pt_level)
1238  {
1239  	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1240  
1241  	if (pt_level == 1)
1242  		return XE_PDE_PS_2M;
1243  	else if (pt_level == 2)
1244  		return XE_PDPE_PS_1G;
1245  
1246  	return 0;
1247  }
1248  
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1249  static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1250  			      const u16 pat_index)
1251  {
1252  	u64 pde;
1253  
1254  	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1255  	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1256  	pde |= pde_encode_pat_index(pat_index);
1257  
1258  	return pde;
1259  }
1260  
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1261  static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1262  			      u16 pat_index, u32 pt_level)
1263  {
1264  	u64 pte;
1265  
1266  	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1267  	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1268  	pte |= pte_encode_pat_index(pat_index, pt_level);
1269  	pte |= pte_encode_ps(pt_level);
1270  
1271  	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1272  		pte |= XE_PPGTT_PTE_DM;
1273  
1274  	return pte;
1275  }
1276  
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1277  static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1278  			       u16 pat_index, u32 pt_level)
1279  {
1280  	pte |= XE_PAGE_PRESENT;
1281  
1282  	if (likely(!xe_vma_read_only(vma)))
1283  		pte |= XE_PAGE_RW;
1284  
1285  	pte |= pte_encode_pat_index(pat_index, pt_level);
1286  	pte |= pte_encode_ps(pt_level);
1287  
1288  	if (unlikely(xe_vma_is_null(vma)))
1289  		pte |= XE_PTE_NULL;
1290  
1291  	return pte;
1292  }
1293  
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1294  static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1295  				u16 pat_index,
1296  				u32 pt_level, bool devmem, u64 flags)
1297  {
1298  	u64 pte;
1299  
1300  	/* Avoid passing random bits directly as flags */
1301  	xe_assert(xe, !(flags & ~XE_PTE_PS64));
1302  
1303  	pte = addr;
1304  	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1305  	pte |= pte_encode_pat_index(pat_index, pt_level);
1306  	pte |= pte_encode_ps(pt_level);
1307  
1308  	if (devmem)
1309  		pte |= XE_PPGTT_PTE_DM;
1310  
1311  	pte |= flags;
1312  
1313  	return pte;
1314  }
1315  
1316  static const struct xe_pt_ops xelp_pt_ops = {
1317  	.pte_encode_bo = xelp_pte_encode_bo,
1318  	.pte_encode_vma = xelp_pte_encode_vma,
1319  	.pte_encode_addr = xelp_pte_encode_addr,
1320  	.pde_encode_bo = xelp_pde_encode_bo,
1321  };
1322  
1323  static void vm_destroy_work_func(struct work_struct *w);
1324  
1325  /**
1326   * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1327   * given tile and vm.
1328   * @xe: xe device.
1329   * @tile: tile to set up for.
1330   * @vm: vm to set up for.
1331   *
1332   * Sets up a pagetable tree with one page-table per level and a single
1333   * leaf PTE. All pagetable entries point to the single page-table or,
1334   * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1335   * writes become NOPs.
1336   *
1337   * Return: 0 on success, negative error code on error.
1338   */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1339  static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1340  				struct xe_vm *vm)
1341  {
1342  	u8 id = tile->id;
1343  	int i;
1344  
1345  	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1346  		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1347  		if (IS_ERR(vm->scratch_pt[id][i]))
1348  			return PTR_ERR(vm->scratch_pt[id][i]);
1349  
1350  		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1351  	}
1352  
1353  	return 0;
1354  }
1355  
xe_vm_free_scratch(struct xe_vm * vm)1356  static void xe_vm_free_scratch(struct xe_vm *vm)
1357  {
1358  	struct xe_tile *tile;
1359  	u8 id;
1360  
1361  	if (!xe_vm_has_scratch(vm))
1362  		return;
1363  
1364  	for_each_tile(tile, vm->xe, id) {
1365  		u32 i;
1366  
1367  		if (!vm->pt_root[id])
1368  			continue;
1369  
1370  		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1371  			if (vm->scratch_pt[id][i])
1372  				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1373  	}
1374  }
1375  
xe_vm_create(struct xe_device * xe,u32 flags)1376  struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1377  {
1378  	struct drm_gem_object *vm_resv_obj;
1379  	struct xe_vm *vm;
1380  	int err, number_tiles = 0;
1381  	struct xe_tile *tile;
1382  	u8 id;
1383  
1384  	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1385  	if (!vm)
1386  		return ERR_PTR(-ENOMEM);
1387  
1388  	vm->xe = xe;
1389  
1390  	vm->size = 1ull << xe->info.va_bits;
1391  
1392  	vm->flags = flags;
1393  
1394  	init_rwsem(&vm->lock);
1395  	mutex_init(&vm->snap_mutex);
1396  
1397  	INIT_LIST_HEAD(&vm->rebind_list);
1398  
1399  	INIT_LIST_HEAD(&vm->userptr.repin_list);
1400  	INIT_LIST_HEAD(&vm->userptr.invalidated);
1401  	init_rwsem(&vm->userptr.notifier_lock);
1402  	spin_lock_init(&vm->userptr.invalidated_lock);
1403  
1404  	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1405  
1406  	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1407  
1408  	INIT_LIST_HEAD(&vm->preempt.exec_queues);
1409  	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
1410  
1411  	for_each_tile(tile, xe, id)
1412  		xe_range_fence_tree_init(&vm->rftree[id]);
1413  
1414  	vm->pt_ops = &xelp_pt_ops;
1415  
1416  	/*
1417  	 * Long-running workloads are not protected by the scheduler references.
1418  	 * By design, run_job for long-running workloads returns NULL and the
1419  	 * scheduler drops all the references of it, hence protecting the VM
1420  	 * for this case is necessary.
1421  	 */
1422  	if (flags & XE_VM_FLAG_LR_MODE)
1423  		xe_pm_runtime_get_noresume(xe);
1424  
1425  	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1426  	if (!vm_resv_obj) {
1427  		err = -ENOMEM;
1428  		goto err_no_resv;
1429  	}
1430  
1431  	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1432  		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1433  
1434  	drm_gem_object_put(vm_resv_obj);
1435  
1436  	err = xe_vm_lock(vm, true);
1437  	if (err)
1438  		goto err_close;
1439  
1440  	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1441  		vm->flags |= XE_VM_FLAG_64K;
1442  
1443  	for_each_tile(tile, xe, id) {
1444  		if (flags & XE_VM_FLAG_MIGRATION &&
1445  		    tile->id != XE_VM_FLAG_TILE_ID(flags))
1446  			continue;
1447  
1448  		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1449  		if (IS_ERR(vm->pt_root[id])) {
1450  			err = PTR_ERR(vm->pt_root[id]);
1451  			vm->pt_root[id] = NULL;
1452  			goto err_unlock_close;
1453  		}
1454  	}
1455  
1456  	if (xe_vm_has_scratch(vm)) {
1457  		for_each_tile(tile, xe, id) {
1458  			if (!vm->pt_root[id])
1459  				continue;
1460  
1461  			err = xe_vm_create_scratch(xe, tile, vm);
1462  			if (err)
1463  				goto err_unlock_close;
1464  		}
1465  		vm->batch_invalidate_tlb = true;
1466  	}
1467  
1468  	if (vm->flags & XE_VM_FLAG_LR_MODE) {
1469  		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1470  		vm->batch_invalidate_tlb = false;
1471  	}
1472  
1473  	/* Fill pt_root after allocating scratch tables */
1474  	for_each_tile(tile, xe, id) {
1475  		if (!vm->pt_root[id])
1476  			continue;
1477  
1478  		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1479  	}
1480  	xe_vm_unlock(vm);
1481  
1482  	/* Kernel migration VM shouldn't have a circular loop.. */
1483  	if (!(flags & XE_VM_FLAG_MIGRATION)) {
1484  		for_each_tile(tile, xe, id) {
1485  			struct xe_exec_queue *q;
1486  			u32 create_flags = EXEC_QUEUE_FLAG_VM;
1487  
1488  			if (!vm->pt_root[id])
1489  				continue;
1490  
1491  			q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1492  			if (IS_ERR(q)) {
1493  				err = PTR_ERR(q);
1494  				goto err_close;
1495  			}
1496  			vm->q[id] = q;
1497  			number_tiles++;
1498  		}
1499  	}
1500  
1501  	if (number_tiles > 1)
1502  		vm->composite_fence_ctx = dma_fence_context_alloc(1);
1503  
1504  	trace_xe_vm_create(vm);
1505  
1506  	return vm;
1507  
1508  err_unlock_close:
1509  	xe_vm_unlock(vm);
1510  err_close:
1511  	xe_vm_close_and_put(vm);
1512  	return ERR_PTR(err);
1513  
1514  err_no_resv:
1515  	mutex_destroy(&vm->snap_mutex);
1516  	for_each_tile(tile, xe, id)
1517  		xe_range_fence_tree_fini(&vm->rftree[id]);
1518  	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1519  	kfree(vm);
1520  	if (flags & XE_VM_FLAG_LR_MODE)
1521  		xe_pm_runtime_put(xe);
1522  	return ERR_PTR(err);
1523  }
1524  
xe_vm_close(struct xe_vm * vm)1525  static void xe_vm_close(struct xe_vm *vm)
1526  {
1527  	down_write(&vm->lock);
1528  	vm->size = 0;
1529  	up_write(&vm->lock);
1530  }
1531  
xe_vm_close_and_put(struct xe_vm * vm)1532  void xe_vm_close_and_put(struct xe_vm *vm)
1533  {
1534  	LIST_HEAD(contested);
1535  	struct xe_device *xe = vm->xe;
1536  	struct xe_tile *tile;
1537  	struct xe_vma *vma, *next_vma;
1538  	struct drm_gpuva *gpuva, *next;
1539  	u8 id;
1540  
1541  	xe_assert(xe, !vm->preempt.num_exec_queues);
1542  
1543  	xe_vm_close(vm);
1544  	if (xe_vm_in_preempt_fence_mode(vm))
1545  		flush_work(&vm->preempt.rebind_work);
1546  
1547  	down_write(&vm->lock);
1548  	for_each_tile(tile, xe, id) {
1549  		if (vm->q[id])
1550  			xe_exec_queue_last_fence_put(vm->q[id], vm);
1551  	}
1552  	up_write(&vm->lock);
1553  
1554  	for_each_tile(tile, xe, id) {
1555  		if (vm->q[id]) {
1556  			xe_exec_queue_kill(vm->q[id]);
1557  			xe_exec_queue_put(vm->q[id]);
1558  			vm->q[id] = NULL;
1559  		}
1560  	}
1561  
1562  	down_write(&vm->lock);
1563  	xe_vm_lock(vm, false);
1564  	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1565  		vma = gpuva_to_vma(gpuva);
1566  
1567  		if (xe_vma_has_no_bo(vma)) {
1568  			down_read(&vm->userptr.notifier_lock);
1569  			vma->gpuva.flags |= XE_VMA_DESTROYED;
1570  			up_read(&vm->userptr.notifier_lock);
1571  		}
1572  
1573  		xe_vm_remove_vma(vm, vma);
1574  
1575  		/* easy case, remove from VMA? */
1576  		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1577  			list_del_init(&vma->combined_links.rebind);
1578  			xe_vma_destroy(vma, NULL);
1579  			continue;
1580  		}
1581  
1582  		list_move_tail(&vma->combined_links.destroy, &contested);
1583  		vma->gpuva.flags |= XE_VMA_DESTROYED;
1584  	}
1585  
1586  	/*
1587  	 * All vm operations will add shared fences to resv.
1588  	 * The only exception is eviction for a shared object,
1589  	 * but even so, the unbind when evicted would still
1590  	 * install a fence to resv. Hence it's safe to
1591  	 * destroy the pagetables immediately.
1592  	 */
1593  	xe_vm_free_scratch(vm);
1594  
1595  	for_each_tile(tile, xe, id) {
1596  		if (vm->pt_root[id]) {
1597  			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1598  			vm->pt_root[id] = NULL;
1599  		}
1600  	}
1601  	xe_vm_unlock(vm);
1602  
1603  	/*
1604  	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1605  	 * Since we hold a refcount to the bo, we can remove and free
1606  	 * the members safely without locking.
1607  	 */
1608  	list_for_each_entry_safe(vma, next_vma, &contested,
1609  				 combined_links.destroy) {
1610  		list_del_init(&vma->combined_links.destroy);
1611  		xe_vma_destroy_unlocked(vma);
1612  	}
1613  
1614  	up_write(&vm->lock);
1615  
1616  	down_write(&xe->usm.lock);
1617  	if (vm->usm.asid) {
1618  		void *lookup;
1619  
1620  		xe_assert(xe, xe->info.has_asid);
1621  		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1622  
1623  		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1624  		xe_assert(xe, lookup == vm);
1625  	}
1626  	up_write(&xe->usm.lock);
1627  
1628  	for_each_tile(tile, xe, id)
1629  		xe_range_fence_tree_fini(&vm->rftree[id]);
1630  
1631  	xe_vm_put(vm);
1632  }
1633  
vm_destroy_work_func(struct work_struct * w)1634  static void vm_destroy_work_func(struct work_struct *w)
1635  {
1636  	struct xe_vm *vm =
1637  		container_of(w, struct xe_vm, destroy_work);
1638  	struct xe_device *xe = vm->xe;
1639  	struct xe_tile *tile;
1640  	u8 id;
1641  
1642  	/* xe_vm_close_and_put was not called? */
1643  	xe_assert(xe, !vm->size);
1644  
1645  	if (xe_vm_in_preempt_fence_mode(vm))
1646  		flush_work(&vm->preempt.rebind_work);
1647  
1648  	mutex_destroy(&vm->snap_mutex);
1649  
1650  	if (vm->flags & XE_VM_FLAG_LR_MODE)
1651  		xe_pm_runtime_put(xe);
1652  
1653  	for_each_tile(tile, xe, id)
1654  		XE_WARN_ON(vm->pt_root[id]);
1655  
1656  	trace_xe_vm_free(vm);
1657  
1658  	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1659  
1660  	if (vm->xef)
1661  		xe_file_put(vm->xef);
1662  
1663  	kfree(vm);
1664  }
1665  
xe_vm_free(struct drm_gpuvm * gpuvm)1666  static void xe_vm_free(struct drm_gpuvm *gpuvm)
1667  {
1668  	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1669  
1670  	/* To destroy the VM we need to be able to sleep */
1671  	queue_work(system_unbound_wq, &vm->destroy_work);
1672  }
1673  
xe_vm_lookup(struct xe_file * xef,u32 id)1674  struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1675  {
1676  	struct xe_vm *vm;
1677  
1678  	mutex_lock(&xef->vm.lock);
1679  	vm = xa_load(&xef->vm.xa, id);
1680  	if (vm)
1681  		xe_vm_get(vm);
1682  	mutex_unlock(&xef->vm.lock);
1683  
1684  	return vm;
1685  }
1686  
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1687  u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1688  {
1689  	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1690  					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1691  }
1692  
1693  static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1694  to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1695  {
1696  	return q ? q : vm->q[0];
1697  }
1698  
1699  static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1700  find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1701  {
1702  	unsigned int i;
1703  
1704  	for (i = 0; i < num_syncs; i++) {
1705  		struct xe_sync_entry *e = &syncs[i];
1706  
1707  		if (xe_sync_is_ufence(e))
1708  			return xe_sync_ufence_get(e);
1709  	}
1710  
1711  	return NULL;
1712  }
1713  
1714  #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1715  				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1716  				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1717  
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1718  int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1719  		       struct drm_file *file)
1720  {
1721  	struct xe_device *xe = to_xe_device(dev);
1722  	struct xe_file *xef = to_xe_file(file);
1723  	struct drm_xe_vm_create *args = data;
1724  	struct xe_tile *tile;
1725  	struct xe_vm *vm;
1726  	u32 id, asid;
1727  	int err;
1728  	u32 flags = 0;
1729  
1730  	if (XE_IOCTL_DBG(xe, args->extensions))
1731  		return -EINVAL;
1732  
1733  	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1734  		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1735  
1736  	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1737  			 !xe->info.has_usm))
1738  		return -EINVAL;
1739  
1740  	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1741  		return -EINVAL;
1742  
1743  	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1744  		return -EINVAL;
1745  
1746  	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1747  			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1748  		return -EINVAL;
1749  
1750  	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1751  			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1752  		return -EINVAL;
1753  
1754  	if (XE_IOCTL_DBG(xe, args->extensions))
1755  		return -EINVAL;
1756  
1757  	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1758  		flags |= XE_VM_FLAG_SCRATCH_PAGE;
1759  	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1760  		flags |= XE_VM_FLAG_LR_MODE;
1761  	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1762  		flags |= XE_VM_FLAG_FAULT_MODE;
1763  
1764  	vm = xe_vm_create(xe, flags);
1765  	if (IS_ERR(vm))
1766  		return PTR_ERR(vm);
1767  
1768  	if (xe->info.has_asid) {
1769  		down_write(&xe->usm.lock);
1770  		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1771  				      XA_LIMIT(1, XE_MAX_ASID - 1),
1772  				      &xe->usm.next_asid, GFP_KERNEL);
1773  		up_write(&xe->usm.lock);
1774  		if (err < 0)
1775  			goto err_close_and_put;
1776  
1777  		vm->usm.asid = asid;
1778  	}
1779  
1780  	vm->xef = xe_file_get(xef);
1781  
1782  	/* Record BO memory for VM pagetable created against client */
1783  	for_each_tile(tile, xe, id)
1784  		if (vm->pt_root[id])
1785  			xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1786  
1787  #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1788  	/* Warning: Security issue - never enable by default */
1789  	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1790  #endif
1791  
1792  	/* user id alloc must always be last in ioctl to prevent UAF */
1793  	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1794  	if (err)
1795  		goto err_close_and_put;
1796  
1797  	args->vm_id = id;
1798  
1799  	return 0;
1800  
1801  err_close_and_put:
1802  	xe_vm_close_and_put(vm);
1803  
1804  	return err;
1805  }
1806  
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1807  int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1808  			struct drm_file *file)
1809  {
1810  	struct xe_device *xe = to_xe_device(dev);
1811  	struct xe_file *xef = to_xe_file(file);
1812  	struct drm_xe_vm_destroy *args = data;
1813  	struct xe_vm *vm;
1814  	int err = 0;
1815  
1816  	if (XE_IOCTL_DBG(xe, args->pad) ||
1817  	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1818  		return -EINVAL;
1819  
1820  	mutex_lock(&xef->vm.lock);
1821  	vm = xa_load(&xef->vm.xa, args->vm_id);
1822  	if (XE_IOCTL_DBG(xe, !vm))
1823  		err = -ENOENT;
1824  	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1825  		err = -EBUSY;
1826  	else
1827  		xa_erase(&xef->vm.xa, args->vm_id);
1828  	mutex_unlock(&xef->vm.lock);
1829  
1830  	if (!err)
1831  		xe_vm_close_and_put(vm);
1832  
1833  	return err;
1834  }
1835  
1836  static const u32 region_to_mem_type[] = {
1837  	XE_PL_TT,
1838  	XE_PL_VRAM0,
1839  	XE_PL_VRAM1,
1840  };
1841  
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)1842  static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1843  			     bool post_commit)
1844  {
1845  	down_read(&vm->userptr.notifier_lock);
1846  	vma->gpuva.flags |= XE_VMA_DESTROYED;
1847  	up_read(&vm->userptr.notifier_lock);
1848  	if (post_commit)
1849  		xe_vm_remove_vma(vm, vma);
1850  }
1851  
1852  #undef ULL
1853  #define ULL	unsigned long long
1854  
1855  #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1856  static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1857  {
1858  	struct xe_vma *vma;
1859  
1860  	switch (op->op) {
1861  	case DRM_GPUVA_OP_MAP:
1862  		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1863  		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
1864  		break;
1865  	case DRM_GPUVA_OP_REMAP:
1866  		vma = gpuva_to_vma(op->remap.unmap->va);
1867  		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1868  		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1869  		       op->remap.unmap->keep ? 1 : 0);
1870  		if (op->remap.prev)
1871  			vm_dbg(&xe->drm,
1872  			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
1873  			       (ULL)op->remap.prev->va.addr,
1874  			       (ULL)op->remap.prev->va.range);
1875  		if (op->remap.next)
1876  			vm_dbg(&xe->drm,
1877  			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
1878  			       (ULL)op->remap.next->va.addr,
1879  			       (ULL)op->remap.next->va.range);
1880  		break;
1881  	case DRM_GPUVA_OP_UNMAP:
1882  		vma = gpuva_to_vma(op->unmap.va);
1883  		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1884  		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1885  		       op->unmap.keep ? 1 : 0);
1886  		break;
1887  	case DRM_GPUVA_OP_PREFETCH:
1888  		vma = gpuva_to_vma(op->prefetch.va);
1889  		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
1890  		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
1891  		break;
1892  	default:
1893  		drm_warn(&xe->drm, "NOT POSSIBLE");
1894  	}
1895  }
1896  #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1897  static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1898  {
1899  }
1900  #endif
1901  
1902  /*
1903   * Create operations list from IOCTL arguments, setup operations fields so parse
1904   * and commit steps are decoupled from IOCTL arguments. This step can fail.
1905   */
1906  static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)1907  vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
1908  			 u64 bo_offset_or_userptr, u64 addr, u64 range,
1909  			 u32 operation, u32 flags,
1910  			 u32 prefetch_region, u16 pat_index)
1911  {
1912  	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
1913  	struct drm_gpuva_ops *ops;
1914  	struct drm_gpuva_op *__op;
1915  	struct drm_gpuvm_bo *vm_bo;
1916  	int err;
1917  
1918  	lockdep_assert_held_write(&vm->lock);
1919  
1920  	vm_dbg(&vm->xe->drm,
1921  	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
1922  	       operation, (ULL)addr, (ULL)range,
1923  	       (ULL)bo_offset_or_userptr);
1924  
1925  	switch (operation) {
1926  	case DRM_XE_VM_BIND_OP_MAP:
1927  	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
1928  		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
1929  						  obj, bo_offset_or_userptr);
1930  		break;
1931  	case DRM_XE_VM_BIND_OP_UNMAP:
1932  		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
1933  		break;
1934  	case DRM_XE_VM_BIND_OP_PREFETCH:
1935  		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
1936  		break;
1937  	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
1938  		xe_assert(vm->xe, bo);
1939  
1940  		err = xe_bo_lock(bo, true);
1941  		if (err)
1942  			return ERR_PTR(err);
1943  
1944  		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
1945  		if (IS_ERR(vm_bo)) {
1946  			xe_bo_unlock(bo);
1947  			return ERR_CAST(vm_bo);
1948  		}
1949  
1950  		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
1951  		drm_gpuvm_bo_put(vm_bo);
1952  		xe_bo_unlock(bo);
1953  		break;
1954  	default:
1955  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
1956  		ops = ERR_PTR(-EINVAL);
1957  	}
1958  	if (IS_ERR(ops))
1959  		return ops;
1960  
1961  	drm_gpuva_for_each_op(__op, ops) {
1962  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
1963  
1964  		if (__op->op == DRM_GPUVA_OP_MAP) {
1965  			op->map.immediate =
1966  				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
1967  			op->map.read_only =
1968  				flags & DRM_XE_VM_BIND_FLAG_READONLY;
1969  			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
1970  			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
1971  			op->map.pat_index = pat_index;
1972  		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
1973  			op->prefetch.region = prefetch_region;
1974  		}
1975  
1976  		print_op(vm->xe, __op);
1977  	}
1978  
1979  	return ops;
1980  }
1981  
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)1982  static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
1983  			      u16 pat_index, unsigned int flags)
1984  {
1985  	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
1986  	struct drm_exec exec;
1987  	struct xe_vma *vma;
1988  	int err = 0;
1989  
1990  	lockdep_assert_held_write(&vm->lock);
1991  
1992  	if (bo) {
1993  		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
1994  		drm_exec_until_all_locked(&exec) {
1995  			err = 0;
1996  			if (!bo->vm) {
1997  				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
1998  				drm_exec_retry_on_contention(&exec);
1999  			}
2000  			if (!err) {
2001  				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2002  				drm_exec_retry_on_contention(&exec);
2003  			}
2004  			if (err) {
2005  				drm_exec_fini(&exec);
2006  				return ERR_PTR(err);
2007  			}
2008  		}
2009  	}
2010  	vma = xe_vma_create(vm, bo, op->gem.offset,
2011  			    op->va.addr, op->va.addr +
2012  			    op->va.range - 1, pat_index, flags);
2013  	if (IS_ERR(vma))
2014  		goto err_unlock;
2015  
2016  	if (xe_vma_is_userptr(vma))
2017  		err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2018  	else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2019  		err = add_preempt_fences(vm, bo);
2020  
2021  err_unlock:
2022  	if (bo)
2023  		drm_exec_fini(&exec);
2024  
2025  	if (err) {
2026  		prep_vma_destroy(vm, vma, false);
2027  		xe_vma_destroy_unlocked(vma);
2028  		vma = ERR_PTR(err);
2029  	}
2030  
2031  	return vma;
2032  }
2033  
xe_vma_max_pte_size(struct xe_vma * vma)2034  static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2035  {
2036  	if (vma->gpuva.flags & XE_VMA_PTE_1G)
2037  		return SZ_1G;
2038  	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2039  		return SZ_2M;
2040  	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2041  		return SZ_64K;
2042  	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2043  		return SZ_4K;
2044  
2045  	return SZ_1G;	/* Uninitialized, used max size */
2046  }
2047  
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2048  static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2049  {
2050  	switch (size) {
2051  	case SZ_1G:
2052  		vma->gpuva.flags |= XE_VMA_PTE_1G;
2053  		break;
2054  	case SZ_2M:
2055  		vma->gpuva.flags |= XE_VMA_PTE_2M;
2056  		break;
2057  	case SZ_64K:
2058  		vma->gpuva.flags |= XE_VMA_PTE_64K;
2059  		break;
2060  	case SZ_4K:
2061  		vma->gpuva.flags |= XE_VMA_PTE_4K;
2062  		break;
2063  	}
2064  }
2065  
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2066  static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2067  {
2068  	int err = 0;
2069  
2070  	lockdep_assert_held_write(&vm->lock);
2071  
2072  	switch (op->base.op) {
2073  	case DRM_GPUVA_OP_MAP:
2074  		err |= xe_vm_insert_vma(vm, op->map.vma);
2075  		if (!err)
2076  			op->flags |= XE_VMA_OP_COMMITTED;
2077  		break;
2078  	case DRM_GPUVA_OP_REMAP:
2079  	{
2080  		u8 tile_present =
2081  			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2082  
2083  		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2084  				 true);
2085  		op->flags |= XE_VMA_OP_COMMITTED;
2086  
2087  		if (op->remap.prev) {
2088  			err |= xe_vm_insert_vma(vm, op->remap.prev);
2089  			if (!err)
2090  				op->flags |= XE_VMA_OP_PREV_COMMITTED;
2091  			if (!err && op->remap.skip_prev) {
2092  				op->remap.prev->tile_present =
2093  					tile_present;
2094  				op->remap.prev = NULL;
2095  			}
2096  		}
2097  		if (op->remap.next) {
2098  			err |= xe_vm_insert_vma(vm, op->remap.next);
2099  			if (!err)
2100  				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2101  			if (!err && op->remap.skip_next) {
2102  				op->remap.next->tile_present =
2103  					tile_present;
2104  				op->remap.next = NULL;
2105  			}
2106  		}
2107  
2108  		/* Adjust for partial unbind after removin VMA from VM */
2109  		if (!err) {
2110  			op->base.remap.unmap->va->va.addr = op->remap.start;
2111  			op->base.remap.unmap->va->va.range = op->remap.range;
2112  		}
2113  		break;
2114  	}
2115  	case DRM_GPUVA_OP_UNMAP:
2116  		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2117  		op->flags |= XE_VMA_OP_COMMITTED;
2118  		break;
2119  	case DRM_GPUVA_OP_PREFETCH:
2120  		op->flags |= XE_VMA_OP_COMMITTED;
2121  		break;
2122  	default:
2123  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2124  	}
2125  
2126  	return err;
2127  }
2128  
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2129  static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2130  				   struct xe_vma_ops *vops)
2131  {
2132  	struct xe_device *xe = vm->xe;
2133  	struct drm_gpuva_op *__op;
2134  	struct xe_tile *tile;
2135  	u8 id, tile_mask = 0;
2136  	int err = 0;
2137  
2138  	lockdep_assert_held_write(&vm->lock);
2139  
2140  	for_each_tile(tile, vm->xe, id)
2141  		tile_mask |= 0x1 << id;
2142  
2143  	drm_gpuva_for_each_op(__op, ops) {
2144  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2145  		struct xe_vma *vma;
2146  		unsigned int flags = 0;
2147  
2148  		INIT_LIST_HEAD(&op->link);
2149  		list_add_tail(&op->link, &vops->list);
2150  		op->tile_mask = tile_mask;
2151  
2152  		switch (op->base.op) {
2153  		case DRM_GPUVA_OP_MAP:
2154  		{
2155  			flags |= op->map.read_only ?
2156  				VMA_CREATE_FLAG_READ_ONLY : 0;
2157  			flags |= op->map.is_null ?
2158  				VMA_CREATE_FLAG_IS_NULL : 0;
2159  			flags |= op->map.dumpable ?
2160  				VMA_CREATE_FLAG_DUMPABLE : 0;
2161  
2162  			vma = new_vma(vm, &op->base.map, op->map.pat_index,
2163  				      flags);
2164  			if (IS_ERR(vma))
2165  				return PTR_ERR(vma);
2166  
2167  			op->map.vma = vma;
2168  			if (op->map.immediate || !xe_vm_in_fault_mode(vm))
2169  				xe_vma_ops_incr_pt_update_ops(vops,
2170  							      op->tile_mask);
2171  			break;
2172  		}
2173  		case DRM_GPUVA_OP_REMAP:
2174  		{
2175  			struct xe_vma *old =
2176  				gpuva_to_vma(op->base.remap.unmap->va);
2177  
2178  			op->remap.start = xe_vma_start(old);
2179  			op->remap.range = xe_vma_size(old);
2180  
2181  			if (op->base.remap.prev) {
2182  				flags |= op->base.remap.unmap->va->flags &
2183  					XE_VMA_READ_ONLY ?
2184  					VMA_CREATE_FLAG_READ_ONLY : 0;
2185  				flags |= op->base.remap.unmap->va->flags &
2186  					DRM_GPUVA_SPARSE ?
2187  					VMA_CREATE_FLAG_IS_NULL : 0;
2188  				flags |= op->base.remap.unmap->va->flags &
2189  					XE_VMA_DUMPABLE ?
2190  					VMA_CREATE_FLAG_DUMPABLE : 0;
2191  
2192  				vma = new_vma(vm, op->base.remap.prev,
2193  					      old->pat_index, flags);
2194  				if (IS_ERR(vma))
2195  					return PTR_ERR(vma);
2196  
2197  				op->remap.prev = vma;
2198  
2199  				/*
2200  				 * Userptr creates a new SG mapping so
2201  				 * we must also rebind.
2202  				 */
2203  				op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2204  					IS_ALIGNED(xe_vma_end(vma),
2205  						   xe_vma_max_pte_size(old));
2206  				if (op->remap.skip_prev) {
2207  					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2208  					op->remap.range -=
2209  						xe_vma_end(vma) -
2210  						xe_vma_start(old);
2211  					op->remap.start = xe_vma_end(vma);
2212  					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2213  					       (ULL)op->remap.start,
2214  					       (ULL)op->remap.range);
2215  				} else {
2216  					xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2217  				}
2218  			}
2219  
2220  			if (op->base.remap.next) {
2221  				flags |= op->base.remap.unmap->va->flags &
2222  					XE_VMA_READ_ONLY ?
2223  					VMA_CREATE_FLAG_READ_ONLY : 0;
2224  				flags |= op->base.remap.unmap->va->flags &
2225  					DRM_GPUVA_SPARSE ?
2226  					VMA_CREATE_FLAG_IS_NULL : 0;
2227  				flags |= op->base.remap.unmap->va->flags &
2228  					XE_VMA_DUMPABLE ?
2229  					VMA_CREATE_FLAG_DUMPABLE : 0;
2230  
2231  				vma = new_vma(vm, op->base.remap.next,
2232  					      old->pat_index, flags);
2233  				if (IS_ERR(vma))
2234  					return PTR_ERR(vma);
2235  
2236  				op->remap.next = vma;
2237  
2238  				/*
2239  				 * Userptr creates a new SG mapping so
2240  				 * we must also rebind.
2241  				 */
2242  				op->remap.skip_next = !xe_vma_is_userptr(old) &&
2243  					IS_ALIGNED(xe_vma_start(vma),
2244  						   xe_vma_max_pte_size(old));
2245  				if (op->remap.skip_next) {
2246  					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2247  					op->remap.range -=
2248  						xe_vma_end(old) -
2249  						xe_vma_start(vma);
2250  					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2251  					       (ULL)op->remap.start,
2252  					       (ULL)op->remap.range);
2253  				} else {
2254  					xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2255  				}
2256  			}
2257  			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2258  			break;
2259  		}
2260  		case DRM_GPUVA_OP_UNMAP:
2261  		case DRM_GPUVA_OP_PREFETCH:
2262  			/* FIXME: Need to skip some prefetch ops */
2263  			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2264  			break;
2265  		default:
2266  			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2267  		}
2268  
2269  		err = xe_vma_op_commit(vm, op);
2270  		if (err)
2271  			return err;
2272  	}
2273  
2274  	return 0;
2275  }
2276  
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2277  static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2278  			     bool post_commit, bool prev_post_commit,
2279  			     bool next_post_commit)
2280  {
2281  	lockdep_assert_held_write(&vm->lock);
2282  
2283  	switch (op->base.op) {
2284  	case DRM_GPUVA_OP_MAP:
2285  		if (op->map.vma) {
2286  			prep_vma_destroy(vm, op->map.vma, post_commit);
2287  			xe_vma_destroy_unlocked(op->map.vma);
2288  		}
2289  		break;
2290  	case DRM_GPUVA_OP_UNMAP:
2291  	{
2292  		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2293  
2294  		if (vma) {
2295  			down_read(&vm->userptr.notifier_lock);
2296  			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2297  			up_read(&vm->userptr.notifier_lock);
2298  			if (post_commit)
2299  				xe_vm_insert_vma(vm, vma);
2300  		}
2301  		break;
2302  	}
2303  	case DRM_GPUVA_OP_REMAP:
2304  	{
2305  		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2306  
2307  		if (op->remap.prev) {
2308  			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2309  			xe_vma_destroy_unlocked(op->remap.prev);
2310  		}
2311  		if (op->remap.next) {
2312  			prep_vma_destroy(vm, op->remap.next, next_post_commit);
2313  			xe_vma_destroy_unlocked(op->remap.next);
2314  		}
2315  		if (vma) {
2316  			down_read(&vm->userptr.notifier_lock);
2317  			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2318  			up_read(&vm->userptr.notifier_lock);
2319  			if (post_commit)
2320  				xe_vm_insert_vma(vm, vma);
2321  		}
2322  		break;
2323  	}
2324  	case DRM_GPUVA_OP_PREFETCH:
2325  		/* Nothing to do */
2326  		break;
2327  	default:
2328  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2329  	}
2330  }
2331  
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2332  static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2333  				     struct drm_gpuva_ops **ops,
2334  				     int num_ops_list)
2335  {
2336  	int i;
2337  
2338  	for (i = num_ops_list - 1; i >= 0; --i) {
2339  		struct drm_gpuva_ops *__ops = ops[i];
2340  		struct drm_gpuva_op *__op;
2341  
2342  		if (!__ops)
2343  			continue;
2344  
2345  		drm_gpuva_for_each_op_reverse(__op, __ops) {
2346  			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2347  
2348  			xe_vma_op_unwind(vm, op,
2349  					 op->flags & XE_VMA_OP_COMMITTED,
2350  					 op->flags & XE_VMA_OP_PREV_COMMITTED,
2351  					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2352  		}
2353  	}
2354  }
2355  
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2356  static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2357  				 bool validate)
2358  {
2359  	struct xe_bo *bo = xe_vma_bo(vma);
2360  	int err = 0;
2361  
2362  	if (bo) {
2363  		if (!bo->vm)
2364  			err = drm_exec_lock_obj(exec, &bo->ttm.base);
2365  		if (!err && validate)
2366  			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
2367  	}
2368  
2369  	return err;
2370  }
2371  
check_ufence(struct xe_vma * vma)2372  static int check_ufence(struct xe_vma *vma)
2373  {
2374  	if (vma->ufence) {
2375  		struct xe_user_fence * const f = vma->ufence;
2376  
2377  		if (!xe_sync_ufence_get_status(f))
2378  			return -EBUSY;
2379  
2380  		vma->ufence = NULL;
2381  		xe_sync_ufence_put(f);
2382  	}
2383  
2384  	return 0;
2385  }
2386  
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2387  static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2388  			    struct xe_vma_op *op)
2389  {
2390  	int err = 0;
2391  
2392  	switch (op->base.op) {
2393  	case DRM_GPUVA_OP_MAP:
2394  		err = vma_lock_and_validate(exec, op->map.vma,
2395  					    !xe_vm_in_fault_mode(vm) ||
2396  					    op->map.immediate);
2397  		break;
2398  	case DRM_GPUVA_OP_REMAP:
2399  		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2400  		if (err)
2401  			break;
2402  
2403  		err = vma_lock_and_validate(exec,
2404  					    gpuva_to_vma(op->base.remap.unmap->va),
2405  					    false);
2406  		if (!err && op->remap.prev)
2407  			err = vma_lock_and_validate(exec, op->remap.prev, true);
2408  		if (!err && op->remap.next)
2409  			err = vma_lock_and_validate(exec, op->remap.next, true);
2410  		break;
2411  	case DRM_GPUVA_OP_UNMAP:
2412  		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2413  		if (err)
2414  			break;
2415  
2416  		err = vma_lock_and_validate(exec,
2417  					    gpuva_to_vma(op->base.unmap.va),
2418  					    false);
2419  		break;
2420  	case DRM_GPUVA_OP_PREFETCH:
2421  	{
2422  		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2423  		u32 region = op->prefetch.region;
2424  
2425  		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2426  
2427  		err = vma_lock_and_validate(exec,
2428  					    gpuva_to_vma(op->base.prefetch.va),
2429  					    false);
2430  		if (!err && !xe_vma_has_no_bo(vma))
2431  			err = xe_bo_migrate(xe_vma_bo(vma),
2432  					    region_to_mem_type[region]);
2433  		break;
2434  	}
2435  	default:
2436  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2437  	}
2438  
2439  	return err;
2440  }
2441  
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2442  static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2443  					   struct xe_vm *vm,
2444  					   struct xe_vma_ops *vops)
2445  {
2446  	struct xe_vma_op *op;
2447  	int err;
2448  
2449  	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2450  	if (err)
2451  		return err;
2452  
2453  	list_for_each_entry(op, &vops->list, link) {
2454  		err = op_lock_and_prep(exec, vm, op);
2455  		if (err)
2456  			return err;
2457  	}
2458  
2459  #ifdef TEST_VM_OPS_ERROR
2460  	if (vops->inject_error &&
2461  	    vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
2462  		return -ENOSPC;
2463  #endif
2464  
2465  	return 0;
2466  }
2467  
op_trace(struct xe_vma_op * op)2468  static void op_trace(struct xe_vma_op *op)
2469  {
2470  	switch (op->base.op) {
2471  	case DRM_GPUVA_OP_MAP:
2472  		trace_xe_vma_bind(op->map.vma);
2473  		break;
2474  	case DRM_GPUVA_OP_REMAP:
2475  		trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
2476  		if (op->remap.prev)
2477  			trace_xe_vma_bind(op->remap.prev);
2478  		if (op->remap.next)
2479  			trace_xe_vma_bind(op->remap.next);
2480  		break;
2481  	case DRM_GPUVA_OP_UNMAP:
2482  		trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
2483  		break;
2484  	case DRM_GPUVA_OP_PREFETCH:
2485  		trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
2486  		break;
2487  	default:
2488  		XE_WARN_ON("NOT POSSIBLE");
2489  	}
2490  }
2491  
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)2492  static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
2493  {
2494  	struct xe_vma_op *op;
2495  
2496  	list_for_each_entry(op, &vops->list, link)
2497  		op_trace(op);
2498  }
2499  
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)2500  static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
2501  {
2502  	struct xe_exec_queue *q = vops->q;
2503  	struct xe_tile *tile;
2504  	int number_tiles = 0;
2505  	u8 id;
2506  
2507  	for_each_tile(tile, vm->xe, id) {
2508  		if (vops->pt_update_ops[id].num_ops)
2509  			++number_tiles;
2510  
2511  		if (vops->pt_update_ops[id].q)
2512  			continue;
2513  
2514  		if (q) {
2515  			vops->pt_update_ops[id].q = q;
2516  			if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
2517  				q = list_next_entry(q, multi_gt_list);
2518  		} else {
2519  			vops->pt_update_ops[id].q = vm->q[id];
2520  		}
2521  	}
2522  
2523  	return number_tiles;
2524  }
2525  
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2526  static struct dma_fence *ops_execute(struct xe_vm *vm,
2527  				     struct xe_vma_ops *vops)
2528  {
2529  	struct xe_tile *tile;
2530  	struct dma_fence *fence = NULL;
2531  	struct dma_fence **fences = NULL;
2532  	struct dma_fence_array *cf = NULL;
2533  	int number_tiles = 0, current_fence = 0, err;
2534  	u8 id;
2535  
2536  	number_tiles = vm_ops_setup_tile_args(vm, vops);
2537  	if (number_tiles == 0)
2538  		return ERR_PTR(-ENODATA);
2539  
2540  	if (number_tiles > 1) {
2541  		fences = kmalloc_array(number_tiles, sizeof(*fences),
2542  				       GFP_KERNEL);
2543  		if (!fences) {
2544  			fence = ERR_PTR(-ENOMEM);
2545  			goto err_trace;
2546  		}
2547  	}
2548  
2549  	for_each_tile(tile, vm->xe, id) {
2550  		if (!vops->pt_update_ops[id].num_ops)
2551  			continue;
2552  
2553  		err = xe_pt_update_ops_prepare(tile, vops);
2554  		if (err) {
2555  			fence = ERR_PTR(err);
2556  			goto err_out;
2557  		}
2558  	}
2559  
2560  	trace_xe_vm_ops_execute(vops);
2561  
2562  	for_each_tile(tile, vm->xe, id) {
2563  		if (!vops->pt_update_ops[id].num_ops)
2564  			continue;
2565  
2566  		fence = xe_pt_update_ops_run(tile, vops);
2567  		if (IS_ERR(fence))
2568  			goto err_out;
2569  
2570  		if (fences)
2571  			fences[current_fence++] = fence;
2572  	}
2573  
2574  	if (fences) {
2575  		cf = dma_fence_array_create(number_tiles, fences,
2576  					    vm->composite_fence_ctx,
2577  					    vm->composite_fence_seqno++,
2578  					    false);
2579  		if (!cf) {
2580  			--vm->composite_fence_seqno;
2581  			fence = ERR_PTR(-ENOMEM);
2582  			goto err_out;
2583  		}
2584  		fence = &cf->base;
2585  	}
2586  
2587  	for_each_tile(tile, vm->xe, id) {
2588  		if (!vops->pt_update_ops[id].num_ops)
2589  			continue;
2590  
2591  		xe_pt_update_ops_fini(tile, vops);
2592  	}
2593  
2594  	return fence;
2595  
2596  err_out:
2597  	for_each_tile(tile, vm->xe, id) {
2598  		if (!vops->pt_update_ops[id].num_ops)
2599  			continue;
2600  
2601  		xe_pt_update_ops_abort(tile, vops);
2602  	}
2603  	while (current_fence)
2604  		dma_fence_put(fences[--current_fence]);
2605  	kfree(fences);
2606  	kfree(cf);
2607  
2608  err_trace:
2609  	trace_xe_vm_ops_fail(vm);
2610  	return fence;
2611  }
2612  
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)2613  static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
2614  {
2615  	if (vma->ufence)
2616  		xe_sync_ufence_put(vma->ufence);
2617  	vma->ufence = __xe_sync_ufence_get(ufence);
2618  }
2619  
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)2620  static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
2621  			  struct xe_user_fence *ufence)
2622  {
2623  	switch (op->base.op) {
2624  	case DRM_GPUVA_OP_MAP:
2625  		vma_add_ufence(op->map.vma, ufence);
2626  		break;
2627  	case DRM_GPUVA_OP_REMAP:
2628  		if (op->remap.prev)
2629  			vma_add_ufence(op->remap.prev, ufence);
2630  		if (op->remap.next)
2631  			vma_add_ufence(op->remap.next, ufence);
2632  		break;
2633  	case DRM_GPUVA_OP_UNMAP:
2634  		break;
2635  	case DRM_GPUVA_OP_PREFETCH:
2636  		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
2637  		break;
2638  	default:
2639  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2640  	}
2641  }
2642  
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)2643  static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
2644  				   struct dma_fence *fence)
2645  {
2646  	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
2647  	struct xe_user_fence *ufence;
2648  	struct xe_vma_op *op;
2649  	int i;
2650  
2651  	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
2652  	list_for_each_entry(op, &vops->list, link) {
2653  		if (ufence)
2654  			op_add_ufence(vm, op, ufence);
2655  
2656  		if (op->base.op == DRM_GPUVA_OP_UNMAP)
2657  			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
2658  		else if (op->base.op == DRM_GPUVA_OP_REMAP)
2659  			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
2660  				       fence);
2661  	}
2662  	if (ufence)
2663  		xe_sync_ufence_put(ufence);
2664  	for (i = 0; i < vops->num_syncs; i++)
2665  		xe_sync_entry_signal(vops->syncs + i, fence);
2666  	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
2667  	dma_fence_put(fence);
2668  }
2669  
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2670  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2671  				     struct xe_vma_ops *vops)
2672  {
2673  	struct drm_exec exec;
2674  	struct dma_fence *fence;
2675  	int err;
2676  
2677  	lockdep_assert_held_write(&vm->lock);
2678  
2679  	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
2680  		      DRM_EXEC_IGNORE_DUPLICATES, 0);
2681  	drm_exec_until_all_locked(&exec) {
2682  		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
2683  		drm_exec_retry_on_contention(&exec);
2684  		if (err)
2685  			goto unlock;
2686  
2687  		fence = ops_execute(vm, vops);
2688  		if (IS_ERR(fence)) {
2689  			err = PTR_ERR(fence);
2690  			goto unlock;
2691  		}
2692  
2693  		vm_bind_ioctl_ops_fini(vm, vops, fence);
2694  	}
2695  
2696  unlock:
2697  	drm_exec_fini(&exec);
2698  	return err;
2699  }
2700  
2701  #define SUPPORTED_FLAGS_STUB  \
2702  	(DRM_XE_VM_BIND_FLAG_READONLY | \
2703  	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2704  	 DRM_XE_VM_BIND_FLAG_NULL | \
2705  	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
2706  
2707  #ifdef TEST_VM_OPS_ERROR
2708  #define SUPPORTED_FLAGS	(SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
2709  #else
2710  #define SUPPORTED_FLAGS	SUPPORTED_FLAGS_STUB
2711  #endif
2712  
2713  #define XE_64K_PAGE_MASK 0xffffull
2714  #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2715  
vm_bind_ioctl_check_args(struct xe_device * xe,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)2716  static int vm_bind_ioctl_check_args(struct xe_device *xe,
2717  				    struct drm_xe_vm_bind *args,
2718  				    struct drm_xe_vm_bind_op **bind_ops)
2719  {
2720  	int err;
2721  	int i;
2722  
2723  	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2724  	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2725  		return -EINVAL;
2726  
2727  	if (XE_IOCTL_DBG(xe, args->extensions))
2728  		return -EINVAL;
2729  
2730  	if (args->num_binds > 1) {
2731  		u64 __user *bind_user =
2732  			u64_to_user_ptr(args->vector_of_binds);
2733  
2734  		*bind_ops = kvmalloc_array(args->num_binds,
2735  					   sizeof(struct drm_xe_vm_bind_op),
2736  					   GFP_KERNEL | __GFP_ACCOUNT);
2737  		if (!*bind_ops)
2738  			return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
2739  
2740  		err = __copy_from_user(*bind_ops, bind_user,
2741  				       sizeof(struct drm_xe_vm_bind_op) *
2742  				       args->num_binds);
2743  		if (XE_IOCTL_DBG(xe, err)) {
2744  			err = -EFAULT;
2745  			goto free_bind_ops;
2746  		}
2747  	} else {
2748  		*bind_ops = &args->bind;
2749  	}
2750  
2751  	for (i = 0; i < args->num_binds; ++i) {
2752  		u64 range = (*bind_ops)[i].range;
2753  		u64 addr = (*bind_ops)[i].addr;
2754  		u32 op = (*bind_ops)[i].op;
2755  		u32 flags = (*bind_ops)[i].flags;
2756  		u32 obj = (*bind_ops)[i].obj;
2757  		u64 obj_offset = (*bind_ops)[i].obj_offset;
2758  		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2759  		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2760  		u16 pat_index = (*bind_ops)[i].pat_index;
2761  		u16 coh_mode;
2762  
2763  		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2764  			err = -EINVAL;
2765  			goto free_bind_ops;
2766  		}
2767  
2768  		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2769  		(*bind_ops)[i].pat_index = pat_index;
2770  		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2771  		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2772  			err = -EINVAL;
2773  			goto free_bind_ops;
2774  		}
2775  
2776  		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2777  			err = -EINVAL;
2778  			goto free_bind_ops;
2779  		}
2780  
2781  		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2782  		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2783  		    XE_IOCTL_DBG(xe, obj && is_null) ||
2784  		    XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2785  		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2786  				 is_null) ||
2787  		    XE_IOCTL_DBG(xe, !obj &&
2788  				 op == DRM_XE_VM_BIND_OP_MAP &&
2789  				 !is_null) ||
2790  		    XE_IOCTL_DBG(xe, !obj &&
2791  				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2792  		    XE_IOCTL_DBG(xe, addr &&
2793  				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2794  		    XE_IOCTL_DBG(xe, range &&
2795  				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2796  		    XE_IOCTL_DBG(xe, obj &&
2797  				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2798  		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2799  				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2800  		    XE_IOCTL_DBG(xe, obj &&
2801  				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2802  		    XE_IOCTL_DBG(xe, prefetch_region &&
2803  				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2804  		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2805  				       xe->info.mem_region_mask)) ||
2806  		    XE_IOCTL_DBG(xe, obj &&
2807  				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
2808  			err = -EINVAL;
2809  			goto free_bind_ops;
2810  		}
2811  
2812  		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2813  		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2814  		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2815  		    XE_IOCTL_DBG(xe, !range &&
2816  				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2817  			err = -EINVAL;
2818  			goto free_bind_ops;
2819  		}
2820  	}
2821  
2822  	return 0;
2823  
2824  free_bind_ops:
2825  	if (args->num_binds > 1)
2826  		kvfree(*bind_ops);
2827  	return err;
2828  }
2829  
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)2830  static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2831  				       struct xe_exec_queue *q,
2832  				       struct xe_sync_entry *syncs,
2833  				       int num_syncs)
2834  {
2835  	struct dma_fence *fence;
2836  	int i, err = 0;
2837  
2838  	fence = xe_sync_in_fence_get(syncs, num_syncs,
2839  				     to_wait_exec_queue(vm, q), vm);
2840  	if (IS_ERR(fence))
2841  		return PTR_ERR(fence);
2842  
2843  	for (i = 0; i < num_syncs; i++)
2844  		xe_sync_entry_signal(&syncs[i], fence);
2845  
2846  	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2847  				     fence);
2848  	dma_fence_put(fence);
2849  
2850  	return err;
2851  }
2852  
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)2853  static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
2854  			    struct xe_exec_queue *q,
2855  			    struct xe_sync_entry *syncs, u32 num_syncs)
2856  {
2857  	memset(vops, 0, sizeof(*vops));
2858  	INIT_LIST_HEAD(&vops->list);
2859  	vops->vm = vm;
2860  	vops->q = q;
2861  	vops->syncs = syncs;
2862  	vops->num_syncs = num_syncs;
2863  }
2864  
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index)2865  static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
2866  					u64 addr, u64 range, u64 obj_offset,
2867  					u16 pat_index)
2868  {
2869  	u16 coh_mode;
2870  
2871  	if (XE_IOCTL_DBG(xe, range > bo->size) ||
2872  	    XE_IOCTL_DBG(xe, obj_offset >
2873  			 bo->size - range)) {
2874  		return -EINVAL;
2875  	}
2876  
2877  	/*
2878  	 * Some platforms require 64k VM_BIND alignment,
2879  	 * specifically those with XE_VRAM_FLAGS_NEED64K.
2880  	 *
2881  	 * Other platforms may have BO's set to 64k physical placement,
2882  	 * but can be mapped at 4k offsets anyway. This check is only
2883  	 * there for the former case.
2884  	 */
2885  	if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
2886  	    (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
2887  		if (XE_IOCTL_DBG(xe, obj_offset &
2888  				 XE_64K_PAGE_MASK) ||
2889  		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2890  		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2891  			return  -EINVAL;
2892  		}
2893  	}
2894  
2895  	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2896  	if (bo->cpu_caching) {
2897  		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2898  				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2899  			return  -EINVAL;
2900  		}
2901  	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2902  		/*
2903  		 * Imported dma-buf from a different device should
2904  		 * require 1way or 2way coherency since we don't know
2905  		 * how it was mapped on the CPU. Just assume is it
2906  		 * potentially cached on CPU side.
2907  		 */
2908  		return  -EINVAL;
2909  	}
2910  
2911  	return 0;
2912  }
2913  
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2914  int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2915  {
2916  	struct xe_device *xe = to_xe_device(dev);
2917  	struct xe_file *xef = to_xe_file(file);
2918  	struct drm_xe_vm_bind *args = data;
2919  	struct drm_xe_sync __user *syncs_user;
2920  	struct xe_bo **bos = NULL;
2921  	struct drm_gpuva_ops **ops = NULL;
2922  	struct xe_vm *vm;
2923  	struct xe_exec_queue *q = NULL;
2924  	u32 num_syncs, num_ufence = 0;
2925  	struct xe_sync_entry *syncs = NULL;
2926  	struct drm_xe_vm_bind_op *bind_ops;
2927  	struct xe_vma_ops vops;
2928  	int err;
2929  	int i;
2930  
2931  	err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2932  	if (err)
2933  		return err;
2934  
2935  	if (args->exec_queue_id) {
2936  		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2937  		if (XE_IOCTL_DBG(xe, !q)) {
2938  			err = -ENOENT;
2939  			goto free_objs;
2940  		}
2941  
2942  		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
2943  			err = -EINVAL;
2944  			goto put_exec_queue;
2945  		}
2946  	}
2947  
2948  	vm = xe_vm_lookup(xef, args->vm_id);
2949  	if (XE_IOCTL_DBG(xe, !vm)) {
2950  		err = -EINVAL;
2951  		goto put_exec_queue;
2952  	}
2953  
2954  	err = down_write_killable(&vm->lock);
2955  	if (err)
2956  		goto put_vm;
2957  
2958  	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
2959  		err = -ENOENT;
2960  		goto release_vm_lock;
2961  	}
2962  
2963  	for (i = 0; i < args->num_binds; ++i) {
2964  		u64 range = bind_ops[i].range;
2965  		u64 addr = bind_ops[i].addr;
2966  
2967  		if (XE_IOCTL_DBG(xe, range > vm->size) ||
2968  		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
2969  			err = -EINVAL;
2970  			goto release_vm_lock;
2971  		}
2972  	}
2973  
2974  	if (args->num_binds) {
2975  		bos = kvcalloc(args->num_binds, sizeof(*bos),
2976  			       GFP_KERNEL | __GFP_ACCOUNT);
2977  		if (!bos) {
2978  			err = -ENOMEM;
2979  			goto release_vm_lock;
2980  		}
2981  
2982  		ops = kvcalloc(args->num_binds, sizeof(*ops),
2983  			       GFP_KERNEL | __GFP_ACCOUNT);
2984  		if (!ops) {
2985  			err = -ENOMEM;
2986  			goto release_vm_lock;
2987  		}
2988  	}
2989  
2990  	for (i = 0; i < args->num_binds; ++i) {
2991  		struct drm_gem_object *gem_obj;
2992  		u64 range = bind_ops[i].range;
2993  		u64 addr = bind_ops[i].addr;
2994  		u32 obj = bind_ops[i].obj;
2995  		u64 obj_offset = bind_ops[i].obj_offset;
2996  		u16 pat_index = bind_ops[i].pat_index;
2997  
2998  		if (!obj)
2999  			continue;
3000  
3001  		gem_obj = drm_gem_object_lookup(file, obj);
3002  		if (XE_IOCTL_DBG(xe, !gem_obj)) {
3003  			err = -ENOENT;
3004  			goto put_obj;
3005  		}
3006  		bos[i] = gem_to_xe_bo(gem_obj);
3007  
3008  		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3009  						   obj_offset, pat_index);
3010  		if (err)
3011  			goto put_obj;
3012  	}
3013  
3014  	if (args->num_syncs) {
3015  		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3016  		if (!syncs) {
3017  			err = -ENOMEM;
3018  			goto put_obj;
3019  		}
3020  	}
3021  
3022  	syncs_user = u64_to_user_ptr(args->syncs);
3023  	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3024  		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3025  					  &syncs_user[num_syncs],
3026  					  (xe_vm_in_lr_mode(vm) ?
3027  					   SYNC_PARSE_FLAG_LR_MODE : 0) |
3028  					  (!args->num_binds ?
3029  					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3030  		if (err)
3031  			goto free_syncs;
3032  
3033  		if (xe_sync_is_ufence(&syncs[num_syncs]))
3034  			num_ufence++;
3035  	}
3036  
3037  	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3038  		err = -EINVAL;
3039  		goto free_syncs;
3040  	}
3041  
3042  	if (!args->num_binds) {
3043  		err = -ENODATA;
3044  		goto free_syncs;
3045  	}
3046  
3047  	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3048  	for (i = 0; i < args->num_binds; ++i) {
3049  		u64 range = bind_ops[i].range;
3050  		u64 addr = bind_ops[i].addr;
3051  		u32 op = bind_ops[i].op;
3052  		u32 flags = bind_ops[i].flags;
3053  		u64 obj_offset = bind_ops[i].obj_offset;
3054  		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3055  		u16 pat_index = bind_ops[i].pat_index;
3056  
3057  		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3058  						  addr, range, op, flags,
3059  						  prefetch_region, pat_index);
3060  		if (IS_ERR(ops[i])) {
3061  			err = PTR_ERR(ops[i]);
3062  			ops[i] = NULL;
3063  			goto unwind_ops;
3064  		}
3065  
3066  		err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3067  		if (err)
3068  			goto unwind_ops;
3069  
3070  #ifdef TEST_VM_OPS_ERROR
3071  		if (flags & FORCE_OP_ERROR) {
3072  			vops.inject_error = true;
3073  			vm->xe->vm_inject_error_position =
3074  				(vm->xe->vm_inject_error_position + 1) %
3075  				FORCE_OP_ERROR_COUNT;
3076  		}
3077  #endif
3078  	}
3079  
3080  	/* Nothing to do */
3081  	if (list_empty(&vops.list)) {
3082  		err = -ENODATA;
3083  		goto unwind_ops;
3084  	}
3085  
3086  	err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3087  	if (err)
3088  		goto unwind_ops;
3089  
3090  	err = vm_bind_ioctl_ops_execute(vm, &vops);
3091  
3092  unwind_ops:
3093  	if (err && err != -ENODATA)
3094  		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3095  	xe_vma_ops_fini(&vops);
3096  	for (i = args->num_binds - 1; i >= 0; --i)
3097  		if (ops[i])
3098  			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3099  free_syncs:
3100  	if (err == -ENODATA)
3101  		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3102  	while (num_syncs--)
3103  		xe_sync_entry_cleanup(&syncs[num_syncs]);
3104  
3105  	kfree(syncs);
3106  put_obj:
3107  	for (i = 0; i < args->num_binds; ++i)
3108  		xe_bo_put(bos[i]);
3109  release_vm_lock:
3110  	up_write(&vm->lock);
3111  put_vm:
3112  	xe_vm_put(vm);
3113  put_exec_queue:
3114  	if (q)
3115  		xe_exec_queue_put(q);
3116  free_objs:
3117  	kvfree(bos);
3118  	kvfree(ops);
3119  	if (args->num_binds > 1)
3120  		kvfree(bind_ops);
3121  	return err;
3122  }
3123  
3124  /**
3125   * xe_vm_lock() - Lock the vm's dma_resv object
3126   * @vm: The struct xe_vm whose lock is to be locked
3127   * @intr: Whether to perform any wait interruptible
3128   *
3129   * Return: 0 on success, -EINTR if @intr is true and the wait for a
3130   * contended lock was interrupted. If @intr is false, the function
3131   * always returns 0.
3132   */
xe_vm_lock(struct xe_vm * vm,bool intr)3133  int xe_vm_lock(struct xe_vm *vm, bool intr)
3134  {
3135  	if (intr)
3136  		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3137  
3138  	return dma_resv_lock(xe_vm_resv(vm), NULL);
3139  }
3140  
3141  /**
3142   * xe_vm_unlock() - Unlock the vm's dma_resv object
3143   * @vm: The struct xe_vm whose lock is to be released.
3144   *
3145   * Unlock a buffer object lock that was locked by xe_vm_lock().
3146   */
xe_vm_unlock(struct xe_vm * vm)3147  void xe_vm_unlock(struct xe_vm *vm)
3148  {
3149  	dma_resv_unlock(xe_vm_resv(vm));
3150  }
3151  
3152  /**
3153   * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3154   * @vma: VMA to invalidate
3155   *
3156   * Walks a list of page tables leaves which it memset the entries owned by this
3157   * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3158   * complete.
3159   *
3160   * Returns 0 for success, negative error code otherwise.
3161   */
xe_vm_invalidate_vma(struct xe_vma * vma)3162  int xe_vm_invalidate_vma(struct xe_vma *vma)
3163  {
3164  	struct xe_device *xe = xe_vma_vm(vma)->xe;
3165  	struct xe_tile *tile;
3166  	struct xe_gt_tlb_invalidation_fence
3167  		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3168  	u8 id;
3169  	u32 fence_id = 0;
3170  	int ret = 0;
3171  
3172  	xe_assert(xe, !xe_vma_is_null(vma));
3173  	trace_xe_vma_invalidate(vma);
3174  
3175  	vm_dbg(&xe_vma_vm(vma)->xe->drm,
3176  	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3177  		xe_vma_start(vma), xe_vma_size(vma));
3178  
3179  	/* Check that we don't race with page-table updates */
3180  	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3181  		if (xe_vma_is_userptr(vma)) {
3182  			WARN_ON_ONCE(!mmu_interval_check_retry
3183  				     (&to_userptr_vma(vma)->userptr.notifier,
3184  				      to_userptr_vma(vma)->userptr.notifier_seq));
3185  			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3186  							     DMA_RESV_USAGE_BOOKKEEP));
3187  
3188  		} else {
3189  			xe_bo_assert_held(xe_vma_bo(vma));
3190  		}
3191  	}
3192  
3193  	for_each_tile(tile, xe, id) {
3194  		if (xe_pt_zap_ptes(tile, vma)) {
3195  			xe_device_wmb(xe);
3196  			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3197  							  &fence[fence_id],
3198  							  true);
3199  
3200  			ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3201  							 &fence[fence_id], vma);
3202  			if (ret)
3203  				goto wait;
3204  			++fence_id;
3205  
3206  			if (!tile->media_gt)
3207  				continue;
3208  
3209  			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3210  							  &fence[fence_id],
3211  							  true);
3212  
3213  			ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
3214  							 &fence[fence_id], vma);
3215  			if (ret)
3216  				goto wait;
3217  			++fence_id;
3218  		}
3219  	}
3220  
3221  wait:
3222  	for (id = 0; id < fence_id; ++id)
3223  		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3224  
3225  	vma->tile_invalidated = vma->tile_mask;
3226  
3227  	return ret;
3228  }
3229  
3230  struct xe_vm_snapshot {
3231  	unsigned long num_snaps;
3232  	struct {
3233  		u64 ofs, bo_ofs;
3234  		unsigned long len;
3235  		struct xe_bo *bo;
3236  		void *data;
3237  		struct mm_struct *mm;
3238  	} snap[];
3239  };
3240  
xe_vm_snapshot_capture(struct xe_vm * vm)3241  struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3242  {
3243  	unsigned long num_snaps = 0, i;
3244  	struct xe_vm_snapshot *snap = NULL;
3245  	struct drm_gpuva *gpuva;
3246  
3247  	if (!vm)
3248  		return NULL;
3249  
3250  	mutex_lock(&vm->snap_mutex);
3251  	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3252  		if (gpuva->flags & XE_VMA_DUMPABLE)
3253  			num_snaps++;
3254  	}
3255  
3256  	if (num_snaps)
3257  		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3258  	if (!snap) {
3259  		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3260  		goto out_unlock;
3261  	}
3262  
3263  	snap->num_snaps = num_snaps;
3264  	i = 0;
3265  	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3266  		struct xe_vma *vma = gpuva_to_vma(gpuva);
3267  		struct xe_bo *bo = vma->gpuva.gem.obj ?
3268  			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3269  
3270  		if (!(gpuva->flags & XE_VMA_DUMPABLE))
3271  			continue;
3272  
3273  		snap->snap[i].ofs = xe_vma_start(vma);
3274  		snap->snap[i].len = xe_vma_size(vma);
3275  		if (bo) {
3276  			snap->snap[i].bo = xe_bo_get(bo);
3277  			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3278  		} else if (xe_vma_is_userptr(vma)) {
3279  			struct mm_struct *mm =
3280  				to_userptr_vma(vma)->userptr.notifier.mm;
3281  
3282  			if (mmget_not_zero(mm))
3283  				snap->snap[i].mm = mm;
3284  			else
3285  				snap->snap[i].data = ERR_PTR(-EFAULT);
3286  
3287  			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3288  		} else {
3289  			snap->snap[i].data = ERR_PTR(-ENOENT);
3290  		}
3291  		i++;
3292  	}
3293  
3294  out_unlock:
3295  	mutex_unlock(&vm->snap_mutex);
3296  	return snap;
3297  }
3298  
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)3299  void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3300  {
3301  	if (IS_ERR_OR_NULL(snap))
3302  		return;
3303  
3304  	for (int i = 0; i < snap->num_snaps; i++) {
3305  		struct xe_bo *bo = snap->snap[i].bo;
3306  		struct iosys_map src;
3307  		int err;
3308  
3309  		if (IS_ERR(snap->snap[i].data))
3310  			continue;
3311  
3312  		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3313  		if (!snap->snap[i].data) {
3314  			snap->snap[i].data = ERR_PTR(-ENOMEM);
3315  			goto cleanup_bo;
3316  		}
3317  
3318  		if (bo) {
3319  			xe_bo_lock(bo, false);
3320  			err = ttm_bo_vmap(&bo->ttm, &src);
3321  			if (!err) {
3322  				xe_map_memcpy_from(xe_bo_device(bo),
3323  						   snap->snap[i].data,
3324  						   &src, snap->snap[i].bo_ofs,
3325  						   snap->snap[i].len);
3326  				ttm_bo_vunmap(&bo->ttm, &src);
3327  			}
3328  			xe_bo_unlock(bo);
3329  		} else {
3330  			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3331  
3332  			kthread_use_mm(snap->snap[i].mm);
3333  			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3334  				err = 0;
3335  			else
3336  				err = -EFAULT;
3337  			kthread_unuse_mm(snap->snap[i].mm);
3338  
3339  			mmput(snap->snap[i].mm);
3340  			snap->snap[i].mm = NULL;
3341  		}
3342  
3343  		if (err) {
3344  			kvfree(snap->snap[i].data);
3345  			snap->snap[i].data = ERR_PTR(err);
3346  		}
3347  
3348  cleanup_bo:
3349  		xe_bo_put(bo);
3350  		snap->snap[i].bo = NULL;
3351  	}
3352  }
3353  
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)3354  void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3355  {
3356  	unsigned long i, j;
3357  
3358  	if (IS_ERR_OR_NULL(snap)) {
3359  		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3360  		return;
3361  	}
3362  
3363  	for (i = 0; i < snap->num_snaps; i++) {
3364  		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3365  
3366  		if (IS_ERR(snap->snap[i].data)) {
3367  			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3368  				   PTR_ERR(snap->snap[i].data));
3369  			continue;
3370  		}
3371  
3372  		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3373  
3374  		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3375  			u32 *val = snap->snap[i].data + j;
3376  			char dumped[ASCII85_BUFSZ];
3377  
3378  			drm_puts(p, ascii85_encode(*val, dumped));
3379  		}
3380  
3381  		drm_puts(p, "\n");
3382  	}
3383  }
3384  
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)3385  void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3386  {
3387  	unsigned long i;
3388  
3389  	if (IS_ERR_OR_NULL(snap))
3390  		return;
3391  
3392  	for (i = 0; i < snap->num_snaps; i++) {
3393  		if (!IS_ERR(snap->snap[i].data))
3394  			kvfree(snap->snap[i].data);
3395  		xe_bo_put(snap->snap[i].bo);
3396  		if (snap->snap[i].mm)
3397  			mmput(snap->snap[i].mm);
3398  	}
3399  	kvfree(snap);
3400  }
3401