1  // SPDX-License-Identifier: MIT
2  /*
3   * Copyright © 2019 Intel Corporation
4   */
5  
6  #include "gem/i915_gem_context.h"
7  #include "gem/i915_gem_pm.h"
8  
9  #include "i915_drm_client.h"
10  #include "i915_drv.h"
11  #include "i915_trace.h"
12  
13  #include "intel_context.h"
14  #include "intel_engine.h"
15  #include "intel_engine_pm.h"
16  #include "intel_ring.h"
17  
18  static struct kmem_cache *slab_ce;
19  
intel_context_alloc(void)20  static struct intel_context *intel_context_alloc(void)
21  {
22  	return kmem_cache_zalloc(slab_ce, GFP_KERNEL);
23  }
24  
rcu_context_free(struct rcu_head * rcu)25  static void rcu_context_free(struct rcu_head *rcu)
26  {
27  	struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
28  
29  	trace_intel_context_free(ce);
30  	if (intel_context_has_own_state(ce))
31  		fput(ce->default_state);
32  	kmem_cache_free(slab_ce, ce);
33  }
34  
intel_context_free(struct intel_context * ce)35  void intel_context_free(struct intel_context *ce)
36  {
37  	call_rcu(&ce->rcu, rcu_context_free);
38  }
39  
40  struct intel_context *
intel_context_create(struct intel_engine_cs * engine)41  intel_context_create(struct intel_engine_cs *engine)
42  {
43  	struct intel_context *ce;
44  
45  	ce = intel_context_alloc();
46  	if (!ce)
47  		return ERR_PTR(-ENOMEM);
48  
49  	intel_context_init(ce, engine);
50  	trace_intel_context_create(ce);
51  	return ce;
52  }
53  
intel_context_alloc_state(struct intel_context * ce)54  int intel_context_alloc_state(struct intel_context *ce)
55  {
56  	struct i915_gem_context *ctx;
57  	int err = 0;
58  
59  	if (mutex_lock_interruptible(&ce->pin_mutex))
60  		return -EINTR;
61  
62  	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
63  		if (intel_context_is_banned(ce)) {
64  			err = -EIO;
65  			goto unlock;
66  		}
67  
68  		err = ce->ops->alloc(ce);
69  		if (unlikely(err))
70  			goto unlock;
71  
72  		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
73  
74  		rcu_read_lock();
75  		ctx = rcu_dereference(ce->gem_context);
76  		if (ctx && !kref_get_unless_zero(&ctx->ref))
77  			ctx = NULL;
78  		rcu_read_unlock();
79  		if (ctx) {
80  			if (ctx->client)
81  				i915_drm_client_add_context_objects(ctx->client,
82  								    ce);
83  			i915_gem_context_put(ctx);
84  		}
85  	}
86  
87  unlock:
88  	mutex_unlock(&ce->pin_mutex);
89  	return err;
90  }
91  
intel_context_active_acquire(struct intel_context * ce)92  static int intel_context_active_acquire(struct intel_context *ce)
93  {
94  	int err;
95  
96  	__i915_active_acquire(&ce->active);
97  
98  	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
99  	    intel_context_is_parallel(ce))
100  		return 0;
101  
102  	/* Preallocate tracking nodes */
103  	err = i915_active_acquire_preallocate_barrier(&ce->active,
104  						      ce->engine);
105  	if (err)
106  		i915_active_release(&ce->active);
107  
108  	return err;
109  }
110  
intel_context_active_release(struct intel_context * ce)111  static void intel_context_active_release(struct intel_context *ce)
112  {
113  	/* Nodes preallocated in intel_context_active() */
114  	i915_active_acquire_barrier(&ce->active);
115  	i915_active_release(&ce->active);
116  }
117  
__context_pin_state(struct i915_vma * vma,struct i915_gem_ww_ctx * ww)118  static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
119  {
120  	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
121  	int err;
122  
123  	err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
124  	if (err)
125  		return err;
126  
127  	err = i915_active_acquire(&vma->active);
128  	if (err)
129  		goto err_unpin;
130  
131  	/*
132  	 * And mark it as a globally pinned object to let the shrinker know
133  	 * it cannot reclaim the object until we release it.
134  	 */
135  	i915_vma_make_unshrinkable(vma);
136  	vma->obj->mm.dirty = true;
137  
138  	return 0;
139  
140  err_unpin:
141  	i915_vma_unpin(vma);
142  	return err;
143  }
144  
__context_unpin_state(struct i915_vma * vma)145  static void __context_unpin_state(struct i915_vma *vma)
146  {
147  	i915_vma_make_shrinkable(vma);
148  	i915_active_release(&vma->active);
149  	__i915_vma_unpin(vma);
150  }
151  
__ring_active(struct intel_ring * ring,struct i915_gem_ww_ctx * ww)152  static int __ring_active(struct intel_ring *ring,
153  			 struct i915_gem_ww_ctx *ww)
154  {
155  	int err;
156  
157  	err = intel_ring_pin(ring, ww);
158  	if (err)
159  		return err;
160  
161  	err = i915_active_acquire(&ring->vma->active);
162  	if (err)
163  		goto err_pin;
164  
165  	return 0;
166  
167  err_pin:
168  	intel_ring_unpin(ring);
169  	return err;
170  }
171  
__ring_retire(struct intel_ring * ring)172  static void __ring_retire(struct intel_ring *ring)
173  {
174  	i915_active_release(&ring->vma->active);
175  	intel_ring_unpin(ring);
176  }
177  
intel_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww)178  static int intel_context_pre_pin(struct intel_context *ce,
179  				 struct i915_gem_ww_ctx *ww)
180  {
181  	int err;
182  
183  	CE_TRACE(ce, "active\n");
184  
185  	err = __ring_active(ce->ring, ww);
186  	if (err)
187  		return err;
188  
189  	err = intel_timeline_pin(ce->timeline, ww);
190  	if (err)
191  		goto err_ring;
192  
193  	if (!ce->state)
194  		return 0;
195  
196  	err = __context_pin_state(ce->state, ww);
197  	if (err)
198  		goto err_timeline;
199  
200  
201  	return 0;
202  
203  err_timeline:
204  	intel_timeline_unpin(ce->timeline);
205  err_ring:
206  	__ring_retire(ce->ring);
207  	return err;
208  }
209  
intel_context_post_unpin(struct intel_context * ce)210  static void intel_context_post_unpin(struct intel_context *ce)
211  {
212  	if (ce->state)
213  		__context_unpin_state(ce->state);
214  
215  	intel_timeline_unpin(ce->timeline);
216  	__ring_retire(ce->ring);
217  }
218  
__intel_context_do_pin_ww(struct intel_context * ce,struct i915_gem_ww_ctx * ww)219  int __intel_context_do_pin_ww(struct intel_context *ce,
220  			      struct i915_gem_ww_ctx *ww)
221  {
222  	bool handoff = false;
223  	void *vaddr;
224  	int err = 0;
225  
226  	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
227  		err = intel_context_alloc_state(ce);
228  		if (err)
229  			return err;
230  	}
231  
232  	/*
233  	 * We always pin the context/ring/timeline here, to ensure a pin
234  	 * refcount for __intel_context_active(), which prevent a lock
235  	 * inversion of ce->pin_mutex vs dma_resv_lock().
236  	 */
237  
238  	err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
239  	if (!err)
240  		err = i915_gem_object_lock(ce->ring->vma->obj, ww);
241  	if (!err && ce->state)
242  		err = i915_gem_object_lock(ce->state->obj, ww);
243  	if (!err)
244  		err = intel_context_pre_pin(ce, ww);
245  	if (err)
246  		return err;
247  
248  	err = ce->ops->pre_pin(ce, ww, &vaddr);
249  	if (err)
250  		goto err_ctx_unpin;
251  
252  	err = i915_active_acquire(&ce->active);
253  	if (err)
254  		goto err_post_unpin;
255  
256  	err = mutex_lock_interruptible(&ce->pin_mutex);
257  	if (err)
258  		goto err_release;
259  
260  	intel_engine_pm_might_get(ce->engine);
261  
262  	if (unlikely(intel_context_is_closed(ce))) {
263  		err = -ENOENT;
264  		goto err_unlock;
265  	}
266  
267  	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
268  		err = intel_context_active_acquire(ce);
269  		if (unlikely(err))
270  			goto err_unlock;
271  
272  		err = ce->ops->pin(ce, vaddr);
273  		if (err) {
274  			intel_context_active_release(ce);
275  			goto err_unlock;
276  		}
277  
278  		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
279  			 i915_ggtt_offset(ce->ring->vma),
280  			 ce->ring->head, ce->ring->tail);
281  
282  		handoff = true;
283  		smp_mb__before_atomic(); /* flush pin before it is visible */
284  		atomic_inc(&ce->pin_count);
285  	}
286  
287  	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
288  
289  	trace_intel_context_do_pin(ce);
290  
291  err_unlock:
292  	mutex_unlock(&ce->pin_mutex);
293  err_release:
294  	i915_active_release(&ce->active);
295  err_post_unpin:
296  	if (!handoff)
297  		ce->ops->post_unpin(ce);
298  err_ctx_unpin:
299  	intel_context_post_unpin(ce);
300  
301  	/*
302  	 * Unlock the hwsp_ggtt object since it's shared.
303  	 * In principle we can unlock all the global state locked above
304  	 * since it's pinned and doesn't need fencing, and will
305  	 * thus remain resident until it is explicitly unpinned.
306  	 */
307  	i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
308  
309  	return err;
310  }
311  
__intel_context_do_pin(struct intel_context * ce)312  int __intel_context_do_pin(struct intel_context *ce)
313  {
314  	struct i915_gem_ww_ctx ww;
315  	int err;
316  
317  	i915_gem_ww_ctx_init(&ww, true);
318  retry:
319  	err = __intel_context_do_pin_ww(ce, &ww);
320  	if (err == -EDEADLK) {
321  		err = i915_gem_ww_ctx_backoff(&ww);
322  		if (!err)
323  			goto retry;
324  	}
325  	i915_gem_ww_ctx_fini(&ww);
326  	return err;
327  }
328  
__intel_context_do_unpin(struct intel_context * ce,int sub)329  void __intel_context_do_unpin(struct intel_context *ce, int sub)
330  {
331  	if (!atomic_sub_and_test(sub, &ce->pin_count))
332  		return;
333  
334  	CE_TRACE(ce, "unpin\n");
335  	ce->ops->unpin(ce);
336  	ce->ops->post_unpin(ce);
337  
338  	/*
339  	 * Once released, we may asynchronously drop the active reference.
340  	 * As that may be the only reference keeping the context alive,
341  	 * take an extra now so that it is not freed before we finish
342  	 * dereferencing it.
343  	 */
344  	intel_context_get(ce);
345  	intel_context_active_release(ce);
346  	trace_intel_context_do_unpin(ce);
347  	intel_context_put(ce);
348  }
349  
__intel_context_retire(struct i915_active * active)350  static void __intel_context_retire(struct i915_active *active)
351  {
352  	struct intel_context *ce = container_of(active, typeof(*ce), active);
353  
354  	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
355  		 intel_context_get_total_runtime_ns(ce),
356  		 intel_context_get_avg_runtime_ns(ce));
357  
358  	set_bit(CONTEXT_VALID_BIT, &ce->flags);
359  	intel_context_post_unpin(ce);
360  	intel_context_put(ce);
361  }
362  
__intel_context_active(struct i915_active * active)363  static int __intel_context_active(struct i915_active *active)
364  {
365  	struct intel_context *ce = container_of(active, typeof(*ce), active);
366  
367  	intel_context_get(ce);
368  
369  	/* everything should already be activated by intel_context_pre_pin() */
370  	GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
371  	__intel_ring_pin(ce->ring);
372  
373  	__intel_timeline_pin(ce->timeline);
374  
375  	if (ce->state) {
376  		GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
377  		__i915_vma_pin(ce->state);
378  		i915_vma_make_unshrinkable(ce->state);
379  	}
380  
381  	return 0;
382  }
383  
384  static int
sw_fence_dummy_notify(struct i915_sw_fence * sf,enum i915_sw_fence_notify state)385  sw_fence_dummy_notify(struct i915_sw_fence *sf,
386  		      enum i915_sw_fence_notify state)
387  {
388  	return NOTIFY_DONE;
389  }
390  
391  void
intel_context_init(struct intel_context * ce,struct intel_engine_cs * engine)392  intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
393  {
394  	GEM_BUG_ON(!engine->cops);
395  	GEM_BUG_ON(!engine->gt->vm);
396  
397  	kref_init(&ce->ref);
398  
399  	ce->engine = engine;
400  	ce->ops = engine->cops;
401  	ce->sseu = engine->sseu;
402  	ce->ring = NULL;
403  	ce->ring_size = SZ_4K;
404  
405  	ewma_runtime_init(&ce->stats.runtime.avg);
406  
407  	ce->vm = i915_vm_get(engine->gt->vm);
408  
409  	/* NB ce->signal_link/lock is used under RCU */
410  	spin_lock_init(&ce->signal_lock);
411  	INIT_LIST_HEAD(&ce->signals);
412  
413  	mutex_init(&ce->pin_mutex);
414  
415  	spin_lock_init(&ce->guc_state.lock);
416  	INIT_LIST_HEAD(&ce->guc_state.fences);
417  	INIT_LIST_HEAD(&ce->guc_state.requests);
418  
419  	ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
420  	INIT_LIST_HEAD(&ce->guc_id.link);
421  
422  	INIT_LIST_HEAD(&ce->destroyed_link);
423  
424  	INIT_LIST_HEAD(&ce->parallel.child_list);
425  
426  	/*
427  	 * Initialize fence to be complete as this is expected to be complete
428  	 * unless there is a pending schedule disable outstanding.
429  	 */
430  	i915_sw_fence_init(&ce->guc_state.blocked,
431  			   sw_fence_dummy_notify);
432  	i915_sw_fence_commit(&ce->guc_state.blocked);
433  
434  	i915_active_init(&ce->active,
435  			 __intel_context_active, __intel_context_retire, 0);
436  }
437  
intel_context_fini(struct intel_context * ce)438  void intel_context_fini(struct intel_context *ce)
439  {
440  	struct intel_context *child, *next;
441  
442  	if (ce->timeline)
443  		intel_timeline_put(ce->timeline);
444  	i915_vm_put(ce->vm);
445  
446  	/* Need to put the creation ref for the children */
447  	if (intel_context_is_parent(ce))
448  		for_each_child_safe(ce, child, next)
449  			intel_context_put(child);
450  
451  	mutex_destroy(&ce->pin_mutex);
452  	i915_active_fini(&ce->active);
453  	i915_sw_fence_fini(&ce->guc_state.blocked);
454  }
455  
i915_context_module_exit(void)456  void i915_context_module_exit(void)
457  {
458  	kmem_cache_destroy(slab_ce);
459  }
460  
i915_context_module_init(void)461  int __init i915_context_module_init(void)
462  {
463  	slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
464  	if (!slab_ce)
465  		return -ENOMEM;
466  
467  	return 0;
468  }
469  
intel_context_enter_engine(struct intel_context * ce)470  void intel_context_enter_engine(struct intel_context *ce)
471  {
472  	intel_engine_pm_get(ce->engine);
473  	intel_timeline_enter(ce->timeline);
474  }
475  
intel_context_exit_engine(struct intel_context * ce)476  void intel_context_exit_engine(struct intel_context *ce)
477  {
478  	intel_timeline_exit(ce->timeline);
479  	intel_engine_pm_put(ce->engine);
480  }
481  
intel_context_prepare_remote_request(struct intel_context * ce,struct i915_request * rq)482  int intel_context_prepare_remote_request(struct intel_context *ce,
483  					 struct i915_request *rq)
484  {
485  	struct intel_timeline *tl = ce->timeline;
486  	int err;
487  
488  	/* Only suitable for use in remotely modifying this context */
489  	GEM_BUG_ON(rq->context == ce);
490  
491  	if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
492  		/* Queue this switch after current activity by this context. */
493  		err = i915_active_fence_set(&tl->last_request, rq);
494  		if (err)
495  			return err;
496  	}
497  
498  	/*
499  	 * Guarantee context image and the timeline remains pinned until the
500  	 * modifying request is retired by setting the ce activity tracker.
501  	 *
502  	 * But we only need to take one pin on the account of it. Or in other
503  	 * words transfer the pinned ce object to tracked active request.
504  	 */
505  	GEM_BUG_ON(i915_active_is_idle(&ce->active));
506  	return i915_active_add_request(&ce->active, rq);
507  }
508  
intel_context_create_request(struct intel_context * ce)509  struct i915_request *intel_context_create_request(struct intel_context *ce)
510  {
511  	struct i915_gem_ww_ctx ww;
512  	struct i915_request *rq;
513  	int err;
514  
515  	i915_gem_ww_ctx_init(&ww, true);
516  retry:
517  	err = intel_context_pin_ww(ce, &ww);
518  	if (!err) {
519  		rq = i915_request_create(ce);
520  		intel_context_unpin(ce);
521  	} else if (err == -EDEADLK) {
522  		err = i915_gem_ww_ctx_backoff(&ww);
523  		if (!err)
524  			goto retry;
525  		rq = ERR_PTR(err);
526  	} else {
527  		rq = ERR_PTR(err);
528  	}
529  
530  	i915_gem_ww_ctx_fini(&ww);
531  
532  	if (IS_ERR(rq))
533  		return rq;
534  
535  	/*
536  	 * timeline->mutex should be the inner lock, but is used as outer lock.
537  	 * Hack around this to shut up lockdep in selftests..
538  	 */
539  	lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
540  	mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
541  	mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
542  	rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
543  
544  	return rq;
545  }
546  
intel_context_get_active_request(struct intel_context * ce)547  struct i915_request *intel_context_get_active_request(struct intel_context *ce)
548  {
549  	struct intel_context *parent = intel_context_to_parent(ce);
550  	struct i915_request *rq, *active = NULL;
551  	unsigned long flags;
552  
553  	GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
554  
555  	/*
556  	 * We search the parent list to find an active request on the submitted
557  	 * context. The parent list contains the requests for all the contexts
558  	 * in the relationship so we have to do a compare of each request's
559  	 * context.
560  	 */
561  	spin_lock_irqsave(&parent->guc_state.lock, flags);
562  	list_for_each_entry_reverse(rq, &parent->guc_state.requests,
563  				    sched.link) {
564  		if (rq->context != ce)
565  			continue;
566  		if (i915_request_completed(rq))
567  			break;
568  
569  		active = rq;
570  	}
571  	if (active)
572  		active = i915_request_get_rcu(active);
573  	spin_unlock_irqrestore(&parent->guc_state.lock, flags);
574  
575  	return active;
576  }
577  
intel_context_bind_parent_child(struct intel_context * parent,struct intel_context * child)578  void intel_context_bind_parent_child(struct intel_context *parent,
579  				     struct intel_context *child)
580  {
581  	/*
582  	 * Callers responsibility to validate that this function is used
583  	 * correctly but we use GEM_BUG_ON here ensure that they do.
584  	 */
585  	GEM_BUG_ON(intel_context_is_pinned(parent));
586  	GEM_BUG_ON(intel_context_is_child(parent));
587  	GEM_BUG_ON(intel_context_is_pinned(child));
588  	GEM_BUG_ON(intel_context_is_child(child));
589  	GEM_BUG_ON(intel_context_is_parent(child));
590  
591  	parent->parallel.child_index = parent->parallel.number_children++;
592  	list_add_tail(&child->parallel.child_link,
593  		      &parent->parallel.child_list);
594  	child->parallel.parent = parent;
595  }
596  
intel_context_get_total_runtime_ns(struct intel_context * ce)597  u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
598  {
599  	u64 total, active;
600  
601  	if (ce->ops->update_stats)
602  		ce->ops->update_stats(ce);
603  
604  	total = ce->stats.runtime.total;
605  	if (ce->ops->flags & COPS_RUNTIME_CYCLES)
606  		total *= ce->engine->gt->clock_period_ns;
607  
608  	active = READ_ONCE(ce->stats.active);
609  	if (active)
610  		active = intel_context_clock() - active;
611  
612  	return total + active;
613  }
614  
intel_context_get_avg_runtime_ns(struct intel_context * ce)615  u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
616  {
617  	u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
618  
619  	if (ce->ops->flags & COPS_RUNTIME_CYCLES)
620  		avg *= ce->engine->gt->clock_period_ns;
621  
622  	return avg;
623  }
624  
intel_context_ban(struct intel_context * ce,struct i915_request * rq)625  bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
626  {
627  	bool ret = intel_context_set_banned(ce);
628  
629  	trace_intel_context_ban(ce);
630  
631  	if (ce->ops->revoke)
632  		ce->ops->revoke(ce, rq,
633  				INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
634  
635  	return ret;
636  }
637  
intel_context_revoke(struct intel_context * ce)638  bool intel_context_revoke(struct intel_context *ce)
639  {
640  	bool ret = intel_context_set_exiting(ce);
641  
642  	if (ce->ops->revoke)
643  		ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms);
644  
645  	return ret;
646  }
647  
648  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
649  #include "selftest_context.c"
650  #endif
651