1 // SPDX-License-Identifier: MIT
2 
3 #include "nouveau_drv.h"
4 #include "nouveau_gem.h"
5 #include "nouveau_mem.h"
6 #include "nouveau_dma.h"
7 #include "nouveau_exec.h"
8 #include "nouveau_abi16.h"
9 #include "nouveau_chan.h"
10 #include "nouveau_sched.h"
11 #include "nouveau_uvmm.h"
12 
13 /**
14  * DOC: Overview
15  *
16  * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
17  * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
18  *
19  * In order to use the UAPI firstly a user client must initialize the VA space
20  * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
21  * should be managed by the kernel and which by the UMD.
22  *
23  * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
24  * userspace-managable portion of the VA space. It provides operations to map
25  * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
26  * backed by a GEM object and the kernel will ignore GEM handles provided
27  * alongside a sparse mapping.
28  *
29  * Userspace may request memory backed mappings either within or outside of the
30  * bounds (but not crossing those bounds) of a previously mapped sparse
31  * mapping. Subsequently requested memory backed mappings within a sparse
32  * mapping will take precedence over the corresponding range of the sparse
33  * mapping. If such memory backed mappings are unmapped the kernel will make
34  * sure that the corresponding sparse mapping will take their place again.
35  * Requests to unmap a sparse mapping that still contains memory backed mappings
36  * will result in those memory backed mappings being unmapped first.
37  *
38  * Unmap requests are not bound to the range of existing mappings and can even
39  * overlap the bounds of sparse mappings. For such a request the kernel will
40  * make sure to unmap all memory backed mappings within the given range,
41  * splitting up memory backed mappings which are only partially contained
42  * within the given range. Unmap requests with the sparse flag set must match
43  * the range of a previously mapped sparse mapping exactly though.
44  *
45  * While the kernel generally permits arbitrary sequences and ranges of memory
46  * backed mappings being mapped and unmapped, either within a single or multiple
47  * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
48  *
49  * The kernel does not permit to:
50  *   - unmap non-existent sparse mappings
51  *   - unmap a sparse mapping and map a new sparse mapping overlapping the range
52  *     of the previously unmapped sparse mapping within the same VM_BIND ioctl
53  *   - unmap a sparse mapping and map new memory backed mappings overlapping the
54  *     range of the previously unmapped sparse mapping within the same VM_BIND
55  *     ioctl
56  *
57  * When using the VM_BIND ioctl to request the kernel to map memory to a given
58  * virtual address in the GPU's VA space there is no guarantee that the actual
59  * mappings are created in the GPU's MMU. If the given memory is swapped out
60  * at the time the bind operation is executed the kernel will stash the mapping
61  * details into it's internal alloctor and create the actual MMU mappings once
62  * the memory is swapped back in. While this is transparent for userspace, it is
63  * guaranteed that all the backing memory is swapped back in and all the memory
64  * mappings, as requested by userspace previously, are actually mapped once the
65  * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
66  *
67  * A VM_BIND job can be executed either synchronously or asynchronously. If
68  * exectued asynchronously, userspace may provide a list of syncobjs this job
69  * will wait for and/or a list of syncobj the kernel will signal once the
70  * VM_BIND job finished execution. If executed synchronously the ioctl will
71  * block until the bind job is finished. For synchronous jobs the kernel will
72  * not permit any syncobjs submitted to the kernel.
73  *
74  * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
75  * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
76  * the option to synchronize them with syncobjs.
77  *
78  * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
79  *
80  * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
81  * an up to date view of the VA space. However, the actual mappings might still
82  * be pending. Hence, EXEC jobs require to have the particular fences - of
83  * the corresponding VM_BIND jobs they depent on - attached to them.
84  */
85 
86 static int
nouveau_exec_job_submit(struct nouveau_job * job,struct drm_gpuvm_exec * vme)87 nouveau_exec_job_submit(struct nouveau_job *job,
88 			struct drm_gpuvm_exec *vme)
89 {
90 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
91 	struct nouveau_cli *cli = job->cli;
92 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
93 	int ret;
94 
95 	/* Create a new fence, but do not emit yet. */
96 	ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
97 	if (ret)
98 		return ret;
99 
100 	nouveau_uvmm_lock(uvmm);
101 	ret = drm_gpuvm_exec_lock(vme);
102 	if (ret) {
103 		nouveau_uvmm_unlock(uvmm);
104 		return ret;
105 	}
106 	nouveau_uvmm_unlock(uvmm);
107 
108 	ret = drm_gpuvm_exec_validate(vme);
109 	if (ret) {
110 		drm_gpuvm_exec_unlock(vme);
111 		return ret;
112 	}
113 
114 	return 0;
115 }
116 
117 static void
nouveau_exec_job_armed_submit(struct nouveau_job * job,struct drm_gpuvm_exec * vme)118 nouveau_exec_job_armed_submit(struct nouveau_job *job,
119 			      struct drm_gpuvm_exec *vme)
120 {
121 	drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
122 				      job->resv_usage, job->resv_usage);
123 	drm_gpuvm_exec_unlock(vme);
124 }
125 
126 static struct dma_fence *
nouveau_exec_job_run(struct nouveau_job * job)127 nouveau_exec_job_run(struct nouveau_job *job)
128 {
129 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
130 	struct nouveau_channel *chan = exec_job->chan;
131 	struct nouveau_fence *fence = exec_job->fence;
132 	int i, ret;
133 
134 	ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16);
135 	if (ret) {
136 		NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
137 		return ERR_PTR(ret);
138 	}
139 
140 	for (i = 0; i < exec_job->push.count; i++) {
141 		struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
142 		bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
143 
144 		nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
145 	}
146 
147 	ret = nouveau_fence_emit(fence);
148 	if (ret) {
149 		nouveau_fence_unref(&exec_job->fence);
150 		NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
151 		WIND_RING(chan);
152 		return ERR_PTR(ret);
153 	}
154 
155 	/* The fence was emitted successfully, set the job's fence pointer to
156 	 * NULL in order to avoid freeing it up when the job is cleaned up.
157 	 */
158 	exec_job->fence = NULL;
159 
160 	return &fence->base;
161 }
162 
163 static void
nouveau_exec_job_free(struct nouveau_job * job)164 nouveau_exec_job_free(struct nouveau_job *job)
165 {
166 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
167 
168 	nouveau_job_done(job);
169 	nouveau_job_free(job);
170 
171 	kfree(exec_job->fence);
172 	kfree(exec_job->push.s);
173 	kfree(exec_job);
174 }
175 
176 static enum drm_gpu_sched_stat
nouveau_exec_job_timeout(struct nouveau_job * job)177 nouveau_exec_job_timeout(struct nouveau_job *job)
178 {
179 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
180 	struct nouveau_channel *chan = exec_job->chan;
181 
182 	if (unlikely(!atomic_read(&chan->killed)))
183 		nouveau_channel_kill(chan);
184 
185 	NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
186 		  chan->chid);
187 
188 	return DRM_GPU_SCHED_STAT_NOMINAL;
189 }
190 
191 static const struct nouveau_job_ops nouveau_exec_job_ops = {
192 	.submit = nouveau_exec_job_submit,
193 	.armed_submit = nouveau_exec_job_armed_submit,
194 	.run = nouveau_exec_job_run,
195 	.free = nouveau_exec_job_free,
196 	.timeout = nouveau_exec_job_timeout,
197 };
198 
199 int
nouveau_exec_job_init(struct nouveau_exec_job ** pjob,struct nouveau_exec_job_args * __args)200 nouveau_exec_job_init(struct nouveau_exec_job **pjob,
201 		      struct nouveau_exec_job_args *__args)
202 {
203 	struct nouveau_exec_job *job;
204 	struct nouveau_job_args args = {};
205 	int i, ret;
206 
207 	for (i = 0; i < __args->push.count; i++) {
208 		struct drm_nouveau_exec_push *p = &__args->push.s[i];
209 
210 		if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
211 			NV_PRINTK(err, nouveau_cli(__args->file_priv),
212 				  "pushbuf size exceeds limit: 0x%x max 0x%x\n",
213 				  p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
214 			return -EINVAL;
215 		}
216 	}
217 
218 	job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
219 	if (!job)
220 		return -ENOMEM;
221 
222 	job->push.count = __args->push.count;
223 	if (__args->push.count) {
224 		job->push.s = kmemdup(__args->push.s,
225 				      sizeof(*__args->push.s) *
226 				      __args->push.count,
227 				      GFP_KERNEL);
228 		if (!job->push.s) {
229 			ret = -ENOMEM;
230 			goto err_free_job;
231 		}
232 	}
233 
234 	args.file_priv = __args->file_priv;
235 	job->chan = __args->chan;
236 
237 	args.sched = __args->sched;
238 	/* Plus one to account for the HW fence. */
239 	args.credits = job->push.count + 1;
240 
241 	args.in_sync.count = __args->in_sync.count;
242 	args.in_sync.s = __args->in_sync.s;
243 
244 	args.out_sync.count = __args->out_sync.count;
245 	args.out_sync.s = __args->out_sync.s;
246 
247 	args.ops = &nouveau_exec_job_ops;
248 	args.resv_usage = DMA_RESV_USAGE_WRITE;
249 
250 	ret = nouveau_job_init(&job->base, &args);
251 	if (ret)
252 		goto err_free_pushs;
253 
254 	return 0;
255 
256 err_free_pushs:
257 	kfree(job->push.s);
258 err_free_job:
259 	kfree(job);
260 	*pjob = NULL;
261 
262 	return ret;
263 }
264 
265 static int
nouveau_exec(struct nouveau_exec_job_args * args)266 nouveau_exec(struct nouveau_exec_job_args *args)
267 {
268 	struct nouveau_exec_job *job;
269 	int ret;
270 
271 	ret = nouveau_exec_job_init(&job, args);
272 	if (ret)
273 		return ret;
274 
275 	ret = nouveau_job_submit(&job->base);
276 	if (ret)
277 		goto err_job_fini;
278 
279 	return 0;
280 
281 err_job_fini:
282 	nouveau_job_fini(&job->base);
283 	return ret;
284 }
285 
286 static int
nouveau_exec_ucopy(struct nouveau_exec_job_args * args,struct drm_nouveau_exec * req)287 nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
288 		   struct drm_nouveau_exec *req)
289 {
290 	struct drm_nouveau_sync **s;
291 	u32 inc = req->wait_count;
292 	u64 ins = req->wait_ptr;
293 	u32 outc = req->sig_count;
294 	u64 outs = req->sig_ptr;
295 	u32 pushc = req->push_count;
296 	u64 pushs = req->push_ptr;
297 	int ret;
298 
299 	if (pushc) {
300 		args->push.count = pushc;
301 		args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
302 		if (IS_ERR(args->push.s))
303 			return PTR_ERR(args->push.s);
304 	}
305 
306 	if (inc) {
307 		s = &args->in_sync.s;
308 
309 		args->in_sync.count = inc;
310 		*s = u_memcpya(ins, inc, sizeof(**s));
311 		if (IS_ERR(*s)) {
312 			ret = PTR_ERR(*s);
313 			goto err_free_pushs;
314 		}
315 	}
316 
317 	if (outc) {
318 		s = &args->out_sync.s;
319 
320 		args->out_sync.count = outc;
321 		*s = u_memcpya(outs, outc, sizeof(**s));
322 		if (IS_ERR(*s)) {
323 			ret = PTR_ERR(*s);
324 			goto err_free_ins;
325 		}
326 	}
327 
328 	return 0;
329 
330 err_free_pushs:
331 	u_free(args->push.s);
332 err_free_ins:
333 	u_free(args->in_sync.s);
334 	return ret;
335 }
336 
337 static void
nouveau_exec_ufree(struct nouveau_exec_job_args * args)338 nouveau_exec_ufree(struct nouveau_exec_job_args *args)
339 {
340 	u_free(args->push.s);
341 	u_free(args->in_sync.s);
342 	u_free(args->out_sync.s);
343 }
344 
345 int
nouveau_exec_ioctl_exec(struct drm_device * dev,void * data,struct drm_file * file_priv)346 nouveau_exec_ioctl_exec(struct drm_device *dev,
347 			void *data,
348 			struct drm_file *file_priv)
349 {
350 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
351 	struct nouveau_cli *cli = nouveau_cli(file_priv);
352 	struct nouveau_abi16_chan *chan16;
353 	struct nouveau_channel *chan = NULL;
354 	struct nouveau_exec_job_args args = {};
355 	struct drm_nouveau_exec *req = data;
356 	int push_max, ret = 0;
357 
358 	if (unlikely(!abi16))
359 		return -ENOMEM;
360 
361 	/* abi16 locks already */
362 	if (unlikely(!nouveau_cli_uvmm(cli)))
363 		return nouveau_abi16_put(abi16, -ENOSYS);
364 
365 	list_for_each_entry(chan16, &abi16->channels, head) {
366 		if (chan16->chan->chid == req->channel) {
367 			chan = chan16->chan;
368 			break;
369 		}
370 	}
371 
372 	if (!chan)
373 		return nouveau_abi16_put(abi16, -ENOENT);
374 
375 	if (unlikely(atomic_read(&chan->killed)))
376 		return nouveau_abi16_put(abi16, -ENODEV);
377 
378 	if (!chan->dma.ib_max)
379 		return nouveau_abi16_put(abi16, -ENOSYS);
380 
381 	push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max);
382 	if (unlikely(req->push_count > push_max)) {
383 		NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
384 			  req->push_count, push_max);
385 		return nouveau_abi16_put(abi16, -EINVAL);
386 	}
387 
388 	ret = nouveau_exec_ucopy(&args, req);
389 	if (ret)
390 		goto out;
391 
392 	args.sched = chan16->sched;
393 	args.file_priv = file_priv;
394 	args.chan = chan;
395 
396 	ret = nouveau_exec(&args);
397 	if (ret)
398 		goto out_free_args;
399 
400 out_free_args:
401 	nouveau_exec_ufree(&args);
402 out:
403 	return nouveau_abi16_put(abi16, ret);
404 }
405