Lines Matching +full:resource +full:- +full:attachments

31 #include <linux/dma-buf.h>
49 struct amdgpu_fpriv *fpriv = filp->driver_priv; in amdgpu_cs_parser_init()
51 if (cs->in.num_chunks == 0) in amdgpu_cs_parser_init()
52 return -EINVAL; in amdgpu_cs_parser_init()
55 p->adev = adev; in amdgpu_cs_parser_init()
56 p->filp = filp; in amdgpu_cs_parser_init()
58 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); in amdgpu_cs_parser_init()
59 if (!p->ctx) in amdgpu_cs_parser_init()
60 return -EINVAL; in amdgpu_cs_parser_init()
62 if (atomic_read(&p->ctx->guilty)) { in amdgpu_cs_parser_init()
63 amdgpu_ctx_put(p->ctx); in amdgpu_cs_parser_init()
64 return -ECANCELED; in amdgpu_cs_parser_init()
67 amdgpu_sync_create(&p->sync); in amdgpu_cs_parser_init()
68 drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | in amdgpu_cs_parser_init()
80 r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, in amdgpu_cs_job_idx()
81 chunk_ib->ip_instance, in amdgpu_cs_job_idx()
82 chunk_ib->ring, &entity); in amdgpu_cs_job_idx()
90 if (entity->rq == NULL) in amdgpu_cs_job_idx()
91 return -EINVAL; in amdgpu_cs_job_idx()
94 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_job_idx()
95 if (p->entities[i] == entity) in amdgpu_cs_job_idx()
100 return -EINVAL; in amdgpu_cs_job_idx()
102 p->entities[i] = entity; in amdgpu_cs_job_idx()
103 p->gang_size = i + 1; in amdgpu_cs_job_idx()
117 if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type)) in amdgpu_cs_p1_ib()
118 return -EINVAL; in amdgpu_cs_p1_ib()
121 p->gang_leader_idx = r; in amdgpu_cs_p1_ib()
132 gobj = drm_gem_object_lookup(p->filp, data->handle); in amdgpu_cs_p1_user_fence()
134 return -EINVAL; in amdgpu_cs_p1_user_fence()
136 p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); in amdgpu_cs_p1_user_fence()
139 size = amdgpu_bo_size(p->uf_bo); in amdgpu_cs_p1_user_fence()
140 if (size != PAGE_SIZE || data->offset > (size - 8)) in amdgpu_cs_p1_user_fence()
141 return -EINVAL; in amdgpu_cs_p1_user_fence()
143 if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) in amdgpu_cs_p1_user_fence()
144 return -EINVAL; in amdgpu_cs_p1_user_fence()
146 *offset = data->offset; in amdgpu_cs_p1_user_fence()
160 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, in amdgpu_cs_p1_bo_handles()
161 &p->bo_list); in amdgpu_cs_p1_bo_handles()
178 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_pass1()
180 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_pass1()
188 chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), in amdgpu_cs_pass1()
191 return -ENOMEM; in amdgpu_cs_pass1()
194 chunk_array_user = u64_to_user_ptr(cs->in.chunks); in amdgpu_cs_pass1()
196 sizeof(uint64_t)*cs->in.num_chunks)) { in amdgpu_cs_pass1()
197 ret = -EFAULT; in amdgpu_cs_pass1()
201 p->nchunks = cs->in.num_chunks; in amdgpu_cs_pass1()
202 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), in amdgpu_cs_pass1()
204 if (!p->chunks) { in amdgpu_cs_pass1()
205 ret = -ENOMEM; in amdgpu_cs_pass1()
209 for (i = 0; i < p->nchunks; i++) { in amdgpu_cs_pass1()
217 ret = -EFAULT; in amdgpu_cs_pass1()
218 i--; in amdgpu_cs_pass1()
221 p->chunks[i].chunk_id = user_chunk.chunk_id; in amdgpu_cs_pass1()
222 p->chunks[i].length_dw = user_chunk.length_dw; in amdgpu_cs_pass1()
224 size = p->chunks[i].length_dw; in amdgpu_cs_pass1()
227 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), in amdgpu_cs_pass1()
229 if (p->chunks[i].kdata == NULL) { in amdgpu_cs_pass1()
230 ret = -ENOMEM; in amdgpu_cs_pass1()
231 i--; in amdgpu_cs_pass1()
235 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { in amdgpu_cs_pass1()
236 ret = -EFAULT; in amdgpu_cs_pass1()
241 ret = -EINVAL; in amdgpu_cs_pass1()
242 switch (p->chunks[i].chunk_id) { in amdgpu_cs_pass1()
247 ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); in amdgpu_cs_pass1()
256 ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, in amdgpu_cs_pass1()
267 if (p->bo_list) in amdgpu_cs_pass1()
270 ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); in amdgpu_cs_pass1()
289 if (!p->gang_size) { in amdgpu_cs_pass1()
290 ret = -EINVAL; in amdgpu_cs_pass1()
294 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_pass1()
295 ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, in amdgpu_cs_pass1()
296 num_ibs[i], &p->jobs[i]); in amdgpu_cs_pass1()
299 p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; in amdgpu_cs_pass1()
301 p->gang_leader = p->jobs[p->gang_leader_idx]; in amdgpu_cs_pass1()
303 if (p->ctx->generation != p->gang_leader->generation) { in amdgpu_cs_pass1()
304 ret = -ECANCELED; in amdgpu_cs_pass1()
308 if (p->uf_bo) in amdgpu_cs_pass1()
309 p->gang_leader->uf_addr = uf_offset; in amdgpu_cs_pass1()
318 i = p->nchunks - 1; in amdgpu_cs_pass1()
320 for (; i >= 0; i--) in amdgpu_cs_pass1()
321 kvfree(p->chunks[i].kdata); in amdgpu_cs_pass1()
322 kvfree(p->chunks); in amdgpu_cs_pass1()
323 p->chunks = NULL; in amdgpu_cs_pass1()
324 p->nchunks = 0; in amdgpu_cs_pass1()
336 struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; in amdgpu_cs_p2_ib()
337 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_p2_ib()
338 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_p2_ib()
348 job = p->jobs[r]; in amdgpu_cs_p2_ib()
350 ib = &job->ibs[job->num_ibs++]; in amdgpu_cs_p2_ib()
353 if (p->uf_bo && ring->funcs->no_user_fence) in amdgpu_cs_p2_ib()
354 return -EINVAL; in amdgpu_cs_p2_ib()
356 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && in amdgpu_cs_p2_ib()
357 chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { in amdgpu_cs_p2_ib()
358 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) in amdgpu_cs_p2_ib()
366 return -EINVAL; in amdgpu_cs_p2_ib()
369 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) in amdgpu_cs_p2_ib()
370 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; in amdgpu_cs_p2_ib()
372 r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? in amdgpu_cs_p2_ib()
373 chunk_ib->ib_bytes : 0, in amdgpu_cs_p2_ib()
380 ib->gpu_addr = chunk_ib->va_start; in amdgpu_cs_p2_ib()
381 ib->length_dw = chunk_ib->ib_bytes / 4; in amdgpu_cs_p2_ib()
382 ib->flags = chunk_ib->flags; in amdgpu_cs_p2_ib()
389 struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; in amdgpu_cs_p2_dependencies()
390 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_p2_dependencies()
394 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_dependencies()
404 return -EINVAL; in amdgpu_cs_p2_dependencies()
422 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { in amdgpu_cs_p2_dependencies()
427 fence = dma_fence_get(&s_fence->scheduled); in amdgpu_cs_p2_dependencies()
431 r = amdgpu_sync_fence(&p->sync, fence); in amdgpu_cs_p2_dependencies()
446 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); in amdgpu_syncobj_lookup_and_add()
453 r = amdgpu_sync_fence(&p->sync, fence); in amdgpu_syncobj_lookup_and_add()
461 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; in amdgpu_cs_p2_syncobj_in()
465 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_in()
479 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; in amdgpu_cs_p2_syncobj_timeline_wait()
483 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_timeline_wait()
499 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; in amdgpu_cs_p2_syncobj_out()
503 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_out()
506 if (p->post_deps) in amdgpu_cs_p2_syncobj_out()
507 return -EINVAL; in amdgpu_cs_p2_syncobj_out()
509 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), in amdgpu_cs_p2_syncobj_out()
511 p->num_post_deps = 0; in amdgpu_cs_p2_syncobj_out()
513 if (!p->post_deps) in amdgpu_cs_p2_syncobj_out()
514 return -ENOMEM; in amdgpu_cs_p2_syncobj_out()
518 p->post_deps[i].syncobj = in amdgpu_cs_p2_syncobj_out()
519 drm_syncobj_find(p->filp, deps[i].handle); in amdgpu_cs_p2_syncobj_out()
520 if (!p->post_deps[i].syncobj) in amdgpu_cs_p2_syncobj_out()
521 return -EINVAL; in amdgpu_cs_p2_syncobj_out()
522 p->post_deps[i].chain = NULL; in amdgpu_cs_p2_syncobj_out()
523 p->post_deps[i].point = 0; in amdgpu_cs_p2_syncobj_out()
524 p->num_post_deps++; in amdgpu_cs_p2_syncobj_out()
533 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; in amdgpu_cs_p2_syncobj_timeline_signal()
537 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_timeline_signal()
540 if (p->post_deps) in amdgpu_cs_p2_syncobj_timeline_signal()
541 return -EINVAL; in amdgpu_cs_p2_syncobj_timeline_signal()
543 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), in amdgpu_cs_p2_syncobj_timeline_signal()
545 p->num_post_deps = 0; in amdgpu_cs_p2_syncobj_timeline_signal()
547 if (!p->post_deps) in amdgpu_cs_p2_syncobj_timeline_signal()
548 return -ENOMEM; in amdgpu_cs_p2_syncobj_timeline_signal()
551 struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; in amdgpu_cs_p2_syncobj_timeline_signal()
553 dep->chain = NULL; in amdgpu_cs_p2_syncobj_timeline_signal()
555 dep->chain = dma_fence_chain_alloc(); in amdgpu_cs_p2_syncobj_timeline_signal()
556 if (!dep->chain) in amdgpu_cs_p2_syncobj_timeline_signal()
557 return -ENOMEM; in amdgpu_cs_p2_syncobj_timeline_signal()
560 dep->syncobj = drm_syncobj_find(p->filp, in amdgpu_cs_p2_syncobj_timeline_signal()
562 if (!dep->syncobj) { in amdgpu_cs_p2_syncobj_timeline_signal()
563 dma_fence_chain_free(dep->chain); in amdgpu_cs_p2_syncobj_timeline_signal()
564 return -EINVAL; in amdgpu_cs_p2_syncobj_timeline_signal()
566 dep->point = syncobj_deps[i].point; in amdgpu_cs_p2_syncobj_timeline_signal()
567 p->num_post_deps++; in amdgpu_cs_p2_syncobj_timeline_signal()
576 struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; in amdgpu_cs_p2_shadow()
579 if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) in amdgpu_cs_p2_shadow()
580 return -EINVAL; in amdgpu_cs_p2_shadow()
582 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_p2_shadow()
583 p->jobs[i]->shadow_va = shadow->shadow_va; in amdgpu_cs_p2_shadow()
584 p->jobs[i]->csa_va = shadow->csa_va; in amdgpu_cs_p2_shadow()
585 p->jobs[i]->gds_va = shadow->gds_va; in amdgpu_cs_p2_shadow()
586 p->jobs[i]->init_shadow = in amdgpu_cs_p2_shadow()
587 shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; in amdgpu_cs_p2_shadow()
598 for (i = 0; i < p->nchunks; ++i) { in amdgpu_cs_pass2()
601 chunk = &p->chunks[i]; in amdgpu_cs_pass2()
603 switch (chunk->chunk_id) { in amdgpu_cs_pass2()
649 if (us <= 0 || !adev->mm_stats.log2_max_MBps) in us_to_bytes()
655 return us << adev->mm_stats.log2_max_MBps; in us_to_bytes()
660 if (!adev->mm_stats.log2_max_MBps) in bytes_to_us()
663 return bytes >> adev->mm_stats.log2_max_MBps; in bytes_to_us()
667 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
685 /* Allow a maximum of 200 accumulated ms. This is basically per-IB in amdgpu_cs_get_threshold_for_moves()
694 if (!adev->mm_stats.log2_max_MBps) { in amdgpu_cs_get_threshold_for_moves()
700 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); in amdgpu_cs_get_threshold_for_moves()
701 used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); in amdgpu_cs_get_threshold_for_moves()
702 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; in amdgpu_cs_get_threshold_for_moves()
704 spin_lock(&adev->mm_stats.lock); in amdgpu_cs_get_threshold_for_moves()
708 increment_us = time_us - adev->mm_stats.last_update_us; in amdgpu_cs_get_threshold_for_moves()
709 adev->mm_stats.last_update_us = time_us; in amdgpu_cs_get_threshold_for_moves()
710 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, in amdgpu_cs_get_threshold_for_moves()
718 * - a lot of VRAM is freed by userspace in amdgpu_cs_get_threshold_for_moves()
719 * - the presence of a big buffer causes a lot of evictions in amdgpu_cs_get_threshold_for_moves()
731 if (!(adev->flags & AMD_IS_APU)) in amdgpu_cs_get_threshold_for_moves()
736 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); in amdgpu_cs_get_threshold_for_moves()
742 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); in amdgpu_cs_get_threshold_for_moves()
745 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { in amdgpu_cs_get_threshold_for_moves()
746 u64 total_vis_vram = adev->gmc.visible_vram_size; in amdgpu_cs_get_threshold_for_moves()
748 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); in amdgpu_cs_get_threshold_for_moves()
751 u64 free_vis_vram = total_vis_vram - used_vis_vram; in amdgpu_cs_get_threshold_for_moves()
753 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + in amdgpu_cs_get_threshold_for_moves()
757 adev->mm_stats.accum_us_vis = in amdgpu_cs_get_threshold_for_moves()
759 adev->mm_stats.accum_us_vis); in amdgpu_cs_get_threshold_for_moves()
762 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); in amdgpu_cs_get_threshold_for_moves()
767 spin_unlock(&adev->mm_stats.lock); in amdgpu_cs_get_threshold_for_moves()
777 spin_lock(&adev->mm_stats.lock); in amdgpu_cs_report_moved_bytes()
778 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); in amdgpu_cs_report_moved_bytes()
779 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); in amdgpu_cs_report_moved_bytes()
780 spin_unlock(&adev->mm_stats.lock); in amdgpu_cs_report_moved_bytes()
785 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); in amdgpu_cs_bo_validate()
790 .resv = bo->tbo.base.resv in amdgpu_cs_bo_validate()
795 if (bo->tbo.pin_count) in amdgpu_cs_bo_validate()
801 if (p->bytes_moved < p->bytes_moved_threshold && in amdgpu_cs_bo_validate()
802 (!bo->tbo.base.dma_buf || in amdgpu_cs_bo_validate()
803 list_empty(&bo->tbo.base.dma_buf->attachments))) { in amdgpu_cs_bo_validate()
804 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && in amdgpu_cs_bo_validate()
805 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { in amdgpu_cs_bo_validate()
810 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) in amdgpu_cs_bo_validate()
811 domain = bo->preferred_domains; in amdgpu_cs_bo_validate()
813 domain = bo->allowed_domains; in amdgpu_cs_bo_validate()
815 domain = bo->preferred_domains; in amdgpu_cs_bo_validate()
818 domain = bo->allowed_domains; in amdgpu_cs_bo_validate()
823 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); in amdgpu_cs_bo_validate()
825 p->bytes_moved += ctx.bytes_moved; in amdgpu_cs_bo_validate()
826 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && in amdgpu_cs_bo_validate()
827 amdgpu_res_cpu_visible(adev, bo->tbo.resource)) in amdgpu_cs_bo_validate()
828 p->bytes_moved_vis += ctx.bytes_moved; in amdgpu_cs_bo_validate()
830 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { in amdgpu_cs_bo_validate()
831 domain = bo->allowed_domains; in amdgpu_cs_bo_validate()
841 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_parser_bos()
843 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_parser_bos()
850 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ in amdgpu_cs_parser_bos()
851 if (cs->in.bo_list_handle) { in amdgpu_cs_parser_bos()
852 if (p->bo_list) in amdgpu_cs_parser_bos()
853 return -EINVAL; in amdgpu_cs_parser_bos()
855 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, in amdgpu_cs_parser_bos()
856 &p->bo_list); in amdgpu_cs_parser_bos()
859 } else if (!p->bo_list) { in amdgpu_cs_parser_bos()
861 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, in amdgpu_cs_parser_bos()
862 &p->bo_list); in amdgpu_cs_parser_bos()
867 mutex_lock(&p->bo_list->bo_list_mutex); in amdgpu_cs_parser_bos()
873 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
875 struct amdgpu_bo *bo = e->bo; in amdgpu_cs_parser_bos()
878 e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, in amdgpu_cs_parser_bos()
881 if (!e->user_pages) { in amdgpu_cs_parser_bos()
883 r = -ENOMEM; in amdgpu_cs_parser_bos()
887 r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); in amdgpu_cs_parser_bos()
889 kvfree(e->user_pages); in amdgpu_cs_parser_bos()
890 e->user_pages = NULL; in amdgpu_cs_parser_bos()
894 for (i = 0; i < bo->tbo.ttm->num_pages; i++) { in amdgpu_cs_parser_bos()
895 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { in amdgpu_cs_parser_bos()
900 e->user_invalidated = userpage_invalidated; in amdgpu_cs_parser_bos()
903 drm_exec_until_all_locked(&p->exec) { in amdgpu_cs_parser_bos()
904 r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); in amdgpu_cs_parser_bos()
905 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
909 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
911 r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, in amdgpu_cs_parser_bos()
912 1 + p->gang_size); in amdgpu_cs_parser_bos()
913 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
917 e->bo_va = amdgpu_vm_bo_find(vm, e->bo); in amdgpu_cs_parser_bos()
920 if (p->uf_bo) { in amdgpu_cs_parser_bos()
921 r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, in amdgpu_cs_parser_bos()
922 1 + p->gang_size); in amdgpu_cs_parser_bos()
923 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
929 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
932 usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm); in amdgpu_cs_parser_bos()
933 if (usermm && usermm != current->mm) { in amdgpu_cs_parser_bos()
934 r = -EPERM; in amdgpu_cs_parser_bos()
938 if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && in amdgpu_cs_parser_bos()
939 e->user_invalidated && e->user_pages) { in amdgpu_cs_parser_bos()
940 amdgpu_bo_placement_from_domain(e->bo, in amdgpu_cs_parser_bos()
942 r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, in amdgpu_cs_parser_bos()
947 amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, in amdgpu_cs_parser_bos()
948 e->user_pages); in amdgpu_cs_parser_bos()
951 kvfree(e->user_pages); in amdgpu_cs_parser_bos()
952 e->user_pages = NULL; in amdgpu_cs_parser_bos()
955 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, in amdgpu_cs_parser_bos()
956 &p->bytes_moved_vis_threshold); in amdgpu_cs_parser_bos()
957 p->bytes_moved = 0; in amdgpu_cs_parser_bos()
958 p->bytes_moved_vis = 0; in amdgpu_cs_parser_bos()
960 r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, in amdgpu_cs_parser_bos()
967 drm_exec_for_each_locked_object(&p->exec, index, obj) { in amdgpu_cs_parser_bos()
973 if (p->uf_bo) { in amdgpu_cs_parser_bos()
974 r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo); in amdgpu_cs_parser_bos()
978 p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); in amdgpu_cs_parser_bos()
981 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, in amdgpu_cs_parser_bos()
982 p->bytes_moved_vis); in amdgpu_cs_parser_bos()
984 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_parser_bos()
985 amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, in amdgpu_cs_parser_bos()
986 p->bo_list->gws_obj, in amdgpu_cs_parser_bos()
987 p->bo_list->oa_obj); in amdgpu_cs_parser_bos()
991 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
992 struct amdgpu_bo *bo = e->bo; in amdgpu_cs_parser_bos()
994 if (!e->user_pages) in amdgpu_cs_parser_bos()
996 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); in amdgpu_cs_parser_bos()
997 kvfree(e->user_pages); in amdgpu_cs_parser_bos()
998 e->user_pages = NULL; in amdgpu_cs_parser_bos()
999 e->range = NULL; in amdgpu_cs_parser_bos()
1001 mutex_unlock(&p->bo_list->bo_list_mutex); in amdgpu_cs_parser_bos()
1012 for (i = 0; i < p->gang_size; ++i) { in trace_amdgpu_cs_ibs()
1013 struct amdgpu_job *job = p->jobs[i]; in trace_amdgpu_cs_ibs()
1015 for (j = 0; j < job->num_ibs; ++j) in trace_amdgpu_cs_ibs()
1016 trace_amdgpu_cs(p, job, &job->ibs[j]); in trace_amdgpu_cs_ibs()
1028 if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place) in amdgpu_cs_patch_ibs()
1031 for (i = 0; i < job->num_ibs; ++i) { in amdgpu_cs_patch_ibs()
1032 struct amdgpu_ib *ib = &job->ibs[i]; in amdgpu_cs_patch_ibs()
1038 va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; in amdgpu_cs_patch_ibs()
1045 if ((va_start + ib->length_dw * 4) > in amdgpu_cs_patch_ibs()
1046 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { in amdgpu_cs_patch_ibs()
1048 return -EINVAL; in amdgpu_cs_patch_ibs()
1056 kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); in amdgpu_cs_patch_ibs()
1058 if (ring->funcs->parse_cs) { in amdgpu_cs_patch_ibs()
1059 memcpy(ib->ptr, kptr, ib->length_dw * 4); in amdgpu_cs_patch_ibs()
1066 if (ib->sa_bo) in amdgpu_cs_patch_ibs()
1067 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); in amdgpu_cs_patch_ibs()
1069 ib->ptr = (uint32_t *)kptr; in amdgpu_cs_patch_ibs()
1085 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_patch_jobs()
1086 r = amdgpu_cs_patch_ibs(p, p->jobs[i]); in amdgpu_cs_patch_jobs()
1095 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_vm_handling()
1096 struct amdgpu_job *job = p->gang_leader; in amdgpu_cs_vm_handling()
1097 struct amdgpu_device *adev = p->adev; in amdgpu_cs_vm_handling()
1098 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_vm_handling()
1108 if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { in amdgpu_cs_vm_handling()
1109 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_vm_handling()
1110 struct drm_sched_entity *entity = p->entities[i]; in amdgpu_cs_vm_handling()
1111 struct drm_gpu_scheduler *sched = entity->rq->sched; in amdgpu_cs_vm_handling()
1114 if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) in amdgpu_cs_vm_handling()
1115 return -EINVAL; in amdgpu_cs_vm_handling()
1123 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); in amdgpu_cs_vm_handling()
1127 r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); in amdgpu_cs_vm_handling()
1131 if (fpriv->csa_va) { in amdgpu_cs_vm_handling()
1132 bo_va = fpriv->csa_va; in amdgpu_cs_vm_handling()
1138 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); in amdgpu_cs_vm_handling()
1145 * with p->ticket. But removing it caused test regressions, so I'm in amdgpu_cs_vm_handling()
1148 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_vm_handling()
1149 bo_va = e->bo_va; in amdgpu_cs_vm_handling()
1157 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); in amdgpu_cs_vm_handling()
1162 r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket); in amdgpu_cs_vm_handling()
1170 r = amdgpu_sync_fence(&p->sync, vm->last_update); in amdgpu_cs_vm_handling()
1174 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_vm_handling()
1175 job = p->jobs[i]; in amdgpu_cs_vm_handling()
1177 if (!job->vm) in amdgpu_cs_vm_handling()
1180 job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); in amdgpu_cs_vm_handling()
1183 if (adev->debug_vm) { in amdgpu_cs_vm_handling()
1185 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_vm_handling()
1186 struct amdgpu_bo *bo = e->bo; in amdgpu_cs_vm_handling()
1201 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_sync_rings()
1209 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); in amdgpu_cs_sync_rings()
1211 if (r != -ERESTARTSYS) in amdgpu_cs_sync_rings()
1216 drm_exec_for_each_locked_object(&p->exec, index, obj) { in amdgpu_cs_sync_rings()
1219 struct dma_resv *resv = bo->tbo.base.resv; in amdgpu_cs_sync_rings()
1224 r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, in amdgpu_cs_sync_rings()
1225 &fpriv->vm); in amdgpu_cs_sync_rings()
1230 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_sync_rings()
1231 r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); in amdgpu_cs_sync_rings()
1236 sched = p->gang_leader->base.entity->rq->sched; in amdgpu_cs_sync_rings()
1237 while ((fence = amdgpu_sync_get_fence(&p->sync))) { in amdgpu_cs_sync_rings()
1246 if (!s_fence || s_fence->sched != sched) { in amdgpu_cs_sync_rings()
1251 r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); in amdgpu_cs_sync_rings()
1263 for (i = 0; i < p->num_post_deps; ++i) { in amdgpu_cs_post_dependencies()
1264 if (p->post_deps[i].chain && p->post_deps[i].point) { in amdgpu_cs_post_dependencies()
1265 drm_syncobj_add_point(p->post_deps[i].syncobj, in amdgpu_cs_post_dependencies()
1266 p->post_deps[i].chain, in amdgpu_cs_post_dependencies()
1267 p->fence, p->post_deps[i].point); in amdgpu_cs_post_dependencies()
1268 p->post_deps[i].chain = NULL; in amdgpu_cs_post_dependencies()
1270 drm_syncobj_replace_fence(p->post_deps[i].syncobj, in amdgpu_cs_post_dependencies()
1271 p->fence); in amdgpu_cs_post_dependencies()
1279 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_submit()
1280 struct amdgpu_job *leader = p->gang_leader; in amdgpu_cs_submit()
1288 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_submit()
1289 drm_sched_job_arm(&p->jobs[i]->base); in amdgpu_cs_submit()
1291 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1294 if (p->jobs[i] == leader) in amdgpu_cs_submit()
1297 fence = &p->jobs[i]->base.s_fence->scheduled; in amdgpu_cs_submit()
1299 r = drm_sched_job_add_dependency(&leader->base, fence); in amdgpu_cs_submit()
1306 if (p->gang_size > 1) { in amdgpu_cs_submit()
1307 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_submit()
1308 amdgpu_job_set_gang_leader(p->jobs[i], leader); in amdgpu_cs_submit()
1315 mutex_lock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1318 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. in amdgpu_cs_submit()
1321 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_submit()
1322 r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, in amdgpu_cs_submit()
1323 e->range); in amdgpu_cs_submit()
1324 e->range = NULL; in amdgpu_cs_submit()
1327 r = -EAGAIN; in amdgpu_cs_submit()
1328 mutex_unlock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1332 p->fence = dma_fence_get(&leader->base.s_fence->finished); in amdgpu_cs_submit()
1333 drm_exec_for_each_locked_object(&p->exec, index, gobj) { in amdgpu_cs_submit()
1335 ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo); in amdgpu_cs_submit()
1338 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1339 if (p->jobs[i] == leader) in amdgpu_cs_submit()
1342 dma_resv_add_fence(gobj->resv, in amdgpu_cs_submit()
1343 &p->jobs[i]->base.s_fence->finished, in amdgpu_cs_submit()
1348 dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); in amdgpu_cs_submit()
1351 seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], in amdgpu_cs_submit()
1352 p->fence); in amdgpu_cs_submit()
1355 if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && in amdgpu_cs_submit()
1356 !p->ctx->preamble_presented) { in amdgpu_cs_submit()
1357 leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; in amdgpu_cs_submit()
1358 p->ctx->preamble_presented = true; in amdgpu_cs_submit()
1361 cs->out.handle = seq; in amdgpu_cs_submit()
1362 leader->uf_sequence = seq; in amdgpu_cs_submit()
1364 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket); in amdgpu_cs_submit()
1365 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1366 amdgpu_job_free_resources(p->jobs[i]); in amdgpu_cs_submit()
1367 trace_amdgpu_cs_ioctl(p->jobs[i]); in amdgpu_cs_submit()
1368 drm_sched_entity_push_job(&p->jobs[i]->base); in amdgpu_cs_submit()
1369 p->jobs[i] = NULL; in amdgpu_cs_submit()
1372 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); in amdgpu_cs_submit()
1374 mutex_unlock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1375 mutex_unlock(&p->bo_list->bo_list_mutex); in amdgpu_cs_submit()
1384 amdgpu_sync_free(&parser->sync); in amdgpu_cs_parser_fini()
1385 drm_exec_fini(&parser->exec); in amdgpu_cs_parser_fini()
1387 for (i = 0; i < parser->num_post_deps; i++) { in amdgpu_cs_parser_fini()
1388 drm_syncobj_put(parser->post_deps[i].syncobj); in amdgpu_cs_parser_fini()
1389 kfree(parser->post_deps[i].chain); in amdgpu_cs_parser_fini()
1391 kfree(parser->post_deps); in amdgpu_cs_parser_fini()
1393 dma_fence_put(parser->fence); in amdgpu_cs_parser_fini()
1395 if (parser->ctx) in amdgpu_cs_parser_fini()
1396 amdgpu_ctx_put(parser->ctx); in amdgpu_cs_parser_fini()
1397 if (parser->bo_list) in amdgpu_cs_parser_fini()
1398 amdgpu_bo_list_put(parser->bo_list); in amdgpu_cs_parser_fini()
1400 for (i = 0; i < parser->nchunks; i++) in amdgpu_cs_parser_fini()
1401 kvfree(parser->chunks[i].kdata); in amdgpu_cs_parser_fini()
1402 kvfree(parser->chunks); in amdgpu_cs_parser_fini()
1403 for (i = 0; i < parser->gang_size; ++i) { in amdgpu_cs_parser_fini()
1404 if (parser->jobs[i]) in amdgpu_cs_parser_fini()
1405 amdgpu_job_free(parser->jobs[i]); in amdgpu_cs_parser_fini()
1407 amdgpu_bo_unref(&parser->uf_bo); in amdgpu_cs_parser_fini()
1417 return -EHWPOISON; in amdgpu_cs_ioctl()
1419 if (!adev->accel_working) in amdgpu_cs_ioctl()
1420 return -EBUSY; in amdgpu_cs_ioctl()
1438 if (r == -ENOMEM) in amdgpu_cs_ioctl()
1440 else if (r != -ERESTARTSYS && r != -EAGAIN) in amdgpu_cs_ioctl()
1467 mutex_unlock(&parser.bo_list->bo_list_mutex); in amdgpu_cs_ioctl()
1475 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1487 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); in amdgpu_cs_wait_ioctl()
1493 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); in amdgpu_cs_wait_ioctl()
1495 return -EINVAL; in amdgpu_cs_wait_ioctl()
1497 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, in amdgpu_cs_wait_ioctl()
1498 wait->in.ring, &entity); in amdgpu_cs_wait_ioctl()
1504 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); in amdgpu_cs_wait_ioctl()
1509 if (r > 0 && fence->error) in amdgpu_cs_wait_ioctl()
1510 r = fence->error; in amdgpu_cs_wait_ioctl()
1520 wait->out.status = (r == 0); in amdgpu_cs_wait_ioctl()
1526 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1541 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); in amdgpu_cs_get_fence()
1543 return ERR_PTR(-EINVAL); in amdgpu_cs_get_fence()
1545 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, in amdgpu_cs_get_fence()
1546 user->ring, &entity); in amdgpu_cs_get_fence()
1552 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); in amdgpu_cs_get_fence()
1568 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); in amdgpu_cs_fence_to_handle_ioctl()
1575 switch (info->in.what) { in amdgpu_cs_fence_to_handle_ioctl()
1581 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); in amdgpu_cs_fence_to_handle_ioctl()
1590 r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle); in amdgpu_cs_fence_to_handle_ioctl()
1605 return -ENOMEM; in amdgpu_cs_fence_to_handle_ioctl()
1608 fd_install(fd, sync_file->file); in amdgpu_cs_fence_to_handle_ioctl()
1609 info->out.handle = fd; in amdgpu_cs_fence_to_handle_ioctl()
1614 return -EINVAL; in amdgpu_cs_fence_to_handle_ioctl()
1619 * amdgpu_cs_wait_all_fences - wait on all fences to signal
1631 uint32_t fence_count = wait->in.fence_count; in amdgpu_cs_wait_all_fences()
1637 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); in amdgpu_cs_wait_all_fences()
1646 if (r > 0 && fence->error) in amdgpu_cs_wait_all_fences()
1647 r = fence->error; in amdgpu_cs_wait_all_fences()
1658 wait->out.status = (r > 0); in amdgpu_cs_wait_all_fences()
1664 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1676 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); in amdgpu_cs_wait_any_fence()
1677 uint32_t fence_count = wait->in.fence_count; in amdgpu_cs_wait_any_fence()
1687 return -ENOMEM; in amdgpu_cs_wait_any_fence()
1712 wait->out.status = (r > 0); in amdgpu_cs_wait_any_fence()
1713 wait->out.first_signaled = first; in amdgpu_cs_wait_any_fence()
1716 r = array[first]->error; in amdgpu_cs_wait_any_fence()
1729 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1740 uint32_t fence_count = wait->in.fence_count; in amdgpu_cs_wait_fences_ioctl()
1749 return -ENOMEM; in amdgpu_cs_wait_fences_ioctl()
1751 fences_user = u64_to_user_ptr(wait->in.fences); in amdgpu_cs_wait_fences_ioctl()
1754 r = -EFAULT; in amdgpu_cs_wait_fences_ioctl()
1758 if (wait->in.wait_all) in amdgpu_cs_wait_fences_ioctl()
1770 * amdgpu_cs_find_mapping - find bo_va for VM address
1785 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; in amdgpu_cs_find_mapping()
1787 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_find_mapping()
1794 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) in amdgpu_cs_find_mapping()
1795 return -EINVAL; in amdgpu_cs_find_mapping()
1797 *bo = mapping->bo_va->base.bo; in amdgpu_cs_find_mapping()
1801 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) in amdgpu_cs_find_mapping()
1802 return -EINVAL; in amdgpu_cs_find_mapping()
1804 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; in amdgpu_cs_find_mapping()
1805 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); in amdgpu_cs_find_mapping()
1806 for (i = 0; i < (*bo)->placement.num_placement; i++) in amdgpu_cs_find_mapping()
1807 (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; in amdgpu_cs_find_mapping()
1808 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); in amdgpu_cs_find_mapping()
1812 return amdgpu_ttm_alloc_gart(&(*bo)->tbo); in amdgpu_cs_find_mapping()