Lines Matching +full:activity +full:- +full:signal +full:- +full:sources
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
59 /* Ordered, single-threaded workqueue for restoring evicted
62 * their BOs and result in a live-lock situation where processes
84 * Structure for SDMA activity tracking
115 pdd = workarea->pdd; in kfd_sdma_activity_worker()
118 dqm = pdd->dev->dqm; in kfd_sdma_activity_worker()
119 qpd = &pdd->qpd; in kfd_sdma_activity_worker()
123 * Total SDMA activity is current SDMA activity + past SDMA activity in kfd_sdma_activity_worker()
125 * To get the current activity counters for all active SDMA queues, in kfd_sdma_activity_worker()
126 * we loop over all SDMA queues and get their counts from user-space. in kfd_sdma_activity_worker()
132 * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list, in kfd_sdma_activity_worker()
138 * from the qpd->queues_list. in kfd_sdma_activity_worker()
139 * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted. in kfd_sdma_activity_worker()
141 * past activity counter. So subtract the SDMA counter stored in step 2 in kfd_sdma_activity_worker()
151 list_for_each_entry(q, &qpd->queues_list, list) { in kfd_sdma_activity_worker()
152 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && in kfd_sdma_activity_worker()
153 (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) in kfd_sdma_activity_worker()
162 INIT_LIST_HEAD(&sdma_q->list); in kfd_sdma_activity_worker()
163 sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr; in kfd_sdma_activity_worker()
164 sdma_q->queue_id = q->properties.queue_id; in kfd_sdma_activity_worker()
165 list_add_tail(&sdma_q->list, &sdma_q_list.list); in kfd_sdma_activity_worker()
170 * qpd->queues_list. Return the past activity count as the total sdma in kfd_sdma_activity_worker()
174 workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; in kfd_sdma_activity_worker()
184 mm = get_task_mm(pdd->process->lead_thread); in kfd_sdma_activity_worker()
192 ret = read_sdma_queue_counter(sdma_q->rptr, &val); in kfd_sdma_activity_worker()
195 sdma_q->queue_id); in kfd_sdma_activity_worker()
197 sdma_q->sdma_val = val; in kfd_sdma_activity_worker()
198 workarea->sdma_activity_counter += val; in kfd_sdma_activity_worker()
211 workarea->sdma_activity_counter += pdd->sdma_past_activity_counter; in kfd_sdma_activity_worker()
213 list_for_each_entry(q, &qpd->queues_list, list) { in kfd_sdma_activity_worker()
217 if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && in kfd_sdma_activity_worker()
218 (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) in kfd_sdma_activity_worker()
222 if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) && in kfd_sdma_activity_worker()
223 (sdma_q->queue_id == q->properties.queue_id)) { in kfd_sdma_activity_worker()
224 list_del(&sdma_q->list); in kfd_sdma_activity_worker()
235 * from qpd->queues_list during SDMA usage read. Subtract the SDMA in kfd_sdma_activity_worker()
239 workarea->sdma_activity_counter -= sdma_q->sdma_val; in kfd_sdma_activity_worker()
240 list_del(&sdma_q->list); in kfd_sdma_activity_worker()
248 list_del(&sdma_q->list); in kfd_sdma_activity_worker()
254 * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
280 dev = pdd->dev; in kfd_get_cu_occupancy()
281 if (dev->kfd2kgd->get_cu_occupancy == NULL) in kfd_get_cu_occupancy()
282 return -EINVAL; in kfd_get_cu_occupancy()
285 proc = pdd->process; in kfd_get_cu_occupancy()
286 if (pdd->qpd.queue_count == 0) { in kfd_get_cu_occupancy()
287 pr_debug("Gpu-Id: %d has no active queues for process %d\n", in kfd_get_cu_occupancy()
288 dev->id, proc->pasid); in kfd_get_cu_occupancy()
303 dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, in kfd_get_cu_occupancy()
304 &max_waves_per_cu, ffs(dev->xcc_mask) - 1); in kfd_get_cu_occupancy()
308 kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, in kfd_get_cu_occupancy()
314 wave_cnt += (NUM_XCC(dev->xcc_mask) * in kfd_get_cu_occupancy()
320 cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; in kfd_get_cu_occupancy()
327 if (strcmp(attr->name, "pasid") == 0) { in kfd_procfs_show()
331 return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid); in kfd_procfs_show()
332 } else if (strncmp(attr->name, "vram_", 5) == 0) { in kfd_procfs_show()
335 return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); in kfd_procfs_show()
336 } else if (strncmp(attr->name, "sdma_", 5) == 0) { in kfd_procfs_show()
356 return -EINVAL; in kfd_procfs_show()
385 &kfd_device->kobj, "proc"); in kfd_procfs_init()
407 if (!strcmp(attr->name, "size")) in kfd_procfs_queue_show()
409 q->properties.queue_size); in kfd_procfs_queue_show()
410 else if (!strcmp(attr->name, "type")) in kfd_procfs_queue_show()
411 return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type); in kfd_procfs_queue_show()
412 else if (!strcmp(attr->name, "gpuid")) in kfd_procfs_queue_show()
413 return snprintf(buffer, PAGE_SIZE, "%u", q->device->id); in kfd_procfs_queue_show()
423 if (strcmp(attr->name, "evicted_ms") == 0) { in kfd_procfs_stats_show()
429 evict_jiffies = atomic64_read(&pdd->evict_duration_counter); in kfd_procfs_stats_show()
437 } else if (strcmp(attr->name, "cu_occupancy") == 0) { in kfd_procfs_stats_show()
451 if (!strcmp(attr->name, "faults")) { in kfd_sysfs_counters_show()
454 return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults)); in kfd_sysfs_counters_show()
456 if (!strcmp(attr->name, "page_in")) { in kfd_sysfs_counters_show()
459 return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in)); in kfd_sysfs_counters_show()
461 if (!strcmp(attr->name, "page_out")) { in kfd_sysfs_counters_show()
464 return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out)); in kfd_sysfs_counters_show()
524 if (!q || !q->process) in kfd_procfs_add_queue()
525 return -EINVAL; in kfd_procfs_add_queue()
526 proc = q->process; in kfd_procfs_add_queue()
529 if (!proc->kobj_queues) in kfd_procfs_add_queue()
530 return -EFAULT; in kfd_procfs_add_queue()
531 ret = kobject_init_and_add(&q->kobj, &procfs_queue_type, in kfd_procfs_add_queue()
532 proc->kobj_queues, "%u", q->properties.queue_id); in kfd_procfs_add_queue()
535 q->properties.queue_id); in kfd_procfs_add_queue()
536 kobject_put(&q->kobj); in kfd_procfs_add_queue()
551 attr->name = name; in kfd_sysfs_create_file()
552 attr->mode = KFD_SYSFS_FILE_MODE; in kfd_sysfs_create_file()
557 pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret); in kfd_sysfs_create_file()
566 if (!p || !p->kobj) in kfd_procfs_add_sysfs_stats()
571 * - proc/<pid>/stats_<gpuid>/ in kfd_procfs_add_sysfs_stats()
572 * - proc/<pid>/stats_<gpuid>/evicted_ms in kfd_procfs_add_sysfs_stats()
573 * - proc/<pid>/stats_<gpuid>/cu_occupancy in kfd_procfs_add_sysfs_stats()
575 for (i = 0; i < p->n_pdds; i++) { in kfd_procfs_add_sysfs_stats()
576 struct kfd_process_device *pdd = p->pdds[i]; in kfd_procfs_add_sysfs_stats()
579 "stats_%u", pdd->dev->id); in kfd_procfs_add_sysfs_stats()
580 pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats); in kfd_procfs_add_sysfs_stats()
581 if (!pdd->kobj_stats) in kfd_procfs_add_sysfs_stats()
584 ret = kobject_init_and_add(pdd->kobj_stats, in kfd_procfs_add_sysfs_stats()
586 p->kobj, in kfd_procfs_add_sysfs_stats()
592 kobject_put(pdd->kobj_stats); in kfd_procfs_add_sysfs_stats()
593 pdd->kobj_stats = NULL; in kfd_procfs_add_sysfs_stats()
597 kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict, in kfd_procfs_add_sysfs_stats()
600 if (pdd->dev->kfd2kgd->get_cu_occupancy) in kfd_procfs_add_sysfs_stats()
601 kfd_sysfs_create_file(pdd->kobj_stats, in kfd_procfs_add_sysfs_stats()
602 &pdd->attr_cu_occupancy, in kfd_procfs_add_sysfs_stats()
613 if (!p || !p->kobj) in kfd_procfs_add_sysfs_counters()
618 * - proc/<pid>/counters_<gpuid>/ in kfd_procfs_add_sysfs_counters()
619 * - proc/<pid>/counters_<gpuid>/faults in kfd_procfs_add_sysfs_counters()
620 * - proc/<pid>/counters_<gpuid>/page_in in kfd_procfs_add_sysfs_counters()
621 * - proc/<pid>/counters_<gpuid>/page_out in kfd_procfs_add_sysfs_counters()
623 for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { in kfd_procfs_add_sysfs_counters()
624 struct kfd_process_device *pdd = p->pdds[i]; in kfd_procfs_add_sysfs_counters()
628 "counters_%u", pdd->dev->id); in kfd_procfs_add_sysfs_counters()
634 p->kobj, counters_dir_filename); in kfd_procfs_add_sysfs_counters()
642 pdd->kobj_counters = kobj_counters; in kfd_procfs_add_sysfs_counters()
643 kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults, in kfd_procfs_add_sysfs_counters()
645 kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in, in kfd_procfs_add_sysfs_counters()
647 kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out, in kfd_procfs_add_sysfs_counters()
656 if (!p || !p->kobj) in kfd_procfs_add_sysfs_files()
661 * - proc/<pid>/vram_<gpuid> in kfd_procfs_add_sysfs_files()
662 * - proc/<pid>/sdma_<gpuid> in kfd_procfs_add_sysfs_files()
664 for (i = 0; i < p->n_pdds; i++) { in kfd_procfs_add_sysfs_files()
665 struct kfd_process_device *pdd = p->pdds[i]; in kfd_procfs_add_sysfs_files()
667 snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", in kfd_procfs_add_sysfs_files()
668 pdd->dev->id); in kfd_procfs_add_sysfs_files()
669 kfd_sysfs_create_file(p->kobj, &pdd->attr_vram, in kfd_procfs_add_sysfs_files()
670 pdd->vram_filename); in kfd_procfs_add_sysfs_files()
672 snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", in kfd_procfs_add_sysfs_files()
673 pdd->dev->id); in kfd_procfs_add_sysfs_files()
674 kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma, in kfd_procfs_add_sysfs_files()
675 pdd->sdma_filename); in kfd_procfs_add_sysfs_files()
684 kobject_del(&q->kobj); in kfd_procfs_del_queue()
685 kobject_put(&q->kobj); in kfd_procfs_del_queue()
698 return -ENOMEM; in kfd_process_create_wq()
719 struct kfd_node *dev = pdd->dev; in kfd_process_free_gpuvm()
726 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); in kfd_process_free_gpuvm()
727 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv, in kfd_process_free_gpuvm()
731 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
735 * not need to take p->mutex.
741 struct kfd_node *kdev = pdd->dev; in kfd_process_alloc_gpuvm()
744 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, in kfd_process_alloc_gpuvm()
745 pdd->drm_priv, mem, NULL, in kfd_process_alloc_gpuvm()
750 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem, in kfd_process_alloc_gpuvm()
751 pdd->drm_priv); in kfd_process_alloc_gpuvm()
755 err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true); in kfd_process_alloc_gpuvm()
757 pr_debug("Sync memory failed, wait interrupted by user signal\n"); in kfd_process_alloc_gpuvm()
773 amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv); in kfd_process_alloc_gpuvm()
776 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv, in kfd_process_alloc_gpuvm()
784 /* kfd_process_device_reserve_ib_mem - Reserve memory inside the
792 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_reserve_ib_mem()
801 if (qpd->ib_kaddr || !qpd->ib_base) in kfd_process_device_reserve_ib_mem()
805 ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, in kfd_process_device_reserve_ib_mem()
810 qpd->ib_mem = mem; in kfd_process_device_reserve_ib_mem()
811 qpd->ib_kaddr = kaddr; in kfd_process_device_reserve_ib_mem()
818 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_destroy_ib_mem()
820 if (!qpd->ib_kaddr || !qpd->ib_base) in kfd_process_device_destroy_ib_mem()
823 kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); in kfd_process_device_destroy_ib_mem()
831 if (!(thread->mm && mmget_not_zero(thread->mm))) in kfd_create_process()
832 return ERR_PTR(-EINVAL); in kfd_create_process()
835 if (thread->group_leader->mm != thread->mm) { in kfd_create_process()
836 mmput(thread->mm); in kfd_create_process()
837 return ERR_PTR(-EINVAL); in kfd_create_process()
849 process = ERR_PTR(-EINVAL); in kfd_create_process()
873 process->kobj = kfd_alloc_struct(process->kobj); in kfd_create_process()
874 if (!process->kobj) { in kfd_create_process()
878 ret = kobject_init_and_add(process->kobj, &procfs_type, in kfd_create_process()
880 (int)process->lead_thread->pid); in kfd_create_process()
883 kobject_put(process->kobj); in kfd_create_process()
887 kfd_sysfs_create_file(process->kobj, &process->attr_pasid, in kfd_create_process()
890 process->kobj_queues = kobject_create_and_add("queues", in kfd_create_process()
891 process->kobj); in kfd_create_process()
892 if (!process->kobj_queues) in kfd_create_process()
899 init_waitqueue_head(&process->wait_irq_drain); in kfd_create_process()
903 kref_get(&process->ref); in kfd_create_process()
905 mmput(thread->mm); in kfd_create_process()
914 if (!thread->mm) in kfd_get_process()
915 return ERR_PTR(-EINVAL); in kfd_get_process()
918 if (thread->group_leader->mm != thread->mm) in kfd_get_process()
919 return ERR_PTR(-EINVAL); in kfd_get_process()
923 return ERR_PTR(-EINVAL); in kfd_get_process()
934 if (process->mm == mm) in find_process_by_mm()
947 p = find_process_by_mm(thread->mm); in find_process()
949 kref_get(&p->ref); in find_process()
957 kref_put(&p->ref, kfd_process_ref_release); in kfd_unref_process()
960 /* This increments the process->ref counter. */
983 struct kfd_process *p = pdd->process; in kfd_process_device_free_bos()
992 idr_for_each_entry(&pdd->alloc_idr, mem, id) { in kfd_process_device_free_bos()
994 for (i = 0; i < p->n_pdds; i++) { in kfd_process_device_free_bos()
995 struct kfd_process_device *peer_pdd = p->pdds[i]; in kfd_process_device_free_bos()
997 if (!peer_pdd->drm_priv) in kfd_process_device_free_bos()
1000 peer_pdd->dev->adev, mem, peer_pdd->drm_priv); in kfd_process_device_free_bos()
1003 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, in kfd_process_device_free_bos()
1004 pdd->drm_priv, NULL); in kfd_process_device_free_bos()
1010 * Just kunmap and unpin signal BO here. It will be freed in
1019 kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle)); in kfd_process_kunmap_signal_bo()
1023 mutex_lock(&p->mutex); in kfd_process_kunmap_signal_bo()
1030 pdd, GET_IDR_HANDLE(p->signal_handle)); in kfd_process_kunmap_signal_bo()
1037 mutex_unlock(&p->mutex); in kfd_process_kunmap_signal_bo()
1044 for (i = 0; i < p->n_pdds; i++) in kfd_process_free_outstanding_kfd_bos()
1045 kfd_process_device_free_bos(p->pdds[i]); in kfd_process_free_outstanding_kfd_bos()
1052 for (i = 0; i < p->n_pdds; i++) { in kfd_process_destroy_pdds()
1053 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_destroy_pdds()
1056 pdd->dev->id, p->pasid); in kfd_process_destroy_pdds()
1061 if (pdd->drm_file) { in kfd_process_destroy_pdds()
1063 pdd->dev->adev, pdd->drm_priv); in kfd_process_destroy_pdds()
1064 fput(pdd->drm_file); in kfd_process_destroy_pdds()
1067 if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) in kfd_process_destroy_pdds()
1068 free_pages((unsigned long)pdd->qpd.cwsr_kaddr, in kfd_process_destroy_pdds()
1071 idr_destroy(&pdd->alloc_idr); in kfd_process_destroy_pdds()
1073 kfd_free_process_doorbells(pdd->dev->kfd, pdd); in kfd_process_destroy_pdds()
1075 if (pdd->dev->kfd->shared_resources.enable_mes) in kfd_process_destroy_pdds()
1076 amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, in kfd_process_destroy_pdds()
1077 &pdd->proc_ctx_bo); in kfd_process_destroy_pdds()
1082 if (pdd->runtime_inuse) { in kfd_process_destroy_pdds()
1083 pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev); in kfd_process_destroy_pdds()
1084 pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev); in kfd_process_destroy_pdds()
1085 pdd->runtime_inuse = false; in kfd_process_destroy_pdds()
1089 p->pdds[i] = NULL; in kfd_process_destroy_pdds()
1091 p->n_pdds = 0; in kfd_process_destroy_pdds()
1099 if (!p->kobj) in kfd_process_remove_sysfs()
1102 sysfs_remove_file(p->kobj, &p->attr_pasid); in kfd_process_remove_sysfs()
1103 kobject_del(p->kobj_queues); in kfd_process_remove_sysfs()
1104 kobject_put(p->kobj_queues); in kfd_process_remove_sysfs()
1105 p->kobj_queues = NULL; in kfd_process_remove_sysfs()
1107 for (i = 0; i < p->n_pdds; i++) { in kfd_process_remove_sysfs()
1108 pdd = p->pdds[i]; in kfd_process_remove_sysfs()
1110 sysfs_remove_file(p->kobj, &pdd->attr_vram); in kfd_process_remove_sysfs()
1111 sysfs_remove_file(p->kobj, &pdd->attr_sdma); in kfd_process_remove_sysfs()
1113 sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); in kfd_process_remove_sysfs()
1114 if (pdd->dev->kfd2kgd->get_cu_occupancy) in kfd_process_remove_sysfs()
1115 sysfs_remove_file(pdd->kobj_stats, in kfd_process_remove_sysfs()
1116 &pdd->attr_cu_occupancy); in kfd_process_remove_sysfs()
1117 kobject_del(pdd->kobj_stats); in kfd_process_remove_sysfs()
1118 kobject_put(pdd->kobj_stats); in kfd_process_remove_sysfs()
1119 pdd->kobj_stats = NULL; in kfd_process_remove_sysfs()
1122 for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { in kfd_process_remove_sysfs()
1123 pdd = p->pdds[i]; in kfd_process_remove_sysfs()
1125 sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); in kfd_process_remove_sysfs()
1126 sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); in kfd_process_remove_sysfs()
1127 sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out); in kfd_process_remove_sysfs()
1128 kobject_del(pdd->kobj_counters); in kfd_process_remove_sysfs()
1129 kobject_put(pdd->kobj_counters); in kfd_process_remove_sysfs()
1130 pdd->kobj_counters = NULL; in kfd_process_remove_sysfs()
1133 kobject_del(p->kobj); in kfd_process_remove_sysfs()
1134 kobject_put(p->kobj); in kfd_process_remove_sysfs()
1135 p->kobj = NULL; in kfd_process_remove_sysfs()
1150 pqm_uninit(&p->pqm); in kfd_process_wq_release()
1152 /* Signal the eviction fence after user mode queues are in kfd_process_wq_release()
1157 ef = rcu_access_pointer(p->ef); in kfd_process_wq_release()
1171 kfd_pasid_free(p->pasid); in kfd_process_wq_release()
1172 mutex_destroy(&p->mutex); in kfd_process_wq_release()
1174 put_task_struct(p->lead_thread); in kfd_process_wq_release()
1183 INIT_WORK(&p->release_work, kfd_process_wq_release); in kfd_process_ref_release()
1184 queue_work(kfd_process_wq, &p->release_work); in kfd_process_ref_release()
1194 return p ? &p->mmu_notifier : ERR_PTR(-ESRCH); in kfd_process_alloc_notifier()
1206 cancel_delayed_work_sync(&p->eviction_work); in kfd_process_notifier_release_internal()
1207 cancel_delayed_work_sync(&p->restore_work); in kfd_process_notifier_release_internal()
1209 for (i = 0; i < p->n_pdds; i++) { in kfd_process_notifier_release_internal()
1210 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_notifier_release_internal()
1212 /* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */ in kfd_process_notifier_release_internal()
1213 if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup) in kfd_process_notifier_release_internal()
1214 amdgpu_gfx_off_ctrl(pdd->dev->adev, true); in kfd_process_notifier_release_internal()
1218 p->mm = NULL; in kfd_process_notifier_release_internal()
1221 if (atomic_read(&p->debugged_process_count) > 0) { in kfd_process_notifier_release_internal()
1227 if (target->debugger_process && target->debugger_process == p) { in kfd_process_notifier_release_internal()
1228 mutex_lock_nested(&target->mutex, 1); in kfd_process_notifier_release_internal()
1230 mutex_unlock(&target->mutex); in kfd_process_notifier_release_internal()
1231 if (atomic_read(&p->debugged_process_count) == 0) in kfd_process_notifier_release_internal()
1239 mmu_notifier_put(&p->mmu_notifier); in kfd_process_notifier_release_internal()
1252 if (WARN_ON(p->mm != mm)) in kfd_process_notifier_release()
1267 hash_del_rcu(&p->kfd_processes); in kfd_process_notifier_release()
1301 hash_del_rcu(&p->kfd_processes); in kfd_cleanup_processes()
1303 hlist_add_head(&p->kfd_processes, &cleanup_list); in kfd_cleanup_processes()
1322 if (p->has_cwsr) in kfd_process_init_cwsr_apu()
1325 for (i = 0; i < p->n_pdds; i++) { in kfd_process_init_cwsr_apu()
1326 struct kfd_node *dev = p->pdds[i]->dev; in kfd_process_init_cwsr_apu()
1327 struct qcm_process_device *qpd = &p->pdds[i]->qpd; in kfd_process_init_cwsr_apu()
1329 if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) in kfd_process_init_cwsr_apu()
1332 offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); in kfd_process_init_cwsr_apu()
1333 qpd->tba_addr = (int64_t)vm_mmap(filep, 0, in kfd_process_init_cwsr_apu()
1337 if (IS_ERR_VALUE(qpd->tba_addr)) { in kfd_process_init_cwsr_apu()
1338 int err = qpd->tba_addr; in kfd_process_init_cwsr_apu()
1340 dev_err(dev->adev->dev, in kfd_process_init_cwsr_apu()
1342 qpd->tba_addr = 0; in kfd_process_init_cwsr_apu()
1343 qpd->cwsr_kaddr = NULL; in kfd_process_init_cwsr_apu()
1347 memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); in kfd_process_init_cwsr_apu()
1349 kfd_process_set_trap_debug_flag(qpd, p->debug_trap_enabled); in kfd_process_init_cwsr_apu()
1351 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; in kfd_process_init_cwsr_apu()
1353 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); in kfd_process_init_cwsr_apu()
1356 p->has_cwsr = true; in kfd_process_init_cwsr_apu()
1363 struct kfd_node *dev = pdd->dev; in kfd_process_device_init_cwsr_dgpu()
1364 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_init_cwsr_dgpu()
1372 if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) in kfd_process_device_init_cwsr_dgpu()
1376 ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base, in kfd_process_device_init_cwsr_dgpu()
1381 qpd->cwsr_mem = mem; in kfd_process_device_init_cwsr_dgpu()
1382 qpd->cwsr_kaddr = kaddr; in kfd_process_device_init_cwsr_dgpu()
1383 qpd->tba_addr = qpd->cwsr_base; in kfd_process_device_init_cwsr_dgpu()
1385 memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); in kfd_process_device_init_cwsr_dgpu()
1387 kfd_process_set_trap_debug_flag(&pdd->qpd, in kfd_process_device_init_cwsr_dgpu()
1388 pdd->process->debug_trap_enabled); in kfd_process_device_init_cwsr_dgpu()
1390 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; in kfd_process_device_init_cwsr_dgpu()
1392 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); in kfd_process_device_init_cwsr_dgpu()
1399 struct kfd_node *dev = pdd->dev; in kfd_process_device_destroy_cwsr_dgpu()
1400 struct qcm_process_device *qpd = &pdd->qpd; in kfd_process_device_destroy_cwsr_dgpu()
1402 if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) in kfd_process_device_destroy_cwsr_dgpu()
1405 kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); in kfd_process_device_destroy_cwsr_dgpu()
1412 if (qpd->cwsr_kaddr) { in kfd_process_set_trap_handler()
1413 /* KFD trap handler is bound, record as second-level TBA/TMA in kfd_process_set_trap_handler()
1414 * in first-level TMA. First-level trap will jump to second. in kfd_process_set_trap_handler()
1417 (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); in kfd_process_set_trap_handler()
1421 /* No trap handler bound, bind as first-level TBA/TMA. */ in kfd_process_set_trap_handler()
1422 qpd->tba_addr = tba_addr; in kfd_process_set_trap_handler()
1423 qpd->tma_addr = tma_addr; in kfd_process_set_trap_handler()
1439 * built for XNACK-off. On GFXv9 it may perform slower. in kfd_process_xnack_mode()
1441 * Therefore applications built for XNACK-off can always be in kfd_process_xnack_mode()
1445 for (i = 0; i < p->n_pdds; i++) { in kfd_process_xnack_mode()
1446 struct kfd_node *dev = p->pdds[i]->dev; in kfd_process_xnack_mode()
1455 * per-process XNACK mode selection. But let the dev->noretry in kfd_process_xnack_mode()
1459 if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) { in kfd_process_xnack_mode()
1468 * management and memory-manager-related preemptions or in kfd_process_xnack_mode()
1474 if (dev->kfd->noretry) in kfd_process_xnack_mode()
1484 if (qpd->cwsr_kaddr) { in kfd_process_set_trap_debug_flag()
1486 (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); in kfd_process_set_trap_debug_flag()
1499 int err = -ENOMEM; in create_process()
1505 kref_init(&process->ref); in create_process()
1506 mutex_init(&process->mutex); in create_process()
1507 process->mm = thread->mm; in create_process()
1508 process->lead_thread = thread->group_leader; in create_process()
1509 process->n_pdds = 0; in create_process()
1510 process->queues_paused = false; in create_process()
1511 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); in create_process()
1512 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); in create_process()
1513 process->last_restore_timestamp = get_jiffies_64(); in create_process()
1517 process->is_32bit_user_mode = in_compat_syscall(); in create_process()
1518 process->debug_trap_enabled = false; in create_process()
1519 process->debugger_process = NULL; in create_process()
1520 process->exception_enable_mask = 0; in create_process()
1521 atomic_set(&process->debugged_process_count, 0); in create_process()
1522 sema_init(&process->runtime_enable_sema, 0); in create_process()
1524 process->pasid = kfd_pasid_alloc(); in create_process()
1525 if (process->pasid == 0) { in create_process()
1526 err = -ENOSPC; in create_process()
1530 err = pqm_init(&process->pqm, process); in create_process()
1540 process->xnack_enabled = kfd_process_xnack_mode(process, false); in create_process()
1547 hash_add_rcu(kfd_processes_table, &process->kfd_processes, in create_process()
1548 (uintptr_t)process->mm); in create_process()
1551 * mmu_notifier_get failed because of pending signal. in create_process()
1553 kref_get(&process->ref); in create_process()
1560 mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm); in create_process()
1565 BUG_ON(mn != &process->mmu_notifier); in create_process()
1568 get_task_struct(process->lead_thread); in create_process()
1570 INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler); in create_process()
1575 hash_del_rcu(&process->kfd_processes); in create_process()
1581 pqm_uninit(&process->pqm); in create_process()
1583 kfd_pasid_free(process->pasid); in create_process()
1587 mutex_destroy(&process->mutex); in create_process()
1598 for (i = 0; i < p->n_pdds; i++) in kfd_get_process_device_data()
1599 if (p->pdds[i]->dev == dev) in kfd_get_process_device_data()
1600 return p->pdds[i]; in kfd_get_process_device_data()
1611 if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) in kfd_create_process_device_data()
1617 pdd->dev = dev; in kfd_create_process_device_data()
1618 INIT_LIST_HEAD(&pdd->qpd.queues_list); in kfd_create_process_device_data()
1619 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); in kfd_create_process_device_data()
1620 pdd->qpd.dqm = dev->dqm; in kfd_create_process_device_data()
1621 pdd->qpd.pqm = &p->pqm; in kfd_create_process_device_data()
1622 pdd->qpd.evicted = 0; in kfd_create_process_device_data()
1623 pdd->qpd.mapped_gws_queue = false; in kfd_create_process_device_data()
1624 pdd->process = p; in kfd_create_process_device_data()
1625 pdd->bound = PDD_UNBOUND; in kfd_create_process_device_data()
1626 pdd->already_dequeued = false; in kfd_create_process_device_data()
1627 pdd->runtime_inuse = false; in kfd_create_process_device_data()
1628 atomic64_set(&pdd->vram_usage, 0); in kfd_create_process_device_data()
1629 pdd->sdma_past_activity_counter = 0; in kfd_create_process_device_data()
1630 pdd->user_gpu_id = dev->id; in kfd_create_process_device_data()
1631 atomic64_set(&pdd->evict_duration_counter, 0); in kfd_create_process_device_data()
1633 if (dev->kfd->shared_resources.enable_mes) { in kfd_create_process_device_data()
1634 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, in kfd_create_process_device_data()
1636 &pdd->proc_ctx_bo, in kfd_create_process_device_data()
1637 &pdd->proc_ctx_gpu_addr, in kfd_create_process_device_data()
1638 &pdd->proc_ctx_cpu_ptr, in kfd_create_process_device_data()
1641 dev_err(dev->adev->dev, in kfd_create_process_device_data()
1645 memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); in kfd_create_process_device_data()
1648 p->pdds[p->n_pdds++] = pdd; in kfd_create_process_device_data()
1649 if (kfd_dbg_is_per_vmid_supported(pdd->dev)) in kfd_create_process_device_data()
1650 pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( in kfd_create_process_device_data()
1651 pdd->dev->adev, in kfd_create_process_device_data()
1656 idr_init(&pdd->alloc_idr); in kfd_create_process_device_data()
1666 * kfd_process_device_init_vm - Initialize a VM for a process-device
1668 * @pdd: The process-device
1677 * Returns 0 on success, -errno on failure.
1690 return -EINVAL; in kfd_process_device_init_vm()
1692 if (pdd->drm_priv) in kfd_process_device_init_vm()
1693 return -EBUSY; in kfd_process_device_init_vm()
1698 avm = &drv_priv->vm; in kfd_process_device_init_vm()
1700 p = pdd->process; in kfd_process_device_init_vm()
1701 dev = pdd->dev; in kfd_process_device_init_vm()
1703 ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm, in kfd_process_device_init_vm()
1704 &p->kgd_process_info, in kfd_process_device_init_vm()
1705 p->ef ? NULL : &ef); in kfd_process_device_init_vm()
1707 dev_err(dev->adev->dev, "Failed to create process VM object\n"); in kfd_process_device_init_vm()
1711 if (!p->ef) in kfd_process_device_init_vm()
1712 RCU_INIT_POINTER(p->ef, ef); in kfd_process_device_init_vm()
1714 pdd->drm_priv = drm_file->private_data; in kfd_process_device_init_vm()
1723 ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid); in kfd_process_device_init_vm()
1727 pdd->drm_file = drm_file; in kfd_process_device_init_vm()
1736 pdd->drm_priv = NULL; in kfd_process_device_init_vm()
1737 amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm); in kfd_process_device_init_vm()
1743 * Direct the IOMMU to bind the process (specifically the pasid->mm)
1757 dev_err(dev->adev->dev, "Process device data doesn't exist\n"); in kfd_bind_process_to_device()
1758 return ERR_PTR(-ENOMEM); in kfd_bind_process_to_device()
1761 if (!pdd->drm_priv) in kfd_bind_process_to_device()
1762 return ERR_PTR(-ENODEV); in kfd_bind_process_to_device()
1765 * signal runtime-pm system to auto resume and prevent in kfd_bind_process_to_device()
1769 if (!pdd->runtime_inuse) { in kfd_bind_process_to_device()
1770 err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev); in kfd_bind_process_to_device()
1772 pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev); in kfd_bind_process_to_device()
1781 pdd->runtime_inuse = true; in kfd_bind_process_to_device()
1792 return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); in kfd_process_device_create_obj_handle()
1804 return idr_find(&pdd->alloc_idr, handle); in kfd_process_device_translate_handle()
1814 idr_remove(&pdd->alloc_idr, handle); in kfd_process_device_remove_obj_handle()
1817 /* This increments the process->ref counter. */
1826 if (p->pasid == pasid) { in kfd_lookup_process_by_pasid()
1827 kref_get(&p->ref); in kfd_lookup_process_by_pasid()
1838 /* This increments the process->ref counter. */
1847 kref_get(&p->ref); in kfd_lookup_process_by_mm()
1854 /* kfd_process_evict_queues - Evict all user queues of a process
1856 * Eviction is reference-counted per process-device. This means multiple
1857 * evictions from different sources can be nested safely.
1865 for (i = 0; i < p->n_pdds; i++) { in kfd_process_evict_queues()
1866 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_evict_queues()
1867 struct device *dev = pdd->dev->adev->dev; in kfd_process_evict_queues()
1869 kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, in kfd_process_evict_queues()
1872 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, in kfd_process_evict_queues()
1873 &pdd->qpd); in kfd_process_evict_queues()
1874 /* evict return -EIO if HWS is hang or asic is resetting, in this case in kfd_process_evict_queues()
1878 if (r && r != -EIO) { in kfd_process_evict_queues()
1884 pdd->dev->dqm->is_hws_hang = false; in kfd_process_evict_queues()
1893 for (i = 0; i < p->n_pdds; i++) { in kfd_process_evict_queues()
1894 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_evict_queues()
1899 kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); in kfd_process_evict_queues()
1901 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, in kfd_process_evict_queues()
1902 &pdd->qpd)) in kfd_process_evict_queues()
1903 dev_err(pdd->dev->adev->dev, in kfd_process_evict_queues()
1906 n_evicted--; in kfd_process_evict_queues()
1912 /* kfd_process_restore_queues - Restore all user queues of a process */
1918 for (i = 0; i < p->n_pdds; i++) { in kfd_process_restore_queues()
1919 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_restore_queues()
1920 struct device *dev = pdd->dev->adev->dev; in kfd_process_restore_queues()
1922 kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); in kfd_process_restore_queues()
1924 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, in kfd_process_restore_queues()
1925 &pdd->qpd); in kfd_process_restore_queues()
1940 for (i = 0; i < p->n_pdds; i++) in kfd_process_gpuidx_from_gpuid()
1941 if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id) in kfd_process_gpuidx_from_gpuid()
1943 return -EINVAL; in kfd_process_gpuidx_from_gpuid()
1952 for (i = 0; i < p->n_pdds; i++) in kfd_process_gpuid_from_node()
1953 if (p->pdds[i] && p->pdds[i]->dev == node) { in kfd_process_gpuid_from_node()
1954 *gpuid = p->pdds[i]->user_gpu_id; in kfd_process_gpuid_from_node()
1958 return -EINVAL; in kfd_process_gpuid_from_node()
1967 ef = dma_fence_get_rcu_safe(&p->ef); in signal_eviction_fence()
1970 return -EINVAL; in signal_eviction_fence()
1991 pr_debug("Started evicting pasid 0x%x\n", p->pasid); in evict_process_worker()
1999 mod_delayed_work(kfd_restore_wq, &p->restore_work, in evict_process_worker()
2003 pr_debug("Finished evicting pasid 0x%x\n", p->pasid); in evict_process_worker()
2005 pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); in evict_process_worker()
2013 if (p->kgd_process_info) { in restore_process_helper()
2015 p->kgd_process_info, &p->ef); in restore_process_helper()
2022 pr_debug("Finished restoring pasid 0x%x\n", p->pasid); in restore_process_helper()
2024 pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); in restore_process_helper()
2041 pr_debug("Started restoring pasid 0x%x\n", p->pasid); in restore_process_worker()
2049 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two in restore_process_worker()
2053 p->last_restore_timestamp = get_jiffies_64(); in restore_process_worker()
2058 p->pasid, PROCESS_BACK_OFF_TIME_MS); in restore_process_worker()
2059 if (mod_delayed_work(kfd_restore_wq, &p->restore_work, in restore_process_worker()
2074 pr_err("Failed to suspend process 0x%x\n", p->pasid); in kfd_suspend_all_processes()
2089 p->pasid); in kfd_resume_all_processes()
2090 ret = -EFAULT; in kfd_resume_all_processes()
2103 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { in kfd_reserved_mem_mmap()
2104 dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n"); in kfd_reserved_mem_mmap()
2105 return -EINVAL; in kfd_reserved_mem_mmap()
2110 return -EINVAL; in kfd_reserved_mem_mmap()
2111 qpd = &pdd->qpd; in kfd_reserved_mem_mmap()
2113 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, in kfd_reserved_mem_mmap()
2115 if (!qpd->cwsr_kaddr) { in kfd_reserved_mem_mmap()
2116 dev_err(dev->adev->dev, in kfd_reserved_mem_mmap()
2118 return -ENOMEM; in kfd_reserved_mem_mmap()
2124 return remap_pfn_range(vma, vma->vm_start, in kfd_reserved_mem_mmap()
2125 PFN_DOWN(__pa(qpd->cwsr_kaddr)), in kfd_reserved_mem_mmap()
2126 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); in kfd_reserved_mem_mmap()
2136 if (!KFD_IS_SOC15(pdd->dev)) in kfd_process_drain_interrupts()
2139 pdd->process->irq_drain_is_open = true; in kfd_process_drain_interrupts()
2144 irq_drain_fence[3] = pdd->process->pasid; in kfd_process_drain_interrupts()
2149 if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) || in kfd_process_drain_interrupts()
2150 KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) { in kfd_process_drain_interrupts()
2151 node_id = ffs(pdd->dev->interrupt_bitmap) - 1; in kfd_process_drain_interrupts()
2156 if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev, in kfd_process_drain_interrupts()
2158 pdd->process->irq_drain_is_open = false; in kfd_process_drain_interrupts()
2162 r = wait_event_interruptible(pdd->process->wait_irq_drain, in kfd_process_drain_interrupts()
2163 !READ_ONCE(pdd->process->irq_drain_is_open)); in kfd_process_drain_interrupts()
2165 pdd->process->irq_drain_is_open = false; in kfd_process_drain_interrupts()
2179 WRITE_ONCE(p->irq_drain_is_open, false); in kfd_process_close_interrupt_drain()
2180 wake_up_all(&p->wait_irq_drain); in kfd_process_close_interrupt_drain()
2205 p = workarea->p; in send_exception_work_handler()
2207 mm = get_task_mm(p->lead_thread); in send_exception_work_handler()
2214 q = pqm_get_user_queue(&p->pqm, workarea->queue_id); in send_exception_work_handler()
2219 csa_header = (void __user *)q->properties.ctx_save_restore_area_address; in send_exception_work_handler()
2221 get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr); in send_exception_work_handler()
2223 cur_err |= workarea->error_reason; in send_exception_work_handler()
2225 get_user(ev_id, &csa_header->err_event_id); in send_exception_work_handler()
2258 for (i = 0; i < p->n_pdds; i++) { in kfd_process_device_data_by_id()
2259 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_device_data_by_id()
2261 if (pdd->user_gpu_id == gpu_id) in kfd_process_device_data_by_id()
2275 for (i = 0; i < p->n_pdds; i++) { in kfd_process_get_user_gpu_id()
2276 struct kfd_process_device *pdd = p->pdds[i]; in kfd_process_get_user_gpu_id()
2278 if (pdd->dev->id == actual_gpu_id) in kfd_process_get_user_gpu_id()
2279 return pdd->user_gpu_id; in kfd_process_get_user_gpu_id()
2281 return -EINVAL; in kfd_process_get_user_gpu_id()
2296 p->lead_thread->tgid, p->pasid); in kfd_debugfs_mqds_by_process()
2298 mutex_lock(&p->mutex); in kfd_debugfs_mqds_by_process()
2299 r = pqm_debugfs_mqds(m, &p->pqm); in kfd_debugfs_mqds_by_process()
2300 mutex_unlock(&p->mutex); in kfd_debugfs_mqds_by_process()