Lines Matching +full:no +full:- +full:reset +full:- +full:during +full:- +full:suspend

1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
52 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
56 "Scrub device memory in various states (0 = no, 1 = yes, default no)");
60 …"Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Defau…
114 * get_asic_type - translate device id to asic type
119 * In case of unidentified device, return -1
123 struct pci_dev *pdev = hdev->pdev; in get_asic_type()
126 switch (pdev->device) { in get_asic_type()
137 switch (pdev->revision) { in get_asic_type()
172 * hl_device_open() - open function for habanalabs device.
187 return -ENOMEM; in hl_device_open()
189 hpriv->hdev = hdev; in hl_device_open()
190 mutex_init(&hpriv->notifier_event.lock); in hl_device_open()
191 mutex_init(&hpriv->restore_phase_mutex); in hl_device_open()
192 mutex_init(&hpriv->ctx_lock); in hl_device_open()
193 kref_init(&hpriv->refcount); in hl_device_open()
195 hl_ctx_mgr_init(&hpriv->ctx_mgr); in hl_device_open()
196 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); in hl_device_open()
198 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); in hl_device_open()
200 mutex_lock(&hdev->fpriv_list_lock); in hl_device_open()
203 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
205 dev_name(hdev->dev), hdev->status[status]); in hl_device_open()
209 rc = -EAGAIN; in hl_device_open()
211 rc = -EPERM; in hl_device_open()
216 if (hdev->is_in_dram_scrub) { in hl_device_open()
217 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
218 "Can't open %s during dram scrub\n", in hl_device_open()
219 dev_name(hdev->dev)); in hl_device_open()
220 rc = -EAGAIN; in hl_device_open()
224 if (hdev->compute_ctx_in_release) { in hl_device_open()
225 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
227 dev_name(hdev->dev)); in hl_device_open()
228 rc = -EAGAIN; in hl_device_open()
232 if (hdev->is_compute_ctx_active) { in hl_device_open()
233 dev_dbg_ratelimited(hdev->dev, in hl_device_open()
235 dev_name(hdev->dev)); in hl_device_open()
236 rc = -EBUSY; in hl_device_open()
242 dev_err(hdev->dev, "Failed to create context %d\n", rc); in hl_device_open()
246 list_add(&hpriv->dev_node, &hdev->fpriv_list); in hl_device_open()
247 mutex_unlock(&hdev->fpriv_list_lock); in hl_device_open()
249 hdev->asic_funcs->send_device_activity(hdev, true); in hl_device_open()
253 hl_enable_err_info_capture(&hdev->captured_err_info); in hl_device_open()
255 hdev->open_counter++; in hl_device_open()
256 hdev->last_successful_open_jif = jiffies; in hl_device_open()
257 hdev->last_successful_open_ktime = ktime_get(); in hl_device_open()
259 file_priv->driver_priv = hpriv; in hl_device_open()
260 hpriv->file_priv = file_priv; in hl_device_open()
265 mutex_unlock(&hdev->fpriv_list_lock); in hl_device_open()
266 hl_mem_mgr_fini(&hpriv->mem_mgr, NULL); in hl_device_open()
267 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); in hl_device_open()
268 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); in hl_device_open()
269 mutex_destroy(&hpriv->ctx_lock); in hl_device_open()
270 mutex_destroy(&hpriv->restore_phase_mutex); in hl_device_open()
271 mutex_destroy(&hpriv->notifier_event.lock); in hl_device_open()
272 put_pid(hpriv->taskpid); in hl_device_open()
292 return -ENXIO; in hl_device_open_ctrl()
297 return -ENOMEM; in hl_device_open_ctrl()
302 hpriv->hdev = hdev; in hl_device_open_ctrl()
303 filp->private_data = hpriv; in hl_device_open_ctrl()
307 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); in hl_device_open_ctrl()
309 mutex_lock(&hdev->fpriv_ctrl_list_lock); in hl_device_open_ctrl()
312 dev_dbg_ratelimited(hdev->dev_ctrl, in hl_device_open_ctrl()
314 dev_name(hdev->dev_ctrl)); in hl_device_open_ctrl()
315 rc = -EPERM; in hl_device_open_ctrl()
319 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); in hl_device_open_ctrl()
320 mutex_unlock(&hdev->fpriv_ctrl_list_lock); in hl_device_open_ctrl()
325 mutex_unlock(&hdev->fpriv_ctrl_list_lock); in hl_device_open_ctrl()
326 filp->private_data = NULL; in hl_device_open_ctrl()
327 put_pid(hpriv->taskpid); in hl_device_open_ctrl()
336 hdev->nic_ports_mask = 0; in set_driver_behavior_per_device()
337 hdev->fw_components = FW_TYPE_ALL_TYPES; in set_driver_behavior_per_device()
338 hdev->cpu_queues_enable = 1; in set_driver_behavior_per_device()
339 hdev->pldm = 0; in set_driver_behavior_per_device()
340 hdev->hard_reset_on_fw_events = 1; in set_driver_behavior_per_device()
341 hdev->bmc_enable = 1; in set_driver_behavior_per_device()
342 hdev->reset_on_preboot_fail = 1; in set_driver_behavior_per_device()
343 hdev->heartbeat = 1; in set_driver_behavior_per_device()
348 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); in copy_kernel_module_params_to_device()
350 hdev->major = hl_major; in copy_kernel_module_params_to_device()
351 hdev->memory_scrub = memory_scrub; in copy_kernel_module_params_to_device()
352 hdev->reset_on_lockup = reset_on_lockup; in copy_kernel_module_params_to_device()
353 hdev->boot_error_status_mask = boot_error_status_mask; in copy_kernel_module_params_to_device()
358 switch (hdev->asic_type) { in fixup_device_params_per_asic()
365 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * in fixup_device_params_per_asic()
368 hdev->reset_upon_device_release = 0; in fixup_device_params_per_asic()
372 hdev->reset_upon_device_release = 0; in fixup_device_params_per_asic()
376 hdev->reset_upon_device_release = 1; in fixup_device_params_per_asic()
387 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; in fixup_device_params()
388 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; in fixup_device_params()
391 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); in fixup_device_params()
393 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; in fixup_device_params()
395 hdev->stop_on_err = true; in fixup_device_params()
396 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; in fixup_device_params()
397 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; in fixup_device_params()
400 hdev->disabled = true; in fixup_device_params()
402 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) && in fixup_device_params()
403 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) { in fixup_device_params()
405 return -EINVAL; in fixup_device_params()
408 /* If CPU queues not enabled, no way to do heartbeat */ in fixup_device_params()
409 if (!hdev->cpu_queues_enable) in fixup_device_params()
410 hdev->heartbeat = 0; in fixup_device_params()
425 if (id == -ENOSPC) in allocate_device_id()
427 return -EBUSY; in allocate_device_id()
430 hdev->id = id; in allocate_device_id()
436 hdev->cdev_idx = hdev->id; in allocate_device_id()
442 * create_hdev - create habanalabs device instance
458 hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm); in create_hdev()
462 hdev->dev = hdev->drm.dev; in create_hdev()
465 hdev->pdev = pdev; in create_hdev()
468 strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); in create_hdev()
469 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); in create_hdev()
470 strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); in create_hdev()
471 strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); in create_hdev()
472 strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], in create_hdev()
474 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], in create_hdev()
475 "in reset after device release", HL_STR_MAX); in create_hdev()
481 hdev->asic_type = get_asic_type(hdev); in create_hdev()
482 if (hdev->asic_type == ASIC_INVALID) { in create_hdev()
483 dev_err(&pdev->dev, "Unsupported ASIC\n"); in create_hdev()
484 rc = -ENODEV; in create_hdev()
507 * destroy_hdev - destroy habanalabs device instance
516 idr_remove(&hl_devs_idr, hdev->id); in destroy_hdev()
525 pr_debug("Going to suspend PCI device\n"); in hl_pmops_suspend()
528 pr_err("device pointer is NULL in suspend\n"); in hl_pmops_suspend()
550 * hl_pci_probe - probe PCI habanalabs devices
564 dev_info(&pdev->dev, HL_NAME in hl_pci_probe()
566 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); in hl_pci_probe()
576 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); in hl_pci_probe()
577 rc = -ENODEV; in hl_pci_probe()
591 * hl_pci_remove - remove PCI habanalabs devices
611 * hl_pci_err_detected - a PCI bus error detected on this device
616 * Called by the PCI subsystem whenever a non-correctable
627 dev_warn(hdev->dev, "PCI normal state error detected\n"); in hl_pci_err_detected()
631 dev_warn(hdev->dev, "PCI frozen state error detected\n"); in hl_pci_err_detected()
636 dev_warn(hdev->dev, "PCI failure state error detected\n"); in hl_pci_err_detected()
644 hdev->asic_funcs->halt_engines(hdev, true, false); in hl_pci_err_detected()
650 * hl_pci_err_resume - resume after a PCI slot reset
659 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); in hl_pci_err_resume()
664 * hl_pci_err_slot_reset - a PCI slot reset has just happened
668 * Determine if the driver can recover from the PCI slot reset
674 dev_warn(hdev->dev, "PCI slot reset detected\n"); in hl_pci_err_slot_reset()
687 hdev->disabled = true; in hl_pci_reset_prepare()
700 * Schedule a thread to trigger hard reset. in hl_pci_reset_done()
702 * and FLR occurs. This is valid only when working with no VM, so FW handles FLR in hl_pci_reset_done()
704 * hard reset in order to load FW fit again. in hl_pci_reset_done()
712 .suspend = hl_pmops_suspend,
739 * hl_init - Initialize the habanalabs kernel driver
772 * hl_exit - Release all resources of the habanalabs kernel driver