Lines Matching refs:hdev
16 enum hl_device_status hl_device_status(struct hl_device *hdev) in hl_device_status() argument
20 if (atomic_read(&hdev->in_reset)) in hl_device_status()
22 else if (hdev->needs_reset) in hl_device_status()
24 else if (hdev->disabled) in hl_device_status()
26 else if (!hdev->init_done) in hl_device_status()
34 bool hl_device_operational(struct hl_device *hdev, in hl_device_operational() argument
39 current_status = hl_device_status(hdev); in hl_device_operational()
60 struct hl_device *hdev; in hpriv_release() local
64 hdev = hpriv->hdev; in hpriv_release()
72 if ((!hdev->pldm) && (hdev->pdev) && in hpriv_release()
73 (!hdev->asic_funcs->is_device_idle(hdev, in hpriv_release()
76 dev_err(hdev->dev, in hpriv_release()
92 mutex_lock(&hdev->fpriv_list_lock); in hpriv_release()
94 mutex_unlock(&hdev->fpriv_list_lock); in hpriv_release()
96 if ((hdev->reset_if_device_not_idle && !device_is_idle) in hpriv_release()
97 || hdev->reset_upon_device_release) in hpriv_release()
98 hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE); in hpriv_release()
104 mutex_lock(&hdev->fpriv_list_lock); in hpriv_release()
105 hdev->compute_ctx = NULL; in hpriv_release()
106 mutex_unlock(&hdev->fpriv_list_lock); in hpriv_release()
132 struct hl_device *hdev = hpriv->hdev; in hl_device_release() local
136 if (!hdev) { in hl_device_release()
145 hl_release_pending_user_interrupts(hpriv->hdev); in hl_device_release()
147 hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); in hl_device_release()
148 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); in hl_device_release()
151 dev_notice(hdev->dev, in hl_device_release()
154 hdev->last_open_session_duration_jif = in hl_device_release()
155 jiffies - hdev->last_successful_open_jif; in hl_device_release()
163 struct hl_device *hdev = hpriv->hdev; in hl_device_release_ctrl() local
167 if (!hdev) { in hl_device_release_ctrl()
172 mutex_lock(&hdev->fpriv_list_lock); in hl_device_release_ctrl()
174 mutex_unlock(&hdev->fpriv_list_lock); in hl_device_release_ctrl()
195 struct hl_device *hdev = hpriv->hdev; in hl_mmap() local
198 if (!hdev) { in hl_mmap()
252 static int device_init_cdev(struct hl_device *hdev, struct class *hclass, in device_init_cdev() argument
265 (*dev)->devt = MKDEV(hdev->major, minor); in device_init_cdev()
268 dev_set_drvdata(*dev, hdev); in device_init_cdev()
274 static int device_cdev_sysfs_add(struct hl_device *hdev) in device_cdev_sysfs_add() argument
278 rc = cdev_device_add(&hdev->cdev, hdev->dev); in device_cdev_sysfs_add()
280 dev_err(hdev->dev, in device_cdev_sysfs_add()
285 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); in device_cdev_sysfs_add()
287 dev_err(hdev->dev, in device_cdev_sysfs_add()
293 rc = hl_sysfs_init(hdev); in device_cdev_sysfs_add()
295 dev_err(hdev->dev, "failed to initialize sysfs\n"); in device_cdev_sysfs_add()
299 hdev->cdev_sysfs_created = true; in device_cdev_sysfs_add()
304 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); in device_cdev_sysfs_add()
306 cdev_device_del(&hdev->cdev, hdev->dev); in device_cdev_sysfs_add()
310 static void device_cdev_sysfs_del(struct hl_device *hdev) in device_cdev_sysfs_del() argument
312 if (!hdev->cdev_sysfs_created) in device_cdev_sysfs_del()
315 hl_sysfs_fini(hdev); in device_cdev_sysfs_del()
316 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); in device_cdev_sysfs_del()
317 cdev_device_del(&hdev->cdev, hdev->dev); in device_cdev_sysfs_del()
320 put_device(hdev->dev); in device_cdev_sysfs_del()
321 put_device(hdev->dev_ctrl); in device_cdev_sysfs_del()
329 struct hl_device *hdev = device_reset_work->hdev; in device_hard_reset_pending() local
338 rc = hl_device_reset(hdev, flags); in device_hard_reset_pending()
339 if ((rc == -EBUSY) && !hdev->device_fini_pending) { in device_hard_reset_pending()
340 dev_info(hdev->dev, in device_hard_reset_pending()
358 static int device_early_init(struct hl_device *hdev) in device_early_init() argument
363 switch (hdev->asic_type) { in device_early_init()
365 goya_set_asic_funcs(hdev); in device_early_init()
366 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); in device_early_init()
369 gaudi_set_asic_funcs(hdev); in device_early_init()
370 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); in device_early_init()
373 gaudi_set_asic_funcs(hdev); in device_early_init()
374 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); in device_early_init()
377 dev_err(hdev->dev, "Unrecognized ASIC type %d\n", in device_early_init()
378 hdev->asic_type); in device_early_init()
382 rc = hdev->asic_funcs->early_init(hdev); in device_early_init()
386 rc = hl_asid_init(hdev); in device_early_init()
390 if (hdev->asic_prop.completion_queues_count) { in device_early_init()
391 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, in device_early_init()
392 sizeof(*hdev->cq_wq), in device_early_init()
394 if (!hdev->cq_wq) { in device_early_init()
400 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { in device_early_init()
402 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); in device_early_init()
403 if (hdev->cq_wq[i] == NULL) { in device_early_init()
404 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); in device_early_init()
410 hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); in device_early_init()
411 if (hdev->eq_wq == NULL) { in device_early_init()
412 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); in device_early_init()
417 hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0); in device_early_init()
418 if (!hdev->sob_reset_wq) { in device_early_init()
419 dev_err(hdev->dev, in device_early_init()
425 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), in device_early_init()
427 if (!hdev->hl_chip_info) { in device_early_init()
432 rc = hl_mmu_if_set_funcs(hdev); in device_early_init()
436 hl_cb_mgr_init(&hdev->kernel_cb_mgr); in device_early_init()
438 hdev->device_reset_work.wq = in device_early_init()
440 if (!hdev->device_reset_work.wq) { in device_early_init()
442 dev_err(hdev->dev, "Failed to create device reset WQ\n"); in device_early_init()
446 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, in device_early_init()
448 hdev->device_reset_work.hdev = hdev; in device_early_init()
449 hdev->device_fini_pending = 0; in device_early_init()
451 mutex_init(&hdev->send_cpu_message_lock); in device_early_init()
452 mutex_init(&hdev->debug_lock); in device_early_init()
453 INIT_LIST_HEAD(&hdev->cs_mirror_list); in device_early_init()
454 spin_lock_init(&hdev->cs_mirror_lock); in device_early_init()
455 INIT_LIST_HEAD(&hdev->fpriv_list); in device_early_init()
456 mutex_init(&hdev->fpriv_list_lock); in device_early_init()
457 atomic_set(&hdev->in_reset, 0); in device_early_init()
462 hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); in device_early_init()
464 kfree(hdev->hl_chip_info); in device_early_init()
466 destroy_workqueue(hdev->sob_reset_wq); in device_early_init()
468 destroy_workqueue(hdev->eq_wq); in device_early_init()
470 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in device_early_init()
471 if (hdev->cq_wq[i]) in device_early_init()
472 destroy_workqueue(hdev->cq_wq[i]); in device_early_init()
473 kfree(hdev->cq_wq); in device_early_init()
475 hl_asid_fini(hdev); in device_early_init()
477 if (hdev->asic_funcs->early_fini) in device_early_init()
478 hdev->asic_funcs->early_fini(hdev); in device_early_init()
489 static void device_early_fini(struct hl_device *hdev) in device_early_fini() argument
493 mutex_destroy(&hdev->debug_lock); in device_early_fini()
494 mutex_destroy(&hdev->send_cpu_message_lock); in device_early_fini()
496 mutex_destroy(&hdev->fpriv_list_lock); in device_early_fini()
498 hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); in device_early_fini()
500 kfree(hdev->hl_chip_info); in device_early_fini()
502 destroy_workqueue(hdev->sob_reset_wq); in device_early_fini()
503 destroy_workqueue(hdev->eq_wq); in device_early_fini()
504 destroy_workqueue(hdev->device_reset_work.wq); in device_early_fini()
506 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in device_early_fini()
507 destroy_workqueue(hdev->cq_wq[i]); in device_early_fini()
508 kfree(hdev->cq_wq); in device_early_fini()
510 hl_asid_fini(hdev); in device_early_fini()
512 if (hdev->asic_funcs->early_fini) in device_early_fini()
513 hdev->asic_funcs->early_fini(hdev); in device_early_fini()
518 struct hl_device *hdev = container_of(work, struct hl_device, in set_freq_to_low_job() local
521 mutex_lock(&hdev->fpriv_list_lock); in set_freq_to_low_job()
523 if (!hdev->compute_ctx) in set_freq_to_low_job()
524 hl_device_set_frequency(hdev, PLL_LOW); in set_freq_to_low_job()
526 mutex_unlock(&hdev->fpriv_list_lock); in set_freq_to_low_job()
528 schedule_delayed_work(&hdev->work_freq, in set_freq_to_low_job()
534 struct hl_device *hdev = container_of(work, struct hl_device, in hl_device_heartbeat() local
537 if (!hl_device_operational(hdev, NULL)) in hl_device_heartbeat()
540 if (!hdev->asic_funcs->send_heartbeat(hdev)) in hl_device_heartbeat()
543 dev_err(hdev->dev, "Device heartbeat failed!\n"); in hl_device_heartbeat()
544 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT); in hl_device_heartbeat()
559 if (!(atomic_read(&hdev->in_reset))) in hl_device_heartbeat()
560 hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; in hl_device_heartbeat()
562 schedule_delayed_work(&hdev->work_heartbeat, in hl_device_heartbeat()
574 static int device_late_init(struct hl_device *hdev) in device_late_init() argument
578 if (hdev->asic_funcs->late_init) { in device_late_init()
579 rc = hdev->asic_funcs->late_init(hdev); in device_late_init()
581 dev_err(hdev->dev, in device_late_init()
587 hdev->high_pll = hdev->asic_prop.high_pll; in device_late_init()
590 hdev->curr_pll_profile = PLL_LOW; in device_late_init()
592 if (hdev->pm_mng_profile == PM_AUTO) in device_late_init()
593 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); in device_late_init()
595 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); in device_late_init()
597 INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); in device_late_init()
598 schedule_delayed_work(&hdev->work_freq, in device_late_init()
601 if (hdev->heartbeat) { in device_late_init()
602 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); in device_late_init()
603 schedule_delayed_work(&hdev->work_heartbeat, in device_late_init()
607 hdev->late_init_done = true; in device_late_init()
618 static void device_late_fini(struct hl_device *hdev) in device_late_fini() argument
620 if (!hdev->late_init_done) in device_late_fini()
623 cancel_delayed_work_sync(&hdev->work_freq); in device_late_fini()
624 if (hdev->heartbeat) in device_late_fini()
625 cancel_delayed_work_sync(&hdev->work_heartbeat); in device_late_fini()
627 if (hdev->asic_funcs->late_fini) in device_late_fini()
628 hdev->asic_funcs->late_fini(hdev); in device_late_fini()
630 hdev->late_init_done = false; in device_late_fini()
633 int hl_device_utilization(struct hl_device *hdev, u32 *utilization) in hl_device_utilization() argument
638 max_power = hdev->asic_prop.max_power_default; in hl_device_utilization()
639 dc_power = hdev->asic_prop.dc_power_default; in hl_device_utilization()
640 rc = hl_fw_cpucp_power_get(hdev, &curr_power); in hl_device_utilization()
666 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) in hl_device_set_frequency() argument
668 if ((hdev->pm_mng_profile == PM_MANUAL) || in hl_device_set_frequency()
669 (hdev->curr_pll_profile == freq)) in hl_device_set_frequency()
672 dev_dbg(hdev->dev, "Changing device frequency to %s\n", in hl_device_set_frequency()
675 hdev->asic_funcs->set_pll_profile(hdev, freq); in hl_device_set_frequency()
677 hdev->curr_pll_profile = freq; in hl_device_set_frequency()
682 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) in hl_device_set_debug_mode() argument
686 mutex_lock(&hdev->debug_lock); in hl_device_set_debug_mode()
689 if (!hdev->in_debug) { in hl_device_set_debug_mode()
690 dev_err(hdev->dev, in hl_device_set_debug_mode()
696 if (!hdev->hard_reset_pending) in hl_device_set_debug_mode()
697 hdev->asic_funcs->halt_coresight(hdev); in hl_device_set_debug_mode()
699 hdev->in_debug = 0; in hl_device_set_debug_mode()
701 if (!hdev->hard_reset_pending) in hl_device_set_debug_mode()
702 hdev->asic_funcs->set_clock_gating(hdev); in hl_device_set_debug_mode()
707 if (hdev->in_debug) { in hl_device_set_debug_mode()
708 dev_err(hdev->dev, in hl_device_set_debug_mode()
714 hdev->asic_funcs->disable_clock_gating(hdev); in hl_device_set_debug_mode()
715 hdev->in_debug = 1; in hl_device_set_debug_mode()
718 mutex_unlock(&hdev->debug_lock); in hl_device_set_debug_mode()
723 static void take_release_locks(struct hl_device *hdev) in take_release_locks() argument
728 hdev->asic_funcs->hw_queues_lock(hdev); in take_release_locks()
729 hdev->asic_funcs->hw_queues_unlock(hdev); in take_release_locks()
732 mutex_lock(&hdev->send_cpu_message_lock); in take_release_locks()
733 mutex_unlock(&hdev->send_cpu_message_lock); in take_release_locks()
736 mutex_lock(&hdev->fpriv_list_lock); in take_release_locks()
737 mutex_unlock(&hdev->fpriv_list_lock); in take_release_locks()
740 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) in cleanup_resources() argument
743 device_late_fini(hdev); in cleanup_resources()
750 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); in cleanup_resources()
753 hl_cs_rollback_all(hdev); in cleanup_resources()
758 hl_release_pending_user_interrupts(hdev); in cleanup_resources()
770 int hl_device_suspend(struct hl_device *hdev) in hl_device_suspend() argument
774 pci_save_state(hdev->pdev); in hl_device_suspend()
777 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); in hl_device_suspend()
779 dev_err(hdev->dev, "Can't suspend while in reset\n"); in hl_device_suspend()
784 hdev->disabled = true; in hl_device_suspend()
786 take_release_locks(hdev); in hl_device_suspend()
788 rc = hdev->asic_funcs->suspend(hdev); in hl_device_suspend()
790 dev_err(hdev->dev, in hl_device_suspend()
794 pci_disable_device(hdev->pdev); in hl_device_suspend()
795 pci_set_power_state(hdev->pdev, PCI_D3hot); in hl_device_suspend()
809 int hl_device_resume(struct hl_device *hdev) in hl_device_resume() argument
813 pci_set_power_state(hdev->pdev, PCI_D0); in hl_device_resume()
814 pci_restore_state(hdev->pdev); in hl_device_resume()
815 rc = pci_enable_device_mem(hdev->pdev); in hl_device_resume()
817 dev_err(hdev->dev, in hl_device_resume()
822 pci_set_master(hdev->pdev); in hl_device_resume()
824 rc = hdev->asic_funcs->resume(hdev); in hl_device_resume()
826 dev_err(hdev->dev, "Failed to resume device after suspend\n"); in hl_device_resume()
831 hdev->disabled = false; in hl_device_resume()
832 atomic_set(&hdev->in_reset, 0); in hl_device_resume()
834 rc = hl_device_reset(hdev, HL_RESET_HARD); in hl_device_resume()
836 dev_err(hdev->dev, "Failed to reset device during resume\n"); in hl_device_resume()
843 pci_clear_master(hdev->pdev); in hl_device_resume()
844 pci_disable_device(hdev->pdev); in hl_device_resume()
849 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) in device_kill_open_processes() argument
859 if (!list_empty(&hdev->fpriv_list)) in device_kill_open_processes()
865 if (hdev->process_kill_trial_cnt) { in device_kill_open_processes()
875 mutex_lock(&hdev->fpriv_list_lock); in device_kill_open_processes()
880 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { in device_kill_open_processes()
883 dev_info(hdev->dev, "Killing user process pid=%d\n", in device_kill_open_processes()
890 dev_warn(hdev->dev, in device_kill_open_processes()
892 mutex_unlock(&hdev->fpriv_list_lock); in device_kill_open_processes()
897 mutex_unlock(&hdev->fpriv_list_lock); in device_kill_open_processes()
909 while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { in device_kill_open_processes()
910 dev_dbg(hdev->dev, in device_kill_open_processes()
919 if (list_empty(&hdev->fpriv_list)) in device_kill_open_processes()
923 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) in device_kill_open_processes()
926 hdev->process_kill_trial_cnt++; in device_kill_open_processes()
931 static void device_disable_open_processes(struct hl_device *hdev) in device_disable_open_processes() argument
935 mutex_lock(&hdev->fpriv_list_lock); in device_disable_open_processes()
936 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) in device_disable_open_processes()
937 hpriv->hdev = NULL; in device_disable_open_processes()
938 mutex_unlock(&hdev->fpriv_list_lock); in device_disable_open_processes()
941 static void handle_reset_trigger(struct hl_device *hdev, u32 flags) in handle_reset_trigger() argument
952 hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; in handle_reset_trigger()
955 hdev->curr_reset_cause = HL_RESET_CAUSE_TDR; in handle_reset_trigger()
958 hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; in handle_reset_trigger()
961 hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; in handle_reset_trigger()
969 if (hdev->prev_reset_trigger != cur_reset_trigger) { in handle_reset_trigger()
970 hdev->prev_reset_trigger = cur_reset_trigger; in handle_reset_trigger()
971 hdev->reset_trigger_repeated = 0; in handle_reset_trigger()
973 hdev->reset_trigger_repeated = 1; in handle_reset_trigger()
993 if (hl_fw_send_pci_access_msg(hdev, in handle_reset_trigger()
995 dev_warn(hdev->dev, in handle_reset_trigger()
1016 int hl_device_reset(struct hl_device *hdev, u32 flags) in hl_device_reset() argument
1022 if (!hdev->init_done) { in hl_device_reset()
1023 dev_err(hdev->dev, in hl_device_reset()
1032 if (!hard_reset && !hdev->supports_soft_reset) { in hl_device_reset()
1037 if (hdev->reset_upon_device_release && in hl_device_reset()
1039 dev_dbg(hdev->dev, in hl_device_reset()
1045 if (!hard_reset && !hdev->allow_inference_soft_reset) { in hl_device_reset()
1051 dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); in hl_device_reset()
1055 if (from_hard_reset_thread && hdev->process_kill_trial_cnt) in hl_device_reset()
1065 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); in hl_device_reset()
1069 handle_reset_trigger(hdev, flags); in hl_device_reset()
1072 hdev->disabled = true; in hl_device_reset()
1074 take_release_locks(hdev); in hl_device_reset()
1077 dev_info(hdev->dev, "Going to reset device\n"); in hl_device_reset()
1079 dev_info(hdev->dev, in hl_device_reset()
1082 dev_info(hdev->dev, in hl_device_reset()
1088 hdev->hard_reset_pending = true; in hl_device_reset()
1090 hdev->process_kill_trial_cnt = 0; in hl_device_reset()
1092 hdev->device_reset_work.fw_reset = fw_reset; in hl_device_reset()
1098 queue_delayed_work(hdev->device_reset_work.wq, in hl_device_reset()
1099 &hdev->device_reset_work.reset_work, 0); in hl_device_reset()
1104 cleanup_resources(hdev, hard_reset, fw_reset); in hl_device_reset()
1112 rc = device_kill_open_processes(hdev, 0); in hl_device_reset()
1115 if (hdev->device_fini_pending) { in hl_device_reset()
1116 dev_crit(hdev->dev, in hl_device_reset()
1126 dev_crit(hdev->dev, in hl_device_reset()
1134 flush_workqueue(hdev->eq_wq); in hl_device_reset()
1138 hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); in hl_device_reset()
1141 hdev->fw_loader.linux_loaded = false; in hl_device_reset()
1144 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) in hl_device_reset()
1145 hdev->kernel_ctx = NULL; in hl_device_reset()
1147 hl_vm_fini(hdev); in hl_device_reset()
1148 hl_mmu_fini(hdev); in hl_device_reset()
1149 hl_eq_reset(hdev, &hdev->event_queue); in hl_device_reset()
1153 hl_hw_queue_reset(hdev, hard_reset); in hl_device_reset()
1154 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_device_reset()
1155 hl_cq_reset(hdev, &hdev->completion_queue[i]); in hl_device_reset()
1157 mutex_lock(&hdev->fpriv_list_lock); in hl_device_reset()
1160 if (hdev->compute_ctx) { in hl_device_reset()
1161 atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1); in hl_device_reset()
1162 hdev->compute_ctx->thread_ctx_switch_wait_token = 0; in hl_device_reset()
1165 mutex_unlock(&hdev->fpriv_list_lock); in hl_device_reset()
1170 hdev->device_cpu_disabled = false; in hl_device_reset()
1171 hdev->hard_reset_pending = false; in hl_device_reset()
1173 if (hdev->reset_trigger_repeated && in hl_device_reset()
1174 (hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) { in hl_device_reset()
1178 dev_crit(hdev->dev, in hl_device_reset()
1184 if (hdev->kernel_ctx) { in hl_device_reset()
1185 dev_crit(hdev->dev, in hl_device_reset()
1191 rc = hl_mmu_init(hdev); in hl_device_reset()
1193 dev_err(hdev->dev, in hl_device_reset()
1199 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), in hl_device_reset()
1201 if (!hdev->kernel_ctx) { in hl_device_reset()
1203 hl_mmu_fini(hdev); in hl_device_reset()
1207 hdev->compute_ctx = NULL; in hl_device_reset()
1209 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); in hl_device_reset()
1211 dev_err(hdev->dev, in hl_device_reset()
1213 kfree(hdev->kernel_ctx); in hl_device_reset()
1214 hdev->kernel_ctx = NULL; in hl_device_reset()
1215 hl_mmu_fini(hdev); in hl_device_reset()
1224 hdev->disabled = false; in hl_device_reset()
1226 rc = hdev->asic_funcs->hw_init(hdev); in hl_device_reset()
1228 dev_err(hdev->dev, in hl_device_reset()
1234 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, in hl_device_reset()
1236 dev_err(hdev->dev, in hl_device_reset()
1244 rc = hdev->asic_funcs->test_queues(hdev); in hl_device_reset()
1246 dev_err(hdev->dev, in hl_device_reset()
1252 rc = device_late_init(hdev); in hl_device_reset()
1254 dev_err(hdev->dev, in hl_device_reset()
1259 rc = hl_vm_init(hdev); in hl_device_reset()
1261 dev_err(hdev->dev, in hl_device_reset()
1266 hl_set_max_power(hdev); in hl_device_reset()
1268 rc = hdev->asic_funcs->soft_reset_late_init(hdev); in hl_device_reset()
1270 dev_err(hdev->dev, in hl_device_reset()
1276 atomic_set(&hdev->in_reset, 0); in hl_device_reset()
1277 hdev->needs_reset = false; in hl_device_reset()
1279 dev_notice(hdev->dev, "Successfully finished resetting the device\n"); in hl_device_reset()
1282 hdev->hard_reset_cnt++; in hl_device_reset()
1289 hdev->asic_funcs->enable_events_from_fw(hdev); in hl_device_reset()
1291 hdev->soft_reset_cnt++; in hl_device_reset()
1297 hdev->disabled = true; in hl_device_reset()
1300 dev_err(hdev->dev, in hl_device_reset()
1302 hdev->hard_reset_cnt++; in hl_device_reset()
1304 dev_err(hdev->dev, in hl_device_reset()
1306 hdev->soft_reset_cnt++; in hl_device_reset()
1311 atomic_set(&hdev->in_reset, 0); in hl_device_reset()
1325 int hl_device_init(struct hl_device *hdev, struct class *hclass) in hl_device_init() argument
1331 name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); in hl_device_init()
1338 rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, in hl_device_init()
1339 &hdev->cdev, &hdev->dev); in hl_device_init()
1346 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); in hl_device_init()
1353 rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, in hl_device_init()
1354 name, &hdev->cdev_ctrl, &hdev->dev_ctrl); in hl_device_init()
1362 rc = device_early_init(hdev); in hl_device_init()
1366 user_interrupt_cnt = hdev->asic_prop.user_interrupt_count; in hl_device_init()
1369 hdev->user_interrupt = kcalloc(user_interrupt_cnt, in hl_device_init()
1370 sizeof(*hdev->user_interrupt), in hl_device_init()
1373 if (!hdev->user_interrupt) { in hl_device_init()
1383 rc = hdev->asic_funcs->sw_init(hdev); in hl_device_init()
1389 hl_multi_cs_completion_init(hdev); in hl_device_init()
1396 rc = hl_hw_queues_create(hdev); in hl_device_init()
1398 dev_err(hdev->dev, "failed to initialize kernel queues\n"); in hl_device_init()
1402 cq_cnt = hdev->asic_prop.completion_queues_count; in hl_device_init()
1410 hdev->completion_queue = kcalloc(cq_cnt, in hl_device_init()
1411 sizeof(*hdev->completion_queue), in hl_device_init()
1414 if (!hdev->completion_queue) { in hl_device_init()
1415 dev_err(hdev->dev, in hl_device_init()
1423 rc = hl_cq_init(hdev, &hdev->completion_queue[i], in hl_device_init()
1424 hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); in hl_device_init()
1426 dev_err(hdev->dev, in hl_device_init()
1430 hdev->completion_queue[i].cq_idx = i; in hl_device_init()
1438 rc = hl_eq_init(hdev, &hdev->event_queue); in hl_device_init()
1440 dev_err(hdev->dev, "failed to initialize event queue\n"); in hl_device_init()
1445 rc = hl_mmu_init(hdev); in hl_device_init()
1447 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); in hl_device_init()
1452 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); in hl_device_init()
1453 if (!hdev->kernel_ctx) { in hl_device_init()
1458 hdev->compute_ctx = NULL; in hl_device_init()
1460 hdev->asic_funcs->state_dump_init(hdev); in hl_device_init()
1462 hl_debugfs_add_device(hdev); in hl_device_init()
1467 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); in hl_device_init()
1469 dev_err(hdev->dev, "failed to initialize kernel context\n"); in hl_device_init()
1470 kfree(hdev->kernel_ctx); in hl_device_init()
1474 rc = hl_cb_pool_init(hdev); in hl_device_init()
1476 dev_err(hdev->dev, "failed to initialize CB pool\n"); in hl_device_init()
1491 hdev->disabled = false; in hl_device_init()
1493 rc = hdev->asic_funcs->hw_init(hdev); in hl_device_init()
1495 dev_err(hdev->dev, "failed to initialize the H/W\n"); in hl_device_init()
1501 rc = hdev->asic_funcs->test_queues(hdev); in hl_device_init()
1503 dev_err(hdev->dev, "Failed to detect if device is alive\n"); in hl_device_init()
1508 rc = device_late_init(hdev); in hl_device_init()
1510 dev_err(hdev->dev, "Failed late initialization\n"); in hl_device_init()
1515 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", in hl_device_init()
1516 hdev->asic_name, in hl_device_init()
1517 hdev->asic_prop.dram_size / SZ_1G); in hl_device_init()
1519 rc = hl_vm_init(hdev); in hl_device_init()
1521 dev_err(hdev->dev, "Failed to initialize memory module\n"); in hl_device_init()
1532 rc = device_cdev_sysfs_add(hdev); in hl_device_init()
1534 dev_err(hdev->dev, in hl_device_init()
1543 hl_set_max_power(hdev); in hl_device_init()
1551 rc = hl_hwmon_init(hdev); in hl_device_init()
1553 dev_err(hdev->dev, "Failed to initialize hwmon\n"); in hl_device_init()
1558 dev_notice(hdev->dev, in hl_device_init()
1561 hdev->init_done = true; in hl_device_init()
1568 hdev->asic_funcs->enable_events_from_fw(hdev); in hl_device_init()
1573 if (hl_ctx_put(hdev->kernel_ctx) != 1) in hl_device_init()
1574 dev_err(hdev->dev, in hl_device_init()
1577 hl_debugfs_remove_device(hdev); in hl_device_init()
1579 hl_mmu_fini(hdev); in hl_device_init()
1581 hl_eq_fini(hdev, &hdev->event_queue); in hl_device_init()
1584 hl_cq_fini(hdev, &hdev->completion_queue[i]); in hl_device_init()
1585 kfree(hdev->completion_queue); in hl_device_init()
1587 hl_hw_queues_destroy(hdev); in hl_device_init()
1589 hdev->asic_funcs->sw_fini(hdev); in hl_device_init()
1591 kfree(hdev->user_interrupt); in hl_device_init()
1593 device_early_fini(hdev); in hl_device_init()
1595 put_device(hdev->dev_ctrl); in hl_device_init()
1597 put_device(hdev->dev); in hl_device_init()
1599 hdev->disabled = true; in hl_device_init()
1601 device_cdev_sysfs_add(hdev); in hl_device_init()
1602 if (hdev->pdev) in hl_device_init()
1603 dev_err(&hdev->pdev->dev, in hl_device_init()
1605 hdev->id / 2); in hl_device_init()
1608 hdev->id / 2); in hl_device_init()
1620 void hl_device_fini(struct hl_device *hdev) in hl_device_fini() argument
1626 dev_info(hdev->dev, "Removing device\n"); in hl_device_fini()
1628 hdev->device_fini_pending = 1; in hl_device_fini()
1629 flush_delayed_work(&hdev->device_reset_work.reset_work); in hl_device_fini()
1631 if (hdev->pldm) in hl_device_fini()
1645 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); in hl_device_fini()
1648 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); in hl_device_fini()
1650 dev_crit(hdev->dev, in hl_device_fini()
1663 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); in hl_device_fini()
1666 hdev->disabled = true; in hl_device_fini()
1668 take_release_locks(hdev); in hl_device_fini()
1670 hdev->hard_reset_pending = true; in hl_device_fini()
1672 hl_hwmon_fini(hdev); in hl_device_fini()
1674 cleanup_resources(hdev, true, false); in hl_device_fini()
1680 dev_info(hdev->dev, in hl_device_fini()
1684 rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); in hl_device_fini()
1686 dev_crit(hdev->dev, "Failed to kill all open processes\n"); in hl_device_fini()
1687 device_disable_open_processes(hdev); in hl_device_fini()
1690 hl_cb_pool_fini(hdev); in hl_device_fini()
1693 hdev->asic_funcs->hw_fini(hdev, true, false); in hl_device_fini()
1695 hdev->fw_loader.linux_loaded = false; in hl_device_fini()
1698 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) in hl_device_fini()
1699 dev_err(hdev->dev, "kernel ctx is still alive\n"); in hl_device_fini()
1701 hl_debugfs_remove_device(hdev); in hl_device_fini()
1703 hl_vm_fini(hdev); in hl_device_fini()
1705 hl_mmu_fini(hdev); in hl_device_fini()
1707 hl_eq_fini(hdev, &hdev->event_queue); in hl_device_fini()
1709 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_device_fini()
1710 hl_cq_fini(hdev, &hdev->completion_queue[i]); in hl_device_fini()
1711 kfree(hdev->completion_queue); in hl_device_fini()
1712 kfree(hdev->user_interrupt); in hl_device_fini()
1714 hl_hw_queues_destroy(hdev); in hl_device_fini()
1717 hdev->asic_funcs->sw_fini(hdev); in hl_device_fini()
1719 device_early_fini(hdev); in hl_device_fini()
1722 device_cdev_sysfs_del(hdev); in hl_device_fini()
1740 inline u32 hl_rreg(struct hl_device *hdev, u32 reg) in hl_rreg() argument
1742 return readl(hdev->rmmio + reg); in hl_rreg()
1755 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) in hl_wreg() argument
1757 writel(val, hdev->rmmio + reg); in hl_wreg()