]> git.dujemihanovic.xyz Git - linux.git/commitdiff
drm/amdgpu: skip coredump after job timeout in SRIOV
authorZhenGuo Yin <zhenguo.yin@amd.com>
Thu, 19 Sep 2024 03:38:04 +0000 (11:38 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 25 Sep 2024 16:55:52 +0000 (12:55 -0400)
VF FLR will be triggered by host driver before job timeout,
hence the error status of GPU get cleared. Performing a
coredump here is unnecessary.

Signed-off-by: ZhenGuo Yin <zhenguo.yin@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ad6bf5d4e0a9d03a805f6378bb9fd25b9e98880d..16f2605ac50b9964c39d378f01963e8bc5fba9e5 100644 (file)
@@ -107,8 +107,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
        /*
         * Do the coredump immediately after a job timeout to get a very
         * close dump/snapshot/representation of GPU's current error status
+        * Skip it for SRIOV, since VF FLR will be triggered by host driver
+        * before job timeout
         */
-       amdgpu_job_core_dump(adev, job);
+       if (!amdgpu_sriov_vf(adev))
+               amdgpu_job_core_dump(adev, job);
 
        if (amdgpu_gpu_recovery &&
            amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {