From bf13da6ae1a0097cf2ff4fba1e3236aaa3fa3a7a Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 24 Oct 2023 14:00:39 +0800 Subject: [PATCH] drm/amdgpu: correct smu v13.0.6 umc ras error check correct smu v13.0.0 umc ras error check Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 3 +++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 16 +++++++++++----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 770b4b4e3138..e9c2ff74f0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -88,7 +88,7 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } -static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) { return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || @@ -96,7 +96,7 @@ static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); } -static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) { return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 4885b9fff272..b34b1e358f8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -117,6 +117,9 @@ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ } while (0) +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); + extern const uint32_t umc_v12_0_channel_idx_tbl[] [UMC_V12_0_UMC_INSTANCE_NUM] diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 83e1228e6eee..a6b57a5d0c77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -48,6 +48,7 @@ #include "smu_cmn.h" #include "mp/mp_13_0_6_offset.h" #include "mp/mp_13_0_6_sh_mask.h" +#include "umc_v12_0.h" #undef MP1_Public #undef smnMP1_FIRMWARE_FLAGS @@ -2481,7 +2482,7 @@ static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error return 0; } -static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, +static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, uint32_t *count) { uint64_t status0; @@ -2491,10 +2492,15 @@ static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, stru if (ret) return ret; - if (REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) - *count = 1; - else + if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; + return 0; + } + + if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) + *count = 1; + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) + *count = 1; return 0; } @@ -2608,7 +2614,7 @@ static const struct mca_ras_info mca_ras_table[] = { { .blkid = AMDGPU_RAS_BLOCK__UMC, .ip = AMDGPU_MCA_IP_UMC, - .get_err_count = mca_normal_mca_get_err_count, + .get_err_count = mca_umc_mca_get_err_count, }, { .blkid = AMDGPU_RAS_BLOCK__GFX, .ip = AMDGPU_MCA_IP_MP5, -- 2.39.5