drm/amdgpu: add UTCL2 RAS poison query for Aldebaran (v2)
Add help functions to query and reset RAS UTCL2 poison status. v2: implement it on amdgpu side and kfd only calls it. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
9d8a8d78d9
commit
6475ae2b74
4 changed files with 24 additions and 0 deletions
|
@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo
|
||||||
else if (reset)
|
else if (reset)
|
||||||
amdgpu_amdkfd_gpu_reset(adev);
|
amdgpu_amdkfd_gpu_reset(adev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
if (adev->gfx.ras->query_utcl2_poison_status)
|
||||||
|
return adev->gfx.ras->query_utcl2_poison_status(adev);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
|
@ -301,6 +301,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
|
||||||
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
|
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
|
||||||
void amdgpu_amdkfd_block_mmu_notifications(void *p);
|
void amdgpu_amdkfd_block_mmu_notifications(void *p);
|
||||||
int amdgpu_amdkfd_criu_resume(void *p);
|
int amdgpu_amdkfd_criu_resume(void *p);
|
||||||
|
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HSA_AMD)
|
#if IS_ENABLED(CONFIG_HSA_AMD)
|
||||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||||
|
|
|
@ -202,6 +202,7 @@ struct amdgpu_cu_info {
|
||||||
struct amdgpu_gfx_ras {
|
struct amdgpu_gfx_ras {
|
||||||
struct amdgpu_ras_block_object ras_block;
|
struct amdgpu_ras_block_object ras_block;
|
||||||
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
|
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
|
||||||
|
bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_gfx_funcs {
|
struct amdgpu_gfx_funcs {
|
||||||
|
|
|
@ -1930,6 +1930,19 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
|
||||||
mutex_unlock(&adev->grbm_idx_mutex);
|
mutex_unlock(&adev->grbm_idx_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
u32 status = 0;
|
||||||
|
struct amdgpu_vmhub *hub;
|
||||||
|
|
||||||
|
hub = &adev->vmhub[AMDGPU_GFXHUB_0];
|
||||||
|
status = RREG32(hub->vm_l2_pro_fault_status);
|
||||||
|
/* reset page fault status */
|
||||||
|
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
|
||||||
|
|
||||||
|
return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
|
||||||
|
}
|
||||||
|
|
||||||
struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
|
struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
|
||||||
.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
|
.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
|
||||||
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
|
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
|
||||||
|
@ -1943,4 +1956,5 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
|
||||||
.hw_ops = &gfx_v9_4_2_ras_ops,
|
.hw_ops = &gfx_v9_4_2_ras_ops,
|
||||||
},
|
},
|
||||||
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
|
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
|
||||||
|
.query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue