drm/amdgpu: add umc_fill_error_record to make code more simple
Create common amdgpu_umc_fill_error_record function for all versions of UMC and clean up related codes. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
9a17696049
commit
400013b268
6 changed files with 46 additions and 93 deletions
|
@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
||||||
|
|
||||||
err_rec.address = address;
|
|
||||||
err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec.ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
|
|
||||||
err_data.err_addr = &err_rec;
|
err_data.err_addr = &err_rec;
|
||||||
err_data.err_addr_cnt = 1;
|
amdgpu_umc_fill_error_record(&err_data, address,
|
||||||
|
(address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
|
||||||
|
|
||||||
if (amdgpu_bad_page_threshold != 0) {
|
if (amdgpu_bad_page_threshold != 0) {
|
||||||
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
||||||
|
@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
|
||||||
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
|
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
|
||||||
umc_inst, ch_inst);
|
umc_inst, ch_inst);
|
||||||
|
|
||||||
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Translate UMC channel address to Physical address
|
* Translate UMC channel address to Physical address
|
||||||
*/
|
*/
|
||||||
|
@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
|
||||||
ADDR_OF_256B_BLOCK(channel_index) |
|
ADDR_OF_256B_BLOCK(channel_index) |
|
||||||
OFFSET_IN_256B_BLOCK(m->addr);
|
OFFSET_IN_256B_BLOCK(m->addr);
|
||||||
|
|
||||||
err_rec.address = m->addr;
|
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
||||||
err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec.ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
err_rec.cu = 0;
|
|
||||||
err_rec.mem_channel = channel_index;
|
|
||||||
err_rec.mcumc_id = umc_inst;
|
|
||||||
|
|
||||||
err_data.err_addr = &err_rec;
|
err_data.err_addr = &err_rec;
|
||||||
err_data.err_addr_cnt = 1;
|
amdgpu_umc_fill_error_record(&err_data, m->addr,
|
||||||
|
retired_page, channel_index, umc_inst);
|
||||||
|
|
||||||
if (amdgpu_bad_page_threshold != 0) {
|
if (amdgpu_bad_page_threshold != 0) {
|
||||||
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
||||||
|
|
|
@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
||||||
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
|
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
|
||||||
|
uint64_t err_addr,
|
||||||
|
uint64_t retired_page,
|
||||||
|
uint32_t channel_index,
|
||||||
|
uint32_t umc_inst)
|
||||||
|
{
|
||||||
|
struct eeprom_table_record *err_rec =
|
||||||
|
&err_data->err_addr[err_data->err_addr_cnt];
|
||||||
|
|
||||||
|
err_rec->address = err_addr;
|
||||||
|
/* page frame address is saved */
|
||||||
|
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||||
|
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||||
|
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||||
|
err_rec->cu = 0;
|
||||||
|
err_rec->mem_channel = channel_index;
|
||||||
|
err_rec->mcumc_id = umc_inst;
|
||||||
|
|
||||||
|
err_data->err_addr_cnt++;
|
||||||
|
}
|
||||||
|
|
|
@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
|
||||||
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
||||||
struct amdgpu_irq_src *source,
|
struct amdgpu_irq_src *source,
|
||||||
struct amdgpu_iv_entry *entry);
|
struct amdgpu_iv_entry *entry);
|
||||||
|
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
|
||||||
|
uint64_t err_addr,
|
||||||
|
uint64_t retired_page,
|
||||||
|
uint32_t channel_index,
|
||||||
|
uint32_t umc_inst);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
||||||
{
|
{
|
||||||
uint32_t lsb, mc_umc_status_addr;
|
uint32_t lsb, mc_umc_status_addr;
|
||||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
||||||
struct eeprom_table_record *err_rec;
|
|
||||||
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||||
|
|
||||||
if (adev->asic_type == CHIP_ARCTURUS) {
|
if (adev->asic_type == CHIP_ARCTURUS) {
|
||||||
|
@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
|
||||||
|
|
||||||
/* calculate error address if ue/ce error is detected */
|
/* calculate error address if ue/ce error is detected */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||||
|
@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1) {
|
== 1)
|
||||||
err_rec->address = err_addr;
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
/* page frame address is saved */
|
retired_page, channel_index, umc_inst);
|
||||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
err_rec->cu = 0;
|
|
||||||
err_rec->mem_channel = channel_index;
|
|
||||||
err_rec->mcumc_id = umc_inst;
|
|
||||||
|
|
||||||
err_data->err_addr_cnt++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear umc status */
|
/* clear umc status */
|
||||||
|
|
|
@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
||||||
uint32_t umc_inst)
|
uint32_t umc_inst)
|
||||||
{
|
{
|
||||||
uint64_t mc_umc_status, err_addr, retired_page;
|
uint64_t mc_umc_status, err_addr, retired_page;
|
||||||
struct eeprom_table_record *err_rec;
|
|
||||||
uint32_t channel_index;
|
uint32_t channel_index;
|
||||||
uint32_t eccinfo_table_idx;
|
uint32_t eccinfo_table_idx;
|
||||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||||
|
@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
||||||
if (!err_data->err_addr)
|
if (!err_data->err_addr)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
|
||||||
|
|
||||||
/* calculate error address if ue/ce error is detected */
|
/* calculate error address if ue/ce error is detected */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||||
|
@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1) {
|
== 1)
|
||||||
err_rec->address = err_addr;
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
/* page frame address is saved */
|
retired_page, channel_index, umc_inst);
|
||||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
err_rec->cu = 0;
|
|
||||||
err_rec->mem_channel = channel_index;
|
|
||||||
err_rec->mcumc_id = umc_inst;
|
|
||||||
|
|
||||||
err_data->err_addr_cnt++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
||||||
{
|
{
|
||||||
uint32_t mc_umc_status_addr;
|
uint32_t mc_umc_status_addr;
|
||||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
||||||
struct eeprom_table_record *err_rec;
|
|
||||||
uint32_t channel_index;
|
uint32_t channel_index;
|
||||||
|
|
||||||
mc_umc_status_addr =
|
mc_umc_status_addr =
|
||||||
|
@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
|
||||||
|
|
||||||
channel_index =
|
channel_index =
|
||||||
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||||
|
|
||||||
|
@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1) {
|
== 1)
|
||||||
err_rec->address = err_addr;
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
/* page frame address is saved */
|
retired_page, channel_index, umc_inst);
|
||||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
err_rec->cu = 0;
|
|
||||||
err_rec->mem_channel = channel_index;
|
|
||||||
err_rec->mcumc_id = umc_inst;
|
|
||||||
|
|
||||||
err_data->err_addr_cnt++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear umc status */
|
/* clear umc status */
|
||||||
|
|
|
@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
||||||
uint32_t umc_inst)
|
uint32_t umc_inst)
|
||||||
{
|
{
|
||||||
uint64_t mc_umc_status, err_addr, retired_page;
|
uint64_t mc_umc_status, err_addr, retired_page;
|
||||||
struct eeprom_table_record *err_rec;
|
|
||||||
uint32_t channel_index;
|
uint32_t channel_index;
|
||||||
uint32_t eccinfo_table_idx;
|
uint32_t eccinfo_table_idx;
|
||||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||||
|
@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
||||||
if (!err_data->err_addr)
|
if (!err_data->err_addr)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
|
||||||
|
|
||||||
/* calculate error address if ue/ce error is detected */
|
/* calculate error address if ue/ce error is detected */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||||
|
@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1) {
|
== 1)
|
||||||
err_rec->address = err_addr;
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
/* page frame address is saved */
|
retired_page, channel_index, umc_inst);
|
||||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
err_rec->cu = 0;
|
|
||||||
err_rec->mem_channel = channel_index;
|
|
||||||
err_rec->mcumc_id = umc_inst;
|
|
||||||
|
|
||||||
err_data->err_addr_cnt++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
|
||||||
{
|
{
|
||||||
uint32_t lsb, mc_umc_status_addr;
|
uint32_t lsb, mc_umc_status_addr;
|
||||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
||||||
struct eeprom_table_record *err_rec;
|
|
||||||
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||||
|
|
||||||
mc_umc_status_addr =
|
mc_umc_status_addr =
|
||||||
|
@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
|
||||||
|
|
||||||
/* calculate error address if ue/ce error is detected */
|
/* calculate error address if ue/ce error is detected */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||||
|
@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1) {
|
== 1)
|
||||||
err_rec->address = err_addr;
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
/* page frame address is saved */
|
retired_page, channel_index, umc_inst);
|
||||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
|
||||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
|
||||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
|
||||||
err_rec->cu = 0;
|
|
||||||
err_rec->mem_channel = channel_index;
|
|
||||||
err_rec->mcumc_id = umc_inst;
|
|
||||||
|
|
||||||
err_data->err_addr_cnt++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear umc status */
|
/* clear umc status */
|
||||||
|
|
Loading…
Add table
Reference in a new issue