drm/amdgpu: add umc_fill_error_record to make code more simple
Create common amdgpu_umc_fill_error_record function for all versions of UMC and clean up related codes. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
9a17696049
commit
400013b268
6 changed files with 46 additions and 93 deletions
|
@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
|
|||
}
|
||||
|
||||
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
||||
|
||||
err_rec.address = address;
|
||||
err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec.ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
|
||||
err_data.err_addr = &err_rec;
|
||||
err_data.err_addr_cnt = 1;
|
||||
amdgpu_umc_fill_error_record(&err_data, address,
|
||||
(address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
|
||||
|
||||
if (amdgpu_bad_page_threshold != 0) {
|
||||
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
||||
|
@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
|
|||
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
|
||||
umc_inst, ch_inst);
|
||||
|
||||
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
||||
|
||||
/*
|
||||
* Translate UMC channel address to Physical address
|
||||
*/
|
||||
|
@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
|
|||
ADDR_OF_256B_BLOCK(channel_index) |
|
||||
OFFSET_IN_256B_BLOCK(m->addr);
|
||||
|
||||
err_rec.address = m->addr;
|
||||
err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec.ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec.cu = 0;
|
||||
err_rec.mem_channel = channel_index;
|
||||
err_rec.mcumc_id = umc_inst;
|
||||
|
||||
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
|
||||
err_data.err_addr = &err_rec;
|
||||
err_data.err_addr_cnt = 1;
|
||||
amdgpu_umc_fill_error_record(&err_data, m->addr,
|
||||
retired_page, channel_index, umc_inst);
|
||||
|
||||
if (amdgpu_bad_page_threshold != 0) {
|
||||
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
||||
|
|
|
@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
|||
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
|
||||
uint64_t err_addr,
|
||||
uint64_t retired_page,
|
||||
uint32_t channel_index,
|
||||
uint32_t umc_inst)
|
||||
{
|
||||
struct eeprom_table_record *err_rec =
|
||||
&err_data->err_addr[err_data->err_addr_cnt];
|
||||
|
||||
err_rec->address = err_addr;
|
||||
/* page frame address is saved */
|
||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec->cu = 0;
|
||||
err_rec->mem_channel = channel_index;
|
||||
err_rec->mcumc_id = umc_inst;
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
|
|
|
@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
|
|||
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
|
||||
uint64_t err_addr,
|
||||
uint64_t retired_page,
|
||||
uint32_t channel_index,
|
||||
uint32_t umc_inst);
|
||||
#endif
|
||||
|
|
|
@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
|||
{
|
||||
uint32_t lsb, mc_umc_status_addr;
|
||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
||||
struct eeprom_table_record *err_rec;
|
||||
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||
|
||||
if (adev->asic_type == CHIP_ARCTURUS) {
|
||||
|
@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
|||
return;
|
||||
}
|
||||
|
||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
||||
|
||||
/* calculate error address if ue/ce error is detected */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||
|
@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
|
|||
|
||||
/* we only save ue error information currently, ce is skipped */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||
== 1) {
|
||||
err_rec->address = err_addr;
|
||||
/* page frame address is saved */
|
||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec->cu = 0;
|
||||
err_rec->mem_channel = channel_index;
|
||||
err_rec->mcumc_id = umc_inst;
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
== 1)
|
||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||
retired_page, channel_index, umc_inst);
|
||||
}
|
||||
|
||||
/* clear umc status */
|
||||
|
|
|
@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||
uint32_t umc_inst)
|
||||
{
|
||||
uint64_t mc_umc_status, err_addr, retired_page;
|
||||
struct eeprom_table_record *err_rec;
|
||||
uint32_t channel_index;
|
||||
uint32_t eccinfo_table_idx;
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||
if (!err_data->err_addr)
|
||||
return;
|
||||
|
||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
||||
|
||||
/* calculate error address if ue/ce error is detected */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||
|
@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||
|
||||
/* we only save ue error information currently, ce is skipped */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||
== 1) {
|
||||
err_rec->address = err_addr;
|
||||
/* page frame address is saved */
|
||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec->cu = 0;
|
||||
err_rec->mem_channel = channel_index;
|
||||
err_rec->mcumc_id = umc_inst;
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
== 1)
|
||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||
retired_page, channel_index, umc_inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
|||
{
|
||||
uint32_t mc_umc_status_addr;
|
||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
||||
struct eeprom_table_record *err_rec;
|
||||
uint32_t channel_index;
|
||||
|
||||
mc_umc_status_addr =
|
||||
|
@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
|||
return;
|
||||
}
|
||||
|
||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
||||
|
||||
channel_index =
|
||||
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||
|
||||
|
@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
|||
|
||||
/* we only save ue error information currently, ce is skipped */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||
== 1) {
|
||||
err_rec->address = err_addr;
|
||||
/* page frame address is saved */
|
||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec->cu = 0;
|
||||
err_rec->mem_channel = channel_index;
|
||||
err_rec->mcumc_id = umc_inst;
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
== 1)
|
||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||
retired_page, channel_index, umc_inst);
|
||||
}
|
||||
|
||||
/* clear umc status */
|
||||
|
|
|
@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||
uint32_t umc_inst)
|
||||
{
|
||||
uint64_t mc_umc_status, err_addr, retired_page;
|
||||
struct eeprom_table_record *err_rec;
|
||||
uint32_t channel_index;
|
||||
uint32_t eccinfo_table_idx;
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||
if (!err_data->err_addr)
|
||||
return;
|
||||
|
||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
||||
|
||||
/* calculate error address if ue/ce error is detected */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||
|
@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||
|
||||
/* we only save ue error information currently, ce is skipped */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||
== 1) {
|
||||
err_rec->address = err_addr;
|
||||
/* page frame address is saved */
|
||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec->cu = 0;
|
||||
err_rec->mem_channel = channel_index;
|
||||
err_rec->mcumc_id = umc_inst;
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
== 1)
|
||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||
retired_page, channel_index, umc_inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
|
|||
{
|
||||
uint32_t lsb, mc_umc_status_addr;
|
||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
||||
struct eeprom_table_record *err_rec;
|
||||
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
|
||||
|
||||
mc_umc_status_addr =
|
||||
|
@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
|
|||
return;
|
||||
}
|
||||
|
||||
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
|
||||
|
||||
/* calculate error address if ue/ce error is detected */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
|
||||
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
|
||||
|
@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
|
|||
|
||||
/* we only save ue error information currently, ce is skipped */
|
||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||
== 1) {
|
||||
err_rec->address = err_addr;
|
||||
/* page frame address is saved */
|
||||
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
|
||||
err_rec->ts = (uint64_t)ktime_get_real_seconds();
|
||||
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
|
||||
err_rec->cu = 0;
|
||||
err_rec->mem_channel = channel_index;
|
||||
err_rec->mcumc_id = umc_inst;
|
||||
|
||||
err_data->err_addr_cnt++;
|
||||
}
|
||||
== 1)
|
||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||
retired_page, channel_index, umc_inst);
|
||||
}
|
||||
|
||||
/* clear umc status */
|
||||
|
|
Loading…
Add table
Reference in a new issue