drm/amdgpu: Fix RAS function interface
The correctable and uncorrectable errors are calculated at each invocation of this function. Therefore, it is highly inefficient to return just one of them based on a Boolean input. If the caller wants both, twice the work would be done. (And this work is O(n^3) on Vega20.) Fix this "interface" to simply return what it had calculated--both values. Let the caller choose what it wants to record, inspect, use. Cc: Alexander Deucher <Alexander.Deucher@amd.com> Cc: John Clements <john.clements@amd.com> Cc: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
2871e10199
commit
a46751fbcd
2 changed files with 18 additions and 10 deletions
|
@ -1043,29 +1043,36 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the total error counts on all IPs */
|
/* get the total error counts on all IPs */
|
||||||
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
||||||
bool is_ce)
|
unsigned long *ce_count,
|
||||||
|
unsigned long *ue_count)
|
||||||
{
|
{
|
||||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||||
struct ras_manager *obj;
|
struct ras_manager *obj;
|
||||||
struct ras_err_data data = {0, 0};
|
unsigned long ce, ue;
|
||||||
|
|
||||||
if (!adev->ras_enabled || !con)
|
if (!adev->ras_enabled || !con)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
|
ce = 0;
|
||||||
|
ue = 0;
|
||||||
list_for_each_entry(obj, &con->head, node) {
|
list_for_each_entry(obj, &con->head, node) {
|
||||||
struct ras_query_if info = {
|
struct ras_query_if info = {
|
||||||
.head = obj->head,
|
.head = obj->head,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (amdgpu_ras_query_error_status(adev, &info))
|
if (amdgpu_ras_query_error_status(adev, &info))
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
data.ce_count += info.ce_count;
|
ce += info.ce_count;
|
||||||
data.ue_count += info.ue_count;
|
ue += info.ue_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
return is_ce ? data.ce_count : data.ue_count;
|
if (ce_count)
|
||||||
|
*ce_count = ce;
|
||||||
|
|
||||||
|
if (ue_count)
|
||||||
|
*ue_count = ue;
|
||||||
}
|
}
|
||||||
/* query/inject/cure end */
|
/* query/inject/cure end */
|
||||||
|
|
||||||
|
|
|
@ -485,8 +485,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
|
||||||
void amdgpu_ras_resume(struct amdgpu_device *adev);
|
void amdgpu_ras_resume(struct amdgpu_device *adev);
|
||||||
void amdgpu_ras_suspend(struct amdgpu_device *adev);
|
void amdgpu_ras_suspend(struct amdgpu_device *adev);
|
||||||
|
|
||||||
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
||||||
bool is_ce);
|
unsigned long *ce_count,
|
||||||
|
unsigned long *ue_count);
|
||||||
|
|
||||||
/* error handling functions */
|
/* error handling functions */
|
||||||
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||||
|
|
Loading…
Add table
Reference in a new issue