drm/amdkfd: Fix the warning of array-index-out-of-bounds
For some GPUs with more CUs, the original sibling_map[32] in struct crat_subtype_cache is not enough to save the cache information when create the VCRAT table, so skip filling the struct crat_subtype_cache info instead fill struct kfd_cache_properties directly to fix this problem. Signed-off-by: Ma Jun <Jun.Ma2@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
cfa61b8f9e
commit
c0cc999f3c
4 changed files with 286 additions and 296 deletions
|
@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_gpu_processor_id(
|
||||||
return current_id;
|
return current_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Static table to describe GPU Cache information */
|
|
||||||
struct kfd_gpu_cache_info {
|
|
||||||
uint32_t cache_size;
|
|
||||||
uint32_t cache_level;
|
|
||||||
uint32_t flags;
|
|
||||||
/* Indicates how many Compute Units share this cache
|
|
||||||
* within a SA. Value = 1 indicates the cache is not shared
|
|
||||||
*/
|
|
||||||
uint32_t num_cu_shared;
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct kfd_gpu_cache_info kaveri_cache_info[] = {
|
static struct kfd_gpu_cache_info kaveri_cache_info[] = {
|
||||||
{
|
{
|
||||||
|
@ -1119,9 +1109,13 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
|
||||||
props->cachelines_per_tag = cache->lines_per_tag;
|
props->cachelines_per_tag = cache->lines_per_tag;
|
||||||
props->cache_assoc = cache->associativity;
|
props->cache_assoc = cache->associativity;
|
||||||
props->cache_latency = cache->cache_latency;
|
props->cache_latency = cache->cache_latency;
|
||||||
|
|
||||||
memcpy(props->sibling_map, cache->sibling_map,
|
memcpy(props->sibling_map, cache->sibling_map,
|
||||||
sizeof(props->sibling_map));
|
sizeof(props->sibling_map));
|
||||||
|
|
||||||
|
/* set the sibling_map_size as 32 for CRAT from ACPI */
|
||||||
|
props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
|
||||||
|
|
||||||
if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
|
if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
|
||||||
props->cache_type |= HSA_CACHE_TYPE_DATA;
|
props->cache_type |= HSA_CACHE_TYPE_DATA;
|
||||||
if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
|
if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
|
||||||
|
@ -1338,125 +1332,6 @@ err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
|
|
||||||
static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
|
|
||||||
struct kfd_gpu_cache_info *pcache_info,
|
|
||||||
struct kfd_cu_info *cu_info,
|
|
||||||
int mem_available,
|
|
||||||
int cu_bitmask,
|
|
||||||
int cache_type, unsigned int cu_processor_id,
|
|
||||||
int cu_block)
|
|
||||||
{
|
|
||||||
unsigned int cu_sibling_map_mask;
|
|
||||||
int first_active_cu;
|
|
||||||
|
|
||||||
/* First check if enough memory is available */
|
|
||||||
if (sizeof(struct crat_subtype_cache) > mem_available)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
cu_sibling_map_mask = cu_bitmask;
|
|
||||||
cu_sibling_map_mask >>= cu_block;
|
|
||||||
cu_sibling_map_mask &=
|
|
||||||
((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
|
||||||
first_active_cu = ffs(cu_sibling_map_mask);
|
|
||||||
|
|
||||||
/* CU could be inactive. In case of shared cache find the first active
|
|
||||||
* CU. and incase of non-shared cache check if the CU is inactive. If
|
|
||||||
* inactive active skip it
|
|
||||||
*/
|
|
||||||
if (first_active_cu) {
|
|
||||||
memset(pcache, 0, sizeof(struct crat_subtype_cache));
|
|
||||||
pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
|
|
||||||
pcache->length = sizeof(struct crat_subtype_cache);
|
|
||||||
pcache->flags = pcache_info[cache_type].flags;
|
|
||||||
pcache->processor_id_low = cu_processor_id
|
|
||||||
+ (first_active_cu - 1);
|
|
||||||
pcache->cache_level = pcache_info[cache_type].cache_level;
|
|
||||||
pcache->cache_size = pcache_info[cache_type].cache_size;
|
|
||||||
|
|
||||||
/* Sibling map is w.r.t processor_id_low, so shift out
|
|
||||||
* inactive CU
|
|
||||||
*/
|
|
||||||
cu_sibling_map_mask =
|
|
||||||
cu_sibling_map_mask >> (first_active_cu - 1);
|
|
||||||
|
|
||||||
pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
|
|
||||||
pcache->sibling_map[1] =
|
|
||||||
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
|
||||||
pcache->sibling_map[2] =
|
|
||||||
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
|
||||||
pcache->sibling_map[3] =
|
|
||||||
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
|
|
||||||
static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
|
|
||||||
struct kfd_gpu_cache_info *pcache_info,
|
|
||||||
struct kfd_cu_info *cu_info,
|
|
||||||
int mem_available,
|
|
||||||
int cache_type, unsigned int cu_processor_id)
|
|
||||||
{
|
|
||||||
unsigned int cu_sibling_map_mask;
|
|
||||||
int first_active_cu;
|
|
||||||
int i, j, k;
|
|
||||||
|
|
||||||
/* First check if enough memory is available */
|
|
||||||
if (sizeof(struct crat_subtype_cache) > mem_available)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
|
|
||||||
cu_sibling_map_mask &=
|
|
||||||
((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
|
||||||
first_active_cu = ffs(cu_sibling_map_mask);
|
|
||||||
|
|
||||||
/* CU could be inactive. In case of shared cache find the first active
|
|
||||||
* CU. and incase of non-shared cache check if the CU is inactive. If
|
|
||||||
* inactive active skip it
|
|
||||||
*/
|
|
||||||
if (first_active_cu) {
|
|
||||||
memset(pcache, 0, sizeof(struct crat_subtype_cache));
|
|
||||||
pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
|
|
||||||
pcache->length = sizeof(struct crat_subtype_cache);
|
|
||||||
pcache->flags = pcache_info[cache_type].flags;
|
|
||||||
pcache->processor_id_low = cu_processor_id
|
|
||||||
+ (first_active_cu - 1);
|
|
||||||
pcache->cache_level = pcache_info[cache_type].cache_level;
|
|
||||||
pcache->cache_size = pcache_info[cache_type].cache_size;
|
|
||||||
|
|
||||||
/* Sibling map is w.r.t processor_id_low, so shift out
|
|
||||||
* inactive CU
|
|
||||||
*/
|
|
||||||
cu_sibling_map_mask =
|
|
||||||
cu_sibling_map_mask >> (first_active_cu - 1);
|
|
||||||
k = 0;
|
|
||||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
|
||||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine;
|
|
||||||
j++) {
|
|
||||||
pcache->sibling_map[k] =
|
|
||||||
(uint8_t)(cu_sibling_map_mask & 0xFF);
|
|
||||||
pcache->sibling_map[k+1] =
|
|
||||||
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
|
||||||
pcache->sibling_map[k+2] =
|
|
||||||
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
|
||||||
pcache->sibling_map[k+3] =
|
|
||||||
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
|
||||||
k += 4;
|
|
||||||
cu_sibling_map_mask =
|
|
||||||
cu_info->cu_bitmap[i % 4][j + i / 4];
|
|
||||||
cu_sibling_map_mask &= (
|
|
||||||
(1 << pcache_info[cache_type].num_cu_shared)
|
|
||||||
- 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define KFD_MAX_CACHE_TYPES 6
|
|
||||||
|
|
||||||
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||||
struct kfd_gpu_cache_info *pcache_info)
|
struct kfd_gpu_cache_info *pcache_info)
|
||||||
|
@ -1530,231 +1405,133 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
|
int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
|
||||||
* tables
|
|
||||||
*
|
|
||||||
* @kdev - [IN] GPU device
|
|
||||||
* @gpu_processor_id - [IN] GPU processor ID to which these caches
|
|
||||||
* associate
|
|
||||||
* @available_size - [IN] Amount of memory available in pcache
|
|
||||||
* @cu_info - [IN] Compute Unit info obtained from KGD
|
|
||||||
* @pcache - [OUT] memory into which cache data is to be filled in.
|
|
||||||
* @size_filled - [OUT] amount of data used up in pcache.
|
|
||||||
* @num_of_entries - [OUT] number of caches added
|
|
||||||
*/
|
|
||||||
static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
|
||||||
int gpu_processor_id,
|
|
||||||
int available_size,
|
|
||||||
struct kfd_cu_info *cu_info,
|
|
||||||
struct crat_subtype_cache *pcache,
|
|
||||||
int *size_filled,
|
|
||||||
int *num_of_entries)
|
|
||||||
{
|
{
|
||||||
struct kfd_gpu_cache_info *pcache_info;
|
|
||||||
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
|
|
||||||
int num_of_cache_types = 0;
|
int num_of_cache_types = 0;
|
||||||
int i, j, k;
|
|
||||||
int ct = 0;
|
|
||||||
int mem_available = available_size;
|
|
||||||
unsigned int cu_processor_id;
|
|
||||||
int ret;
|
|
||||||
unsigned int num_cu_shared;
|
|
||||||
|
|
||||||
switch (kdev->adev->asic_type) {
|
switch (kdev->adev->asic_type) {
|
||||||
case CHIP_KAVERI:
|
case CHIP_KAVERI:
|
||||||
pcache_info = kaveri_cache_info;
|
*pcache_info = kaveri_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
|
num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_HAWAII:
|
case CHIP_HAWAII:
|
||||||
pcache_info = hawaii_cache_info;
|
*pcache_info = hawaii_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
|
num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_CARRIZO:
|
case CHIP_CARRIZO:
|
||||||
pcache_info = carrizo_cache_info;
|
*pcache_info = carrizo_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
|
num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_TONGA:
|
case CHIP_TONGA:
|
||||||
pcache_info = tonga_cache_info;
|
*pcache_info = tonga_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
|
num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_FIJI:
|
case CHIP_FIJI:
|
||||||
pcache_info = fiji_cache_info;
|
*pcache_info = fiji_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
|
num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_POLARIS10:
|
case CHIP_POLARIS10:
|
||||||
pcache_info = polaris10_cache_info;
|
*pcache_info = polaris10_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
|
num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_POLARIS11:
|
case CHIP_POLARIS11:
|
||||||
pcache_info = polaris11_cache_info;
|
*pcache_info = polaris11_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
|
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_POLARIS12:
|
case CHIP_POLARIS12:
|
||||||
pcache_info = polaris12_cache_info;
|
*pcache_info = polaris12_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
|
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
|
||||||
break;
|
break;
|
||||||
case CHIP_VEGAM:
|
case CHIP_VEGAM:
|
||||||
pcache_info = vegam_cache_info;
|
*pcache_info = vegam_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
|
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
switch (KFD_GC_VERSION(kdev)) {
|
switch (KFD_GC_VERSION(kdev)) {
|
||||||
case IP_VERSION(9, 0, 1):
|
case IP_VERSION(9, 0, 1):
|
||||||
pcache_info = vega10_cache_info;
|
*pcache_info = vega10_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
|
num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(9, 2, 1):
|
case IP_VERSION(9, 2, 1):
|
||||||
pcache_info = vega12_cache_info;
|
*pcache_info = vega12_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
|
num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(9, 4, 0):
|
case IP_VERSION(9, 4, 0):
|
||||||
case IP_VERSION(9, 4, 1):
|
case IP_VERSION(9, 4, 1):
|
||||||
pcache_info = vega20_cache_info;
|
*pcache_info = vega20_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
|
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(9, 4, 2):
|
case IP_VERSION(9, 4, 2):
|
||||||
pcache_info = aldebaran_cache_info;
|
*pcache_info = aldebaran_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
|
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(9, 1, 0):
|
case IP_VERSION(9, 1, 0):
|
||||||
case IP_VERSION(9, 2, 2):
|
case IP_VERSION(9, 2, 2):
|
||||||
pcache_info = raven_cache_info;
|
*pcache_info = raven_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
|
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(9, 3, 0):
|
case IP_VERSION(9, 3, 0):
|
||||||
pcache_info = renoir_cache_info;
|
*pcache_info = renoir_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
|
num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 1, 10):
|
case IP_VERSION(10, 1, 10):
|
||||||
case IP_VERSION(10, 1, 2):
|
case IP_VERSION(10, 1, 2):
|
||||||
case IP_VERSION(10, 1, 3):
|
case IP_VERSION(10, 1, 3):
|
||||||
case IP_VERSION(10, 1, 4):
|
case IP_VERSION(10, 1, 4):
|
||||||
pcache_info = navi10_cache_info;
|
*pcache_info = navi10_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
|
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 1, 1):
|
case IP_VERSION(10, 1, 1):
|
||||||
pcache_info = navi14_cache_info;
|
*pcache_info = navi14_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
|
num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 0):
|
case IP_VERSION(10, 3, 0):
|
||||||
pcache_info = sienna_cichlid_cache_info;
|
*pcache_info = sienna_cichlid_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
|
num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 2):
|
case IP_VERSION(10, 3, 2):
|
||||||
pcache_info = navy_flounder_cache_info;
|
*pcache_info = navy_flounder_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
|
num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 4):
|
case IP_VERSION(10, 3, 4):
|
||||||
pcache_info = dimgrey_cavefish_cache_info;
|
*pcache_info = dimgrey_cavefish_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
|
num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 1):
|
case IP_VERSION(10, 3, 1):
|
||||||
pcache_info = vangogh_cache_info;
|
*pcache_info = vangogh_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
|
num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 5):
|
case IP_VERSION(10, 3, 5):
|
||||||
pcache_info = beige_goby_cache_info;
|
*pcache_info = beige_goby_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
|
num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 3):
|
case IP_VERSION(10, 3, 3):
|
||||||
pcache_info = yellow_carp_cache_info;
|
*pcache_info = yellow_carp_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
|
num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 6):
|
case IP_VERSION(10, 3, 6):
|
||||||
pcache_info = gc_10_3_6_cache_info;
|
*pcache_info = gc_10_3_6_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
|
num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(10, 3, 7):
|
case IP_VERSION(10, 3, 7):
|
||||||
pcache_info = gfx1037_cache_info;
|
*pcache_info = gfx1037_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
|
num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
|
||||||
break;
|
break;
|
||||||
case IP_VERSION(11, 0, 0):
|
case IP_VERSION(11, 0, 0):
|
||||||
case IP_VERSION(11, 0, 1):
|
case IP_VERSION(11, 0, 1):
|
||||||
case IP_VERSION(11, 0, 2):
|
case IP_VERSION(11, 0, 2):
|
||||||
case IP_VERSION(11, 0, 3):
|
case IP_VERSION(11, 0, 3):
|
||||||
pcache_info = cache_info;
|
|
||||||
num_of_cache_types =
|
num_of_cache_types =
|
||||||
kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
|
kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pcache_info = dummy_cache_info;
|
*pcache_info = dummy_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
|
num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
|
||||||
pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
|
pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return num_of_cache_types;
|
||||||
*size_filled = 0;
|
|
||||||
*num_of_entries = 0;
|
|
||||||
|
|
||||||
/* For each type of cache listed in the kfd_gpu_cache_info table,
|
|
||||||
* go through all available Compute Units.
|
|
||||||
* The [i,j,k] loop will
|
|
||||||
* if kfd_gpu_cache_info.num_cu_shared = 1
|
|
||||||
* will parse through all available CU
|
|
||||||
* If (kfd_gpu_cache_info.num_cu_shared != 1)
|
|
||||||
* then it will consider only one CU from
|
|
||||||
* the shared unit
|
|
||||||
*/
|
|
||||||
|
|
||||||
for (ct = 0; ct < num_of_cache_types; ct++) {
|
|
||||||
cu_processor_id = gpu_processor_id;
|
|
||||||
if (pcache_info[ct].cache_level == 1) {
|
|
||||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
|
||||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
|
|
||||||
for (k = 0; k < cu_info->num_cu_per_sh;
|
|
||||||
k += pcache_info[ct].num_cu_shared) {
|
|
||||||
ret = fill_in_l1_pcache(pcache,
|
|
||||||
pcache_info,
|
|
||||||
cu_info,
|
|
||||||
mem_available,
|
|
||||||
cu_info->cu_bitmap[i % 4][j + i / 4],
|
|
||||||
ct,
|
|
||||||
cu_processor_id,
|
|
||||||
k);
|
|
||||||
|
|
||||||
if (ret < 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!ret) {
|
|
||||||
pcache++;
|
|
||||||
(*num_of_entries)++;
|
|
||||||
mem_available -= sizeof(*pcache);
|
|
||||||
(*size_filled) += sizeof(*pcache);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Move to next CU block */
|
|
||||||
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
|
|
||||||
cu_info->num_cu_per_sh) ?
|
|
||||||
pcache_info[ct].num_cu_shared :
|
|
||||||
(cu_info->num_cu_per_sh - k);
|
|
||||||
cu_processor_id += num_cu_shared;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = fill_in_l2_l3_pcache(pcache,
|
|
||||||
pcache_info,
|
|
||||||
cu_info,
|
|
||||||
mem_available,
|
|
||||||
ct,
|
|
||||||
cu_processor_id);
|
|
||||||
|
|
||||||
if (ret < 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!ret) {
|
|
||||||
pcache++;
|
|
||||||
(*num_of_entries)++;
|
|
||||||
mem_available -= sizeof(*pcache);
|
|
||||||
(*size_filled) += sizeof(*pcache);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kfd_ignore_crat(void)
|
static bool kfd_ignore_crat(void)
|
||||||
|
@ -2313,8 +2090,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
||||||
struct kfd_cu_info cu_info;
|
struct kfd_cu_info cu_info;
|
||||||
int avail_size = *size;
|
int avail_size = *size;
|
||||||
uint32_t total_num_of_cu;
|
uint32_t total_num_of_cu;
|
||||||
int num_of_cache_entries = 0;
|
|
||||||
int cache_mem_filled = 0;
|
|
||||||
uint32_t nid = 0;
|
uint32_t nid = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
@ -2415,31 +2190,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
||||||
crat_table->length += sizeof(struct crat_subtype_memory);
|
crat_table->length += sizeof(struct crat_subtype_memory);
|
||||||
crat_table->total_entries++;
|
crat_table->total_entries++;
|
||||||
|
|
||||||
/* TODO: Fill in cache information. This information is NOT readily
|
|
||||||
* available in KGD
|
|
||||||
*/
|
|
||||||
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
|
|
||||||
sub_type_hdr->length);
|
|
||||||
ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
|
|
||||||
avail_size,
|
|
||||||
&cu_info,
|
|
||||||
(struct crat_subtype_cache *)sub_type_hdr,
|
|
||||||
&cache_mem_filled,
|
|
||||||
&num_of_cache_entries);
|
|
||||||
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
crat_table->length += cache_mem_filled;
|
|
||||||
crat_table->total_entries += num_of_cache_entries;
|
|
||||||
avail_size -= cache_mem_filled;
|
|
||||||
|
|
||||||
/* Fill in Subtype: IO_LINKS
|
/* Fill in Subtype: IO_LINKS
|
||||||
* Only direct links are added here which is Link from GPU to
|
* Only direct links are added here which is Link from GPU to
|
||||||
* its NUMA node. Indirect links are added by userspace.
|
* its NUMA node. Indirect links are added by userspace.
|
||||||
*/
|
*/
|
||||||
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
|
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
|
||||||
cache_mem_filled);
|
sub_type_hdr->length);
|
||||||
ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
|
ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
|
||||||
(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
|
(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
|
||||||
|
|
||||||
|
|
|
@ -295,6 +295,18 @@ struct crat_subtype_generic {
|
||||||
|
|
||||||
struct kfd_dev;
|
struct kfd_dev;
|
||||||
|
|
||||||
|
/* Static table to describe GPU Cache information */
|
||||||
|
struct kfd_gpu_cache_info {
|
||||||
|
uint32_t cache_size;
|
||||||
|
uint32_t cache_level;
|
||||||
|
uint32_t flags;
|
||||||
|
/* Indicates how many Compute Units share this cache
|
||||||
|
* within a SA. Value = 1 indicates the cache is not shared
|
||||||
|
*/
|
||||||
|
uint32_t num_cu_shared;
|
||||||
|
};
|
||||||
|
int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info);
|
||||||
|
|
||||||
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
|
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
|
||||||
void kfd_destroy_crat_image(void *crat_image);
|
void kfd_destroy_crat_image(void *crat_image);
|
||||||
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
|
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
|
||||||
|
|
|
@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
|
||||||
|
|
||||||
/* Making sure that the buffer is an empty string */
|
/* Making sure that the buffer is an empty string */
|
||||||
buffer[0] = 0;
|
buffer[0] = 0;
|
||||||
|
|
||||||
cache = container_of(attr, struct kfd_cache_properties, attr);
|
cache = container_of(attr, struct kfd_cache_properties, attr);
|
||||||
if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
|
if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
@ -379,8 +378,9 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
|
||||||
sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
|
sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
|
||||||
sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
|
sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
|
||||||
sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
|
sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
|
||||||
|
|
||||||
offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
|
offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
|
||||||
for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
|
for (i = 0; i < cache->sibling_map_size; i++)
|
||||||
for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
|
for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
|
||||||
/* Check each bit */
|
/* Check each bit */
|
||||||
offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
|
offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
|
||||||
|
@ -1197,7 +1197,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
||||||
struct kfd_iolink_properties *iolink;
|
struct kfd_iolink_properties *iolink;
|
||||||
struct kfd_iolink_properties *p2plink;
|
struct kfd_iolink_properties *p2plink;
|
||||||
|
|
||||||
down_write(&topology_lock);
|
|
||||||
list_for_each_entry(dev, &topology_device_list, list) {
|
list_for_each_entry(dev, &topology_device_list, list) {
|
||||||
/* Discrete GPUs need their own topology device list
|
/* Discrete GPUs need their own topology device list
|
||||||
* entries. Don't assign them to CPU/APU nodes.
|
* entries. Don't assign them to CPU/APU nodes.
|
||||||
|
@ -1221,7 +1220,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
up_write(&topology_lock);
|
|
||||||
return out_dev;
|
return out_dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1592,6 +1590,221 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
|
||||||
|
static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
|
||||||
|
struct kfd_gpu_cache_info *pcache_info,
|
||||||
|
struct kfd_cu_info *cu_info,
|
||||||
|
int cu_bitmask,
|
||||||
|
int cache_type, unsigned int cu_processor_id,
|
||||||
|
int cu_block)
|
||||||
|
{
|
||||||
|
unsigned int cu_sibling_map_mask;
|
||||||
|
int first_active_cu;
|
||||||
|
struct kfd_cache_properties *pcache = NULL;
|
||||||
|
|
||||||
|
cu_sibling_map_mask = cu_bitmask;
|
||||||
|
cu_sibling_map_mask >>= cu_block;
|
||||||
|
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||||
|
first_active_cu = ffs(cu_sibling_map_mask);
|
||||||
|
|
||||||
|
/* CU could be inactive. In case of shared cache find the first active
|
||||||
|
* CU. and incase of non-shared cache check if the CU is inactive. If
|
||||||
|
* inactive active skip it
|
||||||
|
*/
|
||||||
|
if (first_active_cu) {
|
||||||
|
pcache = kfd_alloc_struct(pcache);
|
||||||
|
if (!pcache)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
memset(pcache, 0, sizeof(struct kfd_cache_properties));
|
||||||
|
pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
|
||||||
|
pcache->cache_level = pcache_info[cache_type].cache_level;
|
||||||
|
pcache->cache_size = pcache_info[cache_type].cache_size;
|
||||||
|
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_DATA;
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_CPU;
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
|
||||||
|
|
||||||
|
/* Sibling map is w.r.t processor_id_low, so shift out
|
||||||
|
* inactive CU
|
||||||
|
*/
|
||||||
|
cu_sibling_map_mask =
|
||||||
|
cu_sibling_map_mask >> (first_active_cu - 1);
|
||||||
|
|
||||||
|
pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
|
||||||
|
pcache->sibling_map[1] =
|
||||||
|
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
||||||
|
pcache->sibling_map[2] =
|
||||||
|
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
||||||
|
pcache->sibling_map[3] =
|
||||||
|
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
||||||
|
|
||||||
|
pcache->sibling_map_size = 4;
|
||||||
|
*props_ext = pcache;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
|
||||||
|
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
|
||||||
|
struct kfd_gpu_cache_info *pcache_info,
|
||||||
|
struct kfd_cu_info *cu_info,
|
||||||
|
int cache_type, unsigned int cu_processor_id)
|
||||||
|
{
|
||||||
|
unsigned int cu_sibling_map_mask;
|
||||||
|
int first_active_cu;
|
||||||
|
int i, j, k;
|
||||||
|
struct kfd_cache_properties *pcache = NULL;
|
||||||
|
|
||||||
|
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
|
||||||
|
cu_sibling_map_mask &=
|
||||||
|
((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||||
|
first_active_cu = ffs(cu_sibling_map_mask);
|
||||||
|
|
||||||
|
/* CU could be inactive. In case of shared cache find the first active
|
||||||
|
* CU. and incase of non-shared cache check if the CU is inactive. If
|
||||||
|
* inactive active skip it
|
||||||
|
*/
|
||||||
|
if (first_active_cu) {
|
||||||
|
pcache = kfd_alloc_struct(pcache);
|
||||||
|
if (!pcache)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
memset(pcache, 0, sizeof(struct kfd_cache_properties));
|
||||||
|
pcache->processor_id_low = cu_processor_id
|
||||||
|
+ (first_active_cu - 1);
|
||||||
|
pcache->cache_level = pcache_info[cache_type].cache_level;
|
||||||
|
pcache->cache_size = pcache_info[cache_type].cache_size;
|
||||||
|
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_DATA;
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_CPU;
|
||||||
|
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
|
||||||
|
pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
|
||||||
|
|
||||||
|
/* Sibling map is w.r.t processor_id_low, so shift out
|
||||||
|
* inactive CU
|
||||||
|
*/
|
||||||
|
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
|
||||||
|
k = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
||||||
|
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
|
||||||
|
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
|
||||||
|
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
||||||
|
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
||||||
|
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
||||||
|
k += 4;
|
||||||
|
|
||||||
|
cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
|
||||||
|
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pcache->sibling_map_size = k;
|
||||||
|
*props_ext = pcache;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define KFD_MAX_CACHE_TYPES 6
|
||||||
|
|
||||||
|
/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
|
||||||
|
* tables
|
||||||
|
*/
|
||||||
|
void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)
|
||||||
|
{
|
||||||
|
struct kfd_gpu_cache_info *pcache_info = NULL;
|
||||||
|
int i, j, k;
|
||||||
|
int ct = 0;
|
||||||
|
unsigned int cu_processor_id;
|
||||||
|
int ret;
|
||||||
|
unsigned int num_cu_shared;
|
||||||
|
struct kfd_cu_info cu_info;
|
||||||
|
struct kfd_cu_info *pcu_info;
|
||||||
|
int gpu_processor_id;
|
||||||
|
struct kfd_cache_properties *props_ext;
|
||||||
|
int num_of_entries = 0;
|
||||||
|
int num_of_cache_types = 0;
|
||||||
|
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
|
||||||
|
|
||||||
|
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
|
||||||
|
pcu_info = &cu_info;
|
||||||
|
|
||||||
|
gpu_processor_id = dev->node_props.simd_id_base;
|
||||||
|
|
||||||
|
pcache_info = cache_info;
|
||||||
|
num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
|
||||||
|
if (!num_of_cache_types) {
|
||||||
|
pr_warn("no cache info found\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For each type of cache listed in the kfd_gpu_cache_info table,
|
||||||
|
* go through all available Compute Units.
|
||||||
|
* The [i,j,k] loop will
|
||||||
|
* if kfd_gpu_cache_info.num_cu_shared = 1
|
||||||
|
* will parse through all available CU
|
||||||
|
* If (kfd_gpu_cache_info.num_cu_shared != 1)
|
||||||
|
* then it will consider only one CU from
|
||||||
|
* the shared unit
|
||||||
|
*/
|
||||||
|
for (ct = 0; ct < num_of_cache_types; ct++) {
|
||||||
|
cu_processor_id = gpu_processor_id;
|
||||||
|
if (pcache_info[ct].cache_level == 1) {
|
||||||
|
for (i = 0; i < pcu_info->num_shader_engines; i++) {
|
||||||
|
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
|
||||||
|
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
|
||||||
|
|
||||||
|
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
|
||||||
|
pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
|
||||||
|
cu_processor_id, k);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
num_of_entries++;
|
||||||
|
list_add_tail(&props_ext->list, &dev->cache_props);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Move to next CU block */
|
||||||
|
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
|
||||||
|
pcu_info->num_cu_per_sh) ?
|
||||||
|
pcache_info[ct].num_cu_shared :
|
||||||
|
(pcu_info->num_cu_per_sh - k);
|
||||||
|
cu_processor_id += num_cu_shared;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
|
||||||
|
pcu_info, ct, cu_processor_id);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
num_of_entries++;
|
||||||
|
list_add_tail(&props_ext->list, &dev->cache_props);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev->node_props.caches_count += num_of_entries;
|
||||||
|
pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
|
||||||
|
}
|
||||||
|
|
||||||
int kfd_topology_add_device(struct kfd_dev *gpu)
|
int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||||
{
|
{
|
||||||
uint32_t gpu_id;
|
uint32_t gpu_id;
|
||||||
|
@ -1616,9 +1829,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||||
* CRAT to create a new topology device. Once created assign the gpu to
|
* CRAT to create a new topology device. Once created assign the gpu to
|
||||||
* that topology device
|
* that topology device
|
||||||
*/
|
*/
|
||||||
|
down_write(&topology_lock);
|
||||||
dev = kfd_assign_gpu(gpu);
|
dev = kfd_assign_gpu(gpu);
|
||||||
if (!dev) {
|
if (!dev) {
|
||||||
down_write(&topology_lock);
|
|
||||||
proximity_domain = ++topology_crat_proximity_domain;
|
proximity_domain = ++topology_crat_proximity_domain;
|
||||||
|
|
||||||
res = kfd_create_crat_image_virtual(&crat_image, &image_size,
|
res = kfd_create_crat_image_virtual(&crat_image, &image_size,
|
||||||
|
@ -1630,6 +1843,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||||
topology_crat_proximity_domain--;
|
topology_crat_proximity_domain--;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
res = kfd_parse_crat_table(crat_image,
|
res = kfd_parse_crat_table(crat_image,
|
||||||
&temp_topology_device_list,
|
&temp_topology_device_list,
|
||||||
proximity_domain);
|
proximity_domain);
|
||||||
|
@ -1643,23 +1857,28 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||||
kfd_topology_update_device_list(&temp_topology_device_list,
|
kfd_topology_update_device_list(&temp_topology_device_list,
|
||||||
&topology_device_list);
|
&topology_device_list);
|
||||||
|
|
||||||
/* Update the SYSFS tree, since we added another topology
|
|
||||||
* device
|
|
||||||
*/
|
|
||||||
res = kfd_topology_update_sysfs();
|
|
||||||
up_write(&topology_lock);
|
|
||||||
|
|
||||||
if (!res)
|
|
||||||
sys_props.generation_count++;
|
|
||||||
else
|
|
||||||
pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
|
|
||||||
gpu_id, res);
|
|
||||||
dev = kfd_assign_gpu(gpu);
|
dev = kfd_assign_gpu(gpu);
|
||||||
if (WARN_ON(!dev)) {
|
if (WARN_ON(!dev)) {
|
||||||
res = -ENODEV;
|
res = -ENODEV;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Fill the cache affinity information here for the GPUs
|
||||||
|
* using VCRAT
|
||||||
|
*/
|
||||||
|
kfd_fill_cache_non_crat_info(dev, gpu);
|
||||||
|
|
||||||
|
/* Update the SYSFS tree, since we added another topology
|
||||||
|
* device
|
||||||
|
*/
|
||||||
|
res = kfd_topology_update_sysfs();
|
||||||
|
if (!res)
|
||||||
|
sys_props.generation_count++;
|
||||||
|
else
|
||||||
|
pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
|
||||||
|
gpu_id, res);
|
||||||
}
|
}
|
||||||
|
up_write(&topology_lock);
|
||||||
|
|
||||||
dev->gpu_id = gpu_id;
|
dev->gpu_id = gpu_id;
|
||||||
gpu->id = gpu_id;
|
gpu->id = gpu_id;
|
||||||
|
|
|
@ -80,6 +80,8 @@ struct kfd_mem_properties {
|
||||||
struct attribute attr;
|
struct attribute attr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define CACHE_SIBLINGMAP_SIZE 64
|
||||||
|
|
||||||
struct kfd_cache_properties {
|
struct kfd_cache_properties {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
uint32_t processor_id_low;
|
uint32_t processor_id_low;
|
||||||
|
@ -90,10 +92,11 @@ struct kfd_cache_properties {
|
||||||
uint32_t cache_assoc;
|
uint32_t cache_assoc;
|
||||||
uint32_t cache_latency;
|
uint32_t cache_latency;
|
||||||
uint32_t cache_type;
|
uint32_t cache_type;
|
||||||
uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE];
|
uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE];
|
||||||
struct kfd_dev *gpu;
|
struct kfd_dev *gpu;
|
||||||
struct kobject *kobj;
|
struct kobject *kobj;
|
||||||
struct attribute attr;
|
struct attribute attr;
|
||||||
|
uint32_t sibling_map_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd_iolink_properties {
|
struct kfd_iolink_properties {
|
||||||
|
|
Loading…
Add table
Reference in a new issue