Generate the mask of enabled L3 banks for the GT. It is stored with the rest of the GT topology in a consistent representation across platforms. For now the L3 bank mask is just printed in the log for developers to easily figure out the fusing characteristics of machines that they are trying to debug issues on. Later it can be used to replace existing code in the driver that requires the L3 bank count (not mask). Also the mask can easily be exposed to user space in a new query if needed. v2: Better naming of variable and function (Matt Roper) Bspec: 52545, 52546, 62482 Cc: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Francois Dugast <francois.dugast@intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240410123723.7-2-francois.dugast@intel.com
280 lines
8.2 KiB
C
280 lines
8.2 KiB
C
// SPDX-License-Identifier: MIT
|
||
/*
|
||
* Copyright © 2022 Intel Corporation
|
||
*/
|
||
|
||
#include "xe_gt_topology.h"
|
||
|
||
#include <linux/bitmap.h>
|
||
|
||
#include "regs/xe_gt_regs.h"
|
||
#include "xe_assert.h"
|
||
#include "xe_gt.h"
|
||
#include "xe_mmio.h"
|
||
|
||
static void
|
||
load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
|
||
{
|
||
va_list argp;
|
||
u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
|
||
int i;
|
||
|
||
if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
|
||
numregs = XE_MAX_DSS_FUSE_REGS;
|
||
|
||
va_start(argp, numregs);
|
||
for (i = 0; i < numregs; i++)
|
||
fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg));
|
||
va_end(argp);
|
||
|
||
bitmap_from_arr32(mask, fuse_val, numregs * 32);
|
||
}
|
||
|
||
static void
|
||
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
|
||
{
|
||
struct xe_device *xe = gt_to_xe(gt);
|
||
u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
|
||
u32 val = 0;
|
||
int i;
|
||
|
||
BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
|
||
|
||
/*
|
||
* Pre-Xe_HP platforms inverted the bit meaning (disable instead
|
||
* of enable).
|
||
*/
|
||
if (GRAPHICS_VERx100(xe) < 1250)
|
||
reg_val = ~reg_val & XELP_EU_MASK;
|
||
|
||
/* On PVC, one bit = one EU */
|
||
if (GRAPHICS_VERx100(xe) == 1260) {
|
||
val = reg_val;
|
||
} else {
|
||
/* All other platforms, one bit = 2 EU */
|
||
for (i = 0; i < fls(reg_val); i++)
|
||
if (reg_val & BIT(i))
|
||
val |= 0x3 << 2 * i;
|
||
}
|
||
|
||
bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
|
||
}
|
||
|
||
/**
|
||
* gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
|
||
*
|
||
* It is used to compute the L3 bank masks in a generic format on
|
||
* various platforms where the internal representation of L3 node
|
||
* and masks from registers are different.
|
||
*
|
||
* @xe: device
|
||
* @dst: destination
|
||
* @pattern: pattern to replicate
|
||
* @patternbits: size of the pattern, in bits
|
||
* @mask: mask describing where to replicate the pattern
|
||
*
|
||
* Example 1:
|
||
* ----------
|
||
* @pattern = 0b1111
|
||
* └┬─┘
|
||
* @patternbits = 4 (bits)
|
||
* @mask = 0b0101
|
||
* ││││
|
||
* │││└────────────────── 0b1111 (=1×0b1111)
|
||
* ││└──────────── 0b0000 │ (=0×0b1111)
|
||
* │└────── 0b1111 │ │ (=1×0b1111)
|
||
* └ 0b0000 │ │ │ (=0×0b1111)
|
||
* │ │ │ │
|
||
* @dst = 0b0000 0b1111 0b0000 0b1111
|
||
*
|
||
* Example 2:
|
||
* ----------
|
||
* @pattern = 0b11111111
|
||
* └┬─────┘
|
||
* @patternbits = 8 (bits)
|
||
* @mask = 0b10
|
||
* ││
|
||
* ││
|
||
* ││
|
||
* │└────────── 0b00000000 (=0×0b11111111)
|
||
* └ 0b11111111 │ (=1×0b11111111)
|
||
* │ │
|
||
* @dst = 0b11111111 0b00000000
|
||
*/
|
||
static void
|
||
gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
|
||
xe_l3_bank_mask_t pattern, int patternbits,
|
||
unsigned long mask)
|
||
{
|
||
unsigned long bit;
|
||
|
||
xe_assert(xe, fls(mask) <= patternbits);
|
||
for_each_set_bit(bit, &mask, 32) {
|
||
xe_l3_bank_mask_t shifted_pattern = {};
|
||
|
||
bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
|
||
XE_MAX_L3_BANK_MASK_BITS);
|
||
bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
|
||
}
|
||
}
|
||
|
||
static void
|
||
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
|
||
{
|
||
struct xe_device *xe = gt_to_xe(gt);
|
||
u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
|
||
|
||
if (GRAPHICS_VER(xe) >= 20) {
|
||
xe_l3_bank_mask_t per_node = {};
|
||
u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
|
||
u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
|
||
|
||
bitmap_from_arr32(per_node, &bank_val, 32);
|
||
gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
|
||
meml3_en);
|
||
} else if (GRAPHICS_VERx100(xe) >= 1270) {
|
||
xe_l3_bank_mask_t per_node = {};
|
||
xe_l3_bank_mask_t per_mask_bit = {};
|
||
u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
|
||
u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
|
||
u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
|
||
|
||
bitmap_set_value8(per_mask_bit, 0x3, 0);
|
||
gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
|
||
gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
|
||
meml3_en);
|
||
} else if (xe->info.platform == XE_PVC) {
|
||
xe_l3_bank_mask_t per_node = {};
|
||
xe_l3_bank_mask_t per_mask_bit = {};
|
||
u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
|
||
u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
|
||
|
||
bitmap_set_value8(per_mask_bit, 0xf, 0);
|
||
gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
|
||
bank_val);
|
||
gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
|
||
meml3_en);
|
||
} else if (xe->info.platform == XE_DG2) {
|
||
xe_l3_bank_mask_t per_node = {};
|
||
u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
|
||
|
||
bitmap_set_value8(per_node, 0xff, 0);
|
||
gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
|
||
} else {
|
||
/* 1:1 register bit to mask bit (inverted register bits) */
|
||
u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
|
||
|
||
bitmap_from_arr32(l3_bank_mask, &mask, 32);
|
||
}
|
||
}
|
||
|
||
static void
|
||
get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
|
||
{
|
||
if (GRAPHICS_VER(xe) > 20) {
|
||
*geometry_regs = 3;
|
||
*compute_regs = 3;
|
||
} else if (GRAPHICS_VERx100(xe) == 1260) {
|
||
*geometry_regs = 0;
|
||
*compute_regs = 2;
|
||
} else if (GRAPHICS_VERx100(xe) >= 1250) {
|
||
*geometry_regs = 1;
|
||
*compute_regs = 1;
|
||
} else {
|
||
*geometry_regs = 1;
|
||
*compute_regs = 0;
|
||
}
|
||
}
|
||
|
||
void
|
||
xe_gt_topology_init(struct xe_gt *gt)
|
||
{
|
||
struct xe_device *xe = gt_to_xe(gt);
|
||
struct drm_printer p;
|
||
int num_geometry_regs, num_compute_regs;
|
||
|
||
get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
|
||
|
||
/*
|
||
* Register counts returned shouldn't exceed the number of registers
|
||
* passed as parameters below.
|
||
*/
|
||
drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
|
||
drm_WARN_ON(&xe->drm, num_compute_regs > 3);
|
||
|
||
load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
|
||
num_geometry_regs,
|
||
XELP_GT_GEOMETRY_DSS_ENABLE,
|
||
XE2_GT_GEOMETRY_DSS_1,
|
||
XE2_GT_GEOMETRY_DSS_2);
|
||
load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
|
||
XEHP_GT_COMPUTE_DSS_ENABLE,
|
||
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
|
||
XE2_GT_COMPUTE_DSS_2);
|
||
load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
|
||
load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
|
||
|
||
p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
|
||
|
||
xe_gt_topology_dump(gt, &p);
|
||
}
|
||
|
||
void
|
||
xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
|
||
{
|
||
drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
|
||
gt->fuse_topo.g_dss_mask);
|
||
drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS,
|
||
gt->fuse_topo.c_dss_mask);
|
||
|
||
drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
|
||
gt->fuse_topo.eu_mask_per_dss);
|
||
|
||
drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
|
||
gt->fuse_topo.l3_bank_mask);
|
||
}
|
||
|
||
/*
|
||
* Used to obtain the index of the first DSS. Can start searching from the
|
||
* beginning of a specific dss group (e.g., gslice, cslice, etc.) if
|
||
* groupsize and groupnum are non-zero.
|
||
*/
|
||
unsigned int
|
||
xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
|
||
{
|
||
return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
|
||
}
|
||
|
||
bool xe_dss_mask_empty(const xe_dss_mask_t mask)
|
||
{
|
||
return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
|
||
}
|
||
|
||
/**
|
||
* xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
|
||
* @gt: GT to check
|
||
* @quad: Which quadrant of the DSS space to check
|
||
*
|
||
* Since Xe_HP platforms can have up to four CCS engines, those engines
|
||
* are each logically associated with a quarter of the possible DSS. If there
|
||
* are no DSS present in one of the four quadrants of the DSS space, the
|
||
* corresponding CCS engine is also not available for use.
|
||
*
|
||
* Returns false if all DSS in a quadrant of the GT are fused off, else true.
|
||
*/
|
||
bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
|
||
{
|
||
struct xe_device *xe = gt_to_xe(gt);
|
||
xe_dss_mask_t all_dss;
|
||
int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
|
||
|
||
bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
|
||
XE_MAX_DSS_FUSE_BITS);
|
||
|
||
get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
|
||
dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
|
||
|
||
quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
|
||
|
||
return quad_first < (quad + 1) * dss_per_quad;
|
||
}
|