drm/i915/selftests: Unroll the CS frequency loop
Having noticed that MI_BB_START is incurring a memory stall (see the correlation with uncore frequency), we have to unroll the loop in order to diminish the impact of the MI_BB_START on the instruction throughput. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200421171351.19575-1-chris@chris-wilson.co.uk
This commit is contained in:
parent
bd3ec9e758
commit
33883310cd
1 changed files with 19 additions and 12 deletions
|
@ -49,14 +49,17 @@ create_spin_counter(struct intel_engine_cs *engine,
|
||||||
#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
|
#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
|
||||||
struct drm_i915_gem_object *obj;
|
struct drm_i915_gem_object *obj;
|
||||||
struct i915_vma *vma;
|
struct i915_vma *vma;
|
||||||
|
unsigned long end;
|
||||||
u32 *base, *cs;
|
u32 *base, *cs;
|
||||||
int loop, i;
|
int loop, i;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
obj = i915_gem_object_create_internal(vm->i915, 4096);
|
obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
|
||||||
if (IS_ERR(obj))
|
if (IS_ERR(obj))
|
||||||
return ERR_CAST(obj);
|
return ERR_CAST(obj);
|
||||||
|
|
||||||
|
end = obj->base.size / sizeof(u32) - 1;
|
||||||
|
|
||||||
vma = i915_vma_instance(obj, vm, NULL);
|
vma = i915_vma_instance(obj, vm, NULL);
|
||||||
if (IS_ERR(vma)) {
|
if (IS_ERR(vma)) {
|
||||||
i915_gem_object_put(obj);
|
i915_gem_object_put(obj);
|
||||||
|
@ -90,27 +93,31 @@ create_spin_counter(struct intel_engine_cs *engine,
|
||||||
|
|
||||||
loop = cs - base;
|
loop = cs - base;
|
||||||
|
|
||||||
*cs++ = MI_MATH(4);
|
/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
|
||||||
*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
|
for (i = 0; i < 1024; i++) {
|
||||||
*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
|
*cs++ = MI_MATH(4);
|
||||||
*cs++ = MI_MATH_ADD;
|
*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
|
||||||
*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
|
*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
|
||||||
|
*cs++ = MI_MATH_ADD;
|
||||||
|
*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
|
||||||
|
|
||||||
if (srm) {
|
if (srm) {
|
||||||
*cs++ = MI_STORE_REGISTER_MEM_GEN8;
|
*cs++ = MI_STORE_REGISTER_MEM_GEN8;
|
||||||
*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
|
*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
|
||||||
*cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
|
*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
|
||||||
*cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
|
*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*cs++ = MI_BATCH_BUFFER_START_GEN8;
|
*cs++ = MI_BATCH_BUFFER_START_GEN8;
|
||||||
*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
|
*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
|
||||||
*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
|
*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
|
||||||
|
GEM_BUG_ON(cs - base > end);
|
||||||
|
|
||||||
i915_gem_object_flush_map(obj);
|
i915_gem_object_flush_map(obj);
|
||||||
|
|
||||||
*cancel = base + loop;
|
*cancel = base + loop;
|
||||||
*counter = srm ? memset32(base + 1000, 0, 1) : NULL;
|
*counter = srm ? memset32(base + end, 0, 1) : NULL;
|
||||||
return vma;
|
return vma;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue