core: - EDID cleanups - scheduler error handling fixes - managed: add drmm_release_action() with tests - add ratelimited drm debug print - DPCD PSR early transport macro - DP tunneling and bandwidth allocation helpers - remove built-in edids - dp: Avoid AUX transfers on powered-down displays - dp: Add VSC SDP helpers cross drivers: - use new drm print helpers - switch to ->read_edid callback - gem: add stats for shared buffers plus updates to amdgpu, i915, xe syncobj: - fixes to waiting and sleeping ttm: - add tests - fix errno codes - simply busy-placement handling - fix page decryption media: - tc358743: fix v4l device registration video: - move all kernel parameters for video behind CONFIG_VIDEO sound: - remove <drm/drm_edid.h> include from header ci: - add tests for msm - fix apq8016 runner efifb: - use copy of global screen_info state vesafb: - use copy of global screen_info state simplefb: - fix logging bridge: - ite-6505: fix DP link-training bug - samsung-dsim: fix error checking in probe - samsung-dsim: add bsh-smm-s2/pro boards - tc358767: fix regmap usage - imx: add i.MX8MP HDMI PVI plus DT bindings - imx: add i.MX8MP HDMI TX plus DT bindings - sii902x: fix probing and unregistration - tc358767: limit pixel PLL input range - switch to new drm_bridge_read_edid() interface panel: - ltk050h3146w: error-handling fixes - panel-edp: support delay between power-on and enable; use put_sync in unprepare; support Mediatek MT8173 Chromebooks, BOE NV116WHM-N49 V8.0, BOE NV122WUM-N41, CSO MNC207QS1-1 plus DT bindings - panel-lvds: support EDT ETML0700Z9NDHA plus DT bindings - panel-novatek: FRIDA FRD400B25025-A-CTK plus DT bindings - add BOE TH101MB31IG002-28A plus DT bindings - add EDT ETML1010G3DRA plus DT bindings - add Novatek NT36672E LCD DSI plus DT bindings - nt36523: support 120Hz timings, fix includes - simple: fix display timings on RK32FN48H - visionox-vtdr6130: fix initialization - add Powkiddy RGB10MAX3 plus DT bindings - st7703: support panel rotation plus DT bindings - add Himax HX83112A plus DT bindings - ltk500hd1829: add support for ltk101b4029w and admatec 9904370 - simple: add BOE BP082WX1-100 8.2" panel plus DT bindungs panel-orientation-quirks: - GPD Win Mini amdgpu: - Validate DMABuf imports in compute VMs - Add RAS ACA framework - PSP 13 fixes - Misc code cleanups - Replay fixes - Atom interpretor PS, WS bounds checking - DML2 fixes - Audio fixes - DCN 3.5 Z state fixes - Remove deprecated ida_simple usage - UBSAN fixes - RAS fixes - Enable seq64 infrastructure - DC color block enablement - Documentation updates - DC documentation updates - DMCUB updates - ATHUB 4.1 support - LSDMA 7.0 support - JPEG DPG support - IH 7.0 support - HDP 7.0 support - VCN 5.0 support - SMU 13.0.6 updates - NBIO 7.11 updates - SDMA 6.1 updates - MMHUB 3.3 updates - DCN 3.5.1 support - NBIF 6.3.1 support - VPE 6.1.1 support amdkfd: - Validate DMABuf imports in compute VMs - SVM fixes - Trap handler updates and enhancements - Fix cache size reporting - Relocate the trap handler radeon: - Atom interpretor PS, WS bounds checking - Misc code cleanups xe: - new query for GuC submission version - Remove unused persistent exec_queues - Add vram frequency sysfs attributes - Add the flag XE_VM_BIND_FLAG_DUMPABLE - Drop pre-production workarounds - Drop kunit tests for unsupported platforms - Start pumbling SR-IOV support with memory based interrupts for VF - Allow to map BO in GGTT with PAT index corresponding to XE_CACHE_UC to work with memory based interrupts - Add GuC Doorbells Manager as prep work SR-IOV - Implement additional workarounds for xe2 and MTL - Program a few registers according to perfomance guide spec for Xe2 - Fix remaining 32b build issues and enable it back - Fix build with CONFIG_DEBUG_FS=n - Fix warnings from GuC ABI headers - Introduce Relay Communication for SR-IOV for VF <-> GuC <-> PF - Release mmap mappings on rpm suspend - Disable mid-thread preemption when not properly supported by hardware - Fix xe_exec by reserving extra fence slot for CPU bind - Fix xe_exec with full long running exec queue - Canonicalize addresses where needed for Xe2 and add to devcoredum - Toggle USM support for Xe2 - Only allow 1 ufence per exec / bind IOCTL - Add GuC firmware loading for Lunar Lake - Add XE_VMA_PTE_64K VMA flag i915: - Add more ADL-N PCI IDs - Enable fastboot also on older platforms - Early transport for panel replay and PSR - New ARL PCI IDs - DP TPS4 PHY test pattern support - Unify and improve VSC SDP for PSR and non-PSR cases - Refactor memory regions and improve debug logging - Rework global state serialization - Remove unused CDCLK divider fields - Unify HDCP connector logging format - Use display instead of graphics version in display code - Move VBT and opregion debugfs next to the implementation - Abstract opregion interface, use opaque type - MTL fixes - HPD handling fixes - Add GuC submission interface version query - Atomically invalidate userptr on mmu-notifier - Update handling of MMIO triggered reports - Don't make assumptions about intel_wakeref_t type - Extend driver code of Xe_LPG to Xe_LPG+ - Add flex arrays to struct i915_syncmap - Allow for very slow HuC loading - DP tunneling and bandwidth allocation support msm: - Correct bindings for MSM8976 and SM8650 platforms - Start migration of MDP5 platforms to DPU driver - X1E80100 MDSS support - DPU: - Improve DSC allocation, fixing several important corner cases - Add support for SDM630/SDM660 platforms - Simplify dpu_encoder_phys_ops - Apply fixes targeting DSC support with a single DSC encoder - Apply fixes for HCTL_EN timing configuration - X1E80100 support - Add support for YUV420 over DP - GPU: - fix sc7180 UBWC config - fix a7xx LLC config - new gpu support: a305B, a750, a702 - machine support: SM7150 (different power levels than other a618) - a7xx devcoredump support habanalabs: - configure IRQ affinity according to NUMA node - move HBM MMU page tables inside the HBM - improve device reset - check extended PCIe errors ivpu: - updates to firmware API - refactor BO allocation imx: - use devm_ functions during init hisilicon: - fix EDID includes mgag200: - improve ioremap usage - convert to struct drm_edid - Work around PCI write bursts nouveau: - disp: use kmemdup() - fix EDID includes - documentation fixes qaic: - fixes to BO handling - make use of DRM managed release - fix order of remove operations rockchip: - analogix_dp: get encoder port from DT - inno_hdmi: support HDMI for RK3128 - lvds: error-handling fixes ssd130x: - support SSD133x plus DT bindings tegra: - fix error handling tilcdc: - make use of DRM managed release v3d: - show memory stats in debugfs - Support display MMU page size vc4: - fix error handling in plane prepare_fb - fix framebuffer test in plane helpers virtio: - add venus capset defines vkms: - fix OOB access when programming the LUT - Kconfig improvements vmwgfx: - unmap surface before changing plane state - fix memory leak in error handling - documentation fixes - list command SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 as invalid - fix null-pointer deref in execbuf - refactor display-mode probing - fix fencing for creating cursor MOBs - fix cursor-memory lifetime xlnx: - fix live video input for ZynqMP DPSUB lima: - fix memory leak loongson: - fail if no VRAM present meson: - switch to new drm_bridge_read_edid() interface renesas: - add RZ/G2L DU support plus DT bindings mxsfb: - Use managed mode config sun4i: - HDMI: updates to atomic mode setting mediatek: - Add display driver for MT8188 VDOSYS1 - DSI driver cleanups - Filter modes according to hardware capability - Fix a null pointer crash in mtk_drm_crtc_finish_page_flip etnaviv: - enhancements for NPU and MRT support -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmXxI+AACgkQDHTzWXnE hr5isxAApZ+DxesDbV8bd91KXL03vTfJtM5xVQuZoDzrr20KdTvu2EfQcCFnAUjl YtY05U9arDT4Txq5nX70Xc6I5M9HN6lqSUfsWhI6xUcR9TUollPbYwEu8IdoMaCG TRnspkiheye+DLFY6omLNH2aG1/k1IIefVWKaClFpbNPaaSHREDiY7/rkmErMBIS hrN13+6IVzX7+6fmNgHugUfdyawDJ8J9Nsc8T3Zlioljq3p+VbtStLsGeABTHSEJ MX18FwbGllI+QcXvaXM8gIg8NYKvSx/ZtlvKTpyPpTjZT3i3BpY+7yJqWDRQhiGM VTX7di1f90yWgzlYE5T33MW7Imvw3q04N7qYJ+Z1LHD/A8VyjwPUKLeul8P9ousT 0qQLSQsnuXH5AMLDh8IeLG/i0hAMWJ2UbProFSAFhd/UQHP7QOm2mmCsf79me9It qKFn6QZKvAKGZk/myTbQIVAmQWrDCpKq4i1aoKXEvcEuQUtM1lPvmMVsStVEfG+y ACaI24zSJACViH6rfhVzr74giwZX/ay0NSXqwRXfD5kX8fXb050LxLGW93iYZoHv FpdT2C8oTS1A5nsZpoxwVP35euUsp7D4J5YYbrZder2m0s0DDCVLMqdFrSVNdWDM 4ZQRiY3wCiJjSS8dpwppW0uaVGjtnGQnjQ5sQrIw0vKkwxee0TQ= =WLj9 -----END PGP SIGNATURE----- Merge tag 'drm-next-2024-03-13' of https://gitlab.freedesktop.org/drm/kernel Pull drm updates from Dave Airlie: "Highlights are usual, more AMD IP blocks for future hw, i915/xe changes, Displayport tunnelling support for i915, msm YUV over DP changes, new tests for ttm, but its mostly a lot of stuff all over the place from lots of people. core: - EDID cleanups - scheduler error handling fixes - managed: add drmm_release_action() with tests - add ratelimited drm debug print - DPCD PSR early transport macro - DP tunneling and bandwidth allocation helpers - remove built-in edids - dp: Avoid AUX transfers on powered-down displays - dp: Add VSC SDP helpers cross drivers: - use new drm print helpers - switch to ->read_edid callback - gem: add stats for shared buffers plus updates to amdgpu, i915, xe syncobj: - fixes to waiting and sleeping ttm: - add tests - fix errno codes - simply busy-placement handling - fix page decryption media: - tc358743: fix v4l device registration video: - move all kernel parameters for video behind CONFIG_VIDEO sound: - remove <drm/drm_edid.h> include from header ci: - add tests for msm - fix apq8016 runner efifb: - use copy of global screen_info state vesafb: - use copy of global screen_info state simplefb: - fix logging bridge: - ite-6505: fix DP link-training bug - samsung-dsim: fix error checking in probe - samsung-dsim: add bsh-smm-s2/pro boards - tc358767: fix regmap usage - imx: add i.MX8MP HDMI PVI plus DT bindings - imx: add i.MX8MP HDMI TX plus DT bindings - sii902x: fix probing and unregistration - tc358767: limit pixel PLL input range - switch to new drm_bridge_read_edid() interface panel: - ltk050h3146w: error-handling fixes - panel-edp: support delay between power-on and enable; use put_sync in unprepare; support Mediatek MT8173 Chromebooks, BOE NV116WHM-N49 V8.0, BOE NV122WUM-N41, CSO MNC207QS1-1 plus DT bindings - panel-lvds: support EDT ETML0700Z9NDHA plus DT bindings - panel-novatek: FRIDA FRD400B25025-A-CTK plus DT bindings - add BOE TH101MB31IG002-28A plus DT bindings - add EDT ETML1010G3DRA plus DT bindings - add Novatek NT36672E LCD DSI plus DT bindings - nt36523: support 120Hz timings, fix includes - simple: fix display timings on RK32FN48H - visionox-vtdr6130: fix initialization - add Powkiddy RGB10MAX3 plus DT bindings - st7703: support panel rotation plus DT bindings - add Himax HX83112A plus DT bindings - ltk500hd1829: add support for ltk101b4029w and admatec 9904370 - simple: add BOE BP082WX1-100 8.2" panel plus DT bindungs panel-orientation-quirks: - GPD Win Mini amdgpu: - Validate DMABuf imports in compute VMs - Add RAS ACA framework - PSP 13 fixes - Misc code cleanups - Replay fixes - Atom interpretor PS, WS bounds checking - DML2 fixes - Audio fixes - DCN 3.5 Z state fixes - Remove deprecated ida_simple usage - UBSAN fixes - RAS fixes - Enable seq64 infrastructure - DC color block enablement - Documentation updates - DC documentation updates - DMCUB updates - ATHUB 4.1 support - LSDMA 7.0 support - JPEG DPG support - IH 7.0 support - HDP 7.0 support - VCN 5.0 support - SMU 13.0.6 updates - NBIO 7.11 updates - SDMA 6.1 updates - MMHUB 3.3 updates - DCN 3.5.1 support - NBIF 6.3.1 support - VPE 6.1.1 support amdkfd: - Validate DMABuf imports in compute VMs - SVM fixes - Trap handler updates and enhancements - Fix cache size reporting - Relocate the trap handler radeon: - Atom interpretor PS, WS bounds checking - Misc code cleanups xe: - new query for GuC submission version - Remove unused persistent exec_queues - Add vram frequency sysfs attributes - Add the flag XE_VM_BIND_FLAG_DUMPABLE - Drop pre-production workarounds - Drop kunit tests for unsupported platforms - Start pumbling SR-IOV support with memory based interrupts for VF - Allow to map BO in GGTT with PAT index corresponding to XE_CACHE_UC to work with memory based interrupts - Add GuC Doorbells Manager as prep work SR-IOV - Implement additional workarounds for xe2 and MTL - Program a few registers according to perfomance guide spec for Xe2 - Fix remaining 32b build issues and enable it back - Fix build with CONFIG_DEBUG_FS=n - Fix warnings from GuC ABI headers - Introduce Relay Communication for SR-IOV for VF <-> GuC <-> PF - Release mmap mappings on rpm suspend - Disable mid-thread preemption when not properly supported by hardware - Fix xe_exec by reserving extra fence slot for CPU bind - Fix xe_exec with full long running exec queue - Canonicalize addresses where needed for Xe2 and add to devcoredum - Toggle USM support for Xe2 - Only allow 1 ufence per exec / bind IOCTL - Add GuC firmware loading for Lunar Lake - Add XE_VMA_PTE_64K VMA flag i915: - Add more ADL-N PCI IDs - Enable fastboot also on older platforms - Early transport for panel replay and PSR - New ARL PCI IDs - DP TPS4 PHY test pattern support - Unify and improve VSC SDP for PSR and non-PSR cases - Refactor memory regions and improve debug logging - Rework global state serialization - Remove unused CDCLK divider fields - Unify HDCP connector logging format - Use display instead of graphics version in display code - Move VBT and opregion debugfs next to the implementation - Abstract opregion interface, use opaque type - MTL fixes - HPD handling fixes - Add GuC submission interface version query - Atomically invalidate userptr on mmu-notifier - Update handling of MMIO triggered reports - Don't make assumptions about intel_wakeref_t type - Extend driver code of Xe_LPG to Xe_LPG+ - Add flex arrays to struct i915_syncmap - Allow for very slow HuC loading - DP tunneling and bandwidth allocation support msm: - Correct bindings for MSM8976 and SM8650 platforms - Start migration of MDP5 platforms to DPU driver - X1E80100 MDSS support - DPU: - Improve DSC allocation, fixing several important corner cases - Add support for SDM630/SDM660 platforms - Simplify dpu_encoder_phys_ops - Apply fixes targeting DSC support with a single DSC encoder - Apply fixes for HCTL_EN timing configuration - X1E80100 support - Add support for YUV420 over DP - GPU: - fix sc7180 UBWC config - fix a7xx LLC config - new gpu support: a305B, a750, a702 - machine support: SM7150 (different power levels than other a618) - a7xx devcoredump support habanalabs: - configure IRQ affinity according to NUMA node - move HBM MMU page tables inside the HBM - improve device reset - check extended PCIe errors ivpu: - updates to firmware API - refactor BO allocation imx: - use devm_ functions during init hisilicon: - fix EDID includes mgag200: - improve ioremap usage - convert to struct drm_edid - Work around PCI write bursts nouveau: - disp: use kmemdup() - fix EDID includes - documentation fixes qaic: - fixes to BO handling - make use of DRM managed release - fix order of remove operations rockchip: - analogix_dp: get encoder port from DT - inno_hdmi: support HDMI for RK3128 - lvds: error-handling fixes ssd130x: - support SSD133x plus DT bindings tegra: - fix error handling tilcdc: - make use of DRM managed release v3d: - show memory stats in debugfs - Support display MMU page size vc4: - fix error handling in plane prepare_fb - fix framebuffer test in plane helpers virtio: - add venus capset defines vkms: - fix OOB access when programming the LUT - Kconfig improvements vmwgfx: - unmap surface before changing plane state - fix memory leak in error handling - documentation fixes - list command SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 as invalid - fix null-pointer deref in execbuf - refactor display-mode probing - fix fencing for creating cursor MOBs - fix cursor-memory lifetime xlnx: - fix live video input for ZynqMP DPSUB lima: - fix memory leak loongson: - fail if no VRAM present meson: - switch to new drm_bridge_read_edid() interface renesas: - add RZ/G2L DU support plus DT bindings mxsfb: - Use managed mode config sun4i: - HDMI: updates to atomic mode setting mediatek: - Add display driver for MT8188 VDOSYS1 - DSI driver cleanups - Filter modes according to hardware capability - Fix a null pointer crash in mtk_drm_crtc_finish_page_flip etnaviv: - enhancements for NPU and MRT support" * tag 'drm-next-2024-03-13' of https://gitlab.freedesktop.org/drm/kernel: (1420 commits) drm/amd/display: Removed redundant @ symbol to fix kernel-doc warnings in -next repo drm/amd/pm: wait for completion of the EnableGfxImu message drm/amdgpu/soc21: add mode2 asic reset for SMU IP v14.0.1 drm/amdgpu: add smu 14.0.1 support drm/amdgpu: add VPE 6.1.1 discovery support drm/amdgpu/vpe: add VPE 6.1.1 support drm/amdgpu/vpe: don't emit cond exec command under collaborate mode drm/amdgpu/vpe: add collaborate mode support for VPE drm/amdgpu/vpe: add PRED_EXE and COLLAB_SYNC OPCODE drm/amdgpu/vpe: add multi instance VPE support drm/amdgpu/discovery: add nbif v6_3_1 ip block drm/amdgpu: Add nbif v6_3_1 ip block support drm/amdgpu: Add pcie v6_1_0 ip headers (v5) drm/amdgpu: Add nbif v6_3_1 ip headers (v5) arch/powerpc: Remove <linux/fb.h> from backlight code macintosh/via-pmu-backlight: Include <linux/backlight.h> fbdev/chipsfb: Include <linux/backlight.h> drm/etnaviv: Restore some id values drm/amdkfd: make kfd_class constant drm/amdgpu: add ring timeout information in devcoredump ...
464 lines
12 KiB
C
464 lines
12 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2021 Intel Corporation
|
|
*/
|
|
|
|
#include "xe_execlist.h"
|
|
|
|
#include <drm/drm_managed.h>
|
|
|
|
#include "instructions/xe_mi_commands.h"
|
|
#include "regs/xe_engine_regs.h"
|
|
#include "regs/xe_gpu_commands.h"
|
|
#include "regs/xe_gt_regs.h"
|
|
#include "regs/xe_lrc_layout.h"
|
|
#include "xe_assert.h"
|
|
#include "xe_bo.h"
|
|
#include "xe_device.h"
|
|
#include "xe_exec_queue.h"
|
|
#include "xe_gt.h"
|
|
#include "xe_hw_fence.h"
|
|
#include "xe_lrc.h"
|
|
#include "xe_macros.h"
|
|
#include "xe_mmio.h"
|
|
#include "xe_mocs.h"
|
|
#include "xe_ring_ops_types.h"
|
|
#include "xe_sched_job.h"
|
|
|
|
#define XE_EXECLIST_HANG_LIMIT 1
|
|
|
|
#define SW_CTX_ID_SHIFT 37
|
|
#define SW_CTX_ID_WIDTH 11
|
|
#define XEHP_SW_CTX_ID_SHIFT 39
|
|
#define XEHP_SW_CTX_ID_WIDTH 16
|
|
|
|
#define SW_CTX_ID \
|
|
GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
|
|
SW_CTX_ID_SHIFT)
|
|
|
|
#define XEHP_SW_CTX_ID \
|
|
GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
|
|
XEHP_SW_CTX_ID_SHIFT)
|
|
|
|
|
|
static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
|
|
u32 ctx_id)
|
|
{
|
|
struct xe_gt *gt = hwe->gt;
|
|
struct xe_device *xe = gt_to_xe(gt);
|
|
u64 lrc_desc;
|
|
|
|
lrc_desc = xe_lrc_descriptor(lrc);
|
|
|
|
if (GRAPHICS_VERx100(xe) >= 1250) {
|
|
xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
|
|
lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
|
|
} else {
|
|
xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
|
|
lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
|
|
}
|
|
|
|
if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
|
|
xe_mmio_write32(hwe->gt, RCU_MODE,
|
|
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
|
|
|
|
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
|
|
lrc->ring.old_tail = lrc->ring.tail;
|
|
|
|
/*
|
|
* Make sure the context image is complete before we submit it to HW.
|
|
*
|
|
* Ostensibly, writes (including the WCB) should be flushed prior to
|
|
* an uncached write such as our mmio register access, the empirical
|
|
* evidence (esp. on Braswell) suggests that the WC write into memory
|
|
* may not be visible to the HW prior to the completion of the UC
|
|
* register write and that we may begin execution from the context
|
|
* before its image is complete leading to invalid PD chasing.
|
|
*/
|
|
wmb();
|
|
|
|
xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
|
|
xe_bo_ggtt_addr(hwe->hwsp));
|
|
xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
|
|
xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
|
|
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
|
|
|
|
xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
|
|
lower_32_bits(lrc_desc));
|
|
xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
|
|
upper_32_bits(lrc_desc));
|
|
xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
|
|
EL_CTRL_LOAD);
|
|
}
|
|
|
|
static void __xe_execlist_port_start(struct xe_execlist_port *port,
|
|
struct xe_execlist_exec_queue *exl)
|
|
{
|
|
struct xe_device *xe = gt_to_xe(port->hwe->gt);
|
|
int max_ctx = FIELD_MAX(SW_CTX_ID);
|
|
|
|
if (GRAPHICS_VERx100(xe) >= 1250)
|
|
max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
|
|
|
|
xe_execlist_port_assert_held(port);
|
|
|
|
if (port->running_exl != exl || !exl->has_run) {
|
|
port->last_ctx_id++;
|
|
|
|
/* 0 is reserved for the kernel context */
|
|
if (port->last_ctx_id > max_ctx)
|
|
port->last_ctx_id = 1;
|
|
}
|
|
|
|
__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
|
|
port->running_exl = exl;
|
|
exl->has_run = true;
|
|
}
|
|
|
|
static void __xe_execlist_port_idle(struct xe_execlist_port *port)
|
|
{
|
|
u32 noop[2] = { MI_NOOP, MI_NOOP };
|
|
|
|
xe_execlist_port_assert_held(port);
|
|
|
|
if (!port->running_exl)
|
|
return;
|
|
|
|
xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
|
|
__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
|
|
port->running_exl = NULL;
|
|
}
|
|
|
|
static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
|
|
{
|
|
struct xe_lrc *lrc = exl->q->lrc;
|
|
|
|
return lrc->ring.tail == lrc->ring.old_tail;
|
|
}
|
|
|
|
static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
|
|
{
|
|
struct xe_execlist_exec_queue *exl = NULL;
|
|
int i;
|
|
|
|
xe_execlist_port_assert_held(port);
|
|
|
|
for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
|
|
while (!list_empty(&port->active[i])) {
|
|
exl = list_first_entry(&port->active[i],
|
|
struct xe_execlist_exec_queue,
|
|
active_link);
|
|
list_del(&exl->active_link);
|
|
|
|
if (xe_execlist_is_idle(exl)) {
|
|
exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
|
|
continue;
|
|
}
|
|
|
|
list_add_tail(&exl->active_link, &port->active[i]);
|
|
__xe_execlist_port_start(port, exl);
|
|
return;
|
|
}
|
|
}
|
|
|
|
__xe_execlist_port_idle(port);
|
|
}
|
|
|
|
static u64 read_execlist_status(struct xe_hw_engine *hwe)
|
|
{
|
|
struct xe_gt *gt = hwe->gt;
|
|
u32 hi, lo;
|
|
|
|
lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
|
|
hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
|
|
|
|
return lo | (u64)hi << 32;
|
|
}
|
|
|
|
static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
|
|
{
|
|
u64 status;
|
|
|
|
xe_execlist_port_assert_held(port);
|
|
|
|
status = read_execlist_status(port->hwe);
|
|
if (status & BIT(7))
|
|
return;
|
|
|
|
__xe_execlist_port_start_next_active(port);
|
|
}
|
|
|
|
static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
|
|
u16 intr_vec)
|
|
{
|
|
struct xe_execlist_port *port = hwe->exl_port;
|
|
|
|
spin_lock(&port->lock);
|
|
xe_execlist_port_irq_handler_locked(port);
|
|
spin_unlock(&port->lock);
|
|
}
|
|
|
|
static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
|
|
enum xe_exec_queue_priority priority)
|
|
{
|
|
xe_execlist_port_assert_held(port);
|
|
|
|
if (port->running_exl && port->running_exl->active_priority >= priority)
|
|
return;
|
|
|
|
__xe_execlist_port_start_next_active(port);
|
|
}
|
|
|
|
static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
|
|
{
|
|
struct xe_execlist_port *port = exl->port;
|
|
enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
|
|
|
|
XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
|
|
XE_WARN_ON(priority < 0);
|
|
XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
|
|
|
|
spin_lock_irq(&port->lock);
|
|
|
|
if (exl->active_priority != priority &&
|
|
exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
|
|
/* Priority changed, move it to the right list */
|
|
list_del(&exl->active_link);
|
|
exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
|
|
}
|
|
|
|
if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
|
|
exl->active_priority = priority;
|
|
list_add_tail(&exl->active_link, &port->active[priority]);
|
|
}
|
|
|
|
xe_execlist_port_wake_locked(exl->port, priority);
|
|
|
|
spin_unlock_irq(&port->lock);
|
|
}
|
|
|
|
static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
|
|
{
|
|
struct xe_execlist_port *port =
|
|
container_of(timer, struct xe_execlist_port, irq_fail);
|
|
|
|
spin_lock_irq(&port->lock);
|
|
xe_execlist_port_irq_handler_locked(port);
|
|
spin_unlock_irq(&port->lock);
|
|
|
|
port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
|
|
add_timer(&port->irq_fail);
|
|
}
|
|
|
|
struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
|
|
struct xe_hw_engine *hwe)
|
|
{
|
|
struct drm_device *drm = &xe->drm;
|
|
struct xe_execlist_port *port;
|
|
int i;
|
|
|
|
port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
|
|
if (!port)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
port->hwe = hwe;
|
|
|
|
spin_lock_init(&port->lock);
|
|
for (i = 0; i < ARRAY_SIZE(port->active); i++)
|
|
INIT_LIST_HEAD(&port->active[i]);
|
|
|
|
port->last_ctx_id = 1;
|
|
port->running_exl = NULL;
|
|
|
|
hwe->irq_handler = xe_execlist_port_irq_handler;
|
|
|
|
/* TODO: Fix the interrupt code so it doesn't race like mad */
|
|
timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
|
|
port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
|
|
add_timer(&port->irq_fail);
|
|
|
|
return port;
|
|
}
|
|
|
|
void xe_execlist_port_destroy(struct xe_execlist_port *port)
|
|
{
|
|
del_timer(&port->irq_fail);
|
|
|
|
/* Prevent an interrupt while we're destroying */
|
|
spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock);
|
|
port->hwe->irq_handler = NULL;
|
|
spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock);
|
|
}
|
|
|
|
static struct dma_fence *
|
|
execlist_run_job(struct drm_sched_job *drm_job)
|
|
{
|
|
struct xe_sched_job *job = to_xe_sched_job(drm_job);
|
|
struct xe_exec_queue *q = job->q;
|
|
struct xe_execlist_exec_queue *exl = job->q->execlist;
|
|
|
|
q->ring_ops->emit_job(job);
|
|
xe_execlist_make_active(exl);
|
|
|
|
return dma_fence_get(job->fence);
|
|
}
|
|
|
|
static void execlist_job_free(struct drm_sched_job *drm_job)
|
|
{
|
|
struct xe_sched_job *job = to_xe_sched_job(drm_job);
|
|
|
|
xe_sched_job_put(job);
|
|
}
|
|
|
|
static const struct drm_sched_backend_ops drm_sched_ops = {
|
|
.run_job = execlist_run_job,
|
|
.free_job = execlist_job_free,
|
|
};
|
|
|
|
static int execlist_exec_queue_init(struct xe_exec_queue *q)
|
|
{
|
|
struct drm_gpu_scheduler *sched;
|
|
struct xe_execlist_exec_queue *exl;
|
|
struct xe_device *xe = gt_to_xe(q->gt);
|
|
int err;
|
|
|
|
xe_assert(xe, !xe_device_uc_enabled(xe));
|
|
|
|
drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
|
|
|
|
exl = kzalloc(sizeof(*exl), GFP_KERNEL);
|
|
if (!exl)
|
|
return -ENOMEM;
|
|
|
|
exl->q = q;
|
|
|
|
err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
|
|
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
|
|
XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
|
|
NULL, NULL, q->hwe->name,
|
|
gt_to_xe(q->gt)->drm.dev);
|
|
if (err)
|
|
goto err_free;
|
|
|
|
sched = &exl->sched;
|
|
err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
|
|
if (err)
|
|
goto err_sched;
|
|
|
|
exl->port = q->hwe->exl_port;
|
|
exl->has_run = false;
|
|
exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
|
|
q->execlist = exl;
|
|
q->entity = &exl->entity;
|
|
|
|
xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
|
|
|
|
return 0;
|
|
|
|
err_sched:
|
|
drm_sched_fini(&exl->sched);
|
|
err_free:
|
|
kfree(exl);
|
|
return err;
|
|
}
|
|
|
|
static void execlist_exec_queue_fini_async(struct work_struct *w)
|
|
{
|
|
struct xe_execlist_exec_queue *ee =
|
|
container_of(w, struct xe_execlist_exec_queue, fini_async);
|
|
struct xe_exec_queue *q = ee->q;
|
|
struct xe_execlist_exec_queue *exl = q->execlist;
|
|
struct xe_device *xe = gt_to_xe(q->gt);
|
|
unsigned long flags;
|
|
|
|
xe_assert(xe, !xe_device_uc_enabled(xe));
|
|
|
|
spin_lock_irqsave(&exl->port->lock, flags);
|
|
if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
|
|
list_del(&exl->active_link);
|
|
spin_unlock_irqrestore(&exl->port->lock, flags);
|
|
|
|
drm_sched_entity_fini(&exl->entity);
|
|
drm_sched_fini(&exl->sched);
|
|
kfree(exl);
|
|
|
|
xe_exec_queue_fini(q);
|
|
}
|
|
|
|
static void execlist_exec_queue_kill(struct xe_exec_queue *q)
|
|
{
|
|
/* NIY */
|
|
}
|
|
|
|
static void execlist_exec_queue_fini(struct xe_exec_queue *q)
|
|
{
|
|
INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
|
|
queue_work(system_unbound_wq, &q->execlist->fini_async);
|
|
}
|
|
|
|
static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
|
|
enum xe_exec_queue_priority priority)
|
|
{
|
|
/* NIY */
|
|
return 0;
|
|
}
|
|
|
|
static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
|
|
{
|
|
/* NIY */
|
|
return 0;
|
|
}
|
|
|
|
static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
|
|
u32 preempt_timeout_us)
|
|
{
|
|
/* NIY */
|
|
return 0;
|
|
}
|
|
|
|
static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
|
|
{
|
|
/* NIY */
|
|
return 0;
|
|
}
|
|
|
|
static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
|
|
|
|
{
|
|
/* NIY */
|
|
}
|
|
|
|
static void execlist_exec_queue_resume(struct xe_exec_queue *q)
|
|
{
|
|
/* NIY */
|
|
}
|
|
|
|
static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
|
|
{
|
|
/* NIY */
|
|
return false;
|
|
}
|
|
|
|
static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
|
|
.init = execlist_exec_queue_init,
|
|
.kill = execlist_exec_queue_kill,
|
|
.fini = execlist_exec_queue_fini,
|
|
.set_priority = execlist_exec_queue_set_priority,
|
|
.set_timeslice = execlist_exec_queue_set_timeslice,
|
|
.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
|
|
.suspend = execlist_exec_queue_suspend,
|
|
.suspend_wait = execlist_exec_queue_suspend_wait,
|
|
.resume = execlist_exec_queue_resume,
|
|
.reset_status = execlist_exec_queue_reset_status,
|
|
};
|
|
|
|
int xe_execlist_init(struct xe_gt *gt)
|
|
{
|
|
/* GuC submission enabled, nothing to do */
|
|
if (xe_device_uc_enabled(gt_to_xe(gt)))
|
|
return 0;
|
|
|
|
gt->exec_queue_ops = &execlist_exec_queue_ops;
|
|
|
|
return 0;
|
|
}
|