1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/drivers/gpu/drm/xe/xe_execlist.c
Linus Torvalds 480e035fc4 drm for 6.9:
core:
 - EDID cleanups
 - scheduler error handling fixes
 - managed: add drmm_release_action() with tests
 - add ratelimited drm debug print
 - DPCD PSR early transport macro
 - DP tunneling and bandwidth allocation helpers
 - remove built-in edids
 - dp: Avoid AUX transfers on powered-down displays
 - dp: Add VSC SDP helpers
 
 cross drivers:
 - use new drm print helpers
 - switch to ->read_edid callback
 - gem: add stats for shared buffers plus updates to amdgpu, i915, xe
 
 syncobj:
 - fixes to waiting and sleeping
 
 ttm:
 - add tests
 - fix errno codes
 - simply busy-placement handling
 - fix page decryption
 
 media:
 - tc358743: fix v4l device registration
 
 video:
 - move all kernel parameters for video behind CONFIG_VIDEO
 
 sound:
 - remove <drm/drm_edid.h> include from header
 
 ci:
 - add tests for msm
 - fix apq8016 runner
 
 efifb:
 - use copy of global screen_info state
 
 vesafb:
 - use copy of global screen_info state
 
 simplefb:
 - fix logging
 
 bridge:
 - ite-6505: fix DP link-training bug
 - samsung-dsim: fix error checking in probe
 - samsung-dsim: add bsh-smm-s2/pro boards
 - tc358767: fix regmap usage
 - imx: add i.MX8MP HDMI PVI plus DT bindings
 - imx: add i.MX8MP HDMI TX plus DT bindings
 - sii902x: fix probing and unregistration
 - tc358767: limit pixel PLL input range
 - switch to new drm_bridge_read_edid() interface
 
 panel:
 - ltk050h3146w: error-handling fixes
 - panel-edp: support delay between power-on and enable; use put_sync in
   unprepare; support Mediatek MT8173 Chromebooks, BOE NV116WHM-N49 V8.0,
   BOE NV122WUM-N41, CSO MNC207QS1-1 plus DT bindings
 - panel-lvds: support EDT ETML0700Z9NDHA plus DT bindings
 - panel-novatek: FRIDA FRD400B25025-A-CTK plus DT bindings
 - add BOE TH101MB31IG002-28A plus DT bindings
 - add EDT ETML1010G3DRA plus DT bindings
 - add Novatek NT36672E LCD DSI plus DT bindings
 - nt36523: support 120Hz timings, fix includes
 - simple: fix display timings on RK32FN48H
 - visionox-vtdr6130: fix initialization
 - add Powkiddy RGB10MAX3 plus DT bindings
 - st7703: support panel rotation plus DT bindings
 - add Himax HX83112A plus DT bindings
 - ltk500hd1829: add support for ltk101b4029w and admatec 9904370
 - simple: add BOE BP082WX1-100 8.2" panel plus DT bindungs
 
 panel-orientation-quirks:
 - GPD Win Mini
 
 amdgpu:
 - Validate DMABuf imports in compute VMs
 - Add RAS ACA framework
 - PSP 13 fixes
 - Misc code cleanups
 - Replay fixes
 - Atom interpretor PS, WS bounds checking
 - DML2 fixes
 - Audio fixes
 - DCN 3.5 Z state fixes
 - Remove deprecated ida_simple usage
 - UBSAN fixes
 - RAS fixes
 - Enable seq64 infrastructure
 - DC color block enablement
 - Documentation updates
 - DC documentation updates
 - DMCUB updates
 - ATHUB 4.1 support
 - LSDMA 7.0 support
 - JPEG DPG support
 - IH 7.0 support
 - HDP 7.0 support
 - VCN 5.0 support
 - SMU 13.0.6 updates
 - NBIO 7.11 updates
 - SDMA 6.1 updates
 - MMHUB 3.3 updates
 - DCN 3.5.1 support
 - NBIF 6.3.1 support
 - VPE 6.1.1 support
 
 amdkfd:
 - Validate DMABuf imports in compute VMs
 - SVM fixes
 - Trap handler updates and enhancements
 - Fix cache size reporting
 - Relocate the trap handler
 
 radeon:
 - Atom interpretor PS, WS bounds checking
 - Misc code cleanups
 
 xe:
 - new query for GuC submission version
 - Remove unused persistent exec_queues
 - Add vram frequency sysfs attributes
 - Add the flag XE_VM_BIND_FLAG_DUMPABLE
 - Drop pre-production workarounds
 - Drop kunit tests for unsupported platforms
 - Start pumbling SR-IOV support with memory based interrupts for VF
 - Allow to map BO in GGTT with PAT index corresponding to
   XE_CACHE_UC to work with memory based interrupts
 - Add GuC Doorbells Manager as prep work SR-IOV
 - Implement additional workarounds for xe2 and MTL
 - Program a few registers according to perfomance guide spec for Xe2
 - Fix remaining 32b build issues and enable it back
 - Fix build with CONFIG_DEBUG_FS=n
 - Fix warnings from GuC ABI headers
 - Introduce Relay Communication for SR-IOV for VF <-> GuC <-> PF
 - Release mmap mappings on rpm suspend
 - Disable mid-thread preemption when not properly supported by hardware
 - Fix xe_exec by reserving extra fence slot for CPU bind
 - Fix xe_exec with full long running exec queue
 - Canonicalize addresses where needed for Xe2 and add to devcoredum
 - Toggle USM support for Xe2
 - Only allow 1 ufence per exec / bind IOCTL
 - Add GuC firmware loading for Lunar Lake
 - Add XE_VMA_PTE_64K VMA flag
 
 i915:
 - Add more ADL-N PCI IDs
 - Enable fastboot also on older platforms
 - Early transport for panel replay and PSR
 - New ARL PCI IDs
 - DP TPS4 PHY test pattern support
 - Unify and improve VSC SDP for PSR and non-PSR cases
 - Refactor memory regions and improve debug logging
 - Rework global state serialization
 - Remove unused CDCLK divider fields
 - Unify HDCP connector logging format
 - Use display instead of graphics version in display code
 - Move VBT and opregion debugfs next to the implementation
 - Abstract opregion interface, use opaque type
 - MTL fixes
 - HPD handling fixes
 - Add GuC submission interface version query
 - Atomically invalidate userptr on mmu-notifier
 - Update handling of MMIO triggered reports
 - Don't make assumptions about intel_wakeref_t type
 - Extend driver code of Xe_LPG to Xe_LPG+
 - Add flex arrays to struct i915_syncmap
 - Allow for very slow HuC loading
 - DP tunneling and bandwidth allocation support
 
 msm:
 - Correct bindings for MSM8976 and SM8650 platforms
 - Start migration of MDP5 platforms to DPU driver
 - X1E80100 MDSS support
 - DPU:
 - Improve DSC allocation, fixing several important corner cases
 - Add support for SDM630/SDM660 platforms
 - Simplify dpu_encoder_phys_ops
 - Apply fixes targeting DSC support with a single DSC encoder
 - Apply fixes for HCTL_EN timing configuration
 - X1E80100 support
 - Add support for YUV420 over DP
 - GPU:
 - fix sc7180 UBWC config
 - fix a7xx LLC config
 - new gpu support: a305B, a750, a702
 - machine support: SM7150 (different power levels than other a618)
 - a7xx devcoredump support
 
 habanalabs:
 - configure IRQ affinity according to NUMA node
 - move HBM MMU page tables inside the HBM
 - improve device reset
 - check extended PCIe errors
 
 ivpu:
 - updates to firmware API
 - refactor BO allocation
 
 imx:
 - use devm_ functions during init
 
 hisilicon:
 - fix EDID includes
 
 mgag200:
 - improve ioremap usage
 - convert to struct drm_edid
 - Work around PCI write bursts
 
 nouveau:
 - disp: use kmemdup()
 - fix EDID includes
 - documentation fixes
 
 qaic:
 - fixes to BO handling
 - make use of DRM managed release
 - fix order of remove operations
 
 rockchip:
 - analogix_dp: get encoder port from DT
 - inno_hdmi: support HDMI for RK3128
 - lvds: error-handling fixes
 
 ssd130x:
 - support SSD133x plus DT bindings
 
 tegra:
 - fix error handling
 
 tilcdc:
 - make use of DRM managed release
 
 v3d:
 - show memory stats in debugfs
 - Support display MMU page size
 
 vc4:
 - fix error handling in plane prepare_fb
 - fix framebuffer test in plane helpers
 
 virtio:
 - add venus capset defines
 
 vkms:
 - fix OOB access when programming the LUT
 - Kconfig improvements
 
 vmwgfx:
 - unmap surface before changing plane state
 - fix memory leak in error handling
 - documentation fixes
 - list command SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 as invalid
 - fix null-pointer deref in execbuf
 - refactor display-mode probing
 - fix fencing for creating cursor MOBs
 - fix cursor-memory lifetime
 
 xlnx:
 - fix live video input for ZynqMP DPSUB
 
 lima:
 - fix memory leak
 
 loongson:
 - fail if no VRAM present
 
 meson:
 - switch to new drm_bridge_read_edid() interface
 
 renesas:
 - add RZ/G2L DU support plus DT bindings
 
 mxsfb:
 - Use managed mode config
 
 sun4i:
 - HDMI: updates to atomic mode setting
 
 mediatek:
 - Add display driver for MT8188 VDOSYS1
 - DSI driver cleanups
 - Filter modes according to hardware capability
 - Fix a null pointer crash in mtk_drm_crtc_finish_page_flip
 
 etnaviv:
 - enhancements for NPU and MRT support
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmXxI+AACgkQDHTzWXnE
 hr5isxAApZ+DxesDbV8bd91KXL03vTfJtM5xVQuZoDzrr20KdTvu2EfQcCFnAUjl
 YtY05U9arDT4Txq5nX70Xc6I5M9HN6lqSUfsWhI6xUcR9TUollPbYwEu8IdoMaCG
 TRnspkiheye+DLFY6omLNH2aG1/k1IIefVWKaClFpbNPaaSHREDiY7/rkmErMBIS
 hrN13+6IVzX7+6fmNgHugUfdyawDJ8J9Nsc8T3Zlioljq3p+VbtStLsGeABTHSEJ
 MX18FwbGllI+QcXvaXM8gIg8NYKvSx/ZtlvKTpyPpTjZT3i3BpY+7yJqWDRQhiGM
 VTX7di1f90yWgzlYE5T33MW7Imvw3q04N7qYJ+Z1LHD/A8VyjwPUKLeul8P9ousT
 0qQLSQsnuXH5AMLDh8IeLG/i0hAMWJ2UbProFSAFhd/UQHP7QOm2mmCsf79me9It
 qKFn6QZKvAKGZk/myTbQIVAmQWrDCpKq4i1aoKXEvcEuQUtM1lPvmMVsStVEfG+y
 ACaI24zSJACViH6rfhVzr74giwZX/ay0NSXqwRXfD5kX8fXb050LxLGW93iYZoHv
 FpdT2C8oTS1A5nsZpoxwVP35euUsp7D4J5YYbrZder2m0s0DDCVLMqdFrSVNdWDM
 4ZQRiY3wCiJjSS8dpwppW0uaVGjtnGQnjQ5sQrIw0vKkwxee0TQ=
 =WLj9
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2024-03-13' of https://gitlab.freedesktop.org/drm/kernel

Pull drm updates from Dave Airlie:
 "Highlights are usual, more AMD IP blocks for future hw, i915/xe
  changes, Displayport tunnelling support for i915, msm YUV over DP
  changes, new tests for ttm, but its mostly a lot of stuff all over the
  place from lots of people.

  core:
   - EDID cleanups
   - scheduler error handling fixes
   - managed: add drmm_release_action() with tests
   - add ratelimited drm debug print
   - DPCD PSR early transport macro
   - DP tunneling and bandwidth allocation helpers
   - remove built-in edids
   - dp: Avoid AUX transfers on powered-down displays
   - dp: Add VSC SDP helpers

  cross drivers:
   - use new drm print helpers
   - switch to ->read_edid callback
   - gem: add stats for shared buffers plus updates to amdgpu, i915, xe

  syncobj:
   - fixes to waiting and sleeping

  ttm:
   - add tests
   - fix errno codes
   - simply busy-placement handling
   - fix page decryption

  media:
   - tc358743: fix v4l device registration

  video:
   - move all kernel parameters for video behind CONFIG_VIDEO

  sound:
   - remove <drm/drm_edid.h> include from header

  ci:
   - add tests for msm
   - fix apq8016 runner

  efifb:
   - use copy of global screen_info state

  vesafb:
   - use copy of global screen_info state

  simplefb:
   - fix logging

  bridge:
   - ite-6505: fix DP link-training bug
   - samsung-dsim: fix error checking in probe
   - samsung-dsim: add bsh-smm-s2/pro boards
   - tc358767: fix regmap usage
   - imx: add i.MX8MP HDMI PVI plus DT bindings
   - imx: add i.MX8MP HDMI TX plus DT bindings
   - sii902x: fix probing and unregistration
   - tc358767: limit pixel PLL input range
   - switch to new drm_bridge_read_edid() interface

  panel:
   - ltk050h3146w: error-handling fixes
   - panel-edp: support delay between power-on and enable; use put_sync
     in unprepare; support Mediatek MT8173 Chromebooks, BOE NV116WHM-N49
     V8.0, BOE NV122WUM-N41, CSO MNC207QS1-1 plus DT bindings
   - panel-lvds: support EDT ETML0700Z9NDHA plus DT bindings
   - panel-novatek: FRIDA FRD400B25025-A-CTK plus DT bindings
   - add BOE TH101MB31IG002-28A plus DT bindings
   - add EDT ETML1010G3DRA plus DT bindings
   - add Novatek NT36672E LCD DSI plus DT bindings
   - nt36523: support 120Hz timings, fix includes
   - simple: fix display timings on RK32FN48H
   - visionox-vtdr6130: fix initialization
   - add Powkiddy RGB10MAX3 plus DT bindings
   - st7703: support panel rotation plus DT bindings
   - add Himax HX83112A plus DT bindings
   - ltk500hd1829: add support for ltk101b4029w and admatec 9904370
   - simple: add BOE BP082WX1-100 8.2" panel plus DT bindungs

  panel-orientation-quirks:
   - GPD Win Mini

  amdgpu:
   - Validate DMABuf imports in compute VMs
   - Add RAS ACA framework
   - PSP 13 fixes
   - Misc code cleanups
   - Replay fixes
   - Atom interpretor PS, WS bounds checking
   - DML2 fixes
   - Audio fixes
   - DCN 3.5 Z state fixes
   - Remove deprecated ida_simple usage
   - UBSAN fixes
   - RAS fixes
   - Enable seq64 infrastructure
   - DC color block enablement
   - Documentation updates
   - DC documentation updates
   - DMCUB updates
   - ATHUB 4.1 support
   - LSDMA 7.0 support
   - JPEG DPG support
   - IH 7.0 support
   - HDP 7.0 support
   - VCN 5.0 support
   - SMU 13.0.6 updates
   - NBIO 7.11 updates
   - SDMA 6.1 updates
   - MMHUB 3.3 updates
   - DCN 3.5.1 support
   - NBIF 6.3.1 support
   - VPE 6.1.1 support

  amdkfd:
   - Validate DMABuf imports in compute VMs
   - SVM fixes
   - Trap handler updates and enhancements
   - Fix cache size reporting
   - Relocate the trap handler

  radeon:
   - Atom interpretor PS, WS bounds checking
   - Misc code cleanups

  xe:
   - new query for GuC submission version
   - Remove unused persistent exec_queues
   - Add vram frequency sysfs attributes
   - Add the flag XE_VM_BIND_FLAG_DUMPABLE
   - Drop pre-production workarounds
   - Drop kunit tests for unsupported platforms
   - Start pumbling SR-IOV support with memory based interrupts for VF
   - Allow to map BO in GGTT with PAT index corresponding to XE_CACHE_UC
     to work with memory based interrupts
   - Add GuC Doorbells Manager as prep work SR-IOV
   - Implement additional workarounds for xe2 and MTL
   - Program a few registers according to perfomance guide spec for Xe2
   - Fix remaining 32b build issues and enable it back
   - Fix build with CONFIG_DEBUG_FS=n
   - Fix warnings from GuC ABI headers
   - Introduce Relay Communication for SR-IOV for VF <-> GuC <-> PF
   - Release mmap mappings on rpm suspend
   - Disable mid-thread preemption when not properly supported by
     hardware
   - Fix xe_exec by reserving extra fence slot for CPU bind
   - Fix xe_exec with full long running exec queue
   - Canonicalize addresses where needed for Xe2 and add to devcoredum
   - Toggle USM support for Xe2
   - Only allow 1 ufence per exec / bind IOCTL
   - Add GuC firmware loading for Lunar Lake
   - Add XE_VMA_PTE_64K VMA flag

  i915:
   - Add more ADL-N PCI IDs
   - Enable fastboot also on older platforms
   - Early transport for panel replay and PSR
   - New ARL PCI IDs
   - DP TPS4 PHY test pattern support
   - Unify and improve VSC SDP for PSR and non-PSR cases
   - Refactor memory regions and improve debug logging
   - Rework global state serialization
   - Remove unused CDCLK divider fields
   - Unify HDCP connector logging format
   - Use display instead of graphics version in display code
   - Move VBT and opregion debugfs next to the implementation
   - Abstract opregion interface, use opaque type
   - MTL fixes
   - HPD handling fixes
   - Add GuC submission interface version query
   - Atomically invalidate userptr on mmu-notifier
   - Update handling of MMIO triggered reports
   - Don't make assumptions about intel_wakeref_t type
   - Extend driver code of Xe_LPG to Xe_LPG+
   - Add flex arrays to struct i915_syncmap
   - Allow for very slow HuC loading
   - DP tunneling and bandwidth allocation support

  msm:
   - Correct bindings for MSM8976 and SM8650 platforms
   - Start migration of MDP5 platforms to DPU driver
   - X1E80100 MDSS support
   - DPU:
      - Improve DSC allocation, fixing several important corner cases
      - Add support for SDM630/SDM660 platforms
      - Simplify dpu_encoder_phys_ops
      - Apply fixes targeting DSC support with a single DSC encoder
      - Apply fixes for HCTL_EN timing configuration
      - X1E80100 support
      - Add support for YUV420 over DP
   - GPU:
      - fix sc7180 UBWC config
      - fix a7xx LLC config
      - new gpu support: a305B, a750, a702
      - machine support: SM7150 (different power levels than other a618)
      - a7xx devcoredump support

  habanalabs:
   - configure IRQ affinity according to NUMA node
   - move HBM MMU page tables inside the HBM
   - improve device reset
   - check extended PCIe errors

  ivpu:
   - updates to firmware API
   - refactor BO allocation

  imx:
   - use devm_ functions during init

  hisilicon:
   - fix EDID includes

  mgag200:
   - improve ioremap usage
   - convert to struct drm_edid
   - Work around PCI write bursts

  nouveau:
   - disp: use kmemdup()
   - fix EDID includes
   - documentation fixes

  qaic:
   - fixes to BO handling
   - make use of DRM managed release
   - fix order of remove operations

  rockchip:
   - analogix_dp: get encoder port from DT
   - inno_hdmi: support HDMI for RK3128
   - lvds: error-handling fixes

  ssd130x:
   - support SSD133x plus DT bindings

  tegra:
   - fix error handling

  tilcdc:
   - make use of DRM managed release

  v3d:
   - show memory stats in debugfs
   - Support display MMU page size

  vc4:
   - fix error handling in plane prepare_fb
   - fix framebuffer test in plane helpers

  virtio:
   - add venus capset defines

  vkms:
   - fix OOB access when programming the LUT
   - Kconfig improvements

  vmwgfx:
   - unmap surface before changing plane state
   - fix memory leak in error handling
   - documentation fixes
   - list command SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 as invalid
   - fix null-pointer deref in execbuf
   - refactor display-mode probing
   - fix fencing for creating cursor MOBs
   - fix cursor-memory lifetime

  xlnx:
   - fix live video input for ZynqMP DPSUB

  lima:
   - fix memory leak

  loongson:
   - fail if no VRAM present

  meson:
   - switch to new drm_bridge_read_edid() interface

  renesas:
   - add RZ/G2L DU support plus DT bindings

  mxsfb:
   - Use managed mode config

  sun4i:
   - HDMI: updates to atomic mode setting

  mediatek:
   - Add display driver for MT8188 VDOSYS1
   - DSI driver cleanups
   - Filter modes according to hardware capability
   - Fix a null pointer crash in mtk_drm_crtc_finish_page_flip

  etnaviv:
   - enhancements for NPU and MRT support"

* tag 'drm-next-2024-03-13' of https://gitlab.freedesktop.org/drm/kernel: (1420 commits)
  drm/amd/display: Removed redundant @ symbol to fix kernel-doc warnings in -next repo
  drm/amd/pm: wait for completion of the EnableGfxImu message
  drm/amdgpu/soc21: add mode2 asic reset for SMU IP v14.0.1
  drm/amdgpu: add smu 14.0.1 support
  drm/amdgpu: add VPE 6.1.1 discovery support
  drm/amdgpu/vpe: add VPE 6.1.1 support
  drm/amdgpu/vpe: don't emit cond exec command under collaborate mode
  drm/amdgpu/vpe: add collaborate mode support for VPE
  drm/amdgpu/vpe: add PRED_EXE and COLLAB_SYNC OPCODE
  drm/amdgpu/vpe: add multi instance VPE support
  drm/amdgpu/discovery: add nbif v6_3_1 ip block
  drm/amdgpu: Add nbif v6_3_1 ip block support
  drm/amdgpu: Add pcie v6_1_0 ip headers (v5)
  drm/amdgpu: Add nbif v6_3_1 ip headers (v5)
  arch/powerpc: Remove <linux/fb.h> from backlight code
  macintosh/via-pmu-backlight: Include <linux/backlight.h>
  fbdev/chipsfb: Include <linux/backlight.h>
  drm/etnaviv: Restore some id values
  drm/amdkfd: make kfd_class constant
  drm/amdgpu: add ring timeout information in devcoredump
  ...
2024-03-13 18:34:05 -07:00

464 lines
12 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include "xe_execlist.h"
#include <drm/drm_managed.h>
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_gpu_commands.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_lrc_layout.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_hw_fence.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_mocs.h"
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#define XE_EXECLIST_HANG_LIMIT 1
#define SW_CTX_ID_SHIFT 37
#define SW_CTX_ID_WIDTH 11
#define XEHP_SW_CTX_ID_SHIFT 39
#define XEHP_SW_CTX_ID_WIDTH 16
#define SW_CTX_ID \
GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
SW_CTX_ID_SHIFT)
#define XEHP_SW_CTX_ID \
GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
XEHP_SW_CTX_ID_SHIFT)
static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
u32 ctx_id)
{
struct xe_gt *gt = hwe->gt;
struct xe_device *xe = gt_to_xe(gt);
u64 lrc_desc;
lrc_desc = xe_lrc_descriptor(lrc);
if (GRAPHICS_VERx100(xe) >= 1250) {
xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
} else {
xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
}
if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
xe_mmio_write32(hwe->gt, RCU_MODE,
_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
lrc->ring.old_tail = lrc->ring.tail;
/*
* Make sure the context image is complete before we submit it to HW.
*
* Ostensibly, writes (including the WCB) should be flushed prior to
* an uncached write such as our mmio register access, the empirical
* evidence (esp. on Braswell) suggests that the WC write into memory
* may not be visible to the HW prior to the completion of the UC
* register write and that we may begin execution from the context
* before its image is complete leading to invalid PD chasing.
*/
wmb();
xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
xe_bo_ggtt_addr(hwe->hwsp));
xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
lower_32_bits(lrc_desc));
xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
upper_32_bits(lrc_desc));
xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
EL_CTRL_LOAD);
}
static void __xe_execlist_port_start(struct xe_execlist_port *port,
struct xe_execlist_exec_queue *exl)
{
struct xe_device *xe = gt_to_xe(port->hwe->gt);
int max_ctx = FIELD_MAX(SW_CTX_ID);
if (GRAPHICS_VERx100(xe) >= 1250)
max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
xe_execlist_port_assert_held(port);
if (port->running_exl != exl || !exl->has_run) {
port->last_ctx_id++;
/* 0 is reserved for the kernel context */
if (port->last_ctx_id > max_ctx)
port->last_ctx_id = 1;
}
__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
port->running_exl = exl;
exl->has_run = true;
}
static void __xe_execlist_port_idle(struct xe_execlist_port *port)
{
u32 noop[2] = { MI_NOOP, MI_NOOP };
xe_execlist_port_assert_held(port);
if (!port->running_exl)
return;
xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
port->running_exl = NULL;
}
static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
{
struct xe_lrc *lrc = exl->q->lrc;
return lrc->ring.tail == lrc->ring.old_tail;
}
static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
{
struct xe_execlist_exec_queue *exl = NULL;
int i;
xe_execlist_port_assert_held(port);
for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
while (!list_empty(&port->active[i])) {
exl = list_first_entry(&port->active[i],
struct xe_execlist_exec_queue,
active_link);
list_del(&exl->active_link);
if (xe_execlist_is_idle(exl)) {
exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
continue;
}
list_add_tail(&exl->active_link, &port->active[i]);
__xe_execlist_port_start(port, exl);
return;
}
}
__xe_execlist_port_idle(port);
}
static u64 read_execlist_status(struct xe_hw_engine *hwe)
{
struct xe_gt *gt = hwe->gt;
u32 hi, lo;
lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
return lo | (u64)hi << 32;
}
static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
{
u64 status;
xe_execlist_port_assert_held(port);
status = read_execlist_status(port->hwe);
if (status & BIT(7))
return;
__xe_execlist_port_start_next_active(port);
}
static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
u16 intr_vec)
{
struct xe_execlist_port *port = hwe->exl_port;
spin_lock(&port->lock);
xe_execlist_port_irq_handler_locked(port);
spin_unlock(&port->lock);
}
static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
enum xe_exec_queue_priority priority)
{
xe_execlist_port_assert_held(port);
if (port->running_exl && port->running_exl->active_priority >= priority)
return;
__xe_execlist_port_start_next_active(port);
}
static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
{
struct xe_execlist_port *port = exl->port;
enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
XE_WARN_ON(priority < 0);
XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
spin_lock_irq(&port->lock);
if (exl->active_priority != priority &&
exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
/* Priority changed, move it to the right list */
list_del(&exl->active_link);
exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
}
if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
exl->active_priority = priority;
list_add_tail(&exl->active_link, &port->active[priority]);
}
xe_execlist_port_wake_locked(exl->port, priority);
spin_unlock_irq(&port->lock);
}
static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
{
struct xe_execlist_port *port =
container_of(timer, struct xe_execlist_port, irq_fail);
spin_lock_irq(&port->lock);
xe_execlist_port_irq_handler_locked(port);
spin_unlock_irq(&port->lock);
port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
add_timer(&port->irq_fail);
}
struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
struct xe_hw_engine *hwe)
{
struct drm_device *drm = &xe->drm;
struct xe_execlist_port *port;
int i;
port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
if (!port)
return ERR_PTR(-ENOMEM);
port->hwe = hwe;
spin_lock_init(&port->lock);
for (i = 0; i < ARRAY_SIZE(port->active); i++)
INIT_LIST_HEAD(&port->active[i]);
port->last_ctx_id = 1;
port->running_exl = NULL;
hwe->irq_handler = xe_execlist_port_irq_handler;
/* TODO: Fix the interrupt code so it doesn't race like mad */
timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
add_timer(&port->irq_fail);
return port;
}
void xe_execlist_port_destroy(struct xe_execlist_port *port)
{
del_timer(&port->irq_fail);
/* Prevent an interrupt while we're destroying */
spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
port->hwe->irq_handler = NULL;
spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
}
static struct dma_fence *
execlist_run_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
struct xe_exec_queue *q = job->q;
struct xe_execlist_exec_queue *exl = job->q->execlist;
q->ring_ops->emit_job(job);
xe_execlist_make_active(exl);
return dma_fence_get(job->fence);
}
static void execlist_job_free(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
xe_sched_job_put(job);
}
static const struct drm_sched_backend_ops drm_sched_ops = {
.run_job = execlist_run_job,
.free_job = execlist_job_free,
};
static int execlist_exec_queue_init(struct xe_exec_queue *q)
{
struct drm_gpu_scheduler *sched;
struct xe_execlist_exec_queue *exl;
struct xe_device *xe = gt_to_xe(q->gt);
int err;
xe_assert(xe, !xe_device_uc_enabled(xe));
drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
exl = kzalloc(sizeof(*exl), GFP_KERNEL);
if (!exl)
return -ENOMEM;
exl->q = q;
err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
NULL, NULL, q->hwe->name,
gt_to_xe(q->gt)->drm.dev);
if (err)
goto err_free;
sched = &exl->sched;
err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
if (err)
goto err_sched;
exl->port = q->hwe->exl_port;
exl->has_run = false;
exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
q->execlist = exl;
q->entity = &exl->entity;
xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
return 0;
err_sched:
drm_sched_fini(&exl->sched);
err_free:
kfree(exl);
return err;
}
static void execlist_exec_queue_fini_async(struct work_struct *w)
{
struct xe_execlist_exec_queue *ee =
container_of(w, struct xe_execlist_exec_queue, fini_async);
struct xe_exec_queue *q = ee->q;
struct xe_execlist_exec_queue *exl = q->execlist;
struct xe_device *xe = gt_to_xe(q->gt);
unsigned long flags;
xe_assert(xe, !xe_device_uc_enabled(xe));
spin_lock_irqsave(&exl->port->lock, flags);
if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
list_del(&exl->active_link);
spin_unlock_irqrestore(&exl->port->lock, flags);
drm_sched_entity_fini(&exl->entity);
drm_sched_fini(&exl->sched);
kfree(exl);
xe_exec_queue_fini(q);
}
static void execlist_exec_queue_kill(struct xe_exec_queue *q)
{
/* NIY */
}
static void execlist_exec_queue_fini(struct xe_exec_queue *q)
{
INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
queue_work(system_unbound_wq, &q->execlist->fini_async);
}
static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
enum xe_exec_queue_priority priority)
{
/* NIY */
return 0;
}
static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
{
/* NIY */
return 0;
}
static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
u32 preempt_timeout_us)
{
/* NIY */
return 0;
}
static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
{
/* NIY */
return 0;
}
static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
{
/* NIY */
}
static void execlist_exec_queue_resume(struct xe_exec_queue *q)
{
/* NIY */
}
static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
{
/* NIY */
return false;
}
static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
.init = execlist_exec_queue_init,
.kill = execlist_exec_queue_kill,
.fini = execlist_exec_queue_fini,
.set_priority = execlist_exec_queue_set_priority,
.set_timeslice = execlist_exec_queue_set_timeslice,
.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
.suspend = execlist_exec_queue_suspend,
.suspend_wait = execlist_exec_queue_suspend_wait,
.resume = execlist_exec_queue_resume,
.reset_status = execlist_exec_queue_reset_status,
};
int xe_execlist_init(struct xe_gt *gt)
{
/* GuC submission enabled, nothing to do */
if (xe_device_uc_enabled(gt_to_xe(gt)))
return 0;
gt->exec_queue_ops = &execlist_exec_queue_ops;
return 0;
}