Avoid holding gpu lock when calling runpm, to avoid this lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.4.3-debug+ #14 Not tainted ------------------------------------------------------ ring0/373 is trying to acquire lock: ffffffead86efb98 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x70/0x98 but task is already holding lock: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&gpu->lock){+.+.}-{3:3}: __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 msm_job_run+0x7c/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 -> #3 (dma_fence_map){++++}-{0:0}: __dma_fence_might_wait+0x74/0xc0 dma_resv_lockdep+0x1f0/0x2e8 do_one_initcall+0xb4/0x214 kernel_init_freeable+0x338/0x33c kernel_init+0x30/0x134 ret_from_fork+0x10/0x20 -> #2 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}: fs_reclaim_acquire+0x7c/0x9c slab_pre_alloc_hook.constprop.0+0x40/0x250 __kmem_cache_alloc_node+0x60/0x18c kmalloc_node_trace+0x40/0x84 alloc_worker+0x2c/0x64 init_rescuer+0x34/0xe0 workqueue_init+0x168/0x1fc kernel_init_freeable+0x15c/0x33c kernel_init+0x30/0x134 ret_from_fork+0x10/0x20 -> #1 (fs_reclaim){+.+.}-{0:0}: __fs_reclaim_acquire+0x3c/0x48 fs_reclaim_acquire+0x50/0x9c slab_pre_alloc_hook.constprop.0+0x40/0x250 __kmem_cache_alloc_node+0x60/0x18c kmalloc_trace+0x44/0x88 clk_rcg2_dfs_determine_rate+0x60/0x214 clk_core_determine_round_nolock+0xb8/0xf0 clk_core_round_rate_nolock+0x84/0x118 clk_core_round_rate_nolock+0xd8/0x118 clk_round_rate+0x6c/0xd0 geni_se_clk_tbl_get+0x78/0xc0 geni_se_clk_freq_match+0x44/0xe4 get_spi_clk_cfg+0x50/0xf4 geni_spi_set_clock_and_bw+0x54/0x104 spi_geni_prepare_message+0x130/0x174 __spi_pump_transfer_message+0x200/0x4d8 __spi_sync+0x13c/0x23c spi_sync_locked+0x18/0x24 do_cros_ec_pkt_xfer_spi+0x124/0x3f0 cros_ec_xfer_high_pri_work+0x28/0x3c kthread_worker_fn+0x14c/0x27c kthread+0xf0/0x100 ret_from_fork+0x10/0x20 -> #0 (prepare_lock){+.+.}-{3:3}: __lock_acquire+0xdf8/0x109c lock_acquire+0x234/0x284 __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 clk_prepare_lock+0x70/0x98 clk_prepare+0x24/0x50 clk_bulk_prepare+0x50/0x9c a6xx_gmu_resume+0x94/0x800 [msm] a6xx_gmu_pm_resume+0x38/0x158 [msm] adreno_runtime_resume+0x2c/0x38 [msm] pm_generic_runtime_resume+0x30/0x44 __rpm_callback+0x4c/0x134 rpm_callback+0x78/0x7c rpm_resume+0x3a4/0x46c __pm_runtime_resume+0x78/0xbc pm_runtime_get_sync.isra.0+0x14/0x20 [msm] msm_gpu_submit+0x4c/0x12c [msm] msm_job_run+0x88/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 other info that might help us debug this: Chain exists of: prepare_lock --> dma_fence_map --> &gpu->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&gpu->lock); lock(dma_fence_map); lock(&gpu->lock); lock(prepare_lock); *** DEADLOCK *** 2 locks held by ring0/373: #0: ffffffead875ae50 (dma_fence_map){++++}-{0:0}, at: drm_sched_main+0x54/0x354 [gpu_sched] #1: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm] stack backtrace: CPU: 2 PID: 373 Comm: ring0 Not tainted 6.4.3-debug+ #14 Hardware name: Google Villager (rev1+) with LTE (DT) Call trace: dump_backtrace+0xb4/0xf0 show_stack+0x20/0x30 dump_stack_lvl+0x60/0x84 dump_stack+0x18/0x24 print_circular_bug+0x1cc/0x234 check_noncircular+0x78/0xac __lock_acquire+0xdf8/0x109c lock_acquire+0x234/0x284 __mutex_lock+0xc8/0x388 mutex_lock_nested+0x2c/0x38 clk_prepare_lock+0x70/0x98 clk_prepare+0x24/0x50 clk_bulk_prepare+0x50/0x9c a6xx_gmu_resume+0x94/0x800 [msm] a6xx_gmu_pm_resume+0x38/0x158 [msm] adreno_runtime_resume+0x2c/0x38 [msm] pm_generic_runtime_resume+0x30/0x44 __rpm_callback+0x4c/0x134 rpm_callback+0x78/0x7c rpm_resume+0x3a4/0x46c __pm_runtime_resume+0x78/0xbc pm_runtime_get_sync.isra.0+0x14/0x20 [msm] msm_gpu_submit+0x4c/0x12c [msm] msm_job_run+0x88/0x128 [msm] drm_sched_main+0x264/0x354 [gpu_sched] kthread+0xf0/0x100 ret_from_fork+0x10/0x20 Signed-off-by: Rob Clark <robdclark@chromium.org> Patchwork: https://patchwork.freedesktop.org/patch/552298/
131 lines
3 KiB
C
131 lines
3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2013 Red Hat
|
|
* Author: Rob Clark <robdclark@gmail.com>
|
|
*/
|
|
|
|
#include "msm_ringbuffer.h"
|
|
#include "msm_gpu.h"
|
|
|
|
static uint num_hw_submissions = 8;
|
|
MODULE_PARM_DESC(num_hw_submissions, "The max # of jobs to write into ringbuffer (default 8)");
|
|
module_param(num_hw_submissions, uint, 0600);
|
|
|
|
static struct dma_fence *msm_job_run(struct drm_sched_job *job)
|
|
{
|
|
struct msm_gem_submit *submit = to_msm_submit(job);
|
|
struct msm_fence_context *fctx = submit->ring->fctx;
|
|
struct msm_gpu *gpu = submit->gpu;
|
|
struct msm_drm_private *priv = gpu->dev->dev_private;
|
|
int i;
|
|
|
|
msm_fence_init(submit->hw_fence, fctx);
|
|
|
|
submit->seqno = submit->hw_fence->seqno;
|
|
|
|
mutex_lock(&priv->lru.lock);
|
|
|
|
for (i = 0; i < submit->nr_bos; i++) {
|
|
struct drm_gem_object *obj = submit->bos[i].obj;
|
|
|
|
msm_gem_unpin_active(obj);
|
|
submit->bos[i].flags &= ~BO_PINNED;
|
|
}
|
|
|
|
mutex_unlock(&priv->lru.lock);
|
|
|
|
msm_gpu_submit(gpu, submit);
|
|
|
|
return dma_fence_get(submit->hw_fence);
|
|
}
|
|
|
|
static void msm_job_free(struct drm_sched_job *job)
|
|
{
|
|
struct msm_gem_submit *submit = to_msm_submit(job);
|
|
|
|
drm_sched_job_cleanup(job);
|
|
msm_gem_submit_put(submit);
|
|
}
|
|
|
|
static const struct drm_sched_backend_ops msm_sched_ops = {
|
|
.run_job = msm_job_run,
|
|
.free_job = msm_job_free
|
|
};
|
|
|
|
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
|
|
void *memptrs, uint64_t memptrs_iova)
|
|
{
|
|
struct msm_ringbuffer *ring;
|
|
long sched_timeout;
|
|
char name[32];
|
|
int ret;
|
|
|
|
/* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */
|
|
BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ));
|
|
|
|
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
|
|
if (!ring) {
|
|
ret = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
ring->gpu = gpu;
|
|
ring->id = id;
|
|
|
|
ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ,
|
|
check_apriv(gpu, MSM_BO_WC | MSM_BO_GPU_READONLY),
|
|
gpu->aspace, &ring->bo, &ring->iova);
|
|
|
|
if (IS_ERR(ring->start)) {
|
|
ret = PTR_ERR(ring->start);
|
|
ring->start = NULL;
|
|
goto fail;
|
|
}
|
|
|
|
msm_gem_object_set_name(ring->bo, "ring%d", id);
|
|
|
|
ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2);
|
|
ring->next = ring->start;
|
|
ring->cur = ring->start;
|
|
|
|
ring->memptrs = memptrs;
|
|
ring->memptrs_iova = memptrs_iova;
|
|
|
|
/* currently managing hangcheck ourselves: */
|
|
sched_timeout = MAX_SCHEDULE_TIMEOUT;
|
|
|
|
ret = drm_sched_init(&ring->sched, &msm_sched_ops,
|
|
num_hw_submissions, 0, sched_timeout,
|
|
NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
|
|
if (ret) {
|
|
goto fail;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&ring->submits);
|
|
spin_lock_init(&ring->submit_lock);
|
|
spin_lock_init(&ring->preempt_lock);
|
|
|
|
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
|
|
|
|
ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name);
|
|
|
|
return ring;
|
|
|
|
fail:
|
|
msm_ringbuffer_destroy(ring);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
|
|
{
|
|
if (IS_ERR_OR_NULL(ring))
|
|
return;
|
|
|
|
drm_sched_fini(&ring->sched);
|
|
|
|
msm_fence_context_free(ring->fctx);
|
|
|
|
msm_gem_kernel_put(ring->bo, ring->gpu->aspace);
|
|
|
|
kfree(ring);
|
|
}
|