Even if there's nothing currently parsing amdgpu's coredump files, if we eventually have such tools they will be glad to find a version field to properly read the file. Create a version number to be displayed on top of coredump file, to be incremented when the file format or content get changed. Signed-off-by: André Almeida <andrealmeid@igalia.com> Reviewed-by: Shashank Sharma <shashank.sharma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
243 lines
6.5 KiB
C
243 lines
6.5 KiB
C
/*
|
|
* Copyright 2021 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include <linux/devcoredump.h>
|
|
#include <generated/utsrelease.h>
|
|
|
|
#include "amdgpu_reset.h"
|
|
#include "aldebaran.h"
|
|
#include "sienna_cichlid.h"
|
|
#include "smu_v13_0_10.h"
|
|
|
|
int amdgpu_reset_init(struct amdgpu_device *adev)
|
|
{
|
|
int ret = 0;
|
|
|
|
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
|
|
case IP_VERSION(13, 0, 2):
|
|
case IP_VERSION(13, 0, 6):
|
|
ret = aldebaran_reset_init(adev);
|
|
break;
|
|
case IP_VERSION(11, 0, 7):
|
|
ret = sienna_cichlid_reset_init(adev);
|
|
break;
|
|
case IP_VERSION(13, 0, 10):
|
|
ret = smu_v13_0_10_reset_init(adev);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int amdgpu_reset_fini(struct amdgpu_device *adev)
|
|
{
|
|
int ret = 0;
|
|
|
|
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
|
|
case IP_VERSION(13, 0, 2):
|
|
case IP_VERSION(13, 0, 6):
|
|
ret = aldebaran_reset_fini(adev);
|
|
break;
|
|
case IP_VERSION(11, 0, 7):
|
|
ret = sienna_cichlid_reset_fini(adev);
|
|
break;
|
|
case IP_VERSION(13, 0, 10):
|
|
ret = smu_v13_0_10_reset_fini(adev);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
|
|
struct amdgpu_reset_context *reset_context)
|
|
{
|
|
struct amdgpu_reset_handler *reset_handler = NULL;
|
|
|
|
if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
|
|
reset_handler = adev->reset_cntl->get_reset_handler(
|
|
adev->reset_cntl, reset_context);
|
|
if (!reset_handler)
|
|
return -EOPNOTSUPP;
|
|
|
|
return reset_handler->prepare_hwcontext(adev->reset_cntl,
|
|
reset_context);
|
|
}
|
|
|
|
int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
|
|
struct amdgpu_reset_context *reset_context)
|
|
{
|
|
int ret;
|
|
struct amdgpu_reset_handler *reset_handler = NULL;
|
|
|
|
if (adev->reset_cntl)
|
|
reset_handler = adev->reset_cntl->get_reset_handler(
|
|
adev->reset_cntl, reset_context);
|
|
if (!reset_handler)
|
|
return -EOPNOTSUPP;
|
|
|
|
ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return reset_handler->restore_hwcontext(adev->reset_cntl,
|
|
reset_context);
|
|
}
|
|
|
|
|
|
void amdgpu_reset_destroy_reset_domain(struct kref *ref)
|
|
{
|
|
struct amdgpu_reset_domain *reset_domain = container_of(ref,
|
|
struct amdgpu_reset_domain,
|
|
refcount);
|
|
if (reset_domain->wq)
|
|
destroy_workqueue(reset_domain->wq);
|
|
|
|
kvfree(reset_domain);
|
|
}
|
|
|
|
struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
|
|
char *wq_name)
|
|
{
|
|
struct amdgpu_reset_domain *reset_domain;
|
|
|
|
reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL);
|
|
if (!reset_domain) {
|
|
DRM_ERROR("Failed to allocate amdgpu_reset_domain!");
|
|
return NULL;
|
|
}
|
|
|
|
reset_domain->type = type;
|
|
kref_init(&reset_domain->refcount);
|
|
|
|
reset_domain->wq = create_singlethread_workqueue(wq_name);
|
|
if (!reset_domain->wq) {
|
|
DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!");
|
|
amdgpu_reset_put_reset_domain(reset_domain);
|
|
return NULL;
|
|
|
|
}
|
|
|
|
atomic_set(&reset_domain->in_gpu_reset, 0);
|
|
atomic_set(&reset_domain->reset_res, 0);
|
|
init_rwsem(&reset_domain->sem);
|
|
|
|
return reset_domain;
|
|
}
|
|
|
|
void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
|
|
{
|
|
atomic_set(&reset_domain->in_gpu_reset, 1);
|
|
down_write(&reset_domain->sem);
|
|
}
|
|
|
|
|
|
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
|
|
{
|
|
atomic_set(&reset_domain->in_gpu_reset, 0);
|
|
up_write(&reset_domain->sem);
|
|
}
|
|
|
|
#ifndef CONFIG_DEV_COREDUMP
|
|
void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
|
|
struct amdgpu_reset_context *reset_context)
|
|
{
|
|
}
|
|
#else
|
|
static ssize_t
|
|
amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
|
|
void *data, size_t datalen)
|
|
{
|
|
struct drm_printer p;
|
|
struct amdgpu_coredump_info *coredump = data;
|
|
struct drm_print_iterator iter;
|
|
int i;
|
|
|
|
iter.data = buffer;
|
|
iter.offset = 0;
|
|
iter.start = offset;
|
|
iter.remain = count;
|
|
|
|
p = drm_coredump_printer(&iter);
|
|
|
|
drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
|
|
drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
|
|
drm_printf(&p, "kernel: " UTS_RELEASE "\n");
|
|
drm_printf(&p, "module: " KBUILD_MODNAME "\n");
|
|
drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec,
|
|
coredump->reset_time.tv_nsec);
|
|
|
|
if (coredump->reset_task_info.pid)
|
|
drm_printf(&p, "process_name: %s PID: %d\n",
|
|
coredump->reset_task_info.process_name,
|
|
coredump->reset_task_info.pid);
|
|
|
|
if (coredump->reset_vram_lost)
|
|
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
|
|
if (coredump->adev->reset_info.num_regs) {
|
|
drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
|
|
|
|
for (i = 0; i < coredump->adev->reset_info.num_regs; i++)
|
|
drm_printf(&p, "0x%08x: 0x%08x\n",
|
|
coredump->adev->reset_info.reset_dump_reg_list[i],
|
|
coredump->adev->reset_info.reset_dump_reg_value[i]);
|
|
}
|
|
|
|
return count - iter.remain;
|
|
}
|
|
|
|
static void amdgpu_devcoredump_free(void *data)
|
|
{
|
|
kfree(data);
|
|
}
|
|
|
|
void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
|
|
struct amdgpu_reset_context *reset_context)
|
|
{
|
|
struct amdgpu_coredump_info *coredump;
|
|
struct drm_device *dev = adev_to_drm(adev);
|
|
|
|
coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
|
|
|
|
if (!coredump) {
|
|
DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
|
|
return;
|
|
}
|
|
|
|
coredump->reset_vram_lost = vram_lost;
|
|
|
|
if (reset_context->job && reset_context->job->vm)
|
|
coredump->reset_task_info = reset_context->job->vm->task_info;
|
|
|
|
coredump->adev = adev;
|
|
|
|
ktime_get_ts64(&coredump->reset_time);
|
|
|
|
dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
|
|
amdgpu_devcoredump_read, amdgpu_devcoredump_free);
|
|
}
|
|
#endif
|