The reset domain contains register access semaphor now and so needs to be present as long as each device in a hive needs it and so it cannot be binded to XGMI hive life cycle. Adress this by making reset domain refcounted and pointed by each member of the hive and the hive itself. v4: Fix crash on boot witrh XGMI hive by adding type to reset_domain. XGMI will only create a new reset_domain if prevoius was of single device type meaning it's first boot. Otherwsie it will take a refocunt to exsiting reset_domain from the amdgou device. Add a wrapper around reset_domain->refcount get/put and a wrapper around send to reset wq (Lijo) Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Link: https://www.spinics.net/lists/amd-gfx/msg74121.html
75 lines
2.7 KiB
C
75 lines
2.7 KiB
C
/*
|
|
* Copyright 2016 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
#ifndef __AMDGPU_XGMI_H__
|
|
#define __AMDGPU_XGMI_H__
|
|
|
|
#include <drm/task_barrier.h>
|
|
#include "amdgpu_psp.h"
|
|
|
|
|
|
struct amdgpu_hive_info {
|
|
struct kobject kobj;
|
|
uint64_t hive_id;
|
|
struct list_head device_list;
|
|
struct list_head node;
|
|
atomic_t number_devices;
|
|
struct mutex hive_lock;
|
|
int hi_req_count;
|
|
struct amdgpu_device *hi_req_gpu;
|
|
struct task_barrier tb;
|
|
enum {
|
|
AMDGPU_XGMI_PSTATE_MIN,
|
|
AMDGPU_XGMI_PSTATE_MAX_VEGA20,
|
|
AMDGPU_XGMI_PSTATE_UNKNOWN
|
|
} pstate;
|
|
|
|
struct amdgpu_reset_domain *reset_domain;
|
|
};
|
|
|
|
struct amdgpu_pcs_ras_field {
|
|
const char *err_name;
|
|
uint32_t pcs_err_mask;
|
|
uint32_t pcs_err_shift;
|
|
};
|
|
|
|
extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs;
|
|
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
|
|
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
|
|
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
|
|
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
|
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
|
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
|
|
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
|
|
struct amdgpu_device *peer_adev);
|
|
int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
|
|
struct amdgpu_device *peer_adev);
|
|
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
|
|
uint64_t addr);
|
|
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
|
|
struct amdgpu_device *bo_adev)
|
|
{
|
|
return (adev != bo_adev &&
|
|
adev->gmc.xgmi.hive_id &&
|
|
adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
|
|
}
|
|
|
|
#endif
|