summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2020-03-17 15:43:41 -0400
committerAlex Deucher <alexander.deucher@amd.com>2020-04-22 18:11:46 -0400
commitd84a430d9f7b1ce6baedf1305106d0ae706aca76 (patch)
treebbed9c0ce3c43692244ee9033dc685be34c842f2 /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
parent9f65693577d976792e6dbad9ad42105b129259c6 (diff)
drm/amdgpu: fix race between pstate and remote buffer map
Vega20 arbitrates pstate at hive level and not device level. Last peer to remote buffer unmap could drop P-State while another process is still remote buffer mapped. With this fix, P-States still needs to be disabled for now as SMU bug was discovered on synchronous P2P transfers. This should be fixed in the next FW update. Signed-off-by: Jonathan Kim <Jonathan.Kim@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c68
1 files changed, 36 insertions, 32 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 8c3215505e78..54d8a3e7e75c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -373,7 +373,13 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
if (lock)
mutex_lock(&tmp->hive_lock);
- tmp->pstate = -1;
+ tmp->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
+ tmp->hi_req_gpu = NULL;
+ /*
+ * hive pstate on boot is high in vega20 so we have to go to low
+ * pstate on after boot.
+ */
+ tmp->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
mutex_unlock(&xgmi_mutex);
return tmp;
@@ -383,50 +389,51 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
- struct amdgpu_device *tmp_adev;
- bool update_hive_pstate = true;
- bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
+ struct amdgpu_device *request_adev = hive->hi_req_gpu ?
+ hive->hi_req_gpu : adev;
+ bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
+ bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
- if (!hive)
+ /* fw bug so temporarily disable pstate switching */
+ if (!hive || adev->asic_type == CHIP_VEGA20)
return 0;
mutex_lock(&hive->hive_lock);
- if (hive->pstate == pstate) {
- adev->pstate = is_high_pstate ? pstate : adev->pstate;
+ if (is_hi_req)
+ hive->hi_req_count++;
+ else
+ hive->hi_req_count--;
+
+ /*
+ * Vega20 only needs single peer to request pstate high for the hive to
+ * go high but all peers must request pstate low for the hive to go low
+ */
+ if (hive->pstate == pstate ||
+ (!is_hi_req && hive->hi_req_count && !init_low))
goto out;
- }
- dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
+ dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate);
- ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
+ ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate);
if (ret) {
- dev_err(adev->dev,
+ dev_err(request_adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
- adev->gmc.xgmi.node_id,
- adev->gmc.xgmi.hive_id, ret);
+ request_adev->gmc.xgmi.node_id,
+ request_adev->gmc.xgmi.hive_id, ret);
goto out;
}
- /* Update device pstate */
- adev->pstate = pstate;
-
- /*
- * Update the hive pstate only all devices of the hive
- * are in the same pstate
- */
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- if (tmp_adev->pstate != adev->pstate) {
- update_hive_pstate = false;
- break;
- }
- }
- if (update_hive_pstate || is_high_pstate)
+ if (init_low)
+ hive->pstate = hive->hi_req_count ?
+ hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
+ else {
hive->pstate = pstate;
-
+ hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
+ adev : NULL;
+ }
out:
mutex_unlock(&hive->hive_lock);
-
return ret;
}
@@ -507,9 +514,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
- /* Set default device pstate */
- adev->pstate = -1;
-
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);