drm/amdkfd: Fix CU occupancy for GFX 9.4.3

author Mukul Joshi <mukul.joshi@amd.com>

Fri, 20 Sep 2024 18:59:29 +0000 (14:59 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Wed, 25 Sep 2024 16:56:07 +0000 (12:56 -0400)
author Mukul Joshi <mukul.joshi@amd.com>
Fri, 20 Sep 2024 18:59:29 +0000 (14:59 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Wed, 25 Sep 2024 16:56:07 +0000 (12:56 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

index 26b1f37c316e38266d87367c0cd5c3a8d328bd7b..3bc0cbf45bc59ac12ed8a0c26ef70a18a5f32dbe 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -963,14 +963,14 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
          */
         pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
         queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
-       soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
-       reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst,
+       soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+       reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
                                   mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
         wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
         if (wave_cnt != 0) {
                 queue_cnt->wave_cnt += wave_cnt;
                 queue_cnt->doorbell_off =
-                       (RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) &
+                       (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
                          CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
                          CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
         }
@@ -1033,7 +1033,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
         DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
  
         lock_spi_csq_mutexes(adev);
-       soc15_grbm_select(adev, 1, 0, 0, 0, inst);
+       soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
  
         /*
          * Iterate through the shader engines and arrays of the device
@@ -1046,7 +1046,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
         se_cnt = adev->gfx.config.max_shader_engines;
         for (se_idx = 0; se_idx < se_cnt; se_idx++) {
                 amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
-               queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
+               queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
  
                 /*
                  * Assumption: queue map encodes following schema: four
@@ -1071,7 +1071,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
         }
  
         amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
-       soc15_grbm_select(adev, 0, 0, 0, 0, inst);
+       soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
         unlock_spi_csq_mutexes(adev);
  
         /* Update the output parameters and return */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 29578550b47818719b7288dc42d1e3c80b32a994..648f40091aa39567bc03e40cf496ae6d71300aa2 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -3542,15 +3542,19 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
  
  bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
                                  struct qcm_process_device *qpd,
-                                int doorbell_off)
+                                int doorbell_off, u32 *queue_format)
  {
         struct queue *q;
         bool r = false;
  
+       if (!queue_format)
+               return r;
+
         dqm_lock(dqm);
  
         list_for_each_entry(q, &qpd->queues_list, list) {
                 if (q->properties.doorbell_off == doorbell_off) {
+                       *queue_format = q->properties.format;
                         r = true;
                         goto out;
                 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h

index 80be2036abeaca0699265aba8c4d037f269c7282..09ab36f8e8c69e9f0103e7f1b80e1edb5e7a50c6 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -326,7 +326,7 @@ int debug_map_and_unlock(struct device_queue_manager *dqm);
  int debug_refresh_runlist(struct device_queue_manager *dqm);
  bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
                                  struct qcm_process_device *qpd,
-                                int doorbell_off);
+                                int doorbell_off, u32 *queue_format);
  
  static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
  {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

index d73841268c9ba79d6ad9da9ef0ff8d361e91ff15..d07acf1b2f93c342ac49a894ace4851bcb1e01fb 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -272,6 +272,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
         struct kfd_process_device *pdd = NULL;
         int i;
         struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES];
+       u32 queue_format;
  
         memset(cu_occupancy, 0x0, sizeof(cu_occupancy));
  
@@ -292,14 +293,27 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
         wave_cnt = 0;
         max_waves_per_cu = 0;
  
+       /*
+        * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
+        * For AQL queues, because of cooperative dispatch we multiply the wave count
+        * by number of XCCs in the partition to get the total wave counts across all
+        * XCCs in the partition.
+        * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
+        */
         dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
-                       &max_waves_per_cu, 0);
+                       &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
  
         for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
                 if (cu_occupancy[i].wave_cnt != 0 &&
                     kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
-                                               cu_occupancy[i].doorbell_off))
-                       wave_cnt += cu_occupancy[i].wave_cnt;
+                                               cu_occupancy[i].doorbell_off,
+                                               &queue_format)) {
+                       if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
+                               wave_cnt += cu_occupancy[i].wave_cnt;
+                       else
+                               wave_cnt += (NUM_XCC(dev->xcc_mask) *
+                                               cu_occupancy[i].wave_cnt);
+               }
         }
  
         /* Translate wave count to number of compute units */
author	Mukul Joshi <mukul.joshi@amd.com>
	Fri, 20 Sep 2024 18:59:29 +0000 (14:59 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Wed, 25 Sep 2024 16:56:07 +0000 (12:56 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_process.c		patch \| blob \| history