]> git.dujemihanovic.xyz Git - linux.git/commitdiff
drm/xe: Always check force_wake_get return code
authorDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Mon, 18 Mar 2024 15:49:24 +0000 (08:49 -0700)
committerDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Wed, 20 Mar 2024 21:13:58 +0000 (14:13 -0700)
A force_wake_get failure means that the HW might not be awake for the
access we're doing; this can lead to an immediate error or it can be a
more subtle problem (e.g. a register read might return an incorrect
value that is still valid, leading the driver to make a wrong choice
instead of flagging an error).
We avoid an error from the force_wake function because callers might
handle or tolerate the error, but this only works if all callers
are checking the error code. The majority already do, but a few are not.
These are mainly falling into 3 categories, which are each handled
differently:

1) error capture: in this case we want to continue the capture, but we
   log an info message in dmesg to notify the user that the capture
   might have incorrect data.

2) ioctl: in this case we return a -EIO error to userspace

3) unabortable actions: these are scenarios where we can't simply abort
   and retry and so it's better to just try it anyway because there is a
   chance the HW is awake even with the failure. In this case we throw a
   warning so we know there was a forcewake problem if something fails
   down the line.

v2: use gt_WARN_ON where appropriate

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240318154924.3453513-1-daniele.ceraolospurio@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c
drivers/gpu/drm/xe/xe_gsc.c
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
drivers/gpu/drm/xe/xe_guc.c
drivers/gpu/drm/xe/xe_guc_pc.c
drivers/gpu/drm/xe/xe_guc_submit.c
drivers/gpu/drm/xe/xe_query.c

index 0fcd30680323643f44ea238d4ee6736b7cb8937d..7d3aa6bd3524259386409f084165afbeaa2cc1e6 100644 (file)
@@ -13,6 +13,7 @@
 #include "xe_exec_queue.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_submit.h"
 #include "xe_hw_engine.h"
@@ -64,7 +65,9 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
 {
        struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
 
-       xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+       /* keep going if fw fails as we still want to save the memory and SW data */
+       if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+               xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
        xe_vm_snapshot_capture_delayed(ss->vm);
        xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
        xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
@@ -180,7 +183,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
                }
        }
 
-       xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+       /* keep going if fw fails as we still want to save the memory and SW data */
+       if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
+               xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
 
        coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
        coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
index 09f6e789992100a5cbf3118df25056ca08043411..8339b0b49dfb754614230280c700436d766c7378 100644 (file)
@@ -326,7 +326,7 @@ static void gsc_work(struct work_struct *work)
        spin_unlock_irq(&gsc->lock);
 
        xe_pm_runtime_get(xe);
-       xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+       xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
 
        if (actions & GSC_ACTION_ER_COMPLETE) {
                ret = gsc_er_complete(gt);
index a3c4ffba679d5c9a6c1b5a5573a6196e1e5da3d7..25b4111097bcd19e28a5b0154efb0a1e82f0dd5f 100644 (file)
@@ -247,7 +247,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 
                xe_gt_tlb_invalidation_wait(gt, seqno);
        } else if (xe_device_uc_enabled(xe)) {
-               xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+               xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
                if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
                        xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
                                        PVC_GUC_TLB_INV_DESC1_INVALIDATE);
index fa11cad8386c67d90c94d2553571cb8fd6bdc4f9..c558d978462ae030076df7fb7c3242699001d388 100644 (file)
@@ -240,10 +240,11 @@ static void guc_write_params(struct xe_guc *guc)
 static void guc_fini(struct drm_device *drm, void *arg)
 {
        struct xe_guc *guc = arg;
+       struct xe_gt *gt = guc_to_gt(guc);
 
-       xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+       xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
        xe_uc_fini_hw(&guc_to_gt(guc)->uc);
-       xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+       xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 }
 
 /**
index eb174547c8f2ddbe26aa3a2a0bad26af1e583bc8..786acaf719314cb9969192037d8012845a7c7524 100644 (file)
@@ -927,7 +927,7 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
                return;
        }
 
-       xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
+       XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
        XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
        XE_WARN_ON(xe_guc_pc_stop(pc));
        xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
index 4a2b8e6b81b842dad1f12769a721584cc7b6565f..a71be57f0fc2332eece7d65d038004d4e56b88af 100644 (file)
@@ -833,7 +833,9 @@ static void simple_error_capture(struct xe_exec_queue *q)
                        }
                }
 
-               xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+               if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL))
+                       xe_gt_info(guc_to_gt(guc),
+                                  "failed to get forcewake for error capture");
                xe_guc_ct_print(&guc->ct, &p, true);
                guc_exec_queue_print(q, &p);
                for_each_hw_engine(hwe, guc_to_gt(guc), id) {
index e80321b349188f02c999a4f59c53d5bc0f59f7a6..fcd8680d2ccc56686eff394e676de1816acd8843 100644 (file)
@@ -148,7 +148,8 @@ query_engine_cycles(struct xe_device *xe,
        if (!hwe)
                return -EINVAL;
 
-       xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+       if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+               return -EIO;
 
        __read_timestamps(gt,
                          RING_TIMESTAMP(hwe->mmio_base),