From 6689c167ae14c312972e89be1121e933e4de0001 Mon Sep 17 00:00:00 2001 From: "McAulay, Alistair" Date: Fri, 15 Aug 2014 18:51:35 +0100 Subject: [PATCH] drm/i915: Rework GPU reset sequence to match driver load & thaw This patch is to address Daniels concerns over different code during reset: http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html "The reason for aiming as hard as possible to use the exact same code for driver load, gpu reset and runtime pm/system resume is that we've simply seen too many bugs due to slight variations and unintended omissions." Tested using igt drv_hangman. V2: Cleaner way of preventing check_wedge returning -EAGAIN V3: Clean the last_context during reset, to ensure do_switch() does the MI_SET_CONTEXT. As per review. Signed-off-by: McAulay, Alistair Reviewed-by: Mika Kuoppala [danvet: Rebase over ctx->ppgtt rework and extend the comment in check_wedge a bit.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 6 +++ drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_gem.c | 8 +++- drivers/gpu/drm/i915/i915_gem_context.c | 33 +++---------- drivers/gpu/drm/i915/i915_gem_gtt.c | 63 +++---------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- 6 files changed, 30 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ff4db249cc72..683be99117c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -844,7 +844,13 @@ int i915_reset(struct drm_device *dev) !dev_priv->ums.mm_suspended) { dev_priv->ums.mm_suspended = 0; + /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */ + dev_priv->gpu_error.reload_in_reset = true; + ret = i915_gem_init_hw(dev); + + dev_priv->gpu_error.reload_in_reset = false; + mutex_unlock(&dev->struct_mutex); if (ret) { DRM_ERROR("Failed hw init on reset %d\n", ret); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bcf8783dbc2e..9c3677e4448e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1239,6 +1239,9 @@ struct i915_gpu_error { /* For missed irq/seqno simulation. */ unsigned int test_irq_rings; + + /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */ + bool reload_in_reset; }; enum modeset_restore { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f1bb69377a35..fcd7dde6e444 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1085,7 +1085,13 @@ i915_gem_check_wedge(struct i915_gpu_error *error, if (i915_terminally_wedged(error)) return -EIO; - return -EAGAIN; + /* + * Check if GPU Reset is in progress - we need intel_ring_begin + * to work properly to reinit the hw state while the gpu is + * still marked as reset-in-progress. Handle this with a flag. + */ + if (!error->reload_in_reset) + return -EAGAIN; } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9683e62ec61a..0fdb357f8a5c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -289,34 +289,17 @@ void i915_gem_context_reset(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int i; - /* Prevent the hardware from restoring the last context (which hung) on - * the next switch */ for (i = 0; i < I915_NUM_RINGS; i++) { struct intel_engine_cs *ring = &dev_priv->ring[i]; - struct intel_context *dctx = ring->default_context; struct intel_context *lctx = ring->last_context; - /* Do a fake switch to the default context */ - if (lctx == dctx) - continue; - - if (!lctx) - continue; + if (lctx) { + if (lctx->legacy_hw_ctx.rcs_state && i == RCS) + i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state); - if (dctx->legacy_hw_ctx.rcs_state && i == RCS) { - WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state, - get_context_alignment(dev), 0)); - /* Fake a finish/inactive */ - dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0; - dctx->legacy_hw_ctx.rcs_state->active = 0; + i915_gem_context_unreference(lctx); + ring->last_context = NULL; } - - if (lctx->legacy_hw_ctx.rcs_state && i == RCS) - i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state); - - i915_gem_context_unreference(lctx); - i915_gem_context_reference(dctx); - ring->last_context = dctx; } } @@ -412,10 +395,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv) struct intel_engine_cs *ring; int ret, i; - /* FIXME: We should make this work, even in reset */ - if (i915_reset_in_progress(&dev_priv->gpu_error)) - return 0; - BUG_ON(!dev_priv->ring[RCS].default_context); for_each_ring(ring, dev_priv, i) { @@ -558,7 +537,7 @@ static int do_switch(struct intel_engine_cs *ring, from = ring->last_context; if (to->ppgtt) { - ret = to->ppgtt->switch_mm(to->ppgtt, ring, false); + ret = to->ppgtt->switch_mm(to->ppgtt, ring); if (ret) goto unpin_out; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4db237065610..22ad38bb93f6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -204,19 +204,12 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, - uint64_t val, bool synchronous) + uint64_t val) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret; BUG_ON(entry >= 4); - if (synchronous) { - I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); - I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); - return 0; - } - ret = intel_ring_begin(ring, 6); if (ret) return ret; @@ -233,8 +226,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, } static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring, - bool synchronous) + struct intel_engine_cs *ring) { int i, ret; @@ -243,7 +235,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, for (i = used_pd - 1; i >= 0; i--) { dma_addr_t addr = ppgtt->pd_dma_addr[i]; - ret = gen8_write_pdp(ring, i, addr, synchronous); + ret = gen8_write_pdp(ring, i, addr); if (ret) return ret; } @@ -708,29 +700,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring, - bool synchronous) + struct intel_engine_cs *ring) { - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; int ret; - /* If we're in reset, we can assume the GPU is sufficiently idle to - * manually frob these bits. Ideally we could use the ring functions, - * except our error handling makes it quite difficult (can't use - * intel_ring_begin, ring->flush, or intel_ring_advance) - * - * FIXME: We should try not to special case reset - */ - if (synchronous || - i915_reset_in_progress(&dev_priv->gpu_error)) { - WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); - POSTING_READ(RING_PP_DIR_BASE(ring)); - return 0; - } - /* NB: TLBs must be flushed and invalidated before a switch */ ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) @@ -752,29 +725,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, } static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring, - bool synchronous) + struct intel_engine_cs *ring) { - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; int ret; - /* If we're in reset, we can assume the GPU is sufficiently idle to - * manually frob these bits. Ideally we could use the ring functions, - * except our error handling makes it quite difficult (can't use - * intel_ring_begin, ring->flush, or intel_ring_advance) - * - * FIXME: We should try not to special case reset - */ - if (synchronous || - i915_reset_in_progress(&dev_priv->gpu_error)) { - WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); - POSTING_READ(RING_PP_DIR_BASE(ring)); - return 0; - } - /* NB: TLBs must be flushed and invalidated before a switch */ ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) @@ -803,14 +757,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, } static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring, - bool synchronous) + struct intel_engine_cs *ring) { struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - if (!synchronous) - return 0; I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); @@ -1189,7 +1140,7 @@ int i915_ppgtt_init_hw(struct drm_device *dev) if (ppgtt) { for_each_ring(ring, dev_priv, i) { - ret = ppgtt->switch_mm(ppgtt, ring, true); + ret = ppgtt->switch_mm(ppgtt, ring); if (ret != 0) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6280648d4805..d5c14af51e99 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -264,8 +264,7 @@ struct i915_hw_ppgtt { int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring, - bool synchronous); + struct intel_engine_cs *ring); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; -- 2.39.5