drm/i915: Change HSW WIZ hashing mode to 16x4
[deliverable/linux.git] / drivers / gpu / drm / i915 / intel_pm.c
index 6c435e48f5541e0430bebe63c37031808236e4d5..7da7360fea359077c4e0fd5e47c0bc599f14c075 100644 (file)
@@ -2741,7 +2741,7 @@ intel_alloc_context_page(struct drm_device *dev)
                return NULL;
        }
 
-       ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
+       ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
        if (ret) {
                DRM_ERROR("failed to pin power context: %d\n", ret);
                goto err_unref;
@@ -3003,6 +3003,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
        dev_priv->rps.last_adj = 0;
 }
 
+/* gen6_set_rps is called to update the frequency request, but should also be
+ * called when the range (min_delay and max_delay) is modified so that we can
+ * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
 void gen6_set_rps(struct drm_device *dev, u8 val)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3011,8 +3014,14 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        WARN_ON(val > dev_priv->rps.max_delay);
        WARN_ON(val < dev_priv->rps.min_delay);
 
-       if (val == dev_priv->rps.cur_delay)
+       if (val == dev_priv->rps.cur_delay) {
+               /* min/max delay may still have been modified so be sure to
+                * write the limits value */
+               I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+                          gen6_rps_limits(dev_priv, val));
+
                return;
+       }
 
        gen6_set_rps_thresholds(dev_priv, val);
 
@@ -3038,6 +3047,58 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        trace_intel_gpu_freq_change(val * 50);
 }
 
+/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+ *
+ * * If Gfx is Idle, then
+ * 1. Mask Turbo interrupts
+ * 2. Bring up Gfx clock
+ * 3. Change the freq to Rpn and wait till P-Unit updates freq
+ * 4. Clear the Force GFX CLK ON bit so that Gfx can down
+ * 5. Unmask Turbo interrupts
+*/
+static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
+{
+       /*
+        * When we are idle.  Drop to min voltage state.
+        */
+
+       if (dev_priv->rps.cur_delay <= dev_priv->rps.min_delay)
+               return;
+
+       /* Mask turbo interrupt so that they will not come in between */
+       I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+
+       /* Bring up the Gfx clock */
+       I915_WRITE(VLV_GTLC_SURVIVABILITY_REG,
+               I915_READ(VLV_GTLC_SURVIVABILITY_REG) |
+                               VLV_GFX_CLK_FORCE_ON_BIT);
+
+       if (wait_for(((VLV_GFX_CLK_STATUS_BIT &
+               I915_READ(VLV_GTLC_SURVIVABILITY_REG)) != 0), 5)) {
+                       DRM_ERROR("GFX_CLK_ON request timed out\n");
+               return;
+       }
+
+       dev_priv->rps.cur_delay = dev_priv->rps.min_delay;
+
+       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
+                                       dev_priv->rps.min_delay);
+
+       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
+                               & GENFREQSTATUS) == 0, 5))
+               DRM_ERROR("timed out waiting for Punit\n");
+
+       /* Release the Gfx clock */
+       I915_WRITE(VLV_GTLC_SURVIVABILITY_REG,
+               I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
+                               ~VLV_GFX_CLK_FORCE_ON_BIT);
+
+       /* Unmask Up interrupts */
+       dev_priv->rps.rp_up_masked = true;
+       gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+                                               dev_priv->rps.min_delay);
+}
+
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
        struct drm_device *dev = dev_priv->dev;
@@ -3045,7 +3106,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
        mutex_lock(&dev_priv->rps.hw_lock);
        if (dev_priv->rps.enabled) {
                if (IS_VALLEYVIEW(dev))
-                       valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
+                       vlv_set_rps_idle(dev_priv);
                else
                        gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
                dev_priv->rps.last_adj = 0;
@@ -3135,16 +3196,10 @@ static void valleyview_disable_rps(struct drm_device *dev)
 
 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
 {
-       if (IS_GEN6(dev))
-               DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n");
-
-       if (IS_HASWELL(dev))
-               DRM_DEBUG_DRIVER("Haswell: only RC6 available\n");
-
        DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
-                       (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
-                       (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
-                       (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
+                (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
+                (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
+                (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
 }
 
 int intel_enable_rc6(const struct drm_device *dev)
@@ -3161,14 +3216,10 @@ int intel_enable_rc6(const struct drm_device *dev)
        if (INTEL_INFO(dev)->gen == 5)
                return 0;
 
-       if (IS_HASWELL(dev))
-               return INTEL_RC6_ENABLE;
-
-       /* snb/ivb have more than one rc6 state. */
-       if (INTEL_INFO(dev)->gen == 6)
-               return INTEL_RC6_ENABLE;
+       if (IS_IVYBRIDGE(dev))
+               return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
 
-       return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
+       return INTEL_RC6_ENABLE;
 }
 
 static void gen6_enable_rps_interrupts(struct drm_device *dev)
@@ -3225,10 +3276,10 @@ static void gen8_enable_rps(struct drm_device *dev)
        /* 3: Enable RC6 */
        if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
                rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-       DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
+       intel_print_rc6_info(dev, rc6_mask);
        I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
-                       GEN6_RC_CTL_EI_MODE(1) |
-                       rc6_mask);
+                                   GEN6_RC_CTL_EI_MODE(1) |
+                                   rc6_mask);
 
        /* 4 Program defaults and thresholds for RPS*/
        I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */
@@ -3270,7 +3321,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
-       u32 rp_state_cap;
+       u32 rp_state_cap, hw_max, hw_min;
        u32 gt_perf_status;
        u32 rc6vids, pcu_mbox, rc6_mask = 0;
        u32 gtfifodbg;
@@ -3299,13 +3350,20 @@ static void gen6_enable_rps(struct drm_device *dev)
        gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
        /* In units of 50MHz */
-       dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
-       dev_priv->rps.min_delay = (rp_state_cap >> 16) & 0xff;
+       dev_priv->rps.hw_max = hw_max = rp_state_cap & 0xff;
+       hw_min = (rp_state_cap >> 16) & 0xff;
        dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
        dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
        dev_priv->rps.rpe_delay = dev_priv->rps.rp1_delay;
        dev_priv->rps.cur_delay = 0;
 
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_delay == 0)
+               dev_priv->rps.max_delay = hw_max;
+
+       if (dev_priv->rps.min_delay == 0)
+               dev_priv->rps.min_delay = hw_min;
+
        /* disable the counters and set deterministic thresholds */
        I915_WRITE(GEN6_RC_CONTROL, 0);
 
@@ -3534,7 +3592,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
-       u32 gtfifodbg, val, rc6_mode = 0;
+       u32 gtfifodbg, val, hw_max, hw_min, rc6_mode = 0;
        int i;
 
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -3596,21 +3654,27 @@ static void valleyview_enable_rps(struct drm_device *dev)
                         vlv_gpu_freq(dev_priv, dev_priv->rps.cur_delay),
                         dev_priv->rps.cur_delay);
 
-       dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
-       dev_priv->rps.hw_max = dev_priv->rps.max_delay;
+       dev_priv->rps.hw_max = hw_max = valleyview_rps_max_freq(dev_priv);
        DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.max_delay),
-                        dev_priv->rps.max_delay);
+                        vlv_gpu_freq(dev_priv, hw_max),
+                        hw_max);
 
        dev_priv->rps.rpe_delay = valleyview_rps_rpe_freq(dev_priv);
        DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
                         vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
                         dev_priv->rps.rpe_delay);
 
-       dev_priv->rps.min_delay = valleyview_rps_min_freq(dev_priv);
+       hw_min = valleyview_rps_min_freq(dev_priv);
        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.min_delay),
-                        dev_priv->rps.min_delay);
+                        vlv_gpu_freq(dev_priv, hw_min),
+                        hw_min);
+
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_delay == 0)
+               dev_priv->rps.max_delay = hw_max;
+
+       if (dev_priv->rps.min_delay == 0)
+               dev_priv->rps.min_delay = hw_min;
 
        DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
                         vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
@@ -3618,6 +3682,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
        valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
 
+       dev_priv->rps.rp_up_masked = false;
+       dev_priv->rps.rp_down_masked = false;
+
        gen6_enable_rps_interrupts(dev);
 
        gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
@@ -3826,9 +3893,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = dev_priv->dev;
        unsigned long val;
 
-       if (dev_priv->info->gen != 5)
+       if (INTEL_INFO(dev)->gen != 5)
                return 0;
 
        spin_lock_irq(&mchdev_lock);
@@ -3857,6 +3925,7 @@ unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
 
 static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
 {
+       struct drm_device *dev = dev_priv->dev;
        static const struct v_table {
                u16 vd; /* in .1 mil */
                u16 vm; /* in .1 mil */
@@ -3990,7 +4059,7 @@ static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
                { 16000, 14875, },
                { 16125, 15000, },
        };
-       if (dev_priv->info->is_mobile)
+       if (INTEL_INFO(dev)->is_mobile)
                return v_table[pxvid].vm;
        else
                return v_table[pxvid].vd;
@@ -4033,7 +4102,9 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
-       if (dev_priv->info->gen != 5)
+       struct drm_device *dev = dev_priv->dev;
+
+       if (INTEL_INFO(dev)->gen != 5)
                return;
 
        spin_lock_irq(&mchdev_lock);
@@ -4082,9 +4153,10 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = dev_priv->dev;
        unsigned long val;
 
-       if (dev_priv->info->gen != 5)
+       if (INTEL_INFO(dev)->gen != 5)
                return 0;
 
        spin_lock_irq(&mchdev_lock);
@@ -4273,6 +4345,7 @@ void intel_gpu_ips_teardown(void)
        i915_mch_dev = NULL;
        spin_unlock_irq(&mchdev_lock);
 }
+
 static void intel_init_emon(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4588,6 +4661,13 @@ static void gen6_init_clock_gating(struct drm_device *dev)
                I915_WRITE(GEN6_GT_MODE,
                           _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
 
+       /*
+        * BSpec recoomends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        */
+       I915_WRITE(GEN6_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
        ilk_init_lp_watermarks(dev);
 
        I915_WRITE(CACHE_MODE_0,
@@ -4615,9 +4695,17 @@ static void gen6_init_clock_gating(struct drm_device *dev)
                   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
                   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
 
-       /* Bspec says we need to always set all mask bits. */
-       I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) |
-                  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL);
+       /* WaStripsFansDisableFastClipPerformanceFix:snb */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
+
+       /*
+        * Bspec says:
+        * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
+        * 3DSTATE_SF number of SF output attributes is more than 16."
+        */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
 
        /*
         * According to the spec the following bits should be
@@ -4643,11 +4731,6 @@ static void gen6_init_clock_gating(struct drm_device *dev)
 
        g4x_disable_trickle_feed(dev);
 
-       /* The default value should be 0x200 according to docs, but the two
-        * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
-       I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
-       I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
-
        cpt_init_clock_gating(dev);
 
        gen6_check_mch_setup(dev);
@@ -4782,10 +4865,21 @@ static void haswell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN7_FF_THREAD_MODE,
                   I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
 
+       /* enable HiZ Raw Stall Optimization */
+       I915_WRITE(CACHE_MODE_0_GEN7,
+                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
+
        /* WaDisable4x2SubspanOptimization:hsw */
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
        /* WaSwitchSolVfFArbitrationPriority:hsw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -4818,9 +4912,6 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        if (IS_IVB_GT1(dev))
                I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
                           _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
-       else
-               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
-                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
        /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
@@ -4834,10 +4925,13 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        if (IS_IVB_GT1(dev))
                I915_WRITE(GEN7_ROW_CHICKEN2,
                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-       else
+       else {
+               /* must write both registers */
+               I915_WRITE(GEN7_ROW_CHICKEN2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
                I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
                           _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-
+       }
 
        /* WaForceL3Serialization:ivb */
        I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
@@ -4859,10 +4953,21 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 
        gen7_setup_fixed_func_scheduler(dev_priv);
 
+       /* enable HiZ Raw Stall Optimization */
+       I915_WRITE(CACHE_MODE_0_GEN7,
+                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
+
        /* WaDisable4x2SubspanOptimization:ivb */
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
        snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
        snpcr &= ~GEN6_MBC_SNPCR_MASK;
        snpcr |= GEN6_MBC_SNPCR_MED;
@@ -4945,6 +5050,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 
        I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
+       /*
+        * BSpec says this must be set, even though
+        * WaDisable4x2SubspanOptimization isn't listed for VLV.
+        */
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
@@ -4959,16 +5068,7 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
         * Disable clock gating on th GCFG unit to prevent a delay
         * in the reporting of vblank events.
         */
-       I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
-
-       /* Conservative clock gating settings for now */
-       I915_WRITE(0x9400, 0xffffffff);
-       I915_WRITE(0x9404, 0xffffffff);
-       I915_WRITE(0x9408, 0xffffffff);
-       I915_WRITE(0x940c, 0xffffffff);
-       I915_WRITE(0x9410, 0xffffffff);
-       I915_WRITE(0x9414, 0xffffffff);
-       I915_WRITE(0x9418, 0xffffffff);
+       I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
This page took 0.031707 seconds and 5 git commands to generate.