intel: Add multisample scaled blitting in blorp engine

In traditional multisampled framebuffer rendering, color samples must be explicitly resolved via BlitFramebuffer before doing the scaled blitting of the framebuffer. So, scaled blitting of a multisample framebuffer takes two separate calls to BlitFramebuffer. This patch implements the functionality of doing multisampled scaled resolve using just one BlitFramebuffer call. Important changes involved in this patch are listed below: - Use float registers to scale and offset texture coordinates. - Change offset computation to consider float coordinates. - Round the scaled coordinates down to nearest integer. - Modify src texture coordinates clipping to account for scaling.. - Linear filter is not yet implemented in blorp. So, don't use blorp engine to do single sampled scaled blitting. V3: Fix nearest filtering issue in scaled blits. Makes failing piglit fbo-blit-stetch test and framebuffer_blit_functionality_magnifying_blit.test in gles3 CTS pass. Observed no piglit, gles3 CTS regressions on sandybridge & ivybridge with this patch. Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com> Reviewed-by: Paul Berry <stereotype441@gmail.com>
author: Anuj Phogat <anuj.phogat@gmail.com> 2013-05-14 17:20:02 -0700
committer: Anuj Phogat <anuj.phogat@gmail.com> 2013-05-30 10:50:30 -0700
commit: 0a70fdfb3ffe2b03c037c64076fe4c6d3e8a0fe5 (patch)
tree: b4776d6e41388a153d1d958771a5067943d87d61
parent: 6e28713a8dceed021bdeaa881e1b4977ac8103ab (diff)
4 files changed, 93 insertions, 74 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 70e3933e11a..51b23dbea12 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -40,9 +40,10 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
                         unsigned src_level, unsigned src_layer,
                         struct intel_mipmap_tree *dst_mt,
                         unsigned dst_level, unsigned dst_layer,
-                        int src_x0, int src_y0,
-                        int dst_x0, int dst_y0,
-                        int dst_x1, int dst_y1,
+                        float src_x0, float src_y0,
+                        float src_x1, float src_y1,
+                        float dst_x0, float dst_y0,
+                        float dst_x1, float dst_y1,
                         bool mirror_x, bool mirror_y);
 
 bool
@@ -158,11 +159,11 @@ public:
 
 struct brw_blorp_coord_transform_params
 {
-   void setup(GLuint src0, GLuint dst0, GLuint dst1,
+   void setup(GLfloat src0, GLfloat src1, GLfloat dst0, GLfloat dst1,
               bool mirror);
 
-   int32_t multiplier;
-   int32_t offset;
+   float multiplier;
+   float offset;
 };
 
 
@@ -314,9 +315,10 @@ public:
                          unsigned src_level, unsigned src_layer,
                          struct intel_mipmap_tree *dst_mt,
                          unsigned dst_level, unsigned dst_layer,
-                         GLuint src_x0, GLuint src_y0,
-                         GLuint dst_x0, GLuint dst_y0,
-                         GLuint width, GLuint height,
+                         GLfloat src_x0, GLfloat src_y0,
+                         GLfloat src_x1, GLfloat src_y1,
+                         GLfloat dst_x0, GLfloat dst_y0,
+                         GLfloat dst_x1, GLfloat dst_y1,
                          bool mirror_x, bool mirror_y);
 
    virtual uint32_t get_wm_prog(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 60837cae765..0a9aca85e4d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -41,11 +41,11 @@
  * If coord0 > coord1, swap them and invert the "mirror" boolean.
  */
 static inline void
-fixup_mirroring(bool &mirror, GLint &coord0, GLint &coord1)
+fixup_mirroring(bool &mirror, GLfloat &coord0, GLfloat &coord1)
 {
    if (coord0 > coord1) {
       mirror = !mirror;
-      GLint tmp = coord0;
+      GLfloat tmp = coord0;
       coord0 = coord1;
       coord1 = tmp;
    }
@@ -67,9 +67,10 @@ fixup_mirroring(bool &mirror, GLint &coord0, GLint &coord1)
  * coordinates, by swapping the roles of src and dst.
  */
 static inline bool
-clip_or_scissor(bool mirror, GLint &src_x0, GLint &src_x1, GLint &dst_x0,
-                GLint &dst_x1, GLint fb_xmin, GLint fb_xmax)
+clip_or_scissor(bool mirror, GLfloat &src_x0, GLfloat &src_x1, GLfloat &dst_x0,
+                GLfloat &dst_x1, GLfloat fb_xmin, GLfloat fb_xmax)
 {
+   float scale = (float) (src_x1 - src_x0) / (dst_x1 - dst_x0);
    /* If we are going to scissor everything away, stop. */
    if (!(fb_xmin < fb_xmax &&
          dst_x0 < fb_xmax &&
@@ -105,8 +106,8 @@ clip_or_scissor(bool mirror, GLint &src_x0, GLint &src_x1, GLint &dst_x0,
    /* Adjust the source rectangle to remove the pixels corresponding to those
     * that were clipped/scissored out of the destination rectangle.
     */
-   src_x0 += pixels_clipped_left;
-   src_x1 -= pixels_clipped_right;
+   src_x0 += pixels_clipped_left * scale;
+   src_x1 -= pixels_clipped_right * scale;
 
    return true;
 }
@@ -127,9 +128,10 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
                         unsigned src_level, unsigned src_layer,
                         struct intel_mipmap_tree *dst_mt,
                         unsigned dst_level, unsigned dst_layer,
-                        int src_x0, int src_y0,
-                        int dst_x0, int dst_y0,
-                        int dst_x1, int dst_y1,
+                        float src_x0, float src_y0,
+                        float src_x1, float src_y1,
+                        float dst_x0, float dst_y0,
+                        float dst_x1, float dst_y1,
                         bool mirror_x, bool mirror_y)
 {
    intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_layer);
@@ -139,6 +141,7 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
                                 src_mt, src_level, src_layer,
                                 dst_mt, dst_level, dst_layer,
                                 src_x0, src_y0,
+                                src_x1, src_y1,
                                 dst_x0, dst_y0,
                                 dst_x1, dst_y1,
                                 mirror_x, mirror_y);
@@ -151,8 +154,8 @@ static void
 do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit,
               struct intel_renderbuffer *src_irb,
               struct intel_renderbuffer *dst_irb,
-              GLint srcX0, GLint srcY0,
-              GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+              GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
+              GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
               bool mirror_x, bool mirror_y)
 {
    /* Find source/dst miptrees */
@@ -163,7 +166,8 @@ do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit,
    brw_blorp_blit_miptrees(intel,
                            src_mt, src_irb->mt_level, src_irb->mt_layer,
                            dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
-                           srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
+                           srcX0, srcY0, srcX1, srcY1,
+                           dstX0, dstY0, dstX1, dstY1,
                            mirror_x, mirror_y);
 
    intel_renderbuffer_set_needs_downsample(dst_irb);
@@ -203,8 +207,8 @@ formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
 
 static bool
 try_blorp_blit(struct intel_context *intel,
-               GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-               GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+               GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
+               GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
                GLenum filter, GLbitfield buffer_bit)
 {
    struct gl_context *ctx = &intel->ctx;
@@ -224,9 +228,13 @@ try_blorp_blit(struct intel_context *intel,
    fixup_mirroring(mirror_y, srcY0, srcY1);
    fixup_mirroring(mirror_y, dstY0, dstY1);
 
-   /* Make sure width and height match */
-   if (srcX1 - srcX0 != dstX1 - dstX0) return false;
-   if (srcY1 - srcY0 != dstY1 - dstY0) return false;
+   /* Linear filtering is not yet implemented in blorp engine. So, fallback
+    * to other blit paths.
+    */
+   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
+        srcY1 - srcY0 != dstY1 - dstY0) &&
+       filter == GL_LINEAR)
+      return false;
 
    /* If the destination rectangle needs to be clipped or scissored, do so.
     */
@@ -278,7 +286,8 @@ try_blorp_blit(struct intel_context *intel,
          dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
 	 if (dst_irb)
             do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
-                          dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
+                          srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+                          mirror_x, mirror_y);
       }
       break;
    case GL_DEPTH_BUFFER_BIT:
@@ -289,7 +298,8 @@ try_blorp_blit(struct intel_context *intel,
       if (!formats_match(buffer_bit, src_irb, dst_irb))
          return false;
       do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
-                    dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
+                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+                    mirror_x, mirror_y);
       break;
    case GL_STENCIL_BUFFER_BIT:
       src_irb =
@@ -299,7 +309,8 @@ try_blorp_blit(struct intel_context *intel,
       if (!formats_match(buffer_bit, src_irb, dst_irb))
          return false;
       do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
-                    dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y);
+                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+                    mirror_x, mirror_y);
       break;
    default:
       assert(false);
@@ -347,6 +358,7 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
     */
 
    int srcY1 = srcY0 + height;
+   int srcX1 = srcX0 + width;
    int dstX1 = dstX0 + width;
    int dstY1 = dstY0 + height;
 
@@ -364,7 +376,8 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
    brw_blorp_blit_miptrees(intel,
                            src_mt, src_irb->mt_level, src_irb->mt_layer,
                            dst_mt, dst_image->Level, dst_image->Face,
-                           srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
+                           srcX0, srcY0, srcX1, srcY1,
+                           dstX0, dstY0, dstX1, dstY1,
                            false, mirror_y);
 
    /* If we're copying to a packed depth stencil texture and the source
@@ -380,7 +393,8 @@ brw_blorp_copytexsubimage(struct intel_context *intel,
       brw_blorp_blit_miptrees(intel,
                               src_irb->mt, src_irb->mt_level, src_irb->mt_layer,
                               dst_mt, dst_image->Level, dst_image->Face,
-                              srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
+                              srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1,
                               false, mirror_y);
    }
 
@@ -590,7 +604,7 @@ private:
    void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
    void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
    void kill_if_outside_dst_rect();
-   void translate_dst_to_src(unsigned intel_gen);
+   void translate_dst_to_src();
    void single_to_blend();
    void manual_blend(unsigned num_samples);
    void sample(struct brw_reg dst);
@@ -772,7 +786,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
       kill_if_outside_dst_rect();
 
    /* Next, apply a translation to obtain coordinates in the source image. */
-   translate_dst_to_src(brw->intel.gen);
+   translate_dst_to_src();
 
    /* If the source image is not multisampled, then we want to fetch sample
     * number 0, because that's the only sample there is.
@@ -844,7 +858,7 @@ brw_blorp_blit_program::alloc_push_const_regs(int base_reg)
 #define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)
 #define ALLOC_REG(name) \
    this->name = \
-      brw_ud1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 4)
+      brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 4)
 
    ALLOC_REG(dst_x0);
    ALLOC_REG(dst_x1);
@@ -1322,29 +1336,29 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()
  * coordinates.
  */
 void
-brw_blorp_blit_program::translate_dst_to_src(unsigned intel_gen)
+brw_blorp_blit_program::translate_dst_to_src()
 {
+   struct brw_reg X_f = retype(X, BRW_REGISTER_TYPE_F);
+   struct brw_reg Y_f = retype(Y, BRW_REGISTER_TYPE_F);
+   struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
+   struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
+
    brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
-   /* For mul instruction:
-    * On SNB when both src0 and src1 are of type D or UD, only the low 16 bits
-    * of each element of src0 are used.
-    * On IVB when both src0 and src1 are of type D or UD, only the low 16 bits
-    * of each element of src1 are used.
-    * multiplier can be positive or negative. So keep the multiplier in a src
-    * register which don't get truncated during multiplication.
+   /* Move the UD coordinates to float registers. */
+   brw_MOV(&func, Xp_f, X);
+   brw_MOV(&func, Yp_f, Y);
+   /* Scale and offset */
+   brw_MUL(&func, X_f, Xp_f, x_transform.multiplier);
+   brw_MUL(&func, Y_f, Yp_f, y_transform.multiplier);
+   brw_ADD(&func, X_f, X_f, x_transform.offset);
+   brw_ADD(&func, Y_f, Y_f, y_transform.offset);
+   /* Round the float coordinates down to nearest integer by moving to
+    * UD registers.
     */
-   if (intel_gen == 6) {
-      brw_MUL(&func, Xp, X, x_transform.multiplier);
-      brw_MUL(&func, Yp, Y, y_transform.multiplier);
-   }
-   else {
-      brw_MUL(&func, Xp, x_transform.multiplier, X);
-      brw_MUL(&func, Yp, y_transform.multiplier, Y);
-   }
-   brw_ADD(&func, Xp, Xp, x_transform.offset);
-   brw_ADD(&func, Yp, Yp, y_transform.offset);
-   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+   brw_MOV(&func, Xp, X_f);
+   brw_MOV(&func, Yp, Y_f);
    SWAP_XY_AND_XPYP();
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
 }
 
 /**
@@ -1709,25 +1723,32 @@ brw_blorp_blit_program::render_target_write()
 
 
 void
-brw_blorp_coord_transform_params::setup(GLuint src0, GLuint dst0, GLuint dst1,
+brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
+                                        GLfloat dst0, GLfloat dst1,
                                         bool mirror)
 {
+   float scale = (src1 - src0) / (dst1 - dst0);
    if (!mirror) {
       /* When not mirroring a coordinate (say, X), we need:
-       *   x' - src_x0 = x - dst_x0
+       *   src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
        * Therefore:
-       *   x' = 1*x + (src_x0 - dst_x0)
+       *   src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
+       *
+       * blorp program uses "round toward zero" to convert the
+       * transformed floating point coordinates to integer coordinates,
+       * whereas the behaviour we actually want is "round to nearest",
+       * so 0.5 provides the necessary correction.
        */
-      multiplier = 1;
-      offset = (int) (src0 - dst0);
+      multiplier = scale;
+      offset = src0 + (-dst0 + 0.5) * scale;
    } else {
       /* When mirroring X we need:
-       *   x' - src_x0 = dst_x1 - x - 1
+       *   src_x - src_x0 = dst_x1 - dst_x - 0.5
        * Therefore:
-       *   x' = -1*x + (src_x0 + dst_x1 - 1)
+       *   src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
        */
-      multiplier = -1;
-      offset = src0 + dst1 - 1;
+      multiplier = -scale;
+      offset = src0 + (dst1 - 0.5) * scale;
    }
 }
 
@@ -1766,9 +1787,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
                                              unsigned src_level, unsigned src_layer,
                                              struct intel_mipmap_tree *dst_mt,
                                              unsigned dst_level, unsigned dst_layer,
-                                             GLuint src_x0, GLuint src_y0,
-                                             GLuint dst_x0, GLuint dst_y0,
-                                             GLuint dst_x1, GLuint dst_y1,
+                                             GLfloat src_x0, GLfloat src_y0,
+                                             GLfloat src_x1, GLfloat src_y1,
+                                             GLfloat dst_x0, GLfloat dst_y0,
+                                             GLfloat dst_x1, GLfloat dst_y1,
                                              bool mirror_x, bool mirror_y)
 {
    src.set(brw, src_mt, src_level, src_layer);
@@ -1877,8 +1899,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
    y0 = wm_push_consts.dst_y0 = dst_y0;
    x1 = wm_push_consts.dst_x1 = dst_x1;
    y1 = wm_push_consts.dst_y1 = dst_y1;
-   wm_push_consts.x_transform.setup(src_x0, dst_x0, dst_x1, mirror_x);
-   wm_push_consts.y_transform.setup(src_y0, dst_y0, dst_y1, mirror_y);
+   wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
+   wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);
 
    if (dst.num_samples <= 1 && dst_mt->num_samples > 1) {
       /* We must expand the rectangle we send through the rendering pipeline,
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 972ccf6bc42..9ac25442eec 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -344,13 +344,6 @@ brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
-/** Construct unsigned dword[1] register */
-static inline struct brw_reg
-brw_ud1_reg(unsigned file, unsigned nr, unsigned subnr)
-{
-   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UD), subnr);
-}
-
 static inline struct brw_reg
 brw_imm_reg(unsigned type)
 {
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 5f1e171d0cc..e5fc4258fb5 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -1346,6 +1346,7 @@ intel_miptree_updownsample(struct intel_context *intel,
                            src, 0 /* level */, 0 /* layer */,
                            dst, 0 /* level */, 0 /* layer */,
                            src_x0, src_y0,
+                           width, height,
                            dst_x0, dst_y0,
                            width, height,
                            false, false /*mirror x, y*/);
@@ -1355,6 +1356,7 @@ intel_miptree_updownsample(struct intel_context *intel,
                               src->stencil_mt, 0 /* level */, 0 /* layer */,
                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
                               src_x0, src_y0,
+                              width, height,
                               dst_x0, dst_y0,
                               width, height,
                               false, false /*mirror x, y*/);
author	Anuj Phogat <anuj.phogat@gmail.com>	2013-05-14 17:20:02 -0700
committer	Anuj Phogat <anuj.phogat@gmail.com>	2013-05-30 10:50:30 -0700
commit	0a70fdfb3ffe2b03c037c64076fe4c6d3e8a0fe5 (patch)
tree	b4776d6e41388a153d1d958771a5067943d87d61
parent	6e28713a8dceed021bdeaa881e1b4977ac8103ab (diff)