summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChia-I Wu <olvaffe@gmail.com>2014-01-14 14:51:51 +0800
committerChia-I Wu <olvaffe@gmail.com>2014-01-14 15:43:20 +0800
commit7fdab3b201bd2a011e8e0b0b15aca7b7fb5a7aa5 (patch)
treed95b9eb1481ae53350082bf1db87b6b58d359ad9
parent18645d1533032e0ee64714731977e12ee16d959b (diff)
ilo: disable HiZ for misaligned levels
We need to disable HiZ for non-8x4 aligned levels, except for level 0, layer 0. For the very first layer we can adjust Width and Height fields of 3DSTATE_DEPTH_BUFFER to make it aligned. Specifically, add ILO_TEXTURE_HIZ and set the flag only for properly aligned levels. ilo_texture_can_enable_hiz() is updated to check for the flag. In tex_layout_validate(), align the depth bo to 8x4 so that we can adjust Width/Height of 3DSTATE_DEPTH_BUFFER without introducing out-of-bound access. Finally in rectlist blitter, add the ability to adjust 3DSTATE_DEPTH_BUFFER.
-rw-r--r--src/gallium/drivers/ilo/ilo_blitter_rectlist.c153
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen6.h35
-rw-r--r--src/gallium/drivers/ilo/ilo_resource.c85
-rw-r--r--src/gallium/drivers/ilo/ilo_resource.h24
4 files changed, 215 insertions, 82 deletions
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 015cfa459a5..472ab6a1755 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -32,7 +32,8 @@
#include "ilo_3d.h"
#include "ilo_3d_pipeline.h"
#include "ilo_gpe.h"
-#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components */
+#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and
+ zs_align_surface */
/**
* Set the states that are invariant between all ops.
@@ -223,49 +224,18 @@ ilo_blitter_set_uses(struct ilo_blitter *blitter, uint32_t uses)
}
static void
-hiz_emit_rectlist(struct ilo_blitter *blitter)
+hiz_align_fb(struct ilo_blitter *blitter)
{
- struct ilo_3d *hw3d = blitter->ilo->hw3d;
- struct ilo_3d_pipeline *p = hw3d->pipeline;
-
- ilo_3d_own_render_ring(hw3d);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 313:
- *
- * "If other rendering operations have preceded this clear, a
- * PIPE_CONTROL with write cache flush enabled and Z-inhibit
- * disabled must be issued before the rectangle primitive used for
- * the depth buffer clear operation."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 314:
- *
- * "Depth buffer clear pass must be followed by a PIPE_CONTROL
- * command with DEPTH_STALL bit set and Then followed by Depth
- * FLUSH"
- *
- * But the pipeline has to be flushed both before and after not only
- * because of these workarounds. We need them for reasons such as
- *
- * - we may sample from a texture that was rendered to
- * - we may sample from the fb shortly after
- */
- if (!ilo_cp_empty(p->cp))
- ilo_3d_pipeline_emit_flush(p);
-
- ilo_3d_pipeline_emit_rectlist(p, blitter);
-
- ilo_3d_pipeline_emit_flush(p);
-}
+ unsigned align_w, align_h;
-/**
- * This must be called after ilo_blitter_set_fb().
- */
-static void
-hiz_set_rectlist(struct ilo_blitter *blitter, bool aligned)
-{
- unsigned width = blitter->fb.width;
- unsigned height = blitter->fb.height;
+ switch (blitter->op) {
+ case ILO_BLITTER_RECTLIST_CLEAR_ZS:
+ case ILO_BLITTER_RECTLIST_RESOLVE_Z:
+ break;
+ default:
+ return;
+ break;
+ }
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
@@ -296,38 +266,76 @@ hiz_set_rectlist(struct ilo_blitter *blitter, bool aligned)
* buffer clear operation must be delivered, and depth buffer state
* cannot have changed since the previous depth buffer clear
* operation."
- *
- * Making the RECTLIST aligned to 8x4 is easy. But how about
- * 3DSTATE_DRAWING_RECTANGLE and 3DSTATE_DEPTH_BUFFER? Since we use
- * HALIGN_8 and VALIGN_4 for depth buffers, we can safely align the drawing
- * rectangle, except that the PRM requires the drawing rectangle to be
- * clampped to the render target boundary. For 3DSTATE_DEPTH_BUFFER, we
- * cannot align the Width and Height fields if level or slice is greater
- * than zero.
*/
- if (aligned) {
- switch (blitter->fb.num_samples) {
- case 1:
- width = align(width, 8);
- height = align(height, 4);
- break;
- case 2:
- width = align(width, 4);
- height = align(height, 4);
- break;
- case 4:
- width = align(width, 4);
- height = align(height, 2);
- break;
- case 8:
- default:
- width = align(width, 2);
- height = align(height, 2);
- break;
- }
+ switch (blitter->fb.num_samples) {
+ case 1:
+ align_w = 8;
+ align_h = 4;
+ break;
+ case 2:
+ align_w = 4;
+ align_h = 4;
+ break;
+ case 4:
+ align_w = 4;
+ align_h = 2;
+ break;
+ case 8:
+ default:
+ align_w = 2;
+ align_h = 2;
+ break;
}
- ilo_blitter_set_rectlist(blitter, 0, 0, width, height);
+ if (blitter->fb.width % align_w || blitter->fb.height % align_h) {
+ blitter->fb.width = align(blitter->fb.width, align_w);
+ blitter->fb.height = align(blitter->fb.width, align_h);
+
+ assert(!blitter->fb.dst.is_rt);
+ zs_align_surface(blitter->ilo->dev, align_w, align_h,
+ &blitter->fb.dst.u.zs);
+ }
+}
+
+static void
+hiz_emit_rectlist(struct ilo_blitter *blitter)
+{
+ struct ilo_3d *hw3d = blitter->ilo->hw3d;
+ struct ilo_3d_pipeline *p = hw3d->pipeline;
+
+ hiz_align_fb(blitter);
+
+ ilo_blitter_set_rectlist(blitter, 0, 0,
+ blitter->fb.width, blitter->fb.height);
+
+ ilo_3d_own_render_ring(hw3d);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 313:
+ *
+ * "If other rendering operations have preceded this clear, a
+ * PIPE_CONTROL with write cache flush enabled and Z-inhibit
+ * disabled must be issued before the rectangle primitive used for
+ * the depth buffer clear operation."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 314:
+ *
+ * "Depth buffer clear pass must be followed by a PIPE_CONTROL
+ * command with DEPTH_STALL bit set and Then followed by Depth
+ * FLUSH"
+ *
+ * But the pipeline has to be flushed both before and after not only
+ * because of these workarounds. We need them for reasons such as
+ *
+ * - we may sample from a texture that was rendered to
+ * - we may sample from the fb shortly after
+ */
+ if (!ilo_cp_empty(p->cp))
+ ilo_3d_pipeline_emit_flush(p);
+
+ ilo_3d_pipeline_emit_rectlist(p, blitter);
+
+ ilo_3d_pipeline_emit_flush(p);
}
static bool
@@ -452,7 +460,6 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
uses |= ILO_BLITTER_USE_CC | ILO_BLITTER_USE_FB_STENCIL;
ilo_blitter_set_uses(blitter, uses);
- hiz_set_rectlist(blitter, true);
hiz_emit_rectlist(blitter);
return true;
@@ -489,7 +496,6 @@ ilo_blitter_rectlist_resolve_z(struct ilo_blitter *blitter,
ilo_blitter_set_uses(blitter,
ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
- hiz_set_rectlist(blitter, true);
hiz_emit_rectlist(blitter);
}
@@ -522,6 +528,5 @@ ilo_blitter_rectlist_resolve_hiz(struct ilo_blitter *blitter,
ilo_blitter_set_uses(blitter,
ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
- hiz_set_rectlist(blitter, false);
hiz_emit_rectlist(blitter);
}
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h
index 76288d267f9..3c63a7108c9 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h
@@ -1491,6 +1491,41 @@ gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
}
static inline void
+zs_align_surface(const struct ilo_dev_info *dev,
+ unsigned align_w, unsigned align_h,
+ struct ilo_zs_surface *zs)
+{
+ unsigned mask, shift_w, shift_h;
+ unsigned width, height;
+ uint32_t dw3;
+
+ ILO_GPE_VALID_GEN(dev, 6, 7.5);
+
+ if (dev->gen >= ILO_GEN(7)) {
+ shift_w = 4;
+ shift_h = 18;
+ mask = 0x3fff;
+ }
+ else {
+ shift_w = 6;
+ shift_h = 19;
+ mask = 0x1fff;
+ }
+
+ dw3 = zs->payload[2];
+
+ /* aligned width and height */
+ width = align(((dw3 >> shift_w) & mask) + 1, align_w);
+ height = align(((dw3 >> shift_h) & mask) + 1, align_h);
+
+ dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) |
+ (width - 1) << shift_w |
+ (height - 1) << shift_h;
+
+ zs->payload[2] = dw3;
+}
+
+static inline void
gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp)
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index 1048fe3d03c..f9a53318613 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -852,6 +852,16 @@ tex_layout_validate(struct tex_layout *layout)
layout->height = align(layout->height, 64);
}
+ /*
+ * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
+ * ilo_texture_can_enable_hiz(), we always return true for the first slice.
+ * To avoid out-of-bound access, we have to pad.
+ */
+ if (layout->hiz) {
+ layout->width = align(layout->width, 8);
+ layout->height = align(layout->height, 4);
+ }
+
assert(layout->width % layout->block_width == 0);
assert(layout->height % layout->block_height == 0);
assert(layout->qpitch % layout->block_height == 0);
@@ -1037,9 +1047,8 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout)
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = layout->templ;
const int hz_align_j = 8;
- unsigned hz_width, hz_height;
+ unsigned hz_width, hz_height, lv;
unsigned long pitch;
- int i;
/*
* See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
@@ -1054,9 +1063,9 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout)
if (templ->target == PIPE_TEXTURE_3D) {
hz_height = 0;
- for (i = 0; i <= templ->last_level; i++) {
- const unsigned h = align(layout->levels[i].h, hz_align_j);
- hz_height += h * layout->levels[i].d;
+ for (lv = 0; lv <= templ->last_level; lv++) {
+ const unsigned h = align(layout->levels[lv].h, hz_align_j);
+ hz_height += h * layout->levels[lv].d;
}
hz_height /= 2;
@@ -1087,6 +1096,72 @@ tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout)
tex->hiz.bo_stride = pitch;
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
+ *
+ * "A rectangle primitive representing the clear area is delivered. The
+ * primitive must adhere to the following restrictions on size:
+ *
+ * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
+ * aligned to an 8x4 pixel block relative to the upper left corner
+ * of the depth buffer, and contain an integer number of these pixel
+ * blocks, and all 8x4 pixels must be lit.
+ *
+ * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
+ * aligned to a 4x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 4x2 pixels
+ * must be lit
+ *
+ * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
+ * aligned to a 2x2 pixel block (8x4 sample block) relative to the
+ * upper left corner of the depth buffer, and contain an integer
+ * number of these pixel blocks, and all samples of the 2x2 pixels
+ * must be list."
+ *
+ * "The following is required when performing a depth buffer resolve:
+ *
+ * - A rectangle primitive of the same size as the previous depth
+ * buffer clear operation must be delivered, and depth buffer state
+ * cannot have changed since the previous depth buffer clear
+ * operation."
+ *
+ * Experiments on Haswell show that depth buffer resolves have the same
+ * alignment requirements, and aligning the RECTLIST primitive and
+ * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The mipmap size must be
+ * aligned.
+ */
+ for (lv = 0; lv <= templ->last_level; lv++) {
+ unsigned align_w = 8, align_h = 4;
+
+ switch (templ->nr_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ align_w /= 2;
+ break;
+ case 4:
+ align_w /= 2;
+ align_h /= 2;
+ break;
+ case 8:
+ default:
+ align_w /= 4;
+ align_h /= 2;
+ break;
+ }
+
+ if (u_minify(templ->width0, lv) % align_w == 0 &&
+ u_minify(templ->height0, lv) % align_h == 0) {
+ const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ?
+ u_minify(templ->depth0, lv) : templ->array_size;
+
+ ilo_texture_set_slice_flags(tex, lv, 0, num_slices,
+ ILO_TEXTURE_HIZ, ILO_TEXTURE_HIZ);
+ }
+ }
+
return true;
}
diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h
index 125535a2771..fb4fde77d08 100644
--- a/src/gallium/drivers/ilo/ilo_resource.h
+++ b/src/gallium/drivers/ilo/ilo_resource.h
@@ -31,6 +31,7 @@
#include "intel_winsys.h"
#include "ilo_common.h"
+#include "ilo_screen.h"
enum ilo_texture_flags {
ILO_TEXTURE_RENDER_WRITE = 1 << 0,
@@ -40,10 +41,9 @@ enum ilo_texture_flags {
ILO_TEXTURE_BLT_READ = 1 << 4,
ILO_TEXTURE_CPU_READ = 1 << 5,
ILO_TEXTURE_CLEAR = 1 << 6,
+ ILO_TEXTURE_HIZ = 1 << 7,
};
-struct ilo_screen;
-
struct ilo_buffer {
struct pipe_resource base;
@@ -161,7 +161,25 @@ static inline bool
ilo_texture_can_enable_hiz(const struct ilo_texture *tex, unsigned level,
unsigned first_slice, unsigned num_slices)
{
- return (tex->hiz.bo != NULL);
+ const struct ilo_screen *is = ilo_screen(tex->base.screen);
+ const struct ilo_texture_slice *slice =
+ ilo_texture_get_slice(tex, level, first_slice);
+
+ if (!tex->hiz.bo)
+ return false;
+
+ /* we can adjust 3DSTATE_DEPTH_BUFFER for the first slice */
+ if (level == 0 && first_slice == 0 && num_slices == 1)
+ return true;
+
+ /* HiZ is non-mipmapped and non-array on GEN6 */
+ assert(is->dev.gen > ILO_GEN(6));
+
+ /*
+ * Either all or none of the slices in the same level have ILO_TEXTURE_HIZ
+ * set. It suffices to check only the first slice.
+ */
+ return (slice->flags & ILO_TEXTURE_HIZ);
}
#endif /* ILO_RESOURCE_H */