summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2012-10-12 18:46:32 +0200
committerMarek Olšák <maraeo@gmail.com>2012-10-29 12:51:41 +0100
commit96ed6c90eff58ce030c39c2b4db6daf512586b34 (patch)
tree5fc59c951dd1fc7ac3f70354f6f4e3581ba5b8aa
parentb3921e1f53833420e0a0fd581f741744e7957a05 (diff)
r600g: implement texturing with 8x MSAA compressed surfaces for Evergreen
The 2x and 4x MSAA cases are completely broken. The lfdptr instruction returns garbage there. The 8x MSAA case is broken on Cayman, though at least the result looks somewhat correct. Only the 8x MSAA case works on Evergreen and is enabled.
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c8
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h6
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c39
-rw-r--r--src/gallium/drivers/r600/evergreend.h2
-rw-r--r--src/gallium/drivers/r600/r600_asm.c10
-rw-r--r--src/gallium/drivers/r600/r600_asm.h7
-rw-r--r--src/gallium/drivers/r600/r600_blit.c42
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c29
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h21
-rw-r--r--src/gallium/drivers/r600/r600_shader.c133
-rw-r--r--src/gallium/drivers/r600/r600_sq.h3
-rw-r--r--src/gallium/drivers/r600/r600_state.c3
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c4
13 files changed, 270 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 4d6cdd7a244..f4ac4aa8685 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -356,12 +356,20 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_sampler_state(pipe, ctx->sampler_state);
pipe->delete_sampler_state(pipe, ctx->sampler_state_linear);
u_upload_destroy(ctx->upload);
FREE(ctx);
}
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+ boolean supported)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+ ctx->has_texture_multisample = supported;
+}
+
static void blitter_set_running_flag(struct blitter_context_priv *ctx)
{
if (ctx->base.running) {
_debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n",
__LINE__);
}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index de063937793..c49faaad717 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -132,12 +132,18 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter);
static INLINE
struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
{
return blitter->pipe;
}
+/**
+ * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
+ */
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+ boolean supported);
+
/* The default function to draw a rectangle. This can only be used
* inside of the draw_rectangle callback if the driver overrides it. */
void util_blitter_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2, float depth,
enum blitter_attrib_type type,
const union pipe_color_union *attrib);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 96e246a6e68..17b7e9d2c72 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -630,13 +630,13 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
}
if (!util_format_is_supported(format, usage))
return FALSE;
if (sample_count > 1) {
- if (rscreen->info.drm_minor < 19)
+ if (!rscreen->has_msaa)
return FALSE;
switch (sample_count) {
case 2:
case 4:
case 8:
@@ -1071,17 +1071,30 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
else
view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type);
view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
S_030004_ARRAY_MODE(array_mode));
view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
- if (state->u.tex.last_level && texture->nr_samples <= 1) {
+
+ /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
+ if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) {
+ /* XXX the 2x and 4x cases are broken. */
+ if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) {
+ /* disable FMASK (0 = disabled) */
+ view->tex_resource_words[3] = 0;
+ view->skip_mip_address_reloc = true;
+ } else {
+ /* FMASK should be in MIP_ADDRESS for multisample textures */
+ view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8;
+ }
+ } else if (state->u.tex.last_level && texture->nr_samples <= 1) {
view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
} else {
view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
}
+
view->tex_resource_words[4] = (word4 |
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
S_030010_ENDIAN_SWAP(endian));
view->tex_resource_words[5] = S_030014_BASE_ARRAY(state->u.tex.first_layer) |
S_030014_LAST_ARRAY(state->u.tex.last_layer);
if (texture->nr_samples > 1) {
@@ -1579,15 +1592,13 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
}
if (!surf->export_16bpc) {
rctx->framebuffer.export_16bpc = false;
}
- /* Cayman can fetch from a compressed MSAA colorbuffer,
- * so it's pointless to track them. */
- if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+ if (rtex->fmask_size && rtex->cmask_size) {
rctx->framebuffer.compressed_cb_mask |= 1 << i;
}
}
/* Update alpha-test state dependencies.
* Alpha-test is done on the first colorbuffer only. */
@@ -2255,19 +2266,21 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
assert(rview);
r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
r600_write_value(cs, (resource_id_base + resource_index) * 8);
r600_write_array(cs, 8, rview->tex_resource_words);
- /* XXX The kernel needs two relocations. This is stupid. */
reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
RADEON_USAGE_READ);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
- r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
- r600_write_value(cs, reloc);
+
+ if (!rview->skip_mip_address_reloc) {
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, reloc);
+ }
}
state->dirty_mask = 0;
}
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
@@ -3342,12 +3355,22 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx)
memset(&blend, 0, sizeof(blend));
blend.independent_blend_enable = true;
blend.rt[0].colormask = 0xf;
return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS);
}
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx)
+{
+ struct pipe_blend_state blend;
+
+ memset(&blend, 0, sizeof(blend));
+ blend.independent_blend_enable = true;
+ blend.rt[0].colormask = 0xf;
+ return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS);
+}
+
void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
{
struct pipe_depth_stencil_alpha_state dsa = {{0}};
return rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 98df83de918..edb1a55dc8a 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -483,13 +483,13 @@
#define C_028808_MODE 0xFFFFFF8F
#define V_028808_CB_DISABLE 0x00000000
#define V_028808_CB_NORMAL 0x00000001
#define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002
#define V_028808_CB_RESOLVE 0x00000003
#define V_028808_CB_DECOMPRESS 0x00000004
-#define V_028808_CB_FASK_DECOMPRESS 0x00000005
+#define V_028808_CB_FMASK_DECOMPRESS 0x00000005
#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
#define C_028808_ROP3 0xFF00FFFF
#define R_028810_PA_CL_CLIP_CNTL 0x028810
#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0)
#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 51a2e4ee9e5..f04a92062f6 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -252,25 +252,29 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
if (tex == NULL)
return NULL;
LIST_INITHEAD(&tex->list);
return tex;
}
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family)
+void r600_bytecode_init(struct r600_bytecode *bc,
+ enum chip_class chip_class,
+ enum radeon_family family,
+ enum r600_msaa_texture_mode msaa_texture_mode)
{
if ((chip_class == R600) &&
(family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
bc->ar_handling = AR_HANDLE_RV6XX;
bc->r6xx_nop_after_rel_dst = 1;
} else {
bc->ar_handling = AR_HANDLE_NORMAL;
bc->r6xx_nop_after_rel_dst = 0;
}
LIST_INITHEAD(&bc->cf);
bc->chip_class = chip_class;
+ bc->msaa_texture_mode = msaa_texture_mode;
}
static int r600_bytecode_add_cf(struct r600_bytecode *bc)
{
struct r600_bytecode_cf *cf = r600_bytecode_cf();
@@ -1733,12 +1737,13 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod
}
/* common to all 3 families */
static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
{
bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
+ EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
@@ -2763,13 +2768,14 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
int i, j, r, fs_size;
struct r600_resource *fetch_shader;
assert(count < 32);
memset(&bc, 0, sizeof(bc));
- r600_bytecode_init(&bc, rctx->chip_class, rctx->family);
+ r600_bytecode_init(&bc, rctx->chip_class, rctx->family,
+ rctx->screen->msaa_texture_support);
for (i = 0; i < count; i++) {
if (elements[i].instance_divisor > 1) {
if (rctx->chip_class == CAYMAN) {
for (j = 0; j < 4; j++) {
struct r600_bytecode_alu alu;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 8a9f3189be0..2c7db2cefd7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -59,12 +59,13 @@ struct r600_bytecode_alu {
unsigned index_mode;
};
struct r600_bytecode_tex {
struct list_head list;
unsigned inst;
+ unsigned inst_mod;
unsigned resource_id;
unsigned src_gpr;
unsigned src_rel;
unsigned dst_gpr;
unsigned dst_rel;
unsigned dst_sel_x;
@@ -192,12 +193,13 @@ struct r600_cf_callstack {
#define AR_HANDLE_NORMAL 0
#define AR_HANDLE_RV6XX 1 /* except RV670 */
struct r600_bytecode {
enum chip_class chip_class;
+ enum r600_msaa_texture_mode msaa_texture_mode;
int type;
struct list_head cf;
struct r600_bytecode_cf *cf_last;
unsigned ndw;
unsigned ncf;
unsigned ngpr;
@@ -216,13 +218,16 @@ struct r600_bytecode {
};
/* eg_asm.c */
int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
/* r600_asm.c */
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family);
+void r600_bytecode_init(struct r600_bytecode *bc,
+ enum chip_class chip_class,
+ enum radeon_family family,
+ enum r600_msaa_texture_mode msaa_texture_mode);
void r600_bytecode_clear(struct r600_bytecode *bc);
int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex);
int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output);
int r600_bytecode_build(struct r600_bytecode *bc);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 8597b8dfcf7..a19248da3a2 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -249,18 +249,35 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
struct r600_texture *rtex,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer)
{
struct r600_context *rctx = (struct r600_context *)ctx;
unsigned layer, level, checked_last_layer, max_layer;
-
- assert(rctx->chip_class != CAYMAN);
+ void *blend_decompress;
if (!rtex->dirty_level_mask)
return;
+ switch (rctx->screen->msaa_texture_support) {
+ case MSAA_TEXTURE_DECOMPRESSED:
+ blend_decompress = rctx->custom_blend_decompress;
+ break;
+ case MSAA_TEXTURE_COMPRESSED:
+ /* XXX the 2x and 4x cases are broken. */
+ if (rtex->resource.b.b.nr_samples == 8)
+ blend_decompress = rctx->custom_blend_fmask_decompress;
+ else
+ blend_decompress = rctx->custom_blend_decompress;
+ break;
+ case MSAA_TEXTURE_SAMPLE_ZERO:
+ default:
+ /* Nothing to do. */
+ rtex->dirty_level_mask = 0;
+ return;
+ }
+
for (level = first_level; level <= last_level; level++) {
if (!(rtex->dirty_level_mask & (1 << level)))
continue;
/* The smaller the mipmap level, the less layers there are
* as far as 3D textures are concerned. */
@@ -275,14 +292,13 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
surf_tmpl.u.tex.first_layer = layer;
surf_tmpl.u.tex.last_layer = layer;
surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
r600_blitter_begin(ctx, R600_DECOMPRESS);
- util_blitter_custom_color(rctx->blitter, cbsurf,
- rctx->custom_blend_decompress);
+ util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress);
r600_blitter_end(ctx);
pipe_surface_reference(&cbsurf, NULL);
}
/* The texture will always be dirty if some layers or samples aren't flushed.
@@ -296,19 +312,12 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
void r600_decompress_color_textures(struct r600_context *rctx,
struct r600_samplerview_state *textures)
{
unsigned i;
unsigned mask = textures->compressed_colortex_mask;
- /* Cayman cannot decompress an MSAA colorbuffer,
- * but it can read it compressed, so skip this. */
- assert(rctx->chip_class != CAYMAN);
- if (rctx->chip_class == CAYMAN) {
- return;
- }
-
while (mask) {
struct pipe_sampler_view *view;
struct r600_texture *tex;
i = u_bit_scan(&mask);
@@ -330,24 +339,23 @@ void r600_decompress_color_textures(struct r600_context *rctx,
* rendering. */
static bool r600_decompress_subresource(struct pipe_context *ctx,
struct pipe_resource *tex,
unsigned level,
unsigned first_layer, unsigned last_layer)
{
- struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_texture *rtex = (struct r600_texture*)tex;
if (rtex->is_depth && !rtex->is_flushing_texture) {
if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
return false; /* error */
r600_blit_decompress_depth(ctx, rtex, NULL,
level, level,
first_layer, last_layer,
0, u_max_sample(tex));
- } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+ } else if (rtex->fmask_size && rtex->cmask_size) {
r600_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
}
return true;
}
@@ -456,12 +464,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
struct pipe_surface *dst_view, dst_templ;
struct pipe_sampler_view src_templ, *src_view;
unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL;
struct pipe_box sbox;
+ bool copy_all_samples;
/* Handle buffers first. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
r600_copy_buffer(ctx, dst, dstx, src, src_box);
return;
}
@@ -555,22 +564,21 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
src_width0, src_height0);
} else {
src_view = r600_create_sampler_view_custom(ctx, src, &src_templ,
src_widthFL, src_heightFL);
}
+ copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
/* Copy. */
- /* XXX Multisample texturing is unimplemented on Cayman. In the meantime,
- * copy only the first sample (which is the only one that is uncompressed
- * and therefore doesn't return garbage). */
r600_blitter_begin(ctx, R600_COPY_TEXTURE);
util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty,
abs(src_box->width), abs(src_box->height),
src_view, src_box, src_width0, src_height0,
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
- rctx->chip_class != CAYMAN);
+ copy_all_samples);
r600_blitter_end(ctx);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 916fa381a33..7a1e1353553 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -168,12 +168,15 @@ static void r600_destroy_context(struct pipe_context *context)
if (rctx->custom_blend_resolve) {
rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_resolve);
}
if (rctx->custom_blend_decompress) {
rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress);
}
+ if (rctx->custom_blend_fmask_decompress) {
+ rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress);
+ }
util_unreference_framebuffer_state(&rctx->framebuffer.state);
r600_context_fini(rctx);
if (rctx->blitter) {
util_blitter_destroy(rctx->blitter);
@@ -261,12 +264,13 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
evergreen_init_atom_start_compute_cs(rctx);
if (evergreen_context_init(rctx))
goto fail;
rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx);
rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx);
+ rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx);
rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR ||
rctx->family == CHIP_PALM ||
rctx->family == CHIP_SUMO ||
rctx->family == CHIP_SUMO2 ||
rctx->family == CHIP_CAICOS ||
rctx->family == CHIP_CAYMAN ||
@@ -286,12 +290,13 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (!rctx->uploader)
goto fail;
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL)
goto fail;
+ util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
rctx->blitter->draw_rectangle = r600_draw_rectangle;
r600_begin_new_cs(rctx);
r600_get_backend_mask(rctx); /* this emits commands and must be last */
rctx->dummy_pixel_shader =
@@ -390,21 +395,23 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 130;
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
/* Supported except the original R600. */
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
/* R600 doesn't support per-MRT blends */
return family == CHIP_R600 ? 0 : 1;
@@ -944,12 +951,32 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
case EVERGREEN:
case CAYMAN:
rscreen->has_streamout = rscreen->info.drm_minor >= 14;
break;
}
+ /* MSAA support. */
+ switch (rscreen->chip_class) {
+ case R600:
+ case R700:
+ rscreen->has_msaa = rscreen->info.drm_minor >= 22;
+ rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED;
+ break;
+ case EVERGREEN:
+ rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+ rscreen->msaa_texture_support =
+ rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED :
+ MSAA_TEXTURE_DECOMPRESSED;
+ break;
+ case CAYMAN:
+ rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+ /* We should be able to read compressed MSAA textures, but it doesn't work. */
+ rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO;
+ break;
+ }
+
if (r600_init_tiling(rscreen)) {
FREE(rscreen);
return NULL;
}
rscreen->screen.destroy = r600_destroy_screen;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 17dab7f23d5..238ab1676f4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -181,19 +181,37 @@ struct r600_pipe_fences {
struct list_head blocks;
/* linked list of freed fences */
struct list_head pool;
pipe_mutex mutex;
};
+enum r600_msaa_texture_mode {
+ /* If the hw can fetch the first sample only (no decompression available).
+ * This means MSAA texturing is not fully implemented. */
+ MSAA_TEXTURE_SAMPLE_ZERO,
+
+ /* If the hw can fetch decompressed MSAA textures.
+ * Supported families: R600, R700, Evergreen.
+ * Cayman cannot use this, because it cannot do the decompression. */
+ MSAA_TEXTURE_DECOMPRESSED,
+
+ /* If the hw can fetch compressed MSAA textures, which means shaders can
+ * read resolved FMASK. This yields the best performance.
+ * Supported families: Evergreen, Cayman. */
+ MSAA_TEXTURE_COMPRESSED
+};
+
struct r600_screen {
struct pipe_screen screen;
struct radeon_winsys *ws;
unsigned family;
enum chip_class chip_class;
struct radeon_info info;
bool has_streamout;
+ bool has_msaa;
+ enum r600_msaa_texture_mode msaa_texture_support;
struct r600_tiling_info tiling_info;
struct r600_pipe_fences fences;
/*for compute global memory binding, we allocate stuff here, instead of
* buffers.
* XXX: Not sure if this is the best place for global_pool. Also,
@@ -202,12 +220,13 @@ struct r600_screen {
};
struct r600_pipe_sampler_view {
struct pipe_sampler_view base;
struct r600_resource *tex_resource;
uint32_t tex_resource_words[8];
+ bool skip_mip_address_reloc;
};
struct r600_rasterizer_state {
struct r600_command_buffer buffer;
boolean flatshade;
boolean two_side;
@@ -369,12 +388,13 @@ struct r600_context {
unsigned max_db; /* for OQ */
/* Miscellaneous state objects. */
void *custom_dsa_flush;
void *custom_blend_resolve;
void *custom_blend_decompress;
+ void *custom_blend_fmask_decompress;
/* With rasterizer discard, there doesn't have to be a pixel shader.
* In that case, we bind this one: */
void *dummy_pixel_shader;
/* These dummy CMASK and FMASK buffers are used to get around the R6xx hardware
* bug where valid CMASK and FMASK are required to be present to avoid
* a hardlock in certain operations but aren't actually used
@@ -522,12 +542,13 @@ void evergreen_init_state_functions(struct r600_context *rctx);
void evergreen_init_atom_start_cs(struct r600_context *rctx);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
void *evergreen_create_resolve_blend(struct r600_context *rctx);
void *evergreen_create_decompress_blend(struct r600_context *rctx);
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
boolean evergreen_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned usage);
void evergreen_init_color_surface(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c56efda5347..0b586f3aedb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1177,13 +1177,14 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
#endif
ctx.bc = &shader->bc;
ctx.shader = shader;
ctx.native_integers = true;
- r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
+ r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+ rscreen->msaa_texture_support);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
@@ -3793,16 +3794,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_tex tex;
struct r600_bytecode_alu alu;
unsigned src_gpr;
int r, i, j;
int opcode;
+ bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
+ inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+ (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+ inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
- const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
- tgsi_tex_src_requires_loading(ctx, 0);
+ const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+ tgsi_tex_src_requires_loading(ctx, 0)) ||
+ read_compressed_msaa;
boolean src_loaded = FALSE;
unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
@@ -4067,12 +4073,133 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
return r;
}
src_loaded = TRUE;
src_gpr = ctx->temp_reg;
}
+ /* Obtain the sample index for reading a compressed MSAA color texture.
+ * To read the FMASK, we use the ldfptr instruction, which tells us
+ * where the samples are stored.
+ * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
+ * which is the identity mapping. Each nibble says which physical sample
+ * should be fetched to get that sample.
+ *
+ * Assume src.z contains the sample index. It should be modified like this:
+ * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
+ * Then fetch the texel with src.
+ */
+ if (read_compressed_msaa) {
+ unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
+ unsigned temp = r600_get_temp(ctx);
+ assert(src_loaded);
+
+ /* temp.w = ldfptr() */
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.inst = SQ_TEX_INST_LD;
+ tex.inst_mod = 1; /* to indicate this is ldfptr */
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.src_gpr = src_gpr;
+ tex.dst_gpr = temp;
+ tex.dst_sel_x = 7; /* mask out these components */
+ tex.dst_sel_y = 7;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 0; /* store X */
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+ tex.offset_x = offset_x;
+ tex.offset_y = offset_y;
+ tex.offset_z = offset_z;
+ r = r600_bytecode_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+
+ /* temp.x = sample_index*4 */
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0 ; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 4;
+ alu.dst.sel = temp;
+ alu.dst.chan = i;
+ alu.dst.write = i == 0;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 4;
+ alu.dst.sel = temp;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* sample_index = temp.w >> temp.x */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
+ alu.src[0].sel = temp;
+ alu.src[0].chan = 3;
+ alu.src[1].sel = temp;
+ alu.src[1].chan = 0;
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = sample_chan;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* sample_index & 0xF */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0xF;
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = sample_chan;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+#if 0
+ /* visualize the FMASK */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+#endif
+ }
+
opcode = ctx->inst_info->r600_opcode;
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 4b2a19a07f7..587f88deb9e 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -372,12 +372,15 @@
#define S_SQ_TEX_WORD0_TEX_INST(x) (((x) & 0x1F) << 0)
#define G_SQ_TEX_WORD0_TEX_INST(x) (((x) >> 0) & 0x1F)
#define C_SQ_TEX_WORD0_TEX_INST 0xFFFFFFE0
#define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5)
#define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1)
#define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF
+#define EG_S_SQ_TEX_WORD0_INST_MOD(x) (((x) & 0x3) << 5)
+#define EG_G_SQ_TEX_WORD0_INST_MOD(x) (((x) >> 5) & 0x3)
+#define EG_C_SQ_TEX_WORD0_INST_MOD 0xFFFFFF9F
#define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7)
#define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1)
#define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F
#define S_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) & 0xFF) << 8)
#define G_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) >> 8) & 0xFF)
#define C_SQ_TEX_WORD0_RESOURCE_ID 0xFFFF00FF
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 7d07008f16d..1a8d55e8d36 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -582,13 +582,13 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
}
if (!util_format_is_supported(format, usage))
return FALSE;
if (sample_count > 1) {
- if (rscreen->info.drm_minor < 22)
+ if (!rscreen->has_msaa)
return FALSE;
/* R11G11B10 is broken on R6xx. */
if (rscreen->chip_class == R600 &&
format == PIPE_FORMAT_R11G11B10_FLOAT)
return FALSE;
@@ -1985,13 +1985,12 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
assert(rview);
r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
r600_write_value(cs, (resource_id_base + resource_index) * 7);
r600_write_array(cs, 7, rview->tex_resource_words);
- /* XXX The kernel needs two relocations. This is stupid. */
reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
RADEON_USAGE_READ);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 65985c7653d..a4d3e461ef1 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -590,14 +590,14 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
if (rtex->is_depth && !rtex->is_flushing_texture) {
dst->views.compressed_depthtex_mask |= 1 << i;
} else {
dst->views.compressed_depthtex_mask &= ~(1 << i);
}
- /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */
- if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) {
+ /* Track compressed colorbuffers. */
+ if (rtex->cmask_size && rtex->fmask_size) {
dst->views.compressed_colortex_mask |= 1 << i;
} else {
dst->views.compressed_colortex_mask &= ~(1 << i);
}
/* Changing from array to non-arrays textures and vice versa requires