summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c8
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h6
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c39
-rw-r--r--src/gallium/drivers/r600/evergreend.h2
-rw-r--r--src/gallium/drivers/r600/r600_asm.c10
-rw-r--r--src/gallium/drivers/r600/r600_asm.h7
-rw-r--r--src/gallium/drivers/r600/r600_blit.c42
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c29
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h21
-rw-r--r--src/gallium/drivers/r600/r600_shader.c133
-rw-r--r--src/gallium/drivers/r600/r600_sq.h3
-rw-r--r--src/gallium/drivers/r600/r600_state.c3
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c4
13 files changed, 270 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 4d6cdd7a244..f4ac4aa8685 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -356,12 +356,20 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_sampler_state(pipe, ctx->sampler_state);
pipe->delete_sampler_state(pipe, ctx->sampler_state_linear);
u_upload_destroy(ctx->upload);
FREE(ctx);
}
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+ boolean supported)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+ ctx->has_texture_multisample = supported;
+}
+
static void blitter_set_running_flag(struct blitter_context_priv *ctx)
{
if (ctx->base.running) {
_debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n",
__LINE__);
}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index de063937793..c49faaad717 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -132,12 +132,18 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter);
static INLINE
struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
{
return blitter->pipe;
}
+/**
+ * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
+ */
+void util_blitter_set_texture_multisample(struct blitter_context *blitter,
+ boolean supported);
+
/* The default function to draw a rectangle. This can only be used
* inside of the draw_rectangle callback if the driver overrides it. */
void util_blitter_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2, float depth,
enum blitter_attrib_type type,
const union pipe_color_union *attrib);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 96e246a6e68..17b7e9d2c72 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -630,13 +630,13 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
}
if (!util_format_is_supported(format, usage))
return FALSE;
if (sample_count > 1) {
- if (rscreen->info.drm_minor < 19)
+ if (!rscreen->has_msaa)
return FALSE;
switch (sample_count) {
case 2:
case 4:
case 8:
@@ -1071,17 +1071,30 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
else
view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type);
view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
S_030004_ARRAY_MODE(array_mode));
view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
- if (state->u.tex.last_level && texture->nr_samples <= 1) {
+
+ /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
+ if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) {
+ /* XXX the 2x and 4x cases are broken. */
+ if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) {
+ /* disable FMASK (0 = disabled) */
+ view->tex_resource_words[3] = 0;
+ view->skip_mip_address_reloc = true;
+ } else {
+ /* FMASK should be in MIP_ADDRESS for multisample textures */
+ view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8;
+ }
+ } else if (state->u.tex.last_level && texture->nr_samples <= 1) {
view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
} else {
view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
}
+
view->tex_resource_words[4] = (word4 |
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
S_030010_ENDIAN_SWAP(endian));
view->tex_resource_words[5] = S_030014_BASE_ARRAY(state->u.tex.first_layer) |
S_030014_LAST_ARRAY(state->u.tex.last_layer);
if (texture->nr_samples > 1) {
@@ -1579,15 +1592,13 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
}
if (!surf->export_16bpc) {
rctx->framebuffer.export_16bpc = false;
}
- /* Cayman can fetch from a compressed MSAA colorbuffer,
- * so it's pointless to track them. */
- if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+ if (rtex->fmask_size && rtex->cmask_size) {
rctx->framebuffer.compressed_cb_mask |= 1 << i;
}
}
/* Update alpha-test state dependencies.
* Alpha-test is done on the first colorbuffer only. */
@@ -2255,19 +2266,21 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
assert(rview);
r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
r600_write_value(cs, (resource_id_base + resource_index) * 8);
r600_write_array(cs, 8, rview->tex_resource_words);
- /* XXX The kernel needs two relocations. This is stupid. */
reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
RADEON_USAGE_READ);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
- r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
- r600_write_value(cs, reloc);
+
+ if (!rview->skip_mip_address_reloc) {
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, reloc);
+ }
}
state->dirty_mask = 0;
}
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
@@ -3342,12 +3355,22 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx)
memset(&blend, 0, sizeof(blend));
blend.independent_blend_enable = true;
blend.rt[0].colormask = 0xf;
return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS);
}
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx)
+{
+ struct pipe_blend_state blend;
+
+ memset(&blend, 0, sizeof(blend));
+ blend.independent_blend_enable = true;
+ blend.rt[0].colormask = 0xf;
+ return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS);
+}
+
void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
{
struct pipe_depth_stencil_alpha_state dsa = {{0}};
return rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
}
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 98df83de918..edb1a55dc8a 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -483,13 +483,13 @@
#define C_028808_MODE 0xFFFFFF8F
#define V_028808_CB_DISABLE 0x00000000
#define V_028808_CB_NORMAL 0x00000001
#define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002
#define V_028808_CB_RESOLVE 0x00000003
#define V_028808_CB_DECOMPRESS 0x00000004
-#define V_028808_CB_FASK_DECOMPRESS 0x00000005
+#define V_028808_CB_FMASK_DECOMPRESS 0x00000005
#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
#define C_028808_ROP3 0xFF00FFFF
#define R_028810_PA_CL_CLIP_CNTL 0x028810
#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0)
#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 51a2e4ee9e5..f04a92062f6 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -252,25 +252,29 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
if (tex == NULL)
return NULL;
LIST_INITHEAD(&tex->list);
return tex;
}
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family)
+void r600_bytecode_init(struct r600_bytecode *bc,
+ enum chip_class chip_class,
+ enum radeon_family family,
+ enum r600_msaa_texture_mode msaa_texture_mode)
{
if ((chip_class == R600) &&
(family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
bc->ar_handling = AR_HANDLE_RV6XX;
bc->r6xx_nop_after_rel_dst = 1;
} else {
bc->ar_handling = AR_HANDLE_NORMAL;
bc->r6xx_nop_after_rel_dst = 0;
}
LIST_INITHEAD(&bc->cf);
bc->chip_class = chip_class;
+ bc->msaa_texture_mode = msaa_texture_mode;
}
static int r600_bytecode_add_cf(struct r600_bytecode *bc)
{
struct r600_bytecode_cf *cf = r600_bytecode_cf();
@@ -1733,12 +1737,13 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod
}
/* common to all 3 families */
static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
{
bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
+ EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
@@ -2763,13 +2768,14 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
int i, j, r, fs_size;
struct r600_resource *fetch_shader;
assert(count < 32);
memset(&bc, 0, sizeof(bc));
- r600_bytecode_init(&bc, rctx->chip_class, rctx->family);
+ r600_bytecode_init(&bc, rctx->chip_class, rctx->family,
+ rctx->screen->msaa_texture_support);
for (i = 0; i < count; i++) {
if (elements[i].instance_divisor > 1) {
if (rctx->chip_class == CAYMAN) {
for (j = 0; j < 4; j++) {
struct r600_bytecode_alu alu;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 8a9f3189be0..2c7db2cefd7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -59,12 +59,13 @@ struct r600_bytecode_alu {
unsigned index_mode;
};
struct r600_bytecode_tex {
struct list_head list;
unsigned inst;
+ unsigned inst_mod;
unsigned resource_id;
unsigned src_gpr;
unsigned src_rel;
unsigned dst_gpr;
unsigned dst_rel;
unsigned dst_sel_x;
@@ -192,12 +193,13 @@ struct r600_cf_callstack {
#define AR_HANDLE_NORMAL 0
#define AR_HANDLE_RV6XX 1 /* except RV670 */
struct r600_bytecode {
enum chip_class chip_class;
+ enum r600_msaa_texture_mode msaa_texture_mode;
int type;
struct list_head cf;
struct r600_bytecode_cf *cf_last;
unsigned ndw;
unsigned ncf;
unsigned ngpr;
@@ -216,13 +218,16 @@ struct r600_bytecode {
};
/* eg_asm.c */
int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
/* r600_asm.c */
-void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family);
+void r600_bytecode_init(struct r600_bytecode *bc,
+ enum chip_class chip_class,
+ enum radeon_family family,
+ enum r600_msaa_texture_mode msaa_texture_mode);
void r600_bytecode_clear(struct r600_bytecode *bc);
int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex);
int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output);
int r600_bytecode_build(struct r600_bytecode *bc);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 8597b8dfcf7..a19248da3a2 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -249,18 +249,35 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
struct r600_texture *rtex,
unsigned first_level, unsigned last_level,
unsigned first_layer, unsigned last_layer)
{
struct r600_context *rctx = (struct r600_context *)ctx;
unsigned layer, level, checked_last_layer, max_layer;
-
- assert(rctx->chip_class != CAYMAN);
+ void *blend_decompress;
if (!rtex->dirty_level_mask)
return;
+ switch (rctx->screen->msaa_texture_support) {
+ case MSAA_TEXTURE_DECOMPRESSED:
+ blend_decompress = rctx->custom_blend_decompress;
+ break;
+ case MSAA_TEXTURE_COMPRESSED:
+ /* XXX the 2x and 4x cases are broken. */
+ if (rtex->resource.b.b.nr_samples == 8)
+ blend_decompress = rctx->custom_blend_fmask_decompress;
+ else
+ blend_decompress = rctx->custom_blend_decompress;
+ break;
+ case MSAA_TEXTURE_SAMPLE_ZERO:
+ default:
+ /* Nothing to do. */
+ rtex->dirty_level_mask = 0;
+ return;
+ }
+
for (level = first_level; level <= last_level; level++) {
if (!(rtex->dirty_level_mask & (1 << level)))
continue;
/* The smaller the mipmap level, the less layers there are
* as far as 3D textures are concerned. */
@@ -275,14 +292,13 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
surf_tmpl.u.tex.first_layer = layer;
surf_tmpl.u.tex.last_layer = layer;
surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
r600_blitter_begin(ctx, R600_DECOMPRESS);
- util_blitter_custom_color(rctx->blitter, cbsurf,
- rctx->custom_blend_decompress);
+ util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress);
r600_blitter_end(ctx);
pipe_surface_reference(&cbsurf, NULL);
}
/* The texture will always be dirty if some layers or samples aren't flushed.
@@ -296,19 +312,12 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
void r600_decompress_color_textures(struct r600_context *rctx,
struct r600_samplerview_state *textures)
{
unsigned i;
unsigned mask = textures->compressed_colortex_mask;
- /* Cayman cannot decompress an MSAA colorbuffer,
- * but it can read it compressed, so skip this. */
- assert(rctx->chip_class != CAYMAN);
- if (rctx->chip_class == CAYMAN) {
- return;
- }
-
while (mask) {
struct pipe_sampler_view *view;
struct r600_texture *tex;
i = u_bit_scan(&mask);
@@ -330,24 +339,23 @@ void r600_decompress_color_textures(struct r600_context *rctx,
* rendering. */
static bool r600_decompress_subresource(struct pipe_context *ctx,
struct pipe_resource *tex,
unsigned level,
unsigned first_layer, unsigned last_layer)
{
- struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_texture *rtex = (struct r600_texture*)tex;
if (rtex->is_depth && !rtex->is_flushing_texture) {
if (!r600_init_flushed_depth_texture(ctx, tex, NULL))
return false; /* error */
r600_blit_decompress_depth(ctx, rtex, NULL,
level, level,
first_layer, last_layer,
0, u_max_sample(tex));
- } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
+ } else if (rtex->fmask_size && rtex->cmask_size) {
r600_blit_decompress_color(ctx, rtex, level, level,
first_layer, last_layer);
}
return true;
}
@@ -456,12 +464,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
struct pipe_surface *dst_view, dst_templ;
struct pipe_sampler_view src_templ, *src_view;
unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL;
struct pipe_box sbox;
+ bool copy_all_samples;
/* Handle buffers first. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
r600_copy_buffer(ctx, dst, dstx, src, src_box);
return;
}
@@ -555,22 +564,21 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
src_width0, src_height0);
} else {
src_view = r600_create_sampler_view_custom(ctx, src, &src_templ,
src_widthFL, src_heightFL);
}
+ copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
/* Copy. */
- /* XXX Multisample texturing is unimplemented on Cayman. In the meantime,
- * copy only the first sample (which is the only one that is uncompressed
- * and therefore doesn't return garbage). */
r600_blitter_begin(ctx, R600_COPY_TEXTURE);
util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty,
abs(src_box->width), abs(src_box->height),
src_view, src_box, src_width0, src_height0,
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
- rctx->chip_class != CAYMAN);
+ copy_all_samples);
r600_blitter_end(ctx);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 916fa381a33..7a1e1353553 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -168,12 +168,15 @@ static void r600_destroy_context(struct pipe_context *context)
if (rctx->custom_blend_resolve) {
rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_resolve);
}
if (rctx->custom_blend_decompress) {
rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress);
}
+ if (rctx->custom_blend_fmask_decompress) {
+ rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress);
+ }
util_unreference_framebuffer_state(&rctx->framebuffer.state);
r600_context_fini(rctx);
if (rctx->blitter) {
util_blitter_destroy(rctx->blitter);
@@ -261,12 +264,13 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
evergreen_init_atom_start_compute_cs(rctx);
if (evergreen_context_init(rctx))
goto fail;
rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx);
rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx);
+ rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx);
rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR ||
rctx->family == CHIP_PALM ||
rctx->family == CHIP_SUMO ||
rctx->family == CHIP_SUMO2 ||
rctx->family == CHIP_CAICOS ||
rctx->family == CHIP_CAYMAN ||
@@ -286,12 +290,13 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (!rctx->uploader)
goto fail;
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL)
goto fail;
+ util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
rctx->blitter->draw_rectangle = r600_draw_rectangle;
r600_begin_new_cs(rctx);
r600_get_backend_mask(rctx); /* this emits commands and must be last */
rctx->dummy_pixel_shader =
@@ -390,21 +395,23 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 130;
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
+
/* Supported except the original R600. */
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
/* R600 doesn't support per-MRT blends */
return family == CHIP_R600 ? 0 : 1;
@@ -944,12 +951,32 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
case EVERGREEN:
case CAYMAN:
rscreen->has_streamout = rscreen->info.drm_minor >= 14;
break;
}
+ /* MSAA support. */
+ switch (rscreen->chip_class) {
+ case R600:
+ case R700:
+ rscreen->has_msaa = rscreen->info.drm_minor >= 22;
+ rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED;
+ break;
+ case EVERGREEN:
+ rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+ rscreen->msaa_texture_support =
+ rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED :
+ MSAA_TEXTURE_DECOMPRESSED;
+ break;
+ case CAYMAN:
+ rscreen->has_msaa = rscreen->info.drm_minor >= 19;
+ /* We should be able to read compressed MSAA textures, but it doesn't work. */
+ rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO;
+ break;
+ }
+
if (r600_init_tiling(rscreen)) {
FREE(rscreen);
return NULL;
}
rscreen->screen.destroy = r600_destroy_screen;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 17dab7f23d5..238ab1676f4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -181,19 +181,37 @@ struct r600_pipe_fences {
struct list_head blocks;
/* linked list of freed fences */
struct list_head pool;
pipe_mutex mutex;
};
+enum r600_msaa_texture_mode {
+ /* If the hw can fetch the first sample only (no decompression available).
+ * This means MSAA texturing is not fully implemented. */
+ MSAA_TEXTURE_SAMPLE_ZERO,
+
+ /* If the hw can fetch decompressed MSAA textures.
+ * Supported families: R600, R700, Evergreen.
+ * Cayman cannot use this, because it cannot do the decompression. */
+ MSAA_TEXTURE_DECOMPRESSED,
+
+ /* If the hw can fetch compressed MSAA textures, which means shaders can
+ * read resolved FMASK. This yields the best performance.
+ * Supported families: Evergreen, Cayman. */
+ MSAA_TEXTURE_COMPRESSED
+};
+
struct r600_screen {
struct pipe_screen screen;
struct radeon_winsys *ws;
unsigned family;
enum chip_class chip_class;
struct radeon_info info;
bool has_streamout;
+ bool has_msaa;
+ enum r600_msaa_texture_mode msaa_texture_support;
struct r600_tiling_info tiling_info;
struct r600_pipe_fences fences;
/*for compute global memory binding, we allocate stuff here, instead of
* buffers.
* XXX: Not sure if this is the best place for global_pool. Also,
@@ -202,12 +220,13 @@ struct r600_screen {
};
struct r600_pipe_sampler_view {
struct pipe_sampler_view base;
struct r600_resource *tex_resource;
uint32_t tex_resource_words[8];
+ bool skip_mip_address_reloc;
};
struct r600_rasterizer_state {
struct r600_command_buffer buffer;
boolean flatshade;
boolean two_side;
@@ -369,12 +388,13 @@ struct r600_context {
unsigned max_db; /* for OQ */
/* Miscellaneous state objects. */
void *custom_dsa_flush;
void *custom_blend_resolve;
void *custom_blend_decompress;
+ void *custom_blend_fmask_decompress;
/* With rasterizer discard, there doesn't have to be a pixel shader.
* In that case, we bind this one: */
void *dummy_pixel_shader;
/* These dummy CMASK and FMASK buffers are used to get around the R6xx hardware
* bug where valid CMASK and FMASK are required to be present to avoid
* a hardlock in certain operations but aren't actually used
@@ -522,12 +542,13 @@ void evergreen_init_state_functions(struct r600_context *rctx);
void evergreen_init_atom_start_cs(struct r600_context *rctx);
void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
void *evergreen_create_resolve_blend(struct r600_context *rctx);
void *evergreen_create_decompress_blend(struct r600_context *rctx);
+void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
boolean evergreen_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned usage);
void evergreen_init_color_surface(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c56efda5347..0b586f3aedb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1177,13 +1177,14 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
use_llvm = debug_get_bool_option("R600_LLVM", TRUE);
#endif
ctx.bc = &shader->bc;
ctx.shader = shader;
ctx.native_integers = true;
- r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
+ r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+ rscreen->msaa_texture_support);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
@@ -3793,16 +3794,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_tex tex;
struct r600_bytecode_alu alu;
unsigned src_gpr;
int r, i, j;
int opcode;
+ bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
+ inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+ (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+ inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
- const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
- tgsi_tex_src_requires_loading(ctx, 0);
+ const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
+ tgsi_tex_src_requires_loading(ctx, 0)) ||
+ read_compressed_msaa;
boolean src_loaded = FALSE;
unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
@@ -4067,12 +4073,133 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
return r;
}
src_loaded = TRUE;
src_gpr = ctx->temp_reg;
}
+ /* Obtain the sample index for reading a compressed MSAA color texture.
+ * To read the FMASK, we use the ldfptr instruction, which tells us
+ * where the samples are stored.
+ * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
+ * which is the identity mapping. Each nibble says which physical sample
+ * should be fetched to get that sample.
+ *
+ * Assume src.z contains the sample index. It should be modified like this:
+ * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
+ * Then fetch the texel with src.
+ */
+ if (read_compressed_msaa) {
+ unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
+ unsigned temp = r600_get_temp(ctx);
+ assert(src_loaded);
+
+ /* temp.w = ldfptr() */
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.inst = SQ_TEX_INST_LD;
+ tex.inst_mod = 1; /* to indicate this is ldfptr */
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.src_gpr = src_gpr;
+ tex.dst_gpr = temp;
+ tex.dst_sel_x = 7; /* mask out these components */
+ tex.dst_sel_y = 7;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 0; /* store X */
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+ tex.offset_x = offset_x;
+ tex.offset_y = offset_y;
+ tex.offset_z = offset_z;
+ r = r600_bytecode_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+
+ /* temp.x = sample_index*4 */
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0 ; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 4;
+ alu.dst.sel = temp;
+ alu.dst.chan = i;
+ alu.dst.write = i == 0;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 4;
+ alu.dst.sel = temp;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* sample_index = temp.w >> temp.x */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
+ alu.src[0].sel = temp;
+ alu.src[0].chan = 3;
+ alu.src[1].sel = temp;
+ alu.src[1].chan = 0;
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = sample_chan;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* sample_index & 0xF */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0xF;
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = sample_chan;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+#if 0
+ /* visualize the FMASK */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = sample_chan;
+ alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+#endif
+ }
+
opcode = ctx->inst_info->r600_opcode;
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 4b2a19a07f7..587f88deb9e 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -372,12 +372,15 @@
#define S_SQ_TEX_WORD0_TEX_INST(x) (((x) & 0x1F) << 0)
#define G_SQ_TEX_WORD0_TEX_INST(x) (((x) >> 0) & 0x1F)
#define C_SQ_TEX_WORD0_TEX_INST 0xFFFFFFE0
#define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5)
#define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1)
#define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF
+#define EG_S_SQ_TEX_WORD0_INST_MOD(x) (((x) & 0x3) << 5)
+#define EG_G_SQ_TEX_WORD0_INST_MOD(x) (((x) >> 5) & 0x3)
+#define EG_C_SQ_TEX_WORD0_INST_MOD 0xFFFFFF9F
#define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7)
#define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1)
#define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F
#define S_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) & 0xFF) << 8)
#define G_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) >> 8) & 0xFF)
#define C_SQ_TEX_WORD0_RESOURCE_ID 0xFFFF00FF
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 7d07008f16d..1a8d55e8d36 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -582,13 +582,13 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
}
if (!util_format_is_supported(format, usage))
return FALSE;
if (sample_count > 1) {
- if (rscreen->info.drm_minor < 22)
+ if (!rscreen->has_msaa)
return FALSE;
/* R11G11B10 is broken on R6xx. */
if (rscreen->chip_class == R600 &&
format == PIPE_FORMAT_R11G11B10_FLOAT)
return FALSE;
@@ -1985,13 +1985,12 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
assert(rview);
r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
r600_write_value(cs, (resource_id_base + resource_index) * 7);
r600_write_array(cs, 7, rview->tex_resource_words);
- /* XXX The kernel needs two relocations. This is stupid. */
reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
RADEON_USAGE_READ);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
r600_write_value(cs, reloc);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 65985c7653d..a4d3e461ef1 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -590,14 +590,14 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
if (rtex->is_depth && !rtex->is_flushing_texture) {
dst->views.compressed_depthtex_mask |= 1 << i;
} else {
dst->views.compressed_depthtex_mask &= ~(1 << i);
}
- /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */
- if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) {
+ /* Track compressed colorbuffers. */
+ if (rtex->cmask_size && rtex->fmask_size) {
dst->views.compressed_colortex_mask |= 1 << i;
} else {
dst->views.compressed_colortex_mask &= ~(1 << i);
}
/* Changing from array to non-arrays textures and vice versa requires