summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2012-10-12 18:46:32 +0200
committerMarek Olšák <maraeo@gmail.com>2012-10-29 12:51:41 +0100
commit96ed6c90eff58ce030c39c2b4db6daf512586b34 (patch)
tree5fc59c951dd1fc7ac3f70354f6f4e3581ba5b8aa
parentb3921e1f53833420e0a0fd581f741744e7957a05 (diff)
r600g: implement texturing with 8x MSAA compressed surfaces for Evergreen
The 2x and 4x MSAA cases are completely broken. The lfdptr instruction returns garbage there. The 8x MSAA case is broken on Cayman, though at least the result looks somewhat correct. Only the 8x MSAA case works on Evergreen and is enabled.
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c8
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h6
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c39
-rw-r--r--src/gallium/drivers/r600/evergreend.h2
-rw-r--r--src/gallium/drivers/r600/r600_asm.c10
-rw-r--r--src/gallium/drivers/r600/r600_asm.h7
-rw-r--r--src/gallium/drivers/r600/r600_blit.c42
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c29
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h21
-rw-r--r--src/gallium/drivers/r600/r600_shader.c133
-rw-r--r--src/gallium/drivers/r600/r600_sq.h3
-rw-r--r--src/gallium/drivers/r600/r600_state.c3
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c4
13 files changed, 270 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 4d6cdd7a244..f4ac4aa8685 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -359,6 +359,14 @@ void util_blitter_destroy(struct blitter_context *blitter)
359 FREE(ctx); 359 FREE(ctx);
360} 360}
361 361
362void util_blitter_set_texture_multisample(struct blitter_context *blitter,
363 boolean supported)
364{
365 struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
366
367 ctx->has_texture_multisample = supported;
368}
369
362static void blitter_set_running_flag(struct blitter_context_priv *ctx) 370static void blitter_set_running_flag(struct blitter_context_priv *ctx)
363{ 371{
364 if (ctx->base.running) { 372 if (ctx->base.running) {
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index de063937793..c49faaad717 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -135,6 +135,12 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
135 return blitter->pipe; 135 return blitter->pipe;
136} 136}
137 137
138/**
139 * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
140 */
141void util_blitter_set_texture_multisample(struct blitter_context *blitter,
142 boolean supported);
143
138/* The default function to draw a rectangle. This can only be used 144/* The default function to draw a rectangle. This can only be used
139 * inside of the draw_rectangle callback if the driver overrides it. */ 145 * inside of the draw_rectangle callback if the driver overrides it. */
140void util_blitter_draw_rectangle(struct blitter_context *blitter, 146void util_blitter_draw_rectangle(struct blitter_context *blitter,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 96e246a6e68..17b7e9d2c72 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -633,7 +633,7 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
633 return FALSE; 633 return FALSE;
634 634
635 if (sample_count > 1) { 635 if (sample_count > 1) {
636 if (rscreen->info.drm_minor < 19) 636 if (!rscreen->has_msaa)
637 return FALSE; 637 return FALSE;
638 638
639 switch (sample_count) { 639 switch (sample_count) {
@@ -1074,11 +1074,24 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
1074 S_030004_TEX_DEPTH(depth - 1) | 1074 S_030004_TEX_DEPTH(depth - 1) |
1075 S_030004_ARRAY_MODE(array_mode)); 1075 S_030004_ARRAY_MODE(array_mode));
1076 view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; 1076 view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
1077 if (state->u.tex.last_level && texture->nr_samples <= 1) { 1077
1078 /* TEX_RESOURCE_WORD3.MIP_ADDRESS */
1079 if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) {
1080 /* XXX the 2x and 4x cases are broken. */
1081 if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) {
1082 /* disable FMASK (0 = disabled) */
1083 view->tex_resource_words[3] = 0;
1084 view->skip_mip_address_reloc = true;
1085 } else {
1086 /* FMASK should be in MIP_ADDRESS for multisample textures */
1087 view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8;
1088 }
1089 } else if (state->u.tex.last_level && texture->nr_samples <= 1) {
1078 view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8; 1090 view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
1079 } else { 1091 } else {
1080 view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; 1092 view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
1081 } 1093 }
1094
1082 view->tex_resource_words[4] = (word4 | 1095 view->tex_resource_words[4] = (word4 |
1083 S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | 1096 S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
1084 S_030010_ENDIAN_SWAP(endian)); 1097 S_030010_ENDIAN_SWAP(endian));
@@ -1582,9 +1595,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
1582 rctx->framebuffer.export_16bpc = false; 1595 rctx->framebuffer.export_16bpc = false;
1583 } 1596 }
1584 1597
1585 /* Cayman can fetch from a compressed MSAA colorbuffer, 1598 if (rtex->fmask_size && rtex->cmask_size) {
1586 * so it's pointless to track them. */
1587 if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) {
1588 rctx->framebuffer.compressed_cb_mask |= 1 << i; 1599 rctx->framebuffer.compressed_cb_mask |= 1 << i;
1589 } 1600 }
1590 } 1601 }
@@ -2258,13 +2269,15 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
2258 r600_write_value(cs, (resource_id_base + resource_index) * 8); 2269 r600_write_value(cs, (resource_id_base + resource_index) * 8);
2259 r600_write_array(cs, 8, rview->tex_resource_words); 2270 r600_write_array(cs, 8, rview->tex_resource_words);
2260 2271
2261 /* XXX The kernel needs two relocations. This is stupid. */
2262 reloc = r600_context_bo_reloc(rctx, rview->tex_resource, 2272 reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
2263 RADEON_USAGE_READ); 2273 RADEON_USAGE_READ);
2264 r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); 2274 r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
2265 r600_write_value(cs, reloc); 2275 r600_write_value(cs, reloc);
2266 r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); 2276
2267 r600_write_value(cs, reloc); 2277 if (!rview->skip_mip_address_reloc) {
2278 r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
2279 r600_write_value(cs, reloc);
2280 }
2268 } 2281 }
2269 state->dirty_mask = 0; 2282 state->dirty_mask = 0;
2270} 2283}
@@ -3345,6 +3358,16 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx)
3345 return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS); 3358 return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS);
3346} 3359}
3347 3360
3361void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx)
3362{
3363 struct pipe_blend_state blend;
3364
3365 memset(&blend, 0, sizeof(blend));
3366 blend.independent_blend_enable = true;
3367 blend.rt[0].colormask = 0xf;
3368 return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS);
3369}
3370
3348void *evergreen_create_db_flush_dsa(struct r600_context *rctx) 3371void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
3349{ 3372{
3350 struct pipe_depth_stencil_alpha_state dsa = {{0}}; 3373 struct pipe_depth_stencil_alpha_state dsa = {{0}};
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 98df83de918..edb1a55dc8a 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -486,7 +486,7 @@
486#define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002 486#define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002
487#define V_028808_CB_RESOLVE 0x00000003 487#define V_028808_CB_RESOLVE 0x00000003
488#define V_028808_CB_DECOMPRESS 0x00000004 488#define V_028808_CB_DECOMPRESS 0x00000004
489#define V_028808_CB_FASK_DECOMPRESS 0x00000005 489#define V_028808_CB_FMASK_DECOMPRESS 0x00000005
490#define S_028808_ROP3(x) (((x) & 0xFF) << 16) 490#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
491#define G_028808_ROP3(x) (((x) >> 16) & 0xFF) 491#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
492#define C_028808_ROP3 0xFF00FFFF 492#define C_028808_ROP3 0xFF00FFFF
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 51a2e4ee9e5..f04a92062f6 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -255,7 +255,10 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
255 return tex; 255 return tex;
256} 256}
257 257
258void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family) 258void r600_bytecode_init(struct r600_bytecode *bc,
259 enum chip_class chip_class,
260 enum radeon_family family,
261 enum r600_msaa_texture_mode msaa_texture_mode)
259{ 262{
260 if ((chip_class == R600) && 263 if ((chip_class == R600) &&
261 (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { 264 (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
@@ -268,6 +271,7 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, en
268 271
269 LIST_INITHEAD(&bc->cf); 272 LIST_INITHEAD(&bc->cf);
270 bc->chip_class = chip_class; 273 bc->chip_class = chip_class;
274 bc->msaa_texture_mode = msaa_texture_mode;
271} 275}
272 276
273static int r600_bytecode_add_cf(struct r600_bytecode *bc) 277static int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -1736,6 +1740,7 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod
1736static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) 1740static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
1737{ 1741{
1738 bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | 1742 bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
1743 EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
1739 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | 1744 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1740 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | 1745 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1741 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); 1746 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
@@ -2766,7 +2771,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
2766 assert(count < 32); 2771 assert(count < 32);
2767 2772
2768 memset(&bc, 0, sizeof(bc)); 2773 memset(&bc, 0, sizeof(bc));
2769 r600_bytecode_init(&bc, rctx->chip_class, rctx->family); 2774 r600_bytecode_init(&bc, rctx->chip_class, rctx->family,
2775 rctx->screen->msaa_texture_support);
2770 2776
2771 for (i = 0; i < count; i++) { 2777 for (i = 0; i < count; i++) {
2772 if (elements[i].instance_divisor > 1) { 2778 if (elements[i].instance_divisor > 1) {
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 8a9f3189be0..2c7db2cefd7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -62,6 +62,7 @@ struct r600_bytecode_alu {
62struct r600_bytecode_tex { 62struct r600_bytecode_tex {
63 struct list_head list; 63 struct list_head list;
64 unsigned inst; 64 unsigned inst;
65 unsigned inst_mod;
65 unsigned resource_id; 66 unsigned resource_id;
66 unsigned src_gpr; 67 unsigned src_gpr;
67 unsigned src_rel; 68 unsigned src_rel;
@@ -195,6 +196,7 @@ struct r600_cf_callstack {
195 196
196struct r600_bytecode { 197struct r600_bytecode {
197 enum chip_class chip_class; 198 enum chip_class chip_class;
199 enum r600_msaa_texture_mode msaa_texture_mode;
198 int type; 200 int type;
199 struct list_head cf; 201 struct list_head cf;
200 struct r600_bytecode_cf *cf_last; 202 struct r600_bytecode_cf *cf_last;
@@ -219,7 +221,10 @@ struct r600_bytecode {
219int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); 221int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
220 222
221/* r600_asm.c */ 223/* r600_asm.c */
222void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family); 224void r600_bytecode_init(struct r600_bytecode *bc,
225 enum chip_class chip_class,
226 enum radeon_family family,
227 enum r600_msaa_texture_mode msaa_texture_mode);
223void r600_bytecode_clear(struct r600_bytecode *bc); 228void r600_bytecode_clear(struct r600_bytecode *bc);
224int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); 229int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
225int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); 230int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 8597b8dfcf7..a19248da3a2 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -252,12 +252,29 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
252{ 252{
253 struct r600_context *rctx = (struct r600_context *)ctx; 253 struct r600_context *rctx = (struct r600_context *)ctx;
254 unsigned layer, level, checked_last_layer, max_layer; 254 unsigned layer, level, checked_last_layer, max_layer;
255 255 void *blend_decompress;
256 assert(rctx->chip_class != CAYMAN);
257 256
258 if (!rtex->dirty_level_mask) 257 if (!rtex->dirty_level_mask)
259 return; 258 return;
260 259
260 switch (rctx->screen->msaa_texture_support) {
261 case MSAA_TEXTURE_DECOMPRESSED:
262 blend_decompress = rctx->custom_blend_decompress;
263 break;
264 case MSAA_TEXTURE_COMPRESSED:
265 /* XXX the 2x and 4x cases are broken. */
266 if (rtex->resource.b.b.nr_samples == 8)
267 blend_decompress = rctx->custom_blend_fmask_decompress;
268 else
269 blend_decompress = rctx->custom_blend_decompress;
270 break;
271 case MSAA_TEXTURE_SAMPLE_ZERO:
272 default:
273 /* Nothing to do. */
274 rtex->dirty_level_mask = 0;
275 return;
276 }
277
261 for (level = first_level; level <= last_level; level++) { 278 for (level = first_level; level <= last_level; level++) {
262 if (!(rtex->dirty_level_mask & (1 << level))) 279 if (!(rtex->dirty_level_mask & (1 << level)))
263 continue; 280 continue;
@@ -278,8 +295,7 @@ static void r600_blit_decompress_color(struct pipe_context *ctx,
278 cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); 295 cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
279 296
280 r600_blitter_begin(ctx, R600_DECOMPRESS); 297 r600_blitter_begin(ctx, R600_DECOMPRESS);
281 util_blitter_custom_color(rctx->blitter, cbsurf, 298 util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress);
282 rctx->custom_blend_decompress);
283 r600_blitter_end(ctx); 299 r600_blitter_end(ctx);
284 300
285 pipe_surface_reference(&cbsurf, NULL); 301 pipe_surface_reference(&cbsurf, NULL);
@@ -299,13 +315,6 @@ void r600_decompress_color_textures(struct r600_context *rctx,
299 unsigned i; 315 unsigned i;
300 unsigned mask = textures->compressed_colortex_mask; 316 unsigned mask = textures->compressed_colortex_mask;
301 317
302 /* Cayman cannot decompress an MSAA colorbuffer,
303 * but it can read it compressed, so skip this. */
304 assert(rctx->chip_class != CAYMAN);
305 if (rctx->chip_class == CAYMAN) {
306 return;
307 }
308
309 while (mask) { 318 while (mask) {
310 struct pipe_sampler_view *view; 319 struct pipe_sampler_view *view;
311 struct r600_texture *tex; 320 struct r600_texture *tex;
@@ -333,7 +342,6 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
333 unsigned level, 342 unsigned level,
334 unsigned first_layer, unsigned last_layer) 343 unsigned first_layer, unsigned last_layer)
335{ 344{
336 struct r600_context *rctx = (struct r600_context *)ctx;
337 struct r600_texture *rtex = (struct r600_texture*)tex; 345 struct r600_texture *rtex = (struct r600_texture*)tex;
338 346
339 if (rtex->is_depth && !rtex->is_flushing_texture) { 347 if (rtex->is_depth && !rtex->is_flushing_texture) {
@@ -344,7 +352,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
344 level, level, 352 level, level,
345 first_layer, last_layer, 353 first_layer, last_layer,
346 0, u_max_sample(tex)); 354 0, u_max_sample(tex));
347 } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { 355 } else if (rtex->fmask_size && rtex->cmask_size) {
348 r600_blit_decompress_color(ctx, rtex, level, level, 356 r600_blit_decompress_color(ctx, rtex, level, level,
349 first_layer, last_layer); 357 first_layer, last_layer);
350 } 358 }
@@ -459,6 +467,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
459 struct pipe_sampler_view src_templ, *src_view; 467 struct pipe_sampler_view src_templ, *src_view;
460 unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL; 468 unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL;
461 struct pipe_box sbox; 469 struct pipe_box sbox;
470 bool copy_all_samples;
462 471
463 /* Handle buffers first. */ 472 /* Handle buffers first. */
464 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 473 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -558,16 +567,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
558 src_widthFL, src_heightFL); 567 src_widthFL, src_heightFL);
559 } 568 }
560 569
570 copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
571
561 /* Copy. */ 572 /* Copy. */
562 /* XXX Multisample texturing is unimplemented on Cayman. In the meantime,
563 * copy only the first sample (which is the only one that is uncompressed
564 * and therefore doesn't return garbage). */
565 r600_blitter_begin(ctx, R600_COPY_TEXTURE); 573 r600_blitter_begin(ctx, R600_COPY_TEXTURE);
566 util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty, 574 util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty,
567 abs(src_box->width), abs(src_box->height), 575 abs(src_box->width), abs(src_box->height),
568 src_view, src_box, src_width0, src_height0, 576 src_view, src_box, src_width0, src_height0,
569 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, 577 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
570 rctx->chip_class != CAYMAN); 578 copy_all_samples);
571 r600_blitter_end(ctx); 579 r600_blitter_end(ctx);
572 580
573 pipe_surface_reference(&dst_view, NULL); 581 pipe_surface_reference(&dst_view, NULL);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 916fa381a33..7a1e1353553 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -171,6 +171,9 @@ static void r600_destroy_context(struct pipe_context *context)
171 if (rctx->custom_blend_decompress) { 171 if (rctx->custom_blend_decompress) {
172 rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress); 172 rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress);
173 } 173 }
174 if (rctx->custom_blend_fmask_decompress) {
175 rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress);
176 }
174 util_unreference_framebuffer_state(&rctx->framebuffer.state); 177 util_unreference_framebuffer_state(&rctx->framebuffer.state);
175 178
176 r600_context_fini(rctx); 179 r600_context_fini(rctx);
@@ -264,6 +267,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
264 rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); 267 rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx);
265 rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); 268 rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx);
266 rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); 269 rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx);
270 rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx);
267 rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR || 271 rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR ||
268 rctx->family == CHIP_PALM || 272 rctx->family == CHIP_PALM ||
269 rctx->family == CHIP_SUMO || 273 rctx->family == CHIP_SUMO ||
@@ -289,6 +293,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
289 rctx->blitter = util_blitter_create(&rctx->context); 293 rctx->blitter = util_blitter_create(&rctx->context);
290 if (rctx->blitter == NULL) 294 if (rctx->blitter == NULL)
291 goto fail; 295 goto fail;
296 util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa);
292 rctx->blitter->draw_rectangle = r600_draw_rectangle; 297 rctx->blitter->draw_rectangle = r600_draw_rectangle;
293 298
294 r600_begin_new_cs(rctx); 299 r600_begin_new_cs(rctx);
@@ -393,7 +398,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
393 case PIPE_CAP_COMPUTE: 398 case PIPE_CAP_COMPUTE:
394 case PIPE_CAP_START_INSTANCE: 399 case PIPE_CAP_START_INSTANCE:
395 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: 400 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
396 case PIPE_CAP_TEXTURE_MULTISAMPLE:
397 return 1; 401 return 1;
398 402
399 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: 403 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -402,6 +406,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
402 case PIPE_CAP_GLSL_FEATURE_LEVEL: 406 case PIPE_CAP_GLSL_FEATURE_LEVEL:
403 return 130; 407 return 130;
404 408
409 case PIPE_CAP_TEXTURE_MULTISAMPLE:
410 return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO;
411
405 /* Supported except the original R600. */ 412 /* Supported except the original R600. */
406 case PIPE_CAP_INDEP_BLEND_ENABLE: 413 case PIPE_CAP_INDEP_BLEND_ENABLE:
407 case PIPE_CAP_INDEP_BLEND_FUNC: 414 case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -947,6 +954,26 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
947 break; 954 break;
948 } 955 }
949 956
957 /* MSAA support. */
958 switch (rscreen->chip_class) {
959 case R600:
960 case R700:
961 rscreen->has_msaa = rscreen->info.drm_minor >= 22;
962 rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED;
963 break;
964 case EVERGREEN:
965 rscreen->has_msaa = rscreen->info.drm_minor >= 19;
966 rscreen->msaa_texture_support =
967 rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED :
968 MSAA_TEXTURE_DECOMPRESSED;
969 break;
970 case CAYMAN:
971 rscreen->has_msaa = rscreen->info.drm_minor >= 19;
972 /* We should be able to read compressed MSAA textures, but it doesn't work. */
973 rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO;
974 break;
975 }
976
950 if (r600_init_tiling(rscreen)) { 977 if (r600_init_tiling(rscreen)) {
951 FREE(rscreen); 978 FREE(rscreen);
952 return NULL; 979 return NULL;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 17dab7f23d5..238ab1676f4 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -184,6 +184,22 @@ struct r600_pipe_fences {
184 pipe_mutex mutex; 184 pipe_mutex mutex;
185}; 185};
186 186
187enum r600_msaa_texture_mode {
188 /* If the hw can fetch the first sample only (no decompression available).
189 * This means MSAA texturing is not fully implemented. */
190 MSAA_TEXTURE_SAMPLE_ZERO,
191
192 /* If the hw can fetch decompressed MSAA textures.
193 * Supported families: R600, R700, Evergreen.
194 * Cayman cannot use this, because it cannot do the decompression. */
195 MSAA_TEXTURE_DECOMPRESSED,
196
197 /* If the hw can fetch compressed MSAA textures, which means shaders can
198 * read resolved FMASK. This yields the best performance.
199 * Supported families: Evergreen, Cayman. */
200 MSAA_TEXTURE_COMPRESSED
201};
202
187struct r600_screen { 203struct r600_screen {
188 struct pipe_screen screen; 204 struct pipe_screen screen;
189 struct radeon_winsys *ws; 205 struct radeon_winsys *ws;
@@ -191,6 +207,8 @@ struct r600_screen {
191 enum chip_class chip_class; 207 enum chip_class chip_class;
192 struct radeon_info info; 208 struct radeon_info info;
193 bool has_streamout; 209 bool has_streamout;
210 bool has_msaa;
211 enum r600_msaa_texture_mode msaa_texture_support;
194 struct r600_tiling_info tiling_info; 212 struct r600_tiling_info tiling_info;
195 struct r600_pipe_fences fences; 213 struct r600_pipe_fences fences;
196 214
@@ -205,6 +223,7 @@ struct r600_pipe_sampler_view {
205 struct pipe_sampler_view base; 223 struct pipe_sampler_view base;
206 struct r600_resource *tex_resource; 224 struct r600_resource *tex_resource;
207 uint32_t tex_resource_words[8]; 225 uint32_t tex_resource_words[8];
226 bool skip_mip_address_reloc;
208}; 227};
209 228
210struct r600_rasterizer_state { 229struct r600_rasterizer_state {
@@ -372,6 +391,7 @@ struct r600_context {
372 void *custom_dsa_flush; 391 void *custom_dsa_flush;
373 void *custom_blend_resolve; 392 void *custom_blend_resolve;
374 void *custom_blend_decompress; 393 void *custom_blend_decompress;
394 void *custom_blend_fmask_decompress;
375 /* With rasterizer discard, there doesn't have to be a pixel shader. 395 /* With rasterizer discard, there doesn't have to be a pixel shader.
376 * In that case, we bind this one: */ 396 * In that case, we bind this one: */
377 void *dummy_pixel_shader; 397 void *dummy_pixel_shader;
@@ -525,6 +545,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
525void *evergreen_create_db_flush_dsa(struct r600_context *rctx); 545void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
526void *evergreen_create_resolve_blend(struct r600_context *rctx); 546void *evergreen_create_resolve_blend(struct r600_context *rctx);
527void *evergreen_create_decompress_blend(struct r600_context *rctx); 547void *evergreen_create_decompress_blend(struct r600_context *rctx);
548void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
528boolean evergreen_is_format_supported(struct pipe_screen *screen, 549boolean evergreen_is_format_supported(struct pipe_screen *screen,
529 enum pipe_format format, 550 enum pipe_format format,
530 enum pipe_texture_target target, 551 enum pipe_texture_target target,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c56efda5347..0b586f3aedb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1180,7 +1180,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
1180 ctx.shader = shader; 1180 ctx.shader = shader;
1181 ctx.native_integers = true; 1181 ctx.native_integers = true;
1182 1182
1183 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family); 1183 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
1184 rscreen->msaa_texture_support);
1184 ctx.tokens = tokens; 1185 ctx.tokens = tokens;
1185 tgsi_scan_shader(tokens, &ctx.info); 1186 tgsi_scan_shader(tokens, &ctx.info);
1186 tgsi_parse_init(&ctx.parse, tokens); 1187 tgsi_parse_init(&ctx.parse, tokens);
@@ -3796,10 +3797,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
3796 unsigned src_gpr; 3797 unsigned src_gpr;
3797 int r, i, j; 3798 int r, i, j;
3798 int opcode; 3799 int opcode;
3800 bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED &&
3801 inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
3802 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
3803 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
3799 /* Texture fetch instructions can only use gprs as source. 3804 /* Texture fetch instructions can only use gprs as source.
3800 * Also they cannot negate the source or take the absolute value */ 3805 * Also they cannot negate the source or take the absolute value */
3801 const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 3806 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
3802 tgsi_tex_src_requires_loading(ctx, 0); 3807 tgsi_tex_src_requires_loading(ctx, 0)) ||
3808 read_compressed_msaa;
3803 boolean src_loaded = FALSE; 3809 boolean src_loaded = FALSE;
3804 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 3810 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
3805 uint8_t offset_x = 0, offset_y = 0, offset_z = 0; 3811 uint8_t offset_x = 0, offset_y = 0, offset_z = 0;
@@ -4070,6 +4076,127 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
4070 src_gpr = ctx->temp_reg; 4076 src_gpr = ctx->temp_reg;
4071 } 4077 }
4072 4078
4079 /* Obtain the sample index for reading a compressed MSAA color texture.
4080 * To read the FMASK, we use the ldfptr instruction, which tells us
4081 * where the samples are stored.
4082 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
4083 * which is the identity mapping. Each nibble says which physical sample
4084 * should be fetched to get that sample.
4085 *
4086 * Assume src.z contains the sample index. It should be modified like this:
4087 * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
4088 * Then fetch the texel with src.
4089 */
4090 if (read_compressed_msaa) {
4091 unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4;
4092 unsigned temp = r600_get_temp(ctx);
4093 assert(src_loaded);
4094
4095 /* temp.w = ldfptr() */
4096 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
4097 tex.inst = SQ_TEX_INST_LD;
4098 tex.inst_mod = 1; /* to indicate this is ldfptr */
4099 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
4100 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
4101 tex.src_gpr = src_gpr;
4102 tex.dst_gpr = temp;
4103 tex.dst_sel_x = 7; /* mask out these components */
4104 tex.dst_sel_y = 7;
4105 tex.dst_sel_z = 7;
4106 tex.dst_sel_w = 0; /* store X */
4107 tex.src_sel_x = 0;
4108 tex.src_sel_y = 1;
4109 tex.src_sel_z = 2;
4110 tex.src_sel_w = 3;
4111 tex.offset_x = offset_x;
4112 tex.offset_y = offset_y;
4113 tex.offset_z = offset_z;
4114 r = r600_bytecode_add_tex(ctx->bc, &tex);
4115 if (r)
4116 return r;
4117
4118 /* temp.x = sample_index*4 */
4119 if (ctx->bc->chip_class == CAYMAN) {
4120 for (i = 0 ; i < 4; i++) {
4121 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4122 alu.inst = ctx->inst_info->r600_opcode;
4123 alu.src[0].sel = src_gpr;
4124 alu.src[0].chan = sample_chan;
4125 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4126 alu.src[1].value = 4;
4127 alu.dst.sel = temp;
4128 alu.dst.chan = i;
4129 alu.dst.write = i == 0;
4130 if (i == 3)
4131 alu.last = 1;
4132 r = r600_bytecode_add_alu(ctx->bc, &alu);
4133 if (r)
4134 return r;
4135 }
4136 } else {
4137 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4138 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT);
4139 alu.src[0].sel = src_gpr;
4140 alu.src[0].chan = sample_chan;
4141 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4142 alu.src[1].value = 4;
4143 alu.dst.sel = temp;
4144 alu.dst.chan = 0;
4145 alu.dst.write = 1;
4146 alu.last = 1;
4147 r = r600_bytecode_add_alu(ctx->bc, &alu);
4148 if (r)
4149 return r;
4150 }
4151
4152 /* sample_index = temp.w >> temp.x */
4153 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4154 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT);
4155 alu.src[0].sel = temp;
4156 alu.src[0].chan = 3;
4157 alu.src[1].sel = temp;
4158 alu.src[1].chan = 0;
4159 alu.dst.sel = src_gpr;
4160 alu.dst.chan = sample_chan;
4161 alu.dst.write = 1;
4162 alu.last = 1;
4163 r = r600_bytecode_add_alu(ctx->bc, &alu);
4164 if (r)
4165 return r;
4166
4167 /* sample_index & 0xF */
4168 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT);
4170 alu.src[0].sel = src_gpr;
4171 alu.src[0].chan = sample_chan;
4172 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
4173 alu.src[1].value = 0xF;
4174 alu.dst.sel = src_gpr;
4175 alu.dst.chan = sample_chan;
4176 alu.dst.write = 1;
4177 alu.last = 1;
4178 r = r600_bytecode_add_alu(ctx->bc, &alu);
4179 if (r)
4180 return r;
4181#if 0
4182 /* visualize the FMASK */
4183 for (i = 0; i < 4; i++) {
4184 memset(&alu, 0, sizeof(struct r600_bytecode_alu));
4185 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
4186 alu.src[0].sel = src_gpr;
4187 alu.src[0].chan = sample_chan;
4188 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
4189 alu.dst.chan = i;
4190 alu.dst.write = 1;
4191 alu.last = 1;
4192 r = r600_bytecode_add_alu(ctx->bc, &alu);
4193 if (r)
4194 return r;
4195 }
4196 return 0;
4197#endif
4198 }
4199
4073 opcode = ctx->inst_info->r600_opcode; 4200 opcode = ctx->inst_info->r600_opcode;
4074 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 4201 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
4075 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 4202 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 4b2a19a07f7..587f88deb9e 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -375,6 +375,9 @@
375#define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5) 375#define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5)
376#define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1) 376#define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1)
377#define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF 377#define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF
378#define EG_S_SQ_TEX_WORD0_INST_MOD(x) (((x) & 0x3) << 5)
379#define EG_G_SQ_TEX_WORD0_INST_MOD(x) (((x) >> 5) & 0x3)
380#define EG_C_SQ_TEX_WORD0_INST_MOD 0xFFFFFF9F
378#define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) 381#define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7)
379#define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) 382#define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1)
380#define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F 383#define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 7d07008f16d..1a8d55e8d36 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -585,7 +585,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
585 return FALSE; 585 return FALSE;
586 586
587 if (sample_count > 1) { 587 if (sample_count > 1) {
588 if (rscreen->info.drm_minor < 22) 588 if (!rscreen->has_msaa)
589 return FALSE; 589 return FALSE;
590 590
591 /* R11G11B10 is broken on R6xx. */ 591 /* R11G11B10 is broken on R6xx. */
@@ -1988,7 +1988,6 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
1988 r600_write_value(cs, (resource_id_base + resource_index) * 7); 1988 r600_write_value(cs, (resource_id_base + resource_index) * 7);
1989 r600_write_array(cs, 7, rview->tex_resource_words); 1989 r600_write_array(cs, 7, rview->tex_resource_words);
1990 1990
1991 /* XXX The kernel needs two relocations. This is stupid. */
1992 reloc = r600_context_bo_reloc(rctx, rview->tex_resource, 1991 reloc = r600_context_bo_reloc(rctx, rview->tex_resource,
1993 RADEON_USAGE_READ); 1992 RADEON_USAGE_READ);
1994 r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); 1993 r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 65985c7653d..a4d3e461ef1 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -593,8 +593,8 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
593 dst->views.compressed_depthtex_mask &= ~(1 << i); 593 dst->views.compressed_depthtex_mask &= ~(1 << i);
594 } 594 }
595 595
596 /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */ 596 /* Track compressed colorbuffers. */
597 if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) { 597 if (rtex->cmask_size && rtex->fmask_size) {
598 dst->views.compressed_colortex_mask |= 1 << i; 598 dst->views.compressed_colortex_mask |= 1 << i;
599 } else { 599 } else {
600 dst->views.compressed_colortex_mask &= ~(1 << i); 600 dst->views.compressed_colortex_mask &= ~(1 << i);