diff options
-rw-r--r-- | src/gallium/auxiliary/util/u_blitter.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_blitter.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 39 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 21 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 133 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_sq.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 4 |
13 files changed, 270 insertions, 37 deletions
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 4d6cdd7a244..f4ac4aa8685 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -356,12 +356,20 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_sampler_state(pipe, ctx->sampler_state); pipe->delete_sampler_state(pipe, ctx->sampler_state_linear); u_upload_destroy(ctx->upload); FREE(ctx); } +void util_blitter_set_texture_multisample(struct blitter_context *blitter, + boolean supported) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + + ctx->has_texture_multisample = supported; +} + static void blitter_set_running_flag(struct blitter_context_priv *ctx) { if (ctx->base.running) { _debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n", __LINE__); } diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index de063937793..c49faaad717 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -132,12 +132,18 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter); static INLINE struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter) { return blitter->pipe; } +/** + * Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver. + */ +void util_blitter_set_texture_multisample(struct blitter_context *blitter, + boolean supported); + /* The default function to draw a rectangle. This can only be used * inside of the draw_rectangle callback if the driver overrides it. */ void util_blitter_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 96e246a6e68..17b7e9d2c72 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -630,13 +630,13 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen, } if (!util_format_is_supported(format, usage)) return FALSE; if (sample_count > 1) { - if (rscreen->info.drm_minor < 19) + if (!rscreen->has_msaa) return FALSE; switch (sample_count) { case 2: case 4: case 8: @@ -1071,17 +1071,30 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, else view->tex_resource_words[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type); view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) | S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); view->tex_resource_words[2] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; - if (state->u.tex.last_level && texture->nr_samples <= 1) { + + /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ + if (texture->nr_samples > 1 && rscreen->msaa_texture_support == MSAA_TEXTURE_COMPRESSED) { + /* XXX the 2x and 4x cases are broken. */ + if (tmp->is_depth || tmp->resource.b.b.nr_samples != 8) { + /* disable FMASK (0 = disabled) */ + view->tex_resource_words[3] = 0; + view->skip_mip_address_reloc = true; + } else { + /* FMASK should be in MIP_ADDRESS for multisample textures */ + view->tex_resource_words[3] = (tmp->fmask_offset + r600_resource_va(ctx->screen, texture)) >> 8; + } + } else if (state->u.tex.last_level && texture->nr_samples <= 1) { view->tex_resource_words[3] = (tmp->surface.level[1].offset + r600_resource_va(ctx->screen, texture)) >> 8; } else { view->tex_resource_words[3] = (tmp->surface.level[0].offset + r600_resource_va(ctx->screen, texture)) >> 8; } + view->tex_resource_words[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian)); view->tex_resource_words[5] = S_030014_BASE_ARRAY(state->u.tex.first_layer) | S_030014_LAST_ARRAY(state->u.tex.last_layer); if (texture->nr_samples > 1) { @@ -1579,15 +1592,13 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } if (!surf->export_16bpc) { rctx->framebuffer.export_16bpc = false; } - /* Cayman can fetch from a compressed MSAA colorbuffer, - * so it's pointless to track them. */ - if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { + if (rtex->fmask_size && rtex->cmask_size) { rctx->framebuffer.compressed_cb_mask |= 1 << i; } } /* Update alpha-test state dependencies. * Alpha-test is done on the first colorbuffer only. */ @@ -2255,19 +2266,21 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, assert(rview); r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0)); r600_write_value(cs, (resource_id_base + resource_index) * 8); r600_write_array(cs, 8, rview->tex_resource_words); - /* XXX The kernel needs two relocations. This is stupid. */ reloc = r600_context_bo_reloc(rctx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); - r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, reloc); + + if (!rview->skip_mip_address_reloc) { + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, reloc); + } } state->dirty_mask = 0; } static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) { @@ -3342,12 +3355,22 @@ void *evergreen_create_decompress_blend(struct r600_context *rctx) memset(&blend, 0, sizeof(blend)); blend.independent_blend_enable = true; blend.rt[0].colormask = 0xf; return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_DECOMPRESS); } +void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx) +{ + struct pipe_blend_state blend; + + memset(&blend, 0, sizeof(blend)); + blend.independent_blend_enable = true; + blend.rt[0].colormask = 0xf; + return evergreen_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_FMASK_DECOMPRESS); +} + void *evergreen_create_db_flush_dsa(struct r600_context *rctx) { struct pipe_depth_stencil_alpha_state dsa = {{0}}; return rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 98df83de918..edb1a55dc8a 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -483,13 +483,13 @@ #define C_028808_MODE 0xFFFFFF8F #define V_028808_CB_DISABLE 0x00000000 #define V_028808_CB_NORMAL 0x00000001 #define V_028808_CB_ELIMINATE_FAST_CLEAR 0x00000002 #define V_028808_CB_RESOLVE 0x00000003 #define V_028808_CB_DECOMPRESS 0x00000004 -#define V_028808_CB_FASK_DECOMPRESS 0x00000005 +#define V_028808_CB_FMASK_DECOMPRESS 0x00000005 #define S_028808_ROP3(x) (((x) & 0xFF) << 16) #define G_028808_ROP3(x) (((x) >> 16) & 0xFF) #define C_028808_ROP3 0xFF00FFFF #define R_028810_PA_CL_CLIP_CNTL 0x028810 #define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0) #define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 51a2e4ee9e5..f04a92062f6 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -252,25 +252,29 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void) if (tex == NULL) return NULL; LIST_INITHEAD(&tex->list); return tex; } -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family) +void r600_bytecode_init(struct r600_bytecode *bc, + enum chip_class chip_class, + enum radeon_family family, + enum r600_msaa_texture_mode msaa_texture_mode) { if ((chip_class == R600) && (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { bc->ar_handling = AR_HANDLE_RV6XX; bc->r6xx_nop_after_rel_dst = 1; } else { bc->ar_handling = AR_HANDLE_NORMAL; bc->r6xx_nop_after_rel_dst = 0; } LIST_INITHEAD(&bc->cf); bc->chip_class = chip_class; + bc->msaa_texture_mode = msaa_texture_mode; } static int r600_bytecode_add_cf(struct r600_bytecode *bc) { struct r600_bytecode_cf *cf = r600_bytecode_cf(); @@ -1733,12 +1737,13 @@ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecod } /* common to all 3 families */ static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | + EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) | S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | @@ -2763,13 +2768,14 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, int i, j, r, fs_size; struct r600_resource *fetch_shader; assert(count < 32); memset(&bc, 0, sizeof(bc)); - r600_bytecode_init(&bc, rctx->chip_class, rctx->family); + r600_bytecode_init(&bc, rctx->chip_class, rctx->family, + rctx->screen->msaa_texture_support); for (i = 0; i < count; i++) { if (elements[i].instance_divisor > 1) { if (rctx->chip_class == CAYMAN) { for (j = 0; j < 4; j++) { struct r600_bytecode_alu alu; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 8a9f3189be0..2c7db2cefd7 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -59,12 +59,13 @@ struct r600_bytecode_alu { unsigned index_mode; }; struct r600_bytecode_tex { struct list_head list; unsigned inst; + unsigned inst_mod; unsigned resource_id; unsigned src_gpr; unsigned src_rel; unsigned dst_gpr; unsigned dst_rel; unsigned dst_sel_x; @@ -192,12 +193,13 @@ struct r600_cf_callstack { #define AR_HANDLE_NORMAL 0 #define AR_HANDLE_RV6XX 1 /* except RV670 */ struct r600_bytecode { enum chip_class chip_class; + enum r600_msaa_texture_mode msaa_texture_mode; int type; struct list_head cf; struct r600_bytecode_cf *cf_last; unsigned ndw; unsigned ncf; unsigned ngpr; @@ -216,13 +218,16 @@ struct r600_bytecode { }; /* eg_asm.c */ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); /* r600_asm.c */ -void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family); +void r600_bytecode_init(struct r600_bytecode *bc, + enum chip_class chip_class, + enum radeon_family family, + enum r600_msaa_texture_mode msaa_texture_mode); void r600_bytecode_clear(struct r600_bytecode *bc); int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu); int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx); int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex); int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output); int r600_bytecode_build(struct r600_bytecode *bc); diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 8597b8dfcf7..a19248da3a2 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -249,18 +249,35 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, struct r600_texture *rtex, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { struct r600_context *rctx = (struct r600_context *)ctx; unsigned layer, level, checked_last_layer, max_layer; - - assert(rctx->chip_class != CAYMAN); + void *blend_decompress; if (!rtex->dirty_level_mask) return; + switch (rctx->screen->msaa_texture_support) { + case MSAA_TEXTURE_DECOMPRESSED: + blend_decompress = rctx->custom_blend_decompress; + break; + case MSAA_TEXTURE_COMPRESSED: + /* XXX the 2x and 4x cases are broken. */ + if (rtex->resource.b.b.nr_samples == 8) + blend_decompress = rctx->custom_blend_fmask_decompress; + else + blend_decompress = rctx->custom_blend_decompress; + break; + case MSAA_TEXTURE_SAMPLE_ZERO: + default: + /* Nothing to do. */ + rtex->dirty_level_mask = 0; + return; + } + for (level = first_level; level <= last_level; level++) { if (!(rtex->dirty_level_mask & (1 << level))) continue; /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ @@ -275,14 +292,13 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); r600_blitter_begin(ctx, R600_DECOMPRESS); - util_blitter_custom_color(rctx->blitter, cbsurf, - rctx->custom_blend_decompress); + util_blitter_custom_color(rctx->blitter, cbsurf, blend_decompress); r600_blitter_end(ctx); pipe_surface_reference(&cbsurf, NULL); } /* The texture will always be dirty if some layers or samples aren't flushed. @@ -296,19 +312,12 @@ static void r600_blit_decompress_color(struct pipe_context *ctx, void r600_decompress_color_textures(struct r600_context *rctx, struct r600_samplerview_state *textures) { unsigned i; unsigned mask = textures->compressed_colortex_mask; - /* Cayman cannot decompress an MSAA colorbuffer, - * but it can read it compressed, so skip this. */ - assert(rctx->chip_class != CAYMAN); - if (rctx->chip_class == CAYMAN) { - return; - } - while (mask) { struct pipe_sampler_view *view; struct r600_texture *tex; i = u_bit_scan(&mask); @@ -330,24 +339,23 @@ void r600_decompress_color_textures(struct r600_context *rctx, * rendering. */ static bool r600_decompress_subresource(struct pipe_context *ctx, struct pipe_resource *tex, unsigned level, unsigned first_layer, unsigned last_layer) { - struct r600_context *rctx = (struct r600_context *)ctx; struct r600_texture *rtex = (struct r600_texture*)tex; if (rtex->is_depth && !rtex->is_flushing_texture) { if (!r600_init_flushed_depth_texture(ctx, tex, NULL)) return false; /* error */ r600_blit_decompress_depth(ctx, rtex, NULL, level, level, first_layer, last_layer, 0, u_max_sample(tex)); - } else if (rctx->chip_class != CAYMAN && rtex->fmask_size && rtex->cmask_size) { + } else if (rtex->fmask_size && rtex->cmask_size) { r600_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } return true; } @@ -456,12 +464,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx, struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; struct pipe_surface *dst_view, dst_templ; struct pipe_sampler_view src_templ, *src_view; unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL; struct pipe_box sbox; + bool copy_all_samples; /* Handle buffers first. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { r600_copy_buffer(ctx, dst, dstx, src, src_box); return; } @@ -555,22 +564,21 @@ static void r600_resource_copy_region(struct pipe_context *ctx, src_width0, src_height0); } else { src_view = r600_create_sampler_view_custom(ctx, src, &src_templ, src_widthFL, src_heightFL); } + copy_all_samples = rctx->screen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; + /* Copy. */ - /* XXX Multisample texturing is unimplemented on Cayman. In the meantime, - * copy only the first sample (which is the only one that is uncompressed - * and therefore doesn't return garbage). */ r600_blitter_begin(ctx, R600_COPY_TEXTURE); util_blitter_blit_generic(rctx->blitter, dst_view, dstx, dsty, abs(src_box->width), abs(src_box->height), src_view, src_box, src_width0, src_height0, PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, - rctx->chip_class != CAYMAN); + copy_all_samples); r600_blitter_end(ctx); pipe_surface_reference(&dst_view, NULL); pipe_sampler_view_reference(&src_view, NULL); } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 916fa381a33..7a1e1353553 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -168,12 +168,15 @@ static void r600_destroy_context(struct pipe_context *context) if (rctx->custom_blend_resolve) { rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_resolve); } if (rctx->custom_blend_decompress) { rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_decompress); } + if (rctx->custom_blend_fmask_decompress) { + rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_fmask_decompress); + } util_unreference_framebuffer_state(&rctx->framebuffer.state); r600_context_fini(rctx); if (rctx->blitter) { util_blitter_destroy(rctx->blitter); @@ -261,12 +264,13 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void evergreen_init_atom_start_compute_cs(rctx); if (evergreen_context_init(rctx)) goto fail; rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); + rctx->custom_blend_fmask_decompress = evergreen_create_fmask_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->family == CHIP_CEDAR || rctx->family == CHIP_PALM || rctx->family == CHIP_SUMO || rctx->family == CHIP_SUMO2 || rctx->family == CHIP_CAICOS || rctx->family == CHIP_CAYMAN || @@ -286,12 +290,13 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void if (!rctx->uploader) goto fail; rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) goto fail; + util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); r600_get_backend_mask(rctx); /* this emits commands and must be last */ rctx->dummy_pixel_shader = @@ -390,21 +395,23 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_COMPUTE: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - case PIPE_CAP_TEXTURE_MULTISAMPLE: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_GLSL_FEATURE_LEVEL: return 130; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return rscreen->msaa_texture_support != MSAA_TEXTURE_SAMPLE_ZERO; + /* Supported except the original R600. */ case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: /* R600 doesn't support per-MRT blends */ return family == CHIP_R600 ? 0 : 1; @@ -944,12 +951,32 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) case EVERGREEN: case CAYMAN: rscreen->has_streamout = rscreen->info.drm_minor >= 14; break; } + /* MSAA support. */ + switch (rscreen->chip_class) { + case R600: + case R700: + rscreen->has_msaa = rscreen->info.drm_minor >= 22; + rscreen->msaa_texture_support = MSAA_TEXTURE_DECOMPRESSED; + break; + case EVERGREEN: + rscreen->has_msaa = rscreen->info.drm_minor >= 19; + rscreen->msaa_texture_support = + rscreen->info.drm_minor >= 24 ? MSAA_TEXTURE_COMPRESSED : + MSAA_TEXTURE_DECOMPRESSED; + break; + case CAYMAN: + rscreen->has_msaa = rscreen->info.drm_minor >= 19; + /* We should be able to read compressed MSAA textures, but it doesn't work. */ + rscreen->msaa_texture_support = MSAA_TEXTURE_SAMPLE_ZERO; + break; + } + if (r600_init_tiling(rscreen)) { FREE(rscreen); return NULL; } rscreen->screen.destroy = r600_destroy_screen; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 17dab7f23d5..238ab1676f4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -181,19 +181,37 @@ struct r600_pipe_fences { struct list_head blocks; /* linked list of freed fences */ struct list_head pool; pipe_mutex mutex; }; +enum r600_msaa_texture_mode { + /* If the hw can fetch the first sample only (no decompression available). + * This means MSAA texturing is not fully implemented. */ + MSAA_TEXTURE_SAMPLE_ZERO, + + /* If the hw can fetch decompressed MSAA textures. + * Supported families: R600, R700, Evergreen. + * Cayman cannot use this, because it cannot do the decompression. */ + MSAA_TEXTURE_DECOMPRESSED, + + /* If the hw can fetch compressed MSAA textures, which means shaders can + * read resolved FMASK. This yields the best performance. + * Supported families: Evergreen, Cayman. */ + MSAA_TEXTURE_COMPRESSED +}; + struct r600_screen { struct pipe_screen screen; struct radeon_winsys *ws; unsigned family; enum chip_class chip_class; struct radeon_info info; bool has_streamout; + bool has_msaa; + enum r600_msaa_texture_mode msaa_texture_support; struct r600_tiling_info tiling_info; struct r600_pipe_fences fences; /*for compute global memory binding, we allocate stuff here, instead of * buffers. * XXX: Not sure if this is the best place for global_pool. Also, @@ -202,12 +220,13 @@ struct r600_screen { }; struct r600_pipe_sampler_view { struct pipe_sampler_view base; struct r600_resource *tex_resource; uint32_t tex_resource_words[8]; + bool skip_mip_address_reloc; }; struct r600_rasterizer_state { struct r600_command_buffer buffer; boolean flatshade; boolean two_side; @@ -369,12 +388,13 @@ struct r600_context { unsigned max_db; /* for OQ */ /* Miscellaneous state objects. */ void *custom_dsa_flush; void *custom_blend_resolve; void *custom_blend_decompress; + void *custom_blend_fmask_decompress; /* With rasterizer discard, there doesn't have to be a pixel shader. * In that case, we bind this one: */ void *dummy_pixel_shader; /* These dummy CMASK and FMASK buffers are used to get around the R6xx hardware * bug where valid CMASK and FMASK are required to be present to avoid * a hardlock in certain operations but aren't actually used @@ -522,12 +542,13 @@ void evergreen_init_state_functions(struct r600_context *rctx); void evergreen_init_atom_start_cs(struct r600_context *rctx); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_context *rctx); void *evergreen_create_resolve_blend(struct r600_context *rctx); void *evergreen_create_decompress_blend(struct r600_context *rctx); +void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx); boolean evergreen_is_format_supported(struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned usage); void evergreen_init_color_surface(struct r600_context *rctx, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c56efda5347..0b586f3aedb 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1177,13 +1177,14 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, use_llvm = debug_get_bool_option("R600_LLVM", TRUE); #endif ctx.bc = &shader->bc; ctx.shader = shader; ctx.native_integers = true; - r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family); + r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family, + rscreen->msaa_texture_support); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); ctx.type = ctx.parse.FullHeader.Processor.Processor; shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; @@ -3793,16 +3794,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_tex tex; struct r600_bytecode_alu alu; unsigned src_gpr; int r, i, j; int opcode; + bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED && + inst->Instruction.Opcode == TGSI_OPCODE_TXF && + (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || + inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); /* Texture fetch instructions can only use gprs as source. * Also they cannot negate the source or take the absolute value */ - const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && - tgsi_tex_src_requires_loading(ctx, 0); + const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && + tgsi_tex_src_requires_loading(ctx, 0)) || + read_compressed_msaa; boolean src_loaded = FALSE; unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; uint8_t offset_x = 0, offset_y = 0, offset_z = 0; src_gpr = tgsi_tex_get_src_gpr(ctx, 0); @@ -4067,12 +4073,133 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) return r; } src_loaded = TRUE; src_gpr = ctx->temp_reg; } + /* Obtain the sample index for reading a compressed MSAA color texture. + * To read the FMASK, we use the ldfptr instruction, which tells us + * where the samples are stored. + * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * Assume src.z contains the sample index. It should be modified like this: + * src.z = (ldfptr() >> (src.z * 4)) & 0xF; + * Then fetch the texel with src. + */ + if (read_compressed_msaa) { + unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4; + unsigned temp = r600_get_temp(ctx); + assert(src_loaded); + + /* temp.w = ldfptr() */ + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.inst = SQ_TEX_INST_LD; + tex.inst_mod = 1; /* to indicate this is ldfptr */ + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + tex.src_gpr = src_gpr; + tex.dst_gpr = temp; + tex.dst_sel_x = 7; /* mask out these components */ + tex.dst_sel_y = 7; + tex.dst_sel_z = 7; + tex.dst_sel_w = 0; /* store X */ + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + tex.offset_x = offset_x; + tex.offset_y = offset_y; + tex.offset_z = offset_z; + r = r600_bytecode_add_tex(ctx->bc, &tex); + if (r) + return r; + + /* temp.x = sample_index*4 */ + if (ctx->bc->chip_class == CAYMAN) { + for (i = 0 ; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = i; + alu.dst.write = i == 0; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* sample_index = temp.w >> temp.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT); + alu.src[0].sel = temp; + alu.src[0].chan = 3; + alu.src[1].sel = temp; + alu.src[1].chan = 0; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* sample_index & 0xF */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0xF; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; +#if 0 + /* visualize the FMASK */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + alu.dst.chan = i; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +#endif + } + opcode = ctx->inst_info->r600_opcode; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 4b2a19a07f7..587f88deb9e 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -372,12 +372,15 @@ #define S_SQ_TEX_WORD0_TEX_INST(x) (((x) & 0x1F) << 0) #define G_SQ_TEX_WORD0_TEX_INST(x) (((x) >> 0) & 0x1F) #define C_SQ_TEX_WORD0_TEX_INST 0xFFFFFFE0 #define S_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) & 0x1) << 5) #define G_SQ_TEX_WORD0_BC_FRAC_MODE(x) (((x) >> 5) & 0x1) #define C_SQ_TEX_WORD0_BC_FRAC_MODE 0xFFFFFFDF +#define EG_S_SQ_TEX_WORD0_INST_MOD(x) (((x) & 0x3) << 5) +#define EG_G_SQ_TEX_WORD0_INST_MOD(x) (((x) >> 5) & 0x3) +#define EG_C_SQ_TEX_WORD0_INST_MOD 0xFFFFFF9F #define S_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) #define G_SQ_TEX_WORD0_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) #define C_SQ_TEX_WORD0_FETCH_WHOLE_QUAD 0xFFFFFF7F #define S_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) & 0xFF) << 8) #define G_SQ_TEX_WORD0_RESOURCE_ID(x) (((x) >> 8) & 0xFF) #define C_SQ_TEX_WORD0_RESOURCE_ID 0xFFFF00FF diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d07008f16d..1a8d55e8d36 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -582,13 +582,13 @@ boolean r600_is_format_supported(struct pipe_screen *screen, } if (!util_format_is_supported(format, usage)) return FALSE; if (sample_count > 1) { - if (rscreen->info.drm_minor < 22) + if (!rscreen->has_msaa) return FALSE; /* R11G11B10 is broken on R6xx. */ if (rscreen->chip_class == R600 && format == PIPE_FORMAT_R11G11B10_FLOAT) return FALSE; @@ -1985,13 +1985,12 @@ static void r600_emit_sampler_views(struct r600_context *rctx, assert(rview); r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); r600_write_value(cs, (resource_id_base + resource_index) * 7); r600_write_array(cs, 7, rview->tex_resource_words); - /* XXX The kernel needs two relocations. This is stupid. */ reloc = r600_context_bo_reloc(rctx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 65985c7653d..a4d3e461ef1 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -590,14 +590,14 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, if (rtex->is_depth && !rtex->is_flushing_texture) { dst->views.compressed_depthtex_mask |= 1 << i; } else { dst->views.compressed_depthtex_mask &= ~(1 << i); } - /* Track compressed colorbuffers for Evergreen (Cayman doesn't need this). */ - if (rctx->chip_class != CAYMAN && rtex->cmask_size && rtex->fmask_size) { + /* Track compressed colorbuffers. */ + if (rtex->cmask_size && rtex->fmask_size) { dst->views.compressed_colortex_mask |= 1 << i; } else { dst->views.compressed_colortex_mask &= ~(1 << i); } /* Changing from array to non-arrays textures and vice versa requires |