diff options
Diffstat (limited to 'src/gallium/drivers/r600/r600_state.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 284 |
1 files changed, 146 insertions, 138 deletions
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3448eb482ba..1d94da82da0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1,35 +1,22 @@ /* * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. + * SPDX-License-Identifier: MIT */ + #include "r600_formats.h" #include "r600_shader.h" #include "r600d.h" +#include "r600d_common.h" #include "pipe/p_shader_tokens.h" +#include "util/u_endian.h" #include "util/u_pack_color.h" #include "util/u_memory.h" #include "util/u_framebuffer.h" #include "util/u_dual_blend.h" +#include <assert.h> + static uint32_t r600_translate_blend_function(int blend_func) { switch (blend_func) { @@ -144,13 +131,13 @@ static uint32_t r600_translate_dbformat(enum pipe_format format) static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { return r600_translate_texformat(screen, format, NULL, NULL, NULL, - FALSE) != ~0U; + false) != ~0U; } -static bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format) +static bool r600_is_colorbuffer_format_supported(enum amd_gfx_level chip, enum pipe_format format) { - return r600_translate_colorformat(chip, format, FALSE) != ~0U && - r600_translate_colorswap(format, FALSE) != ~0U; + return r600_translate_colorformat(chip, format, false) != ~0U && + r600_translate_colorswap(format, false) != ~0U; } static bool r600_is_zs_format_supported(enum pipe_format format) @@ -173,6 +160,9 @@ bool r600_is_format_supported(struct pipe_screen *screen, return false; } + if (util_format_get_num_planes(format) > 1) + return false; + if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) return false; @@ -181,7 +171,7 @@ bool r600_is_format_supported(struct pipe_screen *screen, return false; /* R11G11B10 is broken on R6xx. */ - if (rscreen->b.chip_class == R600 && + if (rscreen->b.gfx_level == R600 && format == PIPE_FORMAT_R11G11B10_FLOAT) return false; @@ -202,7 +192,7 @@ bool r600_is_format_supported(struct pipe_screen *screen, if (usage & PIPE_BIND_SAMPLER_VIEW) { if (target == PIPE_BUFFER) { - if (r600_is_vertex_format_supported(format)) + if (r600_is_buffer_format_supported(format, false)) retval |= PIPE_BIND_SAMPLER_VIEW; } else { if (r600_is_sampler_format_supported(screen, format)) @@ -215,7 +205,7 @@ bool r600_is_format_supported(struct pipe_screen *screen, PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && - r600_is_colorbuffer_format_supported(rscreen->b.chip_class, format)) { + r600_is_colorbuffer_format_supported(rscreen->b.gfx_level, format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | @@ -232,7 +222,7 @@ bool r600_is_format_supported(struct pipe_screen *screen, } if ((usage & PIPE_BIND_VERTEX_BUFFER) && - r600_is_vertex_format_supported(format)) { + r600_is_buffer_format_supported(format, true)) { retval |= PIPE_BIND_VERTEX_BUFFER; } @@ -487,7 +477,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); - if (rctx->b.chip_class == R700) { + if (rctx->b.gfx_level == R700) { rs->pa_cl_clip_cntl |= S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard); } @@ -516,7 +506,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, /* workaround possible rendering corruption on RV770 with hyperz together with sample shading */ sc_mode_cntl |= S_028A4C_TILE_COVER_DISABLE(state->multisample && rctx->ps_iter_samples > 1); } - if (rctx->b.chip_class >= R700) { + if (rctx->b.gfx_level >= R700) { sc_mode_cntl |= S_028A4C_FORCE_EOV_REZ_ENABLE(1) | S_028A4C_R700_ZMM_LINE_OFFSET(1) | S_028A4C_R700_VPORT_SCISSOR_ENABLE(1); @@ -563,10 +553,10 @@ static void *r600_create_rs_state(struct pipe_context *ctx, state->fill_back != PIPE_POLYGON_MODE_FILL) | S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) | S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)); - if (rctx->b.chip_class == R700) { + if (rctx->b.gfx_level == R700) { r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL, rs->pa_su_sc_mode_cntl); } - if (rctx->b.chip_class == R600) { + if (rctx->b.gfx_level == R600) { r600_store_context_reg(&rs->buffer, R_028350_SX_MISC, S_028350_MULTIPASS(state->rasterizer_discard)); } @@ -674,7 +664,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0; unsigned width, height, depth, offset_level, last_level; - bool do_endian_swap = FALSE; + bool do_endian_swap = false; if (!view) return NULL; @@ -695,7 +685,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, swizzle[2] = state->swizzle_b; swizzle[3] = state->swizzle_a; - if (R600_BIG_ENDIAN) + if (UTIL_ARCH_BIG_ENDIAN) do_endian_swap = !tmp->db_compatible; format = r600_translate_texformat(ctx->screen, state->format, @@ -821,7 +811,7 @@ static void r600_init_color_surface(struct r600_context *rctx, unsigned offset; const struct util_format_description *desc; int i; - bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = FALSE; + bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = false; if (rtex->db_compatible && !r600_can_sample_zs(rtex, false)) { r600_init_flushed_depth_texture(&rctx->b.b, surf->base.texture, NULL); @@ -854,11 +844,7 @@ static void r600_init_color_surface(struct r600_context *rctx, desc = util_format_description(surf->base.format); - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } + i = util_format_get_first_non_void_channel(surf->base.format); ntype = V_0280A0_NUMBER_UNORM; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -877,10 +863,10 @@ static void r600_init_color_surface(struct r600_context *rctx, ntype = V_0280A0_NUMBER_FLOAT; } - if (R600_BIG_ENDIAN) + if (UTIL_ARCH_BIG_ENDIAN) do_endian_swap = !rtex->db_compatible; - format = r600_translate_colorformat(rctx->b.chip_class, surf->base.format, + format = r600_translate_colorformat(rctx->b.gfx_level, surf->base.format, do_endian_swap); assert(format != ~0); @@ -916,7 +902,7 @@ static void r600_init_color_surface(struct r600_context *rctx, /* EXPORT_NORM is an optimization that can be enabled for better * performance in certain cases */ - if (rctx->b.chip_class == R600) { + if (rctx->b.gfx_level == R600) { /* EXPORT_NORM can be enabled if: * - 11-bit or smaller UNORM/SNORM/SRGB * - BLEND_CLAMP is enabled @@ -1122,7 +1108,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, /* Colorbuffers. */ for (i = 0; i < state->nr_cbufs; i++) { /* The resolve buffer must have CMASK and FMASK to prevent hardlocks on R6xx. */ - bool force_cmask_fmask = rctx->b.chip_class == R600 && + bool force_cmask_fmask = rctx->b.gfx_level == R600 && rctx->framebuffer.is_msaa_resolve && i == 1; @@ -1216,7 +1202,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } if (rctx->framebuffer.state.zsbuf) { rctx->framebuffer.atom.num_dw += 16; - } else if (rctx->screen->b.info.drm_minor >= 18) { + } else { rctx->framebuffer.atom.num_dw += 3; } if (rctx->b.family > CHIP_R600 && rctx->b.family < CHIP_RV770) { @@ -1387,10 +1373,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)cb[i]->base.texture, - RADEON_USAGE_READWRITE, - cb[i]->base.texture->nr_samples > 1 ? + RADEON_USAGE_READWRITE | + (cb[i]->base.texture->nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : - RADEON_PRIO_COLOR_BUFFER); + RADEON_PRIO_COLOR_BUFFER)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); @@ -1400,10 +1386,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, cb[i]->cb_buffer_fmask, - RADEON_USAGE_READWRITE, - cb[i]->base.texture->nr_samples > 1 ? + RADEON_USAGE_READWRITE | + (cb[i]->base.texture->nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : - RADEON_PRIO_COLOR_BUFFER); + RADEON_PRIO_COLOR_BUFFER)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); @@ -1413,10 +1399,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, cb[i]->cb_buffer_cmask, - RADEON_USAGE_READWRITE, - cb[i]->base.texture->nr_samples > 1 ? + RADEON_USAGE_READWRITE | + (cb[i]->base.texture->nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : - RADEON_PRIO_COLOR_BUFFER); + RADEON_PRIO_COLOR_BUFFER)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); } @@ -1452,10 +1438,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)state->zsbuf->texture, - RADEON_USAGE_READWRITE, - surf->base.texture->nr_samples > 1 ? + RADEON_USAGE_READWRITE | + (surf->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA : - RADEON_PRIO_DEPTH_BUFFER); + RADEON_PRIO_DEPTH_BUFFER)); radeon_set_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2); radeon_emit(cs, surf->db_depth_size); /* R_028000_DB_DEPTH_SIZE */ @@ -1470,9 +1456,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a radeon_set_context_reg(cs, R_028D34_DB_PREFETCH_LIMIT, surf->db_prefetch_limit); sbu |= SURFACE_BASE_UPDATE_DEPTH; - } else if (rctx->screen->b.info.drm_minor >= 18) { - /* DRM 2.6.18 allows the INVALID format to disable depth/stencil. - * Older kernels are out of luck. */ + } else { radeon_set_context_reg(cs, R_028010_DB_DEPTH_INFO, S_028010_FORMAT(V_028010_DEPTH_INVALID)); } @@ -1513,7 +1497,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) rctx->ps_iter_samples = min_samples; if (rctx->framebuffer.nr_samples > 1) { r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom); - if (rctx->b.chip_class == R600) + if (rctx->b.gfx_level == R600) r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } @@ -1525,7 +1509,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) { radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); - if (rctx->b.chip_class == R600) { + if (rctx->b.gfx_level == R600) { radeon_emit(cs, 0xff); /* R_028238_CB_TARGET_MASK */ radeon_emit(cs, 0xff); /* R_02823C_CB_SHADER_MASK */ } else { @@ -1561,7 +1545,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource, - RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META); + RADEON_USAGE_READWRITE | RADEON_PRIO_SEPARATE_META); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc_idx); } else { @@ -1578,16 +1562,16 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) | S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE); - if (rctx->b.chip_class >= R700) { + if (rctx->b.gfx_level >= R700) { switch (a->ps_conservative_z) { default: /* fall through */ - case TGSI_FS_DEPTH_LAYOUT_ANY: + case FRAG_DEPTH_LAYOUT_ANY: db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_ANY_Z); break; - case TGSI_FS_DEPTH_LAYOUT_GREATER: + case FRAG_DEPTH_LAYOUT_GREATER: db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_GREATER_THAN_Z); break; - case TGSI_FS_DEPTH_LAYOUT_LESS: + case FRAG_DEPTH_LAYOUT_LESS: db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_LESS_THAN_Z); break; } @@ -1595,7 +1579,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom if (rctx->b.num_occlusion_queries > 0 && !a->occlusion_queries_disabled) { - if (rctx->b.chip_class >= R700) { + if (rctx->b.gfx_level >= R700) { db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); } db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); @@ -1616,7 +1600,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom } else { db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); } - if (rctx->b.chip_class == R600 && rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) { + if (rctx->b.gfx_level == R600 && rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) { /* sample shading and hyperz causes lockups on R6xx chips */ db_render_override |= S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE); } @@ -1628,7 +1612,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom S_028D0C_COPY_CENTROID(1) | S_028D0C_COPY_SAMPLE(a->copy_sample); - if (rctx->b.chip_class == R600) + if (rctx->b.gfx_level == R600) db_render_override |= S_028D10_NOOP_CULL_DISABLE(1); if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 || @@ -1666,13 +1650,15 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom * static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; - uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; + struct r600_fetch_shader *shader = (struct r600_fetch_shader*)rctx->vertex_fetch_shader.cso; + uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask & shader->buffer_mask; while (dirty_mask) { struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; unsigned offset; unsigned buffer_index = u_bit_scan(&dirty_mask); + unsigned stride = shader->strides[buffer_index]; vb = &rctx->vertex_buffer_state.vb[buffer_index]; rbuffer = (struct r600_resource*)vb->buffer.resource; @@ -1687,7 +1673,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */ radeon_emit(cs, /* RESOURCEi_WORD2 */ S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(vb->stride)); + S_038008_STRIDE(stride)); radeon_emit(cs, 0); /* RESOURCEi_WORD3 */ radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ @@ -1695,7 +1681,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); + RADEON_USAGE_READ | RADEON_PRIO_VERTEX_BUFFER)); } } @@ -1721,13 +1707,13 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, offset = cb->buffer_offset; if (!gs_ring_buffer) { - assert(buffer_index < R600_MAX_HW_CONST_BUFFERS); + assert(buffer_index < R600_MAX_ALU_CONST_BUFFERS); radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4, DIV_ROUND_UP(cb->buffer_size, 256)); radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); + RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER)); } radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); @@ -1744,7 +1730,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); + RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); } @@ -1795,7 +1781,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, radeon_emit_array(cs, rview->tex_resource_words, 7); reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource, - RADEON_USAGE_READ, + RADEON_USAGE_READ | r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); @@ -1844,6 +1830,20 @@ static void r600_emit_sampler_states(struct r600_context *rctx, enum pipe_texture_target target = PIPE_BUFFER; if (rview) target = rview->base.texture->target; + + /* If seamless cube map is set, set the CAMP_(X|Y|Z) to + * SQ_TEX_WRAP which seems to trigger properly ignoring the + * texture wrap mode */ + if (target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY) { + if (rstate->seamless_cube_map){ + uint32_t mask = ~(S_03C000_CLAMP_X(7) | + S_03C000_CLAMP_Y(7) | + S_03C000_CLAMP_Z(7)); + rstate->tex_sampler_words[0] &= mask; + } + } + if (target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D_ARRAY) { rstate->tex_sampler_words[0] |= S_03C000_TEX_ARRAY_OVERRIDE(1); @@ -1920,7 +1920,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600 radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer, - RADEON_USAGE_READ, + RADEON_USAGE_READ | RADEON_PRIO_SHADER_BINARY)); } @@ -1974,7 +1974,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READWRITE, + RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -1983,7 +1983,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READWRITE, + RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); @@ -2115,7 +2115,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) r600_init_command_buffer(cb, 256); /* R6xx requires this packet at the start of each command buffer */ - if (rctx->b.chip_class == R600) { + if (rctx->b.gfx_level == R600) { r600_store_value(cb, PKT3(PKT3_START_3D_CMDBUF, 0, 0)); r600_store_value(cb, 0); } @@ -2307,7 +2307,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) r600_store_config_reg(cb, R_009714_VC_ENHANCE, 0); - if (rctx->b.chip_class >= R700) { + if (rctx->b.gfx_level >= R700) { r600_store_context_reg(cb, R_028A50_VGT_ENHANCE, 4); r600_store_config_reg(cb, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000); r600_store_config_reg(cb, R_009830_DB_DEBUG, 0); @@ -2388,7 +2388,7 @@ void r600_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_028200_PA_SC_WINDOW_OFFSET, 0); r600_store_context_reg(cb, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); - if (rctx->b.chip_class >= R700) { + if (rctx->b.gfx_level >= R700) { r600_store_context_reg(cb, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); } @@ -2421,9 +2421,9 @@ void r600_init_atom_start_cs(struct r600_context *rctx) r600_store_context_reg(cb, R_0288A4_SQ_PGM_RESOURCES_FS, 0); - if (rctx->b.chip_class == R700) + if (rctx->b.gfx_level == R700) r600_store_context_reg(cb, R_028350_SX_MISC, 0); - if (rctx->b.chip_class == R700 && rctx->screen->b.has_streamout) + if (rctx->b.gfx_level == R700 && rctx->screen->b.has_streamout) r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf)); r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0); @@ -2446,7 +2446,14 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha unsigned tmp, sid, ufi = 0; int need_linear = 0; unsigned z_export = 0, stencil_export = 0, mask_export = 0; - unsigned sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; + + /* Pull any state we use out of rctx. Make sure that any additional + * state added to this list is also checked in the caller in + * r600_update_derived_state(). + */ + bool sprite_coord_enable = rctx->rasterizer ? rctx->rasterizer->sprite_coord_enable : 0; + bool flatshade = rctx->rasterizer ? rctx->rasterizer->flatshade : 0; + bool msaa = rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0; if (!cb->buf) { r600_init_command_buffer(cb, 64); @@ -2456,11 +2463,15 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha r600_store_context_reg_seq(cb, R_028644_SPI_PS_INPUT_CNTL_0, rshader->ninput); for (i = 0; i < rshader->ninput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + const gl_varying_slot varying_slot = rshader->input[i].varying_slot; + + if (varying_slot == VARYING_SLOT_POS) pos_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_FACE && face_index == -1) - face_index = i; - if (rshader->input[i].name == TGSI_SEMANTIC_SAMPLEID) + else if (varying_slot == VARYING_SLOT_FACE) { + if (face_index == -1) + face_index = i; + } + else if (rshader->input[i].system_value == SYSTEM_VALUE_SAMPLE_ID) fixed_pt_position_index = i; sid = rshader->input[i].spi_sid; @@ -2468,18 +2479,17 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha tmp = S_028644_SEMANTIC(sid); /* D3D 9 behaviour. GL is undefined */ - if (rshader->input[i].name == TGSI_SEMANTIC_COLOR && rshader->input[i].sid == 0) + if (varying_slot == VARYING_SLOT_COL0) tmp |= S_028644_DEFAULT_VAL(3); - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || + if (varying_slot == VARYING_SLOT_POS || rshader->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || - (rshader->input[i].interpolate == TGSI_INTERPOLATE_COLOR && - rctx->rasterizer && rctx->rasterizer->flatshade)) + (rshader->input[i].interpolate == TGSI_INTERPOLATE_COLOR && flatshade)) tmp |= S_028644_FLAT_SHADE(1); - if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD || - (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD && - sprite_coord_enable & (1 << rshader->input[i].sid))) { + if (varying_slot == VARYING_SLOT_PNTC || + (varying_slot >= VARYING_SLOT_TEX0 && varying_slot <= VARYING_SLOT_TEX7 && + (sprite_coord_enable & (1 << ((int)varying_slot - (int)VARYING_SLOT_TEX0))))) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -2498,14 +2508,25 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha } db_shader_control = 0; + exports_ps = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + switch (rshader->output[i].frag_result) { + case FRAG_RESULT_DEPTH: z_export = 1; - if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + exports_ps |= 1; + break; + case FRAG_RESULT_STENCIL: stencil_export = 1; - if (rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK && - rctx->framebuffer.nr_samples > 1 && rctx->ps_iter_samples > 0) - mask_export = 1; + exports_ps |= 1; + break; + case FRAG_RESULT_SAMPLE_MASK: + if (msaa) + mask_export = 1; + exports_ps |= 1; + break; + default: + break; + } } db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export); db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export); @@ -2513,14 +2534,6 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha if (rshader->uses_kill) db_shader_control |= S_02880C_KILL_ENABLE(1); - exports_ps = 0; - for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || - rshader->output[i].name == TGSI_SEMANTIC_STENCIL || - rshader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { - exports_ps |= 1; - } - } num_cout = rshader->nr_ps_color_exports; exports_ps |= S_028854_EXPORT_COLORS(num_cout); if (!exports_ps) { @@ -2585,8 +2598,8 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha shader->ps_depth_export = z_export | stencil_export | mask_export; shader->sprite_coord_enable = sprite_coord_enable; - if (rctx->rasterizer) - shader->flatshade = rctx->rasterizer->flatshade; + shader->flatshade = flatshade; + shader->msaa = msaa; } void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) @@ -2594,14 +2607,16 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha struct r600_command_buffer *cb = &shader->command_buffer; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10] = {}; - unsigned i, tmp, nparams = 0; + unsigned i; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].spi_sid) { - tmp = rshader->output[i].spi_sid << ((nparams & 3) * 8); - spi_vs_out_id[nparams / 4] |= tmp; - nparams++; - } + const int param = rshader->output[i].export_param; + if (param < 0) + continue; + unsigned *const param_spi_vs_out_id = &spi_vs_out_id[param / 4]; + const unsigned param_shift = (param & 3) * 8; + assert(!(*param_spi_vs_out_id & (0xFFu << param_shift))); + *param_spi_vs_out_id |= (unsigned)rshader->output[i].spi_sid << param_shift; } r600_init_command_buffer(cb, 32); @@ -2611,15 +2626,8 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha r600_store_value(cb, spi_vs_out_id[i]); } - /* Certain attributes (position, psize, etc.) don't count as params. - * VS is required to export at least one param and r600_shader_from_tgsi() - * takes care of adding a dummy export. - */ - if (nparams < 1) - nparams = 1; - r600_store_context_reg(cb, R_0286C4_SPI_VS_OUT_CONFIG, - S_0286C4_VS_EXPORT_COUNT(nparams - 1)); + S_0286C4_VS_EXPORT_COUNT(rshader->highest_export_param)); r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS, S_028868_NUM_GPRS(rshader->bc.ngpr) | S_028868_DX10_CLAMP(1) | @@ -2682,7 +2690,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha /* VGT_GS_MODE is written by r600_emit_shader_stages */ r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1); - if (rctx->b.chip_class >= R700) { + if (rctx->b.gfx_level >= R700) { r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, S_028B38_MAX_VERT_OUT(shader->selector->gs_max_out_vertices)); } @@ -2772,7 +2780,7 @@ void *r600_create_decompress_blend(struct r600_context *rctx) void *r600_create_db_flush_dsa(struct r600_context *rctx) { struct pipe_depth_stencil_alpha_state dsa; - boolean quirk = false; + bool quirk = false; if (rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 || rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RV635) @@ -2844,7 +2852,7 @@ static inline unsigned r600_array_mode(unsigned mode) } } -static boolean r600_dma_copy_tile(struct r600_context *rctx, +static bool r600_dma_copy_tile(struct r600_context *rctx, struct pipe_resource *dst, unsigned dst_level, unsigned dst_x, @@ -2880,7 +2888,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, slice_tile_max = (rsrc->surface.u.legacy.level[src_level].nblk_x * rsrc->surface.u.legacy.level[src_level].nblk_y) / (8*8); slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; /* linear height must be the same as the slice tile max height, it's ok even - * if the linear destination/source have smaller heigh as the size of the + * if the linear destination/source have smaller height as the size of the * dma packet will be using the copy_height which is always smaller or equal * to the linear height */ @@ -2899,7 +2907,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, slice_tile_max = (rdst->surface.u.legacy.level[dst_level].nblk_x * rdst->surface.u.legacy.level[dst_level].nblk_y) / (8*8); slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; /* linear height must be the same as the slice tile max height, it's ok even - * if the linear destination/source have smaller heigh as the size of the + * if the linear destination/source have smaller height as the size of the * dma packet will be using the copy_height which is always smaller or equal * to the linear height */ @@ -2915,7 +2923,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, } /* check that we are in dw/base alignment constraint */ if (addr % 4 || base % 256) { - return FALSE; + return false; } /* It's a r6xx/r7xx limitation, the blit must be on 8 boundary for number @@ -2929,8 +2937,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, cheight = cheight > copy_height ? copy_height : cheight; size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ - radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ, 0); - radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE, 0); + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ); + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE); radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, 1, 0, size)); radeon_emit(cs, base >> 8); radeon_emit(cs, (detile << 31) | (array_mode << 27) | @@ -2944,7 +2952,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, addr += cheight * pitch; y += cheight; } - return TRUE; + return true; } static void r600_dma_copy(struct pipe_context *ctx, @@ -3043,9 +3051,9 @@ void r600_init_state_functions(struct r600_context *rctx) unsigned id = 1; unsigned i; /* !!! - * To avoid GPU lockup registers must be emited in a specific order + * To avoid GPU lockup registers must be emitted in a specific order * (no kidding ...). The order below is important and have been - * partialy infered from analyzing fglrx command stream. + * partially inferred from analyzing fglrx command stream. * * Don't reorder atom without carefully checking the effect (GPU lockup * or piglit regression). @@ -3059,8 +3067,8 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, r600_emit_gs_constant_buffers, 0); r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_FRAGMENT].atom, id++, r600_emit_ps_constant_buffers, 0); - /* sampler must be emited before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change - * does not take effect (TA_CNTL_AUX emited by r600_emit_seamless_cube_map) + /* sampler must be emitted before TA_CNTL_AUX otherwise DISABLE_CUBE_WRAP change + * does not take effect (TA_CNTL_AUX emitted by r600_emit_seamless_cube_map) */ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, r600_emit_vs_sampler_states, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, r600_emit_gs_sampler_states, 0); |