/* * Copyright (C) 2012-2013 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Rob Clark */ #include "pipe/p_state.h" #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_helpers.h" #include "freedreno_resource.h" #include "fd2_emit.h" #include "fd2_blend.h" #include "fd2_context.h" #include "fd2_program.h" #include "fd2_rasterizer.h" #include "fd2_texture.h" #include "fd2_util.h" #include "fd2_zsa.h" /* NOTE: just define the position for const regs statically.. the blob * driver doesn't seem to change these dynamically, and I can't really * think of a good reason to so.. */ #define VS_CONST_BASE 0x20 #define PS_CONST_BASE 0x120 static void emit_constants(struct fd_ringbuffer *ring, uint32_t base, struct fd_constbuf_stateobj *constbuf, struct fd2_shader_stateobj *shader) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t start_base = base; unsigned i; /* emit user constants: */ while (enabled_mask) { unsigned index = ffs(enabled_mask) - 1; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); /* hmm, sometimes we still seem to end up with consts bound, * even if shader isn't using them, which ends up overwriting * const reg's used for immediates.. this is a hack to work * around that: */ if (shader && ((base - start_base) >= (shader->first_immediate * 4))) break; const uint32_t *dwords; if (cb->user_buffer) { dwords = cb->user_buffer; } else { struct fd_resource *rsc = fd_resource(cb->buffer); dwords = fd_bo_map(rsc->bo); } dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset); OUT_PKT3(ring, CP_SET_CONSTANT, size + 1); OUT_RING(ring, base); for (i = 0; i < size; i++) OUT_RING(ring, *(dwords++)); base += size; enabled_mask &= ~(1 << index); } /* emit shader immediates: */ if (shader) { for (i = 0; i < shader->num_immediates; i++) { OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_RING(ring, start_base + (4 * (shader->first_immediate + i))); OUT_RING(ring, shader->immediates[i].val[0]); OUT_RING(ring, shader->immediates[i].val[1]); OUT_RING(ring, shader->immediates[i].val[2]); OUT_RING(ring, shader->immediates[i].val[3]); base += 4; } } } typedef uint32_t texmask; static texmask emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted) { unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id); static const struct fd2_sampler_stateobj dummy_sampler = {}; static const struct fd2_pipe_sampler_view dummy_view = {}; const struct fd2_sampler_stateobj *sampler; const struct fd2_pipe_sampler_view *view; struct fd_resource *rsc; if (emitted & (1 << const_idx)) return 0; sampler = tex->samplers[samp_id] ? fd2_sampler_stateobj(tex->samplers[samp_id]) : &dummy_sampler; view = tex->textures[samp_id] ? fd2_pipe_sampler_view(tex->textures[samp_id]) : &dummy_view; rsc = view->base.texture ? fd_resource(view->base.texture) : NULL; OUT_PKT3(ring, CP_SET_CONSTANT, 7); OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); OUT_RING(ring, sampler->tex0 | view->tex0); if (rsc) OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0); else OUT_RING(ring, 0); OUT_RING(ring, view->tex2); OUT_RING(ring, sampler->tex3 | view->tex3); OUT_RING(ring, sampler->tex4 | view->tex4); if (rsc && rsc->base.last_level) OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0); else OUT_RING(ring, view->tex5); return (1 << const_idx); } static void emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx) { struct fd_texture_stateobj *fragtex = &ctx->tex[PIPE_SHADER_FRAGMENT]; struct fd_texture_stateobj *verttex = &ctx->tex[PIPE_SHADER_VERTEX]; texmask emitted = 0; unsigned i; for (i = 0; i < verttex->num_samplers; i++) if (verttex->samplers[i]) emitted |= emit_texture(ring, ctx, verttex, i, emitted); for (i = 0; i < fragtex->num_samplers; i++) if (fragtex->samplers[i]) emitted |= emit_texture(ring, ctx, fragtex, i, emitted); } void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, struct fd2_vertex_buf *vbufs, uint32_t n) { unsigned i; OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n)); OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); for (i = 0; i < n; i++) { struct fd_resource *rsc = fd_resource(vbufs[i].prsc); OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0); OUT_RING (ring, vbufs[i].size); } } void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) { struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend); struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa); struct fd_ringbuffer *ring = ctx->batch->draw; /* NOTE: we probably want to eventually refactor this so each state * object handles emitting it's own state.. although the mapping of * state to registers is not always orthogonal, sometimes a single * register contains bitfields coming from multiple state objects, * so not sure the best way to deal with that yet. */ if (dirty & FD_DIRTY_SAMPLE_MASK) { OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); OUT_RING(ring, ctx->sample_mask); } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { struct pipe_stencil_ref *sr = &ctx->stencil_ref; OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); OUT_RING(ring, zsa->rb_depthcontrol); OUT_PKT3(ring, CP_SET_CONSTANT, 4); OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); OUT_RING(ring, zsa->rb_stencilrefmask_bf | A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1])); OUT_RING(ring, zsa->rb_stencilrefmask | A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); OUT_RING(ring, zsa->rb_alpha_ref); } if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) { struct fd2_rasterizer_stateobj *rasterizer = fd2_rasterizer_stateobj(ctx->rasterizer); OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); OUT_RING(ring, rasterizer->pa_cl_clip_cntl); OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl | A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE); OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE)); OUT_RING(ring, rasterizer->pa_su_point_size); OUT_RING(ring, rasterizer->pa_su_point_minmax); OUT_RING(ring, rasterizer->pa_su_line_cntl); OUT_RING(ring, rasterizer->pa_sc_line_stipple); OUT_PKT3(ring, CP_SET_CONSTANT, 6); OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL)); OUT_RING(ring, rasterizer->pa_su_vtx_cntl); OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_CLIP_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_VERT_DISC_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_CLIP_ADJ */ OUT_RING(ring, fui(1.0)); /* PA_CL_GB_HORZ_DISC_ADJ */ } /* NOTE: scissor enabled bit is part of rasterizer state: */ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER)) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); OUT_RING(ring, xy2d(scissor->minx, /* PA_SC_WINDOW_SCISSOR_TL */ scissor->miny)); OUT_RING(ring, xy2d(scissor->maxx, /* PA_SC_WINDOW_SCISSOR_BR */ scissor->maxy)); ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); } if (dirty & FD_DIRTY_VIEWPORT) { OUT_PKT3(ring, CP_SET_CONSTANT, 7); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); OUT_RING(ring, fui(ctx->viewport.scale[0])); /* PA_CL_VPORT_XSCALE */ OUT_RING(ring, fui(ctx->viewport.translate[0])); /* PA_CL_VPORT_XOFFSET */ OUT_RING(ring, fui(ctx->viewport.scale[1])); /* PA_CL_VPORT_YSCALE */ OUT_RING(ring, fui(ctx->viewport.translate[1])); /* PA_CL_VPORT_YOFFSET */ OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */ OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */ /* set viewport in C65/C66, for a20x hw binning and fragcoord.z */ OUT_PKT3(ring, CP_SET_CONSTANT, 9); OUT_RING(ring, 0x00000184); OUT_RING(ring, fui(ctx->viewport.translate[0])); OUT_RING(ring, fui(ctx->viewport.translate[1])); OUT_RING(ring, fui(ctx->viewport.translate[2])); OUT_RING(ring, fui(0.0f)); OUT_RING(ring, fui(ctx->viewport.scale[0])); OUT_RING(ring, fui(ctx->viewport.scale[1])); OUT_RING(ring, fui(ctx->viewport.scale[2])); OUT_RING(ring, fui(0.0f)); } if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) fd2_program_emit(ctx, ring, &ctx->prog); if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) { emit_constants(ring, VS_CONST_BASE * 4, &ctx->constbuf[PIPE_SHADER_VERTEX], (dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL); emit_constants(ring, PS_CONST_BASE * 4, &ctx->constbuf[PIPE_SHADER_FRAGMENT], (dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL); } if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) { OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); OUT_RING(ring, blend ? zsa->rb_colorcontrol | blend->rb_colorcontrol : 0); } if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { enum pipe_format format = pipe_surface_format(ctx->batch->framebuffer.cbufs[0]); bool has_alpha = util_format_has_alpha(format); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha | COND(has_alpha, blend->rb_blendcontrol_rgb) | COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 0); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); OUT_RING(ring, blend ? blend->rb_colormask : 0xf); } if (dirty & FD_DIRTY_BLEND_COLOR) { OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED)); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2])); OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3])); } if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG)) emit_textures(ring, ctx); } /* emit per-context initialization: */ void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) { if (is_a20x(ctx->screen)) { OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1); OUT_RING(ring, A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) | A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP | A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE | A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) | A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3)); /* not sure why this is required */ OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY)); OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); OUT_RING(ring, 0x00000002); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL)); OUT_RING(ring, 0x00000002); } else { OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); OUT_RING(ring, 0x0000003b); } OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1); OUT_RING(ring, 0x00000002); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_RING(ring, 0x00007fff); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST)); OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) | A2XX_SQ_VS_CONST_SIZE(0x100)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST)); OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) | A2XX_SQ_PS_CONST_SIZE(0xe0)); OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */ OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */ OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); OUT_RING(ring, 0x00000000); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC)); OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL)); OUT_RING(ring, 0xffffffff); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG)); OUT_RING(ring, 0x00000000); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL)); OUT_RING(ring, 0x00000000); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); OUT_RING(ring, 0x00000000); // XXX we change this dynamically for draw/clear.. vs gmem<->mem.. OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS)); OUT_RING(ring, 0x88888888); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK)); OUT_RING(ring, 0xffffffff); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO)); OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) | A2XX_RB_COPY_DEST_INFO_WRITE_RED | A2XX_RB_COPY_DEST_INFO_WRITE_GREEN | A2XX_RB_COPY_DEST_INFO_WRITE_BLUE | A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA); OUT_PKT3(ring, CP_SET_CONSTANT, 3); OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0)); OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_0 */ OUT_RING(ring, 0x00000000); /* SQ_WRAPPING_1 */ OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1); OUT_RING(ring, 0x00000000); OUT_PKT3(ring, CP_WAIT_REG_EQ, 4); OUT_RING(ring, 0x000005d0); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x5f601000); OUT_RING(ring, 0x00000001); OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1); OUT_RING(ring, 0x00000180); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_RING(ring, 0x00000300); OUT_PKT3(ring, CP_SET_SHADER_BASES, 1); OUT_RING(ring, 0x80000180); /* not sure what this form of CP_SET_CONSTANT is.. */ OUT_PKT3(ring, CP_SET_CONSTANT, 13); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x469c4000); OUT_RING(ring, 0x3f800000); OUT_RING(ring, 0x3f000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x40000000); OUT_RING(ring, 0x3f400000); OUT_RING(ring, 0x3ec00000); OUT_RING(ring, 0x3e800000); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED | A2XX_RB_COLOR_MASK_WRITE_GREEN | A2XX_RB_COLOR_MASK_WRITE_BLUE | A2XX_RB_COLOR_MASK_WRITE_ALPHA); OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED)); OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */ OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */ OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */ OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */ OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); } static void fd2_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) { __OUT_IB(ring, false, target); } void fd2_emit_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); ctx->emit_ib = fd2_emit_ib; }