summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robclark@freedesktop.org>2014-07-31 15:42:55 -0400
committerRob Clark <robclark@freedesktop.org>2014-11-15 08:30:31 -0500
commit61c68b69d704b5faa5ff9d2b73b24bebf7e19412 (patch)
tree30978f733c49e198ac955e32f07fee9aec1062e7
parent4b1dfcb2c188b6773bf217ae13b71233a1fc9f44 (diff)
freedreno: add adreno 420 support
Very initial support. Basic stuff working (es2gears, es2tri, and maybe about half of glmark2). Expect broken stuff. Still missing: mem->gmem (restore), queries, mipmaps (blob segfaults!), hw binning, etc. Signed-off-by: Rob Clark <robclark@freedesktop.org>
-rw-r--r--src/gallium/drivers/freedreno/Makefile.am1
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources14
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.c127
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_blend.h53
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.c172
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.h102
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c326
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.h122
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c625
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.h91
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.c415
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.h36
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c480
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.h46
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.c39
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.h36
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c94
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h56
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.c105
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_screen.h36
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c190
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.h68
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_util.c401
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_util.h45
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_zsa.c105
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_zsa.h58
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c6
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c15
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c3
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c8
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h5
32 files changed, 3870 insertions, 12 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index ee5d506f2a3..df00add1cb0 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -15,6 +15,7 @@ libfreedreno_la_SOURCES = \
$(C_SOURCES) \
$(a2xx_SOURCES) \
$(a3xx_SOURCES) \
+ $(a4xx_SOURCES) \
$(ir3_SOURCES)
noinst_PROGRAMS = ir3_compiler
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index edaaadfb478..9d2710cc0fc 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -89,6 +89,20 @@ a3xx_SOURCES := \
a3xx/fd3_zsa.c \
a3xx/fd3_zsa.h
+a4xx_SOURCES := \
+ a4xx/fd4_blend.c \
+ a4xx/fd4_context.c \
+ a4xx/fd4_draw.c \
+ a4xx/fd4_emit.c \
+ a4xx/fd4_gmem.c \
+ a4xx/fd4_program.c \
+ a4xx/fd4_query.c \
+ a4xx/fd4_rasterizer.c \
+ a4xx/fd4_screen.c \
+ a4xx/fd4_texture.c \
+ a4xx/fd4_util.c \
+ a4xx/fd4_zsa.c
+
ir3_SOURCES := \
ir3/disasm-a3xx.c \
ir3/instr-a3xx.h \
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
new file mode 100644
index 00000000000..f569e9313c3
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -0,0 +1,127 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd4_blend.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+static enum a4xx_rb_blend_opcode
+blend_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return BLEND_DST_PLUS_SRC;
+ case PIPE_BLEND_MIN:
+ return BLEND_MIN_DST_SRC;
+ case PIPE_BLEND_MAX:
+ return BLEND_MAX_DST_SRC;
+ case PIPE_BLEND_SUBTRACT:
+ return BLEND_SRC_MINUS_DST;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BLEND_DST_MINUS_SRC;
+ default:
+ DBG("invalid blend func: %x", func);
+ return 0;
+ }
+}
+
+void *
+fd4_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso)
+{
+ struct fd4_blend_stateobj *so;
+// enum a3xx_rop_code rop = ROP_COPY;
+ bool reads_dest = false;
+ int i;
+
+ if (cso->logicop_enable) {
+// rop = cso->logicop_func; /* maps 1:1 */
+
+ switch (cso->logicop_func) {
+ case PIPE_LOGICOP_NOR:
+ case PIPE_LOGICOP_AND_INVERTED:
+ case PIPE_LOGICOP_AND_REVERSE:
+ case PIPE_LOGICOP_INVERT:
+ case PIPE_LOGICOP_XOR:
+ case PIPE_LOGICOP_NAND:
+ case PIPE_LOGICOP_AND:
+ case PIPE_LOGICOP_EQUIV:
+ case PIPE_LOGICOP_NOOP:
+ case PIPE_LOGICOP_OR_INVERTED:
+ case PIPE_LOGICOP_OR_REVERSE:
+ case PIPE_LOGICOP_OR:
+ reads_dest = true;
+ break;
+ }
+ }
+
+ if (cso->independent_blend_enable) {
+ DBG("Unsupported! independent blend state");
+ return NULL;
+ }
+
+ so = CALLOC_STRUCT(fd4_blend_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+ const struct pipe_rt_blend_state *rt = &cso->rt[i];
+
+ so->rb_mrt[i].blend_control =
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+
+ so->rb_mrt[i].control =
+ 0xc00 | /* XXX ROP_CODE ?? */
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+ if (rt->blend_enable)
+ so->rb_mrt[i].control |=
+ A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+ A4XX_RB_MRT_CONTROL_BLEND |
+ A4XX_RB_MRT_CONTROL_BLEND2;
+
+ if (reads_dest)
+ so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+
+ if (cso->dither)
+ so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
+ }
+
+ return so;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
new file mode 100644
index 00000000000..68fcf23ff5c
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -0,0 +1,53 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_BLEND_H_
+#define FD4_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd4_blend_stateobj {
+ struct pipe_blend_state base;
+ struct {
+ uint32_t control;
+ uint32_t buf_info;
+ uint32_t blend_control;
+ } rb_mrt[8];
+};
+
+static INLINE struct fd4_blend_stateobj *
+fd4_blend_stateobj(struct pipe_blend_state *blend)
+{
+ return (struct fd4_blend_stateobj *)blend;
+}
+
+void * fd4_blend_state_create(struct pipe_context *pctx,
+ const struct pipe_blend_state *cso);
+
+#endif /* FD4_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
new file mode 100644
index 00000000000..2321876dd48
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -0,0 +1,172 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "fd4_context.h"
+#include "fd4_blend.h"
+#include "fd4_draw.h"
+#include "fd4_emit.h"
+#include "fd4_gmem.h"
+#include "fd4_program.h"
+#include "fd4_query.h"
+#include "fd4_rasterizer.h"
+#include "fd4_texture.h"
+#include "fd4_zsa.h"
+
+static void
+fd4_context_destroy(struct pipe_context *pctx)
+{
+ struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
+
+ util_dynarray_fini(&fd4_ctx->rbrc_patches);
+
+ fd_bo_del(fd4_ctx->vs_pvt_mem);
+ fd_bo_del(fd4_ctx->fs_pvt_mem);
+ fd_bo_del(fd4_ctx->vsc_size_mem);
+
+ pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx);
+ pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx);
+
+ pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
+ pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);
+
+ fd_context_destroy(pctx);
+}
+
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+ static const float init_shader_const[] = {
+ -1.000000, +1.000000, +1.000000,
+ +1.000000, -1.000000, +1.000000,
+ };
+ struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+ pipe_buffer_write(pctx, prsc, 0,
+ sizeof(init_shader_const), init_shader_const);
+ return prsc;
+}
+
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+ struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+ return prsc;
+}
+
+static const uint8_t primtypes[PIPE_PRIM_MAX] = {
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX,
+ [PIPE_PRIM_LINES] = DI_PT_LINELIST,
+ [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
+ [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
+ [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST,
+ [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN,
+};
+
+struct pipe_context *
+fd4_context_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct fd_screen *screen = fd_screen(pscreen);
+ struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
+ struct pipe_context *pctx;
+
+ if (!fd4_ctx)
+ return NULL;
+
+ pctx = &fd4_ctx->base.base;
+
+ fd4_ctx->base.dev = fd_device_ref(screen->dev);
+ fd4_ctx->base.screen = fd_screen(pscreen);
+
+ pctx->destroy = fd4_context_destroy;
+ pctx->create_blend_state = fd4_blend_state_create;
+ pctx->create_rasterizer_state = fd4_rasterizer_state_create;
+ pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
+
+ fd4_draw_init(pctx);
+ fd4_gmem_init(pctx);
+ fd4_texture_init(pctx);
+ fd4_prog_init(pctx);
+
+ pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
+ if (!pctx)
+ return NULL;
+
+ util_dynarray_init(&fd4_ctx->rbrc_patches);
+
+ fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+ fd4_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+ fd4_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+ /* setup solid_vbuf_state: */
+ fd4_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 1, (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ fd4_ctx->solid_vbuf_state.vertexbuf.count = 1;
+ fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+ fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->solid_vbuf;
+
+ /* setup blit_vbuf_state: */
+ fd4_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 2, (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32_FLOAT,
+ }, {
+ .vertex_buffer_index = 1,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ fd4_ctx->blit_vbuf_state.vertexbuf.count = 2;
+ fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+ fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->blit_texcoord_vbuf;
+ fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+ fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd4_ctx->solid_vbuf;
+
+ fd4_query_context_init(pctx);
+
+ return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
new file mode 100644
index 00000000000..87e69fa613a
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -0,0 +1,102 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_CONTEXT_H_
+#define FD4_CONTEXT_H_
+
+#include "freedreno_drmif.h"
+
+#include "freedreno_context.h"
+
+#include "ir3_shader.h"
+
+struct fd4_context {
+ struct fd_context base;
+
+ /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+ * once we know whether or not to use GMEM, and GMEM tile pitch.
+ */
+ struct util_dynarray rbrc_patches;
+
+ struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+
+ /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
+ * could combine it with another allocation.
+ */
+ struct fd_bo *vsc_size_mem;
+
+ /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+ * vertices:
+ */
+ struct pipe_resource *solid_vbuf;
+
+ /* vertex buf used for mem->gmem tex coords:
+ */
+ struct pipe_resource *blit_texcoord_vbuf;
+
+ /* vertex state for solid_vbuf:
+ * - solid_vbuf / 12 / R32G32B32_FLOAT
+ */
+ struct fd_vertex_state solid_vbuf_state;
+
+ /* vertex state for blit_prog:
+ * - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+ * - solid_vbuf / 12 / R32G32B32_FLOAT
+ */
+ struct fd_vertex_state blit_vbuf_state;
+
+ /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+ bool vsaturate, fsaturate;
+
+ /* bitmask of sampler which needs coords clamped for vertex
+ * shader:
+ */
+ unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+
+ /* bitmask of sampler which needs coords clamped for frag
+ * shader:
+ */
+ unsigned fsaturate_s, fsaturate_t, fsaturate_r;
+
+ /* some state changes require a different shader variant. Keep
+ * track of this so we know when we need to re-emit shader state
+ * due to variant change. See fixup_shader_state()
+ */
+ struct ir3_shader_key last_key;
+};
+
+static INLINE struct fd4_context *
+fd4_context(struct fd_context *ctx)
+{
+ return (struct fd4_context *)ctx;
+}
+
+struct pipe_context *
+fd4_context_create(struct pipe_screen *pscreen, void *priv);
+
+#endif /* FD4_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
new file mode 100644
index 00000000000..2427a8b8a5b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -0,0 +1,326 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd4_draw.h"
+#include "fd4_context.h"
+#include "fd4_emit.h"
+#include "fd4_program.h"
+#include "fd4_util.h"
+#include "fd4_zsa.h"
+
+
+static void
+draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct fd4_emit *emit)
+{
+ const struct pipe_draw_info *info = emit->info;
+
+ fd4_emit_state(ctx, ring, emit);
+
+ if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+ fd4_emit_vertex_bufs(ring, emit);
+
+ OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, info->start); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */
+
+ OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+ info->restart_index : 0xffffffff);
+
+ fd4_draw_emit(ctx, ring,
+ emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+ info);
+}
+
+/* fixup dirty shader state in case some "unrelated" (from the state-
+ * tracker's perspective) state change causes us to switch to a
+ * different variant.
+ */
+static void
+fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct ir3_shader_key *last_key = &fd4_ctx->last_key;
+
+ if (!ir3_shader_key_equal(last_key, key)) {
+ ctx->dirty |= FD_DIRTY_PROG;
+
+ if (last_key->has_per_samp || key->has_per_samp) {
+ if ((last_key->vsaturate_s != key->vsaturate_s) ||
+ (last_key->vsaturate_t != key->vsaturate_t) ||
+ (last_key->vsaturate_r != key->vsaturate_r))
+ ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+
+ if ((last_key->fsaturate_s != key->fsaturate_s) ||
+ (last_key->fsaturate_t != key->fsaturate_t) ||
+ (last_key->fsaturate_r != key->fsaturate_r))
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+ }
+
+ if (last_key->color_two_side != key->color_two_side)
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
+ if (last_key->half_precision != key->half_precision)
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
+ if (last_key->alpha != key->alpha)
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
+ fd4_ctx->last_key = *key;
+ }
+}
+
+static void
+fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd4_emit emit = {
+ .vtx = &ctx->vtx,
+ .prog = &ctx->prog,
+ .info = info,
+ .key = {
+ /* do binning pass first: */
+ .binning_pass = true,
+ .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+ .alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])),
+ // TODO set .half_precision based on render target format,
+ // ie. float16 and smaller use half, float32 use full..
+ .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+ .has_per_samp = fd4_ctx->fsaturate || fd4_ctx->vsaturate,
+ .vsaturate_s = fd4_ctx->vsaturate_s,
+ .vsaturate_t = fd4_ctx->vsaturate_t,
+ .vsaturate_r = fd4_ctx->vsaturate_r,
+ .fsaturate_s = fd4_ctx->fsaturate_s,
+ .fsaturate_t = fd4_ctx->fsaturate_t,
+ .fsaturate_r = fd4_ctx->fsaturate_r,
+ },
+ .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+ };
+ unsigned dirty;
+
+ fixup_shader_state(ctx, &emit.key);
+
+ dirty = ctx->dirty;
+ emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+ draw_impl(ctx, ctx->binning_ring, &emit);
+
+ /* and now regular (non-binning) pass: */
+ emit.key.binning_pass = false;
+ emit.dirty = dirty;
+ emit.vp = NULL; /* we changed key so need to refetch vp */
+ draw_impl(ctx, ctx->ring, &emit);
+}
+
+/* clear operations ignore viewport state, so we need to reset it
+ * based on framebuffer state:
+ */
+static void
+reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
+{
+ float half_width = pfb->width * 0.5f;
+ float half_height = pfb->height * 0.5f;
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 4);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(half_width));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(half_width));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(half_height));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-half_height));
+}
+
+static void
+fd4_clear(struct fd_context *ctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ unsigned dirty = ctx->dirty;
+ unsigned ce, i;
+ struct fd4_emit emit = {
+ .vtx = &fd4_ctx->solid_vbuf_state,
+ .prog = &ctx->solid_prog,
+ .key = {
+ .half_precision = true,
+ },
+ };
+ uint32_t colr = 0;
+
+ if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
+ colr = pack_rgba(pfb->cbufs[0]->format, color->f);
+
+ dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
+ dirty |= FD_DIRTY_PROG;
+ emit.dirty = dirty;
+
+ OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ /* emit generic state now: */
+ fd4_emit_state(ctx, ring, &emit);
+ reset_viewport(ring, pfb);
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
+
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth));
+ ctx->dirty |= FD_DIRTY_VIEWPORT;
+ } else {
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ 0xff000000 | // XXX ???
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+ OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+ A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+ } else {
+ OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
+ OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+ OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+ OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+ }
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+ ce = 0xf;
+ } else {
+ ce = 0x0;
+ }
+
+ for (i = 0; i < 8; i++) {
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
+ A4XX_RB_MRT_CONTROL_B11 |
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+ A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+ }
+
+ fd4_emit_vertex_bufs(ring, &emit);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, 0x0); /* XXX GRAS_ALPHA_CONTROL */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
+ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */
+ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */
+ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */
+ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */
+
+ /* until fastclear works: */
+ fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+
+ OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
+
+ OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+ OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */
+
+ OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
+ OUT_RING(ring, 0x00000001);
+
+ fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
+
+ OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+ OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+void
+fd4_draw_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->draw_vbo = fd4_draw_vbo;
+ ctx->clear = fd4_clear;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
new file mode 100644
index 00000000000..f775cc77795
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -0,0 +1,122 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_DRAW_H_
+#define FD4_DRAW_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_draw.h"
+
+void fd4_draw_init(struct pipe_context *pctx);
+
+/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */
+
+static inline uint32_t DRAW4(enum pc_di_primtype prim_type,
+ enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
+ enum pc_di_vis_cull_mode vis_cull_mode)
+{
+ return (prim_type << 0) |
+ (source_select << 6) |
+ ((index_size & 1) << 11) |
+ ((index_size >> 1) << 13) |
+ (vis_cull_mode << 8);
+}
+
+static inline void
+fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_primtype primtype,
+ enum pc_di_vis_cull_mode vismode,
+ enum pc_di_src_sel src_sel, uint32_t count,
+ enum pc_di_index_size idx_type,
+ uint32_t idx_size, uint32_t idx_offset,
+ struct fd_bo *idx_bo)
+{
+ /* for debug after a lock up, write a unique counter value
+ * to scratch7 for each draw, to make it easier to match up
+ * register dumps to cmdstream. The combination of IB
+ * (scratch6) and DRAW is enough to "triangulate" the
+ * particular draw that caused lockup.
+ */
+ emit_marker(ring, 7);
+
+ OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_bo ? 6 : 3);
+ if (vismode == USE_VISIBILITY) {
+ /* leave vis mode blank for now, it will be patched up when
+ * we know if we are binning or not
+ */
+ OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
+ &ctx->draw_patches);
+ } else {
+ OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
+ }
+ OUT_RING(ring, 0x1); /* XXX */
+ OUT_RING(ring, count); /* NumIndices */
+ if (idx_bo) {
+ OUT_RING(ring, 0x0); /* XXX */
+ OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
+ OUT_RING (ring, idx_size);
+ }
+
+ emit_marker(ring, 7);
+
+ fd_reset_wfi(ctx);
+}
+
+static inline void
+fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum pc_di_vis_cull_mode vismode,
+ const struct pipe_draw_info *info)
+{
+ struct pipe_index_buffer *idx = &ctx->indexbuf;
+ struct fd_bo *idx_bo = NULL;
+ enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
+ enum pc_di_src_sel src_sel;
+ uint32_t idx_size, idx_offset;
+
+ if (info->indexed) {
+ assert(!idx->user_buffer);
+
+ idx_bo = fd_resource(idx->buffer)->bo;
+ idx_type = size2indextype(idx->index_size);
+ idx_size = idx->index_size * info->count;
+ idx_offset = idx->offset + (info->start * idx->index_size);
+ src_sel = DI_SRC_SEL_DMA;
+ } else {
+ idx_bo = NULL;
+ idx_type = INDEX_SIZE_IGN;
+ idx_size = 0;
+ idx_offset = 0;
+ src_sel = DI_SRC_SEL_AUTO_INDEX;
+ }
+
+ fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+ info->count, idx_type, idx_size, idx_offset, idx_bo);
+}
+
+#endif /* FD4_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
new file mode 100644
index 00000000000..1a0986a1925
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -0,0 +1,625 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+
+#include "freedreno_resource.h"
+
+#include "fd4_emit.h"
+#include "fd4_blend.h"
+#include "fd4_context.h"
+#include "fd4_program.h"
+#include "fd4_rasterizer.h"
+#include "fd4_texture.h"
+#include "fd4_util.h"
+#include "fd4_zsa.h"
+
+/* regid: base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords: size of const value buffer
+ */
+void
+fd4_emit_constant(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc)
+{
+ uint32_t i, sz;
+ enum adreno_state_src src;
+
+ if (prsc) {
+ sz = 0;
+ src = 0x2; // TODO ??
+ } else {
+ sz = sizedwords;
+ src = SS_DIRECT;
+ }
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+ CP_LOAD_STATE_0_STATE_SRC(src) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
+ if (prsc) {
+ struct fd_bo *bo = fd_resource(prsc)->bo;
+ OUT_RELOC(ring, bo, offset,
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+ } else {
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+ dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+ }
+ for (i = 0; i < sz; i++) {
+ OUT_RING(ring, dwords[i]);
+ }
+}
+
+static void
+emit_constants(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ struct fd_constbuf_stateobj *constbuf,
+ struct ir3_shader_variant *shader)
+{
+ uint32_t enabled_mask = constbuf->enabled_mask;
+ uint32_t first_immediate;
+ uint32_t base = 0;
+
+ // XXX TODO only emit dirty consts.. but we need to keep track if
+ // they are clobbered by a clear, gmem2mem, or mem2gmem..
+ constbuf->dirty_mask = enabled_mask;
+
+ /* in particular, with binning shader we may end up with unused
+ * consts, ie. we could end up w/ constlen that is smaller
+ * than first_immediate. In that case truncate the user consts
+ * early to avoid HLSQ lockup caused by writing too many consts
+ */
+ first_immediate = MIN2(shader->first_immediate, shader->constlen);
+
+ /* emit user constants: */
+ while (enabled_mask) {
+ unsigned index = ffs(enabled_mask) - 1;
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+ // I expect that size should be a multiple of vec4's:
+ assert(size == align(size, 4));
+
+ /* gallium could leave const buffers bound above what the
+ * current shader uses.. don't let that confuse us.
+ */
+ if (base >= (4 * first_immediate))
+ break;
+
+ if (constbuf->dirty_mask & (1 << index)) {
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, (4 * first_immediate) - base);
+ fd4_emit_constant(ring, sb, base,
+ cb->buffer_offset, size,
+ cb->user_buffer, cb->buffer);
+ constbuf->dirty_mask &= ~(1 << index);
+ }
+
+ base += size;
+ enabled_mask &= ~(1 << index);
+ }
+
+ /* emit shader immediates: */
+ if (shader) {
+ int size = shader->immediates_count;
+ base = shader->first_immediate;
+
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, shader->constlen) - base;
+
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
+
+ if (size > 0) {
+ fd4_emit_constant(ring, sb, base,
+ 0, size, shader->immediates[0].val, NULL);
+ }
+ }
+}
+
+static void
+emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum adreno_state_block sb, struct fd_texture_stateobj *tex)
+{
+ unsigned i;
+
+ if (tex->num_samplers > 0) {
+ /* output sampler state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 + (2 * tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_samplers; i++) {
+ static const struct fd4_sampler_stateobj dummy_sampler = {};
+ const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
+ fd4_sampler_stateobj(tex->samplers[i]) :
+ &dummy_sampler;
+ OUT_RING(ring, sampler->texsamp0);
+ OUT_RING(ring, sampler->texsamp1);
+ }
+ /* maybe an a420.0 (or a4xx.0) workaround?? or just driver bug? */
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ if (tex->num_textures > 0) {
+ /* emit texture state: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+ OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ for (i = 0; i < tex->num_textures; i++) {
+ static const struct fd4_pipe_sampler_view dummy_view = {};
+ const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
+ fd4_pipe_sampler_view(tex->textures[i]) :
+ &dummy_view;
+ struct fd_resource *rsc = view->tex_resource;
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, 0);
+ OUT_RING(ring, view->texconst0);
+ OUT_RING(ring, view->texconst1);
+ OUT_RING(ring, view->texconst2);
+ OUT_RING(ring, view->texconst3);
+ OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+}
+
+/* emit texture state for mem->gmem restore operation.. eventually it would
+ * be good to get rid of this and use normal CSO/etc state for more of these
+ * special cases..
+ */
+void
+fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+{
+ /* TODO */
+}
+
+
+void
+fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+{
+ uint32_t i, j, last = 0;
+ uint32_t total_in = 0;
+ const struct fd_vertex_state *vtx = emit->vtx;
+ struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+ unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
+
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (vtx->vtx->num_elements == 0)
+ return;
+
+ for (i = 0; i < n; i++)
+ if (vp->inputs[i].compmask)
+ last = i;
+
+ for (i = 0, j = 0; i <= last; i++) {
+ if (vp->inputs[i].compmask) {
+ struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+ const struct pipe_vertex_buffer *vb =
+ &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+ struct fd_resource *rsc = fd_resource(vb->buffer);
+ enum pipe_format pfmt = elem->src_format;
+ enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
+ bool switchnext = (i != last);
+ uint32_t fs = util_format_get_blocksize(pfmt);
+ uint32_t off = vb->buffer_offset + elem->src_offset;
+ uint32_t size = fd_bo_size(rsc->bo) - off;
+ debug_assert(fmt != ~0);
+
+ OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+ COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+ OUT_RELOC(ring, rsc->bo, off, 0, 0);
+ OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
+ OUT_RING(ring, 0x00000001);
+
+ OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
+ OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+ A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
+ A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in += vp->inputs[i].ncomp;
+ j++;
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
+ OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+ 0xa0000 | /* XXX */
+ A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+ A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+ OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
+ A4XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+ A4XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
+ OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
+ OUT_RING(ring, 0x0000fc00); /* XXX VFD_CONTROL_3 */
+ OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
+}
+
+void
+fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct fd4_emit *emit)
+{
+ struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+ struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
+ uint32_t dirty = emit->dirty;
+
+ emit_marker(ring, 5);
+
+ if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
+ uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;
+
+ /* I suppose if we needed to (which I don't *think* we need
+ * to), we could emit this for binning pass too. But we
+ * would need to keep a different patch-list for binning
+ * vs render pass.
+ */
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, zsa->gras_alpha_control);
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, zsa->rb_stencil_control);
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, zsa->rb_stencilrefmask |
+ A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+ OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+ A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+ uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_depth_control;
+ if (fp->writes_pos) {
+ val |= A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
+ val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+ if (fp->has_kill) {
+ val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, val);
+ }
+
+ if (dirty & FD_DIRTY_RASTERIZER) {
+ struct fd4_rasterizer_stateobj *rasterizer =
+ fd4_rasterizer_stateobj(ctx->rasterizer);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, rasterizer->gras_su_mode_control |
+ A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
+ OUT_RING(ring, rasterizer->gras_su_point_minmax);
+ OUT_RING(ring, rasterizer->gras_su_point_size);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+ OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+ }
+
+ if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
+ ->gras_cl_clip_cntl;
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, val);
+ }
+
+ /* NOTE: since primitive_restart is not actually part of any
+ * state object, we need to make sure that we always emit
+ * PRIM_VTX_CNTL.. either that or be more clever and detect
+ * when it changes.
+ */
+ if (emit->info) {
+ uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
+ ->pc_prim_vtx_cntl;
+
+ val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
+ val |= COND(fp->total_in > 0, A4XX_PC_PRIM_VTX_CNTL_VAROUT);
+
+ OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
+ OUT_RING(ring, val);
+ OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */
+ }
+
+ if (dirty & FD_DIRTY_SCISSOR) {
+ struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
+
+ ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
+ ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
+ ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
+ ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+ }
+
+ if (dirty & FD_DIRTY_PROG)
+ fd4_program_emit(ring, emit);
+
+ if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
+ /* evil hack to deal sanely with clear path: */
+ (emit->prog == &ctx->prog)) {
+ fd_wfi(ctx, ring);
+ emit_constants(ring, SB_VERT_SHADER,
+ &ctx->constbuf[PIPE_SHADER_VERTEX],
+ (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
+ if (!emit->key.binning_pass) {
+ emit_constants(ring, SB_FRAG_SHADER,
+ &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+ (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ }
+ }
+
+ if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
+ struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
+ uint32_t i;
+
+ for (i = 0; i < 8; i++) {
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+ OUT_RING(ring, blend->rb_mrt[i].control);
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+ OUT_RING(ring, blend->rb_mrt[i].blend_control);
+ }
+ }
+
+ if (dirty & FD_DIRTY_VERTTEX) {
+ if (vp->has_samp)
+ emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
+ else
+ dirty &= ~FD_DIRTY_VERTTEX;
+ }
+
+ if (dirty & FD_DIRTY_FRAGTEX) {
+ if (fp->has_samp)
+ emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
+ else
+ dirty &= ~FD_DIRTY_FRAGTEX;
+ }
+
+ ctx->dirty &= ~dirty;
+}
+
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd4_emit_restore(struct fd_context *ctx)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+
+ OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
+ OUT_RING(ring, 0x00000001);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
+ OUT_RING(ring, 0x00000006);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
+ OUT_RING(ring, 0x0000003a);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
+ OUT_RING(ring, 0x00000001);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
+ OUT_RING(ring, 0x00000007);
+
+ OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000012);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
+ OUT_RING(ring, 0x00000006);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
+ OUT_RING(ring, 0x00040000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+ OUT_RING(ring, 0x00001000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F3, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F4, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F5, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F6, 1);
+ OUT_RING(ring, 0x3c007fff);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
+ OUT_RING(ring, 0x3f800000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
+ OUT_RING(ring, 0x0000001d);
+
+ OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
+ OUT_RING(ring, 0x00000001);
+
+ OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_2381, 1);
+ OUT_RING(ring, 0x00000010);
+
+ OUT_PKT0(ring, REG_A4XX_UNKNOWN_23A0, 1);
+ OUT_RING(ring, 0x00000010);
+
+ /* we don't use this yet.. probably best to disable.. */
+ OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
+ OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
+ CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE_0_GROUP_ID(0));
+ OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
+ OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */
+ OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
+ OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_PARAM */
+ OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
+ A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
+ OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+ A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
+
+ OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
+ OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf));
+
+ OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
+ OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE);
+
+ ctx->needs_rb_fbd = true;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
new file mode 100644
index 00000000000..c5fb24d8d13
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -0,0 +1,91 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_EMIT_H
+#define FD4_EMIT_H
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "fd4_util.h"
+#include "fd4_program.h"
+#include "ir3_shader.h"
+
+struct fd_ringbuffer;
+enum adreno_state_block;
+
+void fd4_emit_constant(struct fd_ringbuffer *ring,
+ enum adreno_state_block sb,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc);
+
+void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+ struct pipe_surface *psurf);
+
+/* grouped together emit-state for prog/vertex/state emit: */
+struct fd4_emit {
+ const struct fd_vertex_state *vtx;
+ const struct fd_program_stateobj *prog;
+ const struct pipe_draw_info *info;
+ struct ir3_shader_key key;
+ uint32_t dirty;
+ bool rasterflat;
+
+ /* cached to avoid repeated lookups of same variants: */
+ struct ir3_shader_variant *vp, *fp;
+ /* TODO: other shader stages.. */
+};
+
+static inline struct ir3_shader_variant *
+fd4_emit_get_vp(struct fd4_emit *emit)
+{
+ if (!emit->vp) {
+ struct fd4_shader_stateobj *so = emit->prog->vp;
+ emit->vp = ir3_shader_variant(so->shader, emit->key);
+ }
+ return emit->vp;
+}
+
+static inline struct ir3_shader_variant *
+fd4_emit_get_fp(struct fd4_emit *emit)
+{
+ if (!emit->fp) {
+ struct fd4_shader_stateobj *so = emit->prog->fp;
+ emit->fp = ir3_shader_variant(so->shader, emit->key);
+ }
+ return emit->fp;
+}
+
+void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+
+void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct fd4_emit *emit);
+
+void fd4_emit_restore(struct fd_context *ctx);
+
+#endif /* FD4_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
new file mode 100644
index 00000000000..8cb6bc4cb6c
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -0,0 +1,415 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "freedreno_draw.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd4_gmem.h"
+#include "fd4_context.h"
+#include "fd4_draw.h"
+#include "fd4_emit.h"
+#include "fd4_program.h"
+#include "fd4_util.h"
+#include "fd4_zsa.h"
+
+static const struct ir3_shader_key key = {
+ // XXX should set this based on render target format! We don't
+ // want half_precision if float32 render target!!!
+ .half_precision = true,
+};
+
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+ struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ enum a4xx_color_fmt format = 0;
+ enum a3xx_color_swap swap = WZYX;
+ struct fd_resource *rsc = NULL;
+ struct fd_resource_slice *slice = NULL;
+ uint32_t stride = 0;
+ uint32_t base = 0;
+ uint32_t layer_offset = 0;
+
+ if ((i < nr_bufs) && bufs[i]) {
+ struct pipe_surface *psurf = bufs[i];
+
+ rsc = fd_resource(psurf->texture);
+ slice = &rsc->slices[psurf->u.tex.level];
+ format = fd4_pipe2color(psurf->format);
+ swap = fd4_pipe2swap(psurf->format);
+
+ debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+ layer_offset = slice->size0 * psurf->u.tex.first_layer;
+
+ if (bin_w) {
+ stride = bin_w * rsc->cpp;
+
+ if (bases) {
+ base = bases[i];
+ }
+ } else {
+ stride = slice->pitch * rsc->cpp;
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
+ OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+ 0x80 | /* XXX not on gmem2mem?? tile-mode? */
+ A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+ A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
+ if (bin_w || (i >= nr_bufs)) {
+ OUT_RING(ring, base);
+ } else {
+ OUT_RELOCW(ring, rsc->bo,
+ slice->offset + layer_offset, 0, -1);
+ }
+ OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
+ }
+}
+
+static uint32_t
+depth_base(struct fd_context *ctx)
+{
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ uint32_t cpp = 4;
+ if (pfb->cbufs[0]) {
+ struct fd_resource *rsc =
+ fd_resource(pfb->cbufs[0]->texture);
+ cpp = rsc->cpp;
+ }
+ return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
+}
+
+/* transfer from gmem to system memory (ie. normal RAM) */
+
+static void
+emit_gmem2mem_surf(struct fd_context *ctx,
+ uint32_t base, struct pipe_surface *psurf)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct fd_resource *rsc = fd_resource(psurf->texture);
+ struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+
+ OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
+ OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
+ A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
+ OUT_RELOCW(ring, rsc->bo, slice->offset, 0, 0); /* RB_COPY_DEST_BASE */
+ OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
+ OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
+ A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
+ A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+ A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+ A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));
+
+ fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+
+static void
+fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ struct fd4_emit emit = {
+ .vtx = &fd4_ctx->solid_vbuf_state,
+ .prog = &ctx->solid_prog,
+ .key = key,
+ };
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+ OUT_RING(ring, 0xff000000 |
+ A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 |
+ A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+ fd_wfi(ctx, ring);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+ OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+ 0xa); /* XXX */
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+ OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+ OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, 0x00000002);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+ OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+ A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+
+ OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
+ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
+
+ fd4_program_emit(ring, &emit);
+ fd4_emit_vertex_bufs(ring, &emit);
+
+ if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ uint32_t base = depth_base(ctx);
+ emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
+ }
+
+ if (ctx->resolve & FD_BUFFER_COLOR) {
+ emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+ A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+ A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+ A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+/* transfer from system memory to gmem */
+
+static void
+fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
+{
+ /* TODO */
+}
+
+static void
+patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
+{
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
+ *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
+ }
+ util_dynarray_resize(&ctx->draw_patches, 0);
+}
+
+static void
+patch_rbrc(struct fd_context *ctx, uint32_t val)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ unsigned i;
+ for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
+ *patch->cs = patch->val | val;
+ }
+ util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
+}
+
+static void
+update_vsc_pipe(struct fd_context *ctx)
+{
+ struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_ringbuffer *ring = ctx->ring;
+ int i;
+
+ OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
+ OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+
+ OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+ A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+ A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+ A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ if (!pipe->bo) {
+ pipe->bo = fd_bo_new(ctx->dev, 0x40000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+ }
+ OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i] */
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
+ for (i = 0; i < 8; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
+ }
+}
+
+/* before first tile */
+static void
+fd4_emit_tile_init(struct fd_context *ctx)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ uint32_t rb_render_control;
+
+ fd4_emit_restore(ctx);
+
+ OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
+ OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+ A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+ A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
+ 0x00010000); /* XXX */
+
+ update_vsc_pipe(ctx);
+ patch_draws(ctx, IGNORE_VISIBILITY);
+
+ rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
+ patch_rbrc(ctx, rb_render_control);
+}
+
+/* before mem2gmem */
+static void
+fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ uint32_t reg;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+ reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
+ if (pfb->zsbuf) {
+ reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+ }
+ OUT_RING(ring, reg);
+ if (pfb->zsbuf) {
+ OUT_RING(ring, A4XX_RB_DEPTH_PITCH(gmem->bin_w));
+ OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(gmem->bin_w));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
+ if (pfb->zsbuf) {
+ OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
+ fd_pipe2depth(pfb->zsbuf->format)));
+ }
+
+ if (ctx->needs_rb_fbd) {
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+ A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+ ctx->needs_rb_fbd = false;
+ }
+}
+
+/* before IB to rendering cmds: */
+static void
+fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
+{
+ struct fd_ringbuffer *ring = ctx->ring;
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+ uint32_t x1 = tile->xoff;
+ uint32_t y1 = tile->yoff;
+ uint32_t x2 = tile->xoff + tile->bin_w - 1;
+ uint32_t y2 = tile->yoff + tile->bin_h - 1;
+
+ OUT_PKT3(ring, CP_SET_BIN, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+ OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+
+ /* setup scissor/offset for current tile: */
+ OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+ OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
+ A4XX_RB_BIN_OFFSET_Y(tile->yoff));
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+ OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+ A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+}
+
+void
+fd4_gmem_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ ctx->emit_tile_init = fd4_emit_tile_init;
+ ctx->emit_tile_prep = fd4_emit_tile_prep;
+ ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
+ ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
+ ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h
new file mode 100644
index 00000000000..8964714d79b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_GMEM_H_
+#define FD4_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd4_gmem_init(struct pipe_context *pctx);
+
+#endif /* FD4_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
new file mode 100644
index 00000000000..591a1d87012
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -0,0 +1,480 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "freedreno_program.h"
+
+#include "fd4_program.h"
+#include "fd4_emit.h"
+#include "fd4_texture.h"
+#include "fd4_util.h"
+
+static void
+delete_shader_stateobj(struct fd4_shader_stateobj *so)
+{
+ ir3_shader_destroy(so->shader);
+ free(so);
+}
+
+static struct fd4_shader_stateobj *
+create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+ enum shader_t type)
+{
+ struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
+ so->shader = ir3_shader_create(pctx, cso->tokens, type);
+ return so;
+}
+
+static void *
+fd4_fp_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+}
+
+static void
+fd4_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd4_shader_stateobj *so = hwcso;
+ delete_shader_stateobj(so);
+}
+
+static void *
+fd4_vp_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+}
+
+static void
+fd4_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+ struct fd4_shader_stateobj *so = hwcso;
+ delete_shader_stateobj(so);
+}
+
+static void
+emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
+{
+ const struct ir3_info *si = &so->info;
+ enum adreno_state_block sb;
+ enum adreno_state_src src;
+ uint32_t i, sz, *bin;
+
+ if (so->type == SHADER_VERTEX) {
+ sb = SB_VERT_SHADER;
+ } else {
+ sb = SB_FRAG_SHADER;
+ }
+
+ if (fd_mesa_debug & FD_DBG_DIRECT) {
+ sz = si->sizedwords;
+ src = SS_DIRECT;
+ bin = fd_bo_map(so->bo);
+ } else {
+ sz = 0;
+ src = 2; // enums different on a4xx..
+ bin = NULL;
+ }
+
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+ CP_LOAD_STATE_0_STATE_SRC(src) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+ if (bin) {
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+ } else {
+ OUT_RELOC(ring, so->bo, 0,
+ CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+ }
+ for (i = 0; i < sz; i++) {
+ OUT_RING(ring, bin[i]);
+ }
+}
+
+struct stage {
+ const struct ir3_shader_variant *v;
+ const struct ir3_info *i;
+ /* const sizes are in units of 4 * vec4 */
+ uint8_t constoff;
+ uint8_t constlen;
+ /* instr sizes are in units of 16 instructions */
+ uint8_t instroff;
+ uint8_t instrlen;
+};
+
+enum {
+ VS = 0,
+ FS = 1,
+ HS = 2,
+ DS = 3,
+ GS = 4,
+ MAX_STAGES
+};
+
+static void
+setup_stages(struct fd4_emit *emit, struct stage *s)
+{
+ unsigned i;
+
+ s[VS].v = fd4_emit_get_vp(emit);
+
+ if (emit->key.binning_pass) {
+ /* use dummy stateobj to simplify binning vs non-binning: */
+ static const struct ir3_shader_variant binning_fp = {};
+ s[FS].v = &binning_fp;
+ } else {
+ s[FS].v = fd4_emit_get_fp(emit);
+ }
+
+ s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */
+
+ for (i = 0; i < MAX_STAGES; i++) {
+ if (s[i].v) {
+ s[i].i = &s[i].v->info;
+ /* constlen is in units of 4 * vec4: */
+ s[i].constlen = align(s[i].v->constlen, 4) / 4;
+ /* instrlen is already in units of 16 instr.. although
+ * probably we should ditch that and not make the compiler
+ * care about instruction group size of a3xx vs a4xx
+ */
+ s[i].instrlen = s[i].v->instrlen;
+ } else {
+ s[i].i = NULL;
+ s[i].constlen = 0;
+ s[i].instrlen = 0;
+ }
+ }
+
+ /* NOTE: at least for gles2, blob partitions VS at bottom of const
+ * space and FS taking entire remaining space. We probably don't
+ * need to do that the same way, but for now mimic what the blob
+ * does to make it easier to diff against register values from blob
+ */
+ s[VS].constlen = 66;
+ s[FS].constlen = 128 - s[VS].constlen;
+ s[VS].instroff = 0;
+ s[VS].constoff = 0;
+ s[FS].instroff = 64 - s[FS].instrlen;
+ s[FS].constoff = s[VS].constlen;
+ s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
+ s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
+}
+
+void
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+{
+ struct stage s[MAX_STAGES];
+ uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+ int constmode;
+ int i, j, k;
+
+ setup_stages(emit, s);
+
+ /* blob seems to always use constmode currently: */
+ constmode = 1;
+
+ pos_regid = ir3_find_output_regid(s[VS].v,
+ ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+ posz_regid = ir3_find_output_regid(s[FS].v,
+ ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+ psize_regid = ir3_find_output_regid(s[VS].v,
+ ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+ color_regid = ir3_find_output_regid(s[FS].v,
+ ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+
+ /* we could probably divide this up into things that need to be
+ * emitted if frag-prog is dirty vs if vert-prog is dirty..
+ */
+
+ OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
+ OUT_RING(ring, 0x00000003);
+
+ OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 4);
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+ A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
+ A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+ /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
+ * flush some caches? I think we only need to set those
+ * bits if we have updated const or shader..
+ */
+ A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+ A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+ 0xfcfc0000 | /* XXX */
+ A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+ COND(s[FS].v->frag_coord, A4XX_HLSQ_CONTROL_1_REG_ZWCOORD));
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid));
+
+ OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
+ OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
+ A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+ A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
+ A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
+ OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
+ A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
+ A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
+ OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
+ A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+ A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
+ A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
+ OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
+ A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+ A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
+ A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
+ OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
+ A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+ A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
+ A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
+ OUT_RING(ring, 0x140010 | /* XXX */
+ COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));
+
+ OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
+ OUT_RING(ring, 0x1c3); /* XXX SP_INSTR_CACHE_CTRL */
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
+ OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
+ OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+ A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+ A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+ A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+ A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+ A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+ COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+ OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
+ A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
+ OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
+ A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
+ A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4));
+
+ for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);
+
+ j = ir3_next_varying(s[FS].v, j);
+ if (j < s[FS].v->inputs_count) {
+ k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+ reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
+ reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
+ }
+
+ j = ir3_next_varying(s[FS].v, j);
+ if (j < s[FS].v->inputs_count) {
+ k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+ reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
+ reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
+ }
+
+ OUT_RING(ring, reg);
+ }
+
+ for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) {
+ uint32_t reg = 0;
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);
+
+ j = ir3_next_varying(s[FS].v, j);
+ if (j < s[FS].v->inputs_count)
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc);
+ j = ir3_next_varying(s[FS].v, j);
+ if (j < s[FS].v->inputs_count)
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc);
+ j = ir3_next_varying(s[FS].v, j);
+ if (j < s[FS].v->inputs_count)
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc);
+ j = ir3_next_varying(s[FS].v, j);
+ if (j < s[FS].v->inputs_count)
+ reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc);
+
+ OUT_RING(ring, reg);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+ A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
+ OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
+ OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
+ OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+ COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
+ A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
+ A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
+ A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+ A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+ COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+ OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
+ 0x80000000 | /* XXX */
+ COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING));
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
+ OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+ A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
+ if (emit->key.binning_pass)
+ OUT_RING(ring, 0x00000000);
+ else
+ OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
+
+ OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+ A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+ A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
+ OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+ A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
+
+ OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL2, 1);
+ OUT_RING(ring, A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES(0) |
+ COND(s[FS].v->total_in > 0, A4XX_RB_MSAA_CONTROL2_VARYING));
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
+ if (s[FS].v->writes_pos) {
+ OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
+ A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
+ } else {
+ OUT_RING(ring, 0x00000001);
+ }
+
+ OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
+// XXX do we need to patch? or update when RT format changes.. maybe
+// move this to emit??
+ A4XX_SP_FS_MRT_REG_MRTFORMAT(RB4_R8G8B8A8_UNORM) | // XXX patch?
+ COND(s[FS].v->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+
+ if (emit->key.binning_pass) {
+ OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+ OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
+ 0x40000000 | /* XXX */
+ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+ OUT_RING(ring, 0x00000000);
+ } else {
+ uint32_t vinterp[8] = {0}, flatshade[2] = {0};
+
+ /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
+ uint32_t interp = s[FS].v->inputs[j].interpolate;
+ if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
+ ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+ /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+ * instead.. rather than -8 everywhere else..
+ */
+ uint32_t loc = s[FS].v->inputs[j].inloc - 8;
+
+ /* currently assuming varyings aligned to 4 (not
+ * packed):
+ */
+ debug_assert((loc % 4) == 0);
+
+ for (i = 0; i < 4; i++, loc++) {
+ vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+ flatshade[loc / 32] |= 1 << (loc % 32);
+ }
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+ OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
+ A4XX_VPC_ATTR_THRDASSIGN(1) |
+ COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
+ 0x40000000 | /* XXX */
+ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+ OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
+ A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));
+
+ OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
+ for (i = 0; i < 8; i++)
+ OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
+
+ OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+ for (i = 0; i < 8; i++)
+ OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
+ }
+
+ emit_shader(ring, s[VS].v);
+
+ if (!emit->key.binning_pass)
+ emit_shader(ring, s[FS].v);
+}
+
+/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
+static void
+fix_blit_fp(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd4_shader_stateobj *so = ctx->blit_prog.fp;
+
+ so->shader->vpsrepl[0] = 0x99999999;
+ so->shader->vpsrepl[1] = 0x99999999;
+ so->shader->vpsrepl[2] = 0x99999999;
+ so->shader->vpsrepl[3] = 0x99999999;
+}
+
+void
+fd4_prog_init(struct pipe_context *pctx)
+{
+ pctx->create_fs_state = fd4_fp_state_create;
+ pctx->delete_fs_state = fd4_fp_state_delete;
+
+ pctx->create_vs_state = fd4_vp_state_create;
+ pctx->delete_vs_state = fd4_vp_state_delete;
+
+ fd_prog_init(pctx);
+
+ fix_blit_fp(pctx);
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
new file mode 100644
index 00000000000..52306a4c60d
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
@@ -0,0 +1,46 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_PROGRAM_H_
+#define FD4_PROGRAM_H_
+
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+
+struct fd4_shader_stateobj {
+ struct ir3_shader *shader;
+};
+
+struct fd4_emit;
+
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+
+void fd4_prog_init(struct pipe_context *pctx);
+
+#endif /* FD4_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
new file mode 100644
index 00000000000..9a50626aaeb
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -0,0 +1,39 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+#include "fd4_query.h"
+#include "fd4_util.h"
+
+void fd4_query_context_init(struct pipe_context *pctx)
+{
+ /* TODO */
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.h b/src/gallium/drivers/freedreno/a4xx/fd4_query.h
new file mode 100644
index 00000000000..a2e91569a46
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.h
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_QUERY_H_
+#define FD4_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd4_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD4_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
new file mode 100644
index 00000000000..b363cb79a28
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -0,0 +1,94 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd4_rasterizer.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+void *
+fd4_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct fd4_rasterizer_stateobj *so;
+ float psize_min, psize_max;
+
+ so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 8192;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
+
+/*
+ if (cso->line_stipple_enable) {
+ ??? TODO line stipple
+ }
+ TODO cso->half_pixel_center
+ if (cso->multisample)
+ TODO
+*/
+ so->gras_cl_clip_cntl = 0x80000; /* ??? */
+ so->gras_su_point_minmax =
+ A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
+ A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
+ so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+ so->gras_su_poly_offset_scale =
+ A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
+ so->gras_su_poly_offset_offset =
+ A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+
+ so->gras_su_mode_control =
+ A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+
+ if (cso->cull_face & PIPE_FACE_FRONT)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+ if (cso->cull_face & PIPE_FACE_BACK)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+ if (!cso->front_ccw)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
+ if (!cso->flatshade_first)
+ so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+
+ if (cso->offset_tri)
+ so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+
+ return so;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
new file mode 100644
index 00000000000..06c728f2f1f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_RASTERIZER_H_
+#define FD4_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd4_rasterizer_stateobj {
+ struct pipe_rasterizer_state base;
+ uint32_t gras_su_point_minmax;
+ uint32_t gras_su_point_size;
+ uint32_t gras_su_poly_offset_scale;
+ uint32_t gras_su_poly_offset_offset;
+
+ uint32_t gras_su_mode_control;
+ uint32_t gras_cl_clip_cntl;
+ uint32_t pc_prim_vtx_cntl;
+};
+
+static INLINE struct fd4_rasterizer_stateobj *
+fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+ return (struct fd4_rasterizer_stateobj *)rast;
+}
+
+void * fd4_rasterizer_state_create(struct pipe_context *pctx,
+ const struct pipe_rasterizer_state *cso);
+
+#endif /* FD4_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
new file mode 100644
index 00000000000..8ee246b611c
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -0,0 +1,105 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+
+#include "fd4_screen.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+static boolean
+fd4_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned usage)
+{
+ unsigned retval = 0;
+
+ if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+ (sample_count > 1) || /* TODO add MSAA */
+ !util_format_is_supported(format, usage)) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+ util_format_name(format), target, sample_count, usage);
+ return FALSE;
+ }
+
+ if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+ (fd4_pipe2vtx(format) != ~0)) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ (fd4_pipe2tex(format) != ~0)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ if ((usage & (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) &&
+ (fd4_pipe2color(format) != ~0) &&
+ (fd4_pipe2tex(format) != ~0)) {
+ retval |= usage & (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED);
+ }
+
+ if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+ (fd_pipe2depth(format) != ~0) &&
+ (fd4_pipe2tex(format) != ~0)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+ (fd_pipe2index(format) != ~0)) {
+ retval |= PIPE_BIND_INDEX_BUFFER;
+ }
+
+ if (usage & PIPE_BIND_TRANSFER_READ)
+ retval |= PIPE_BIND_TRANSFER_READ;
+ if (usage & PIPE_BIND_TRANSFER_WRITE)
+ retval |= PIPE_BIND_TRANSFER_WRITE;
+
+ if (retval != usage) {
+ DBG("not supported: format=%s, target=%d, sample_count=%d, "
+ "usage=%x, retval=%x", util_format_name(format),
+ target, sample_count, usage, retval);
+ }
+
+ return retval == usage;
+}
+
+void
+fd4_screen_init(struct pipe_screen *pscreen)
+{
+ pscreen->context_create = fd4_context_create;
+ pscreen->is_format_supported = fd4_screen_is_format_supported;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.h b/src/gallium/drivers/freedreno/a4xx/fd4_screen.h
new file mode 100644
index 00000000000..09b68ef20c4
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.h
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_SCREEN_H_
+#define FD4_SCREEN_H_
+
+#include "pipe/p_screen.h"
+
+void fd4_screen_init(struct pipe_screen *pscreen);
+
+#endif /* FD4_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
new file mode 100644
index 00000000000..fc9c8735815
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -0,0 +1,190 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "fd4_texture.h"
+#include "fd4_util.h"
+
+/* TODO do we need to emulate clamp-to-edge like a3xx? */
+static enum a4xx_tex_clamp
+tex_clamp(unsigned wrap)
+{
+ /* hardware probably supports more, but we can't coax all the
+ * wrap/clamp modes out of the GLESv2 blob driver.
+ *
+ * TODO once we have basics working, go back and just try
+ * different values and see what happens
+ */
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return A4XX_TEX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return A4XX_TEX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+// TODO
+// return A4XX_TEX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+// TODO
+// return A4XX_TEX_MIRROR_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return A4XX_TEX_MIRROR_REPEAT;
+ default:
+ DBG("invalid wrap: %u", wrap);
+ return 0;
+ }
+}
+
+static enum a4xx_tex_filter
+tex_filter(unsigned filter)
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return A4XX_TEX_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return A4XX_TEX_LINEAR;
+ default:
+ DBG("invalid filter: %u", filter);
+ return 0;
+ }
+}
+
+static void *
+fd4_sampler_state_create(struct pipe_context *pctx,
+ const struct pipe_sampler_state *cso)
+{
+ struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->texsamp0 =
+ A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
+ A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
+ A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
+ A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
+ A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+
+ if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ so->texsamp1 =
+ A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+ A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+ } else {
+ so->texsamp1 = 0x00000000;
+ }
+
+ if (cso->compare_mode)
+ so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+ return so;
+}
+
+static enum a4xx_tex_type
+tex_type(unsigned target)
+{
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A4XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A4XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A4XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A4XX_TEX_CUBE;
+ }
+}
+
+static struct pipe_sampler_view *
+fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+ const struct pipe_sampler_view *cso)
+{
+ struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
+ struct fd_resource *rsc = fd_resource(prsc);
+ unsigned lvl = cso->u.tex.first_level;
+
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+ pipe_reference(NULL, &prsc->reference);
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
+
+ so->tex_resource = rsc;
+
+ so->texconst0 =
+ A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+ A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
+ fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+ cso->swizzle_b, cso->swizzle_a);
+
+ so->texconst1 =
+ A4XX_TEX_CONST_1_WIDTH(prsc->width0) |
+ A4XX_TEX_CONST_1_HEIGHT(prsc->height0);
+ so->texconst2 =
+ A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+
+ switch (prsc->target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_3D:
+ so->texconst3 =
+ A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+ break;
+ default:
+ so->texconst3 = 0x00000000;
+ break;
+ }
+
+ return &so->base;
+}
+
+void
+fd4_texture_init(struct pipe_context *pctx)
+{
+ pctx->create_sampler_state = fd4_sampler_state_create;
+ pctx->bind_sampler_states = fd_sampler_states_bind;
+ pctx->create_sampler_view = fd4_sampler_view_create;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
new file mode 100644
index 00000000000..3592b1049b1
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -0,0 +1,68 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_TEXTURE_H_
+#define FD4_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+struct fd4_sampler_stateobj {
+ struct pipe_sampler_state base;
+ uint32_t texsamp0, texsamp1;
+};
+
+static INLINE struct fd4_sampler_stateobj *
+fd4_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+ return (struct fd4_sampler_stateobj *)samp;
+}
+
+struct fd4_pipe_sampler_view {
+ struct pipe_sampler_view base;
+ struct fd_resource *tex_resource;
+ uint32_t texconst0, texconst1, texconst2, texconst3;
+};
+
+static INLINE struct fd4_pipe_sampler_view *
+fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+ return (struct fd4_pipe_sampler_view *)pview;
+}
+
+unsigned fd4_get_const_idx(struct fd_context *ctx,
+ struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd4_texture_init(struct pipe_context *pctx);
+
+#endif /* FD4_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_util.c b/src/gallium/drivers/freedreno/a4xx/fd4_util.c
new file mode 100644
index 00000000000..ddff977f0e5
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_util.c
@@ -0,0 +1,401 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "fd4_util.h"
+
+/* convert pipe format to vertex buffer format: */
+enum a4xx_vtx_fmt
+fd4_pipe2vtx(enum pipe_format format)
+{
+ switch (format) {
+ /* 8-bit buffers. */
+ case PIPE_FORMAT_R8_UNORM:
+ return VFMT4_NORM_UBYTE_8;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return VFMT4_NORM_BYTE_8;
+
+ case PIPE_FORMAT_R8_UINT:
+ case PIPE_FORMAT_R8_USCALED:
+ return VFMT4_UBYTE_8;
+
+ case PIPE_FORMAT_R8_SINT:
+ case PIPE_FORMAT_R8_SSCALED:
+ return VFMT4_BYTE_8;
+
+ /* 16-bit buffers. */
+ case PIPE_FORMAT_R16_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ return VFMT4_NORM_USHORT_16;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return VFMT4_NORM_SHORT_16;
+
+ case PIPE_FORMAT_R16_UINT:
+ case PIPE_FORMAT_R16_USCALED:
+ return VFMT4_USHORT_16;
+
+ case PIPE_FORMAT_R16_SINT:
+ case PIPE_FORMAT_R16_SSCALED:
+ return VFMT4_SHORT_16;
+
+ case PIPE_FORMAT_R16_FLOAT:
+ return VFMT4_FLOAT_16;
+
+ case PIPE_FORMAT_R8G8_UNORM:
+ return VFMT4_NORM_UBYTE_8_8;
+
+ case PIPE_FORMAT_R8G8_SNORM:
+ return VFMT4_NORM_BYTE_8_8;
+
+ case PIPE_FORMAT_R8G8_UINT:
+ case PIPE_FORMAT_R8G8_USCALED:
+ return VFMT4_UBYTE_8_8;
+
+ case PIPE_FORMAT_R8G8_SINT:
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return VFMT4_BYTE_8_8;
+
+ /* 24-bit buffers. */
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return VFMT4_NORM_UBYTE_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return VFMT4_NORM_BYTE_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8_UINT:
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return VFMT4_UBYTE_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8_SINT:
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return VFMT4_BYTE_8_8_8;
+
+ /* 32-bit buffers. */
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return VFMT4_NORM_UBYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return VFMT4_NORM_BYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8A8_UINT:
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return VFMT4_UBYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R8G8B8A8_SINT:
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return VFMT4_BYTE_8_8_8_8;
+
+ case PIPE_FORMAT_R16G16_SSCALED:
+ case PIPE_FORMAT_R16G16_SINT:
+ return VFMT4_SHORT_16_16;
+
+ case PIPE_FORMAT_R16G16_FLOAT:
+ return VFMT4_FLOAT_16_16;
+
+ case PIPE_FORMAT_R16G16_UINT:
+ case PIPE_FORMAT_R16G16_USCALED:
+ return VFMT4_USHORT_16_16;
+
+ case PIPE_FORMAT_R16G16_UNORM:
+ return VFMT4_NORM_USHORT_16_16;
+
+ case PIPE_FORMAT_R16G16_SNORM:
+ return VFMT4_NORM_SHORT_16_16;
+
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ return VFMT4_NORM_UINT_10_10_10_2;
+
+ case PIPE_FORMAT_R10G10B10A2_SNORM:
+ return VFMT4_NORM_INT_10_10_10_2;
+
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_R10G10B10A2_USCALED:
+ return VFMT4_UINT_10_10_10_2;
+
+ case PIPE_FORMAT_R10G10B10A2_SSCALED:
+ return VFMT4_INT_10_10_10_2;
+
+ /* 48-bit buffers. */
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ return VFMT4_FLOAT_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16_SINT:
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return VFMT4_SHORT_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16_UINT:
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return VFMT4_USHORT_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return VFMT4_NORM_SHORT_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return VFMT4_NORM_USHORT_16_16_16;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ return VFMT4_FLOAT_32;
+
+ case PIPE_FORMAT_R32_FIXED:
+ return VFMT4_FIXED_32;
+
+ /* 64-bit buffers. */
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return VFMT4_NORM_USHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return VFMT4_NORM_SHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16A16_UINT:
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return VFMT4_USHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R16G16B16A16_SINT:
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return VFMT4_SHORT_16_16_16_16;
+
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return VFMT4_FLOAT_32_32;
+
+ case PIPE_FORMAT_R32G32_FIXED:
+ return VFMT4_FIXED_32_32;
+
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ return VFMT4_FLOAT_16_16_16_16;
+
+ /* 96-bit buffers. */
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return VFMT4_FLOAT_32_32_32;
+
+ case PIPE_FORMAT_R32G32B32_FIXED:
+ return VFMT4_FIXED_32_32_32;
+
+ /* 128-bit buffers. */
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return VFMT4_FLOAT_32_32_32_32;
+
+ case PIPE_FORMAT_R32G32B32A32_FIXED:
+ return VFMT4_FIXED_32_32_32_32;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_SINT:
+ case PIPE_FORMAT_R32G32B32A32_UINT:
+
+ case PIPE_FORMAT_R32_UINT:
+ case PIPE_FORMAT_R32_SINT:
+ case PIPE_FORMAT_A32_UINT:
+ case PIPE_FORMAT_A32_SINT:
+ case PIPE_FORMAT_L32_UINT:
+ case PIPE_FORMAT_L32_SINT:
+ case PIPE_FORMAT_I32_UINT:
+ case PIPE_FORMAT_I32_SINT:
+
+ case PIPE_FORMAT_R32G32_SINT:
+ case PIPE_FORMAT_R32G32_UINT:
+ case PIPE_FORMAT_L32A32_UINT:
+ case PIPE_FORMAT_L32A32_SINT:
+*/
+
+ default:
+ return ~0;
+ }
+}
+
+/* convert pipe format to texture sampler format: */
+enum a4xx_tex_fmt
+fd4_pipe2tex(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return TFMT4_NORM_UINT_8;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ case PIPE_FORMAT_R8G8B8X8_SRGB:
+ return TFMT4_NORM_UINT_8_8_8_8;
+
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return TFMT4_NORM_UINT_X8Z24;
+
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return TFMT4_NORM_UINT_8_8_8_8;
+
+// case PIPE_FORMAT_Z16_UNORM:
+// return TFMT4_NORM_UINT_8_8;
+//
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ case PIPE_FORMAT_R16G16B16X16_FLOAT:
+ return TFMT4_FLOAT_16_16_16_16;
+
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R32G32B32X32_FLOAT:
+ return TFMT4_FLOAT_32_32_32_32;
+
+ // TODO add more..
+
+ default:
+ return ~0;
+ }
+}
+
+/* convert pipe format to MRT / copydest format used for render-target: */
+enum a4xx_color_fmt
+fd4_pipe2color(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return RB4_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return RB4_Z16_UNORM;
+
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ /* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */
+ return RB4_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ return RB4_A8_UNORM;
+//
+// case PIPE_FORMAT_R16G16B16A16_FLOAT:
+// case PIPE_FORMAT_R16G16B16X16_FLOAT:
+// return RB4_R16G16B16A16_FLOAT;
+//
+// case PIPE_FORMAT_R32G32B32A32_FLOAT:
+// case PIPE_FORMAT_R32G32B32X32_FLOAT:
+// return RB4_R32G32B32A32_FLOAT;
+
+ // TODO add more..
+
+ default:
+ return ~0;
+ }
+}
+
+/* we need to special case a bit the depth/stencil restore, because we are
+ * using the texture sampler to blit into the depth/stencil buffer, *not*
+ * into a color buffer. Otherwise fd4_tex_swiz() will do the wrong thing,
+ * as it is assuming that you are sampling into normal render target..
+ */
+enum pipe_format
+fd4_gmem_restore_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z16_UNORM:
+ return PIPE_FORMAT_B8G8R8A8_UNORM;
+ default:
+ return format;
+ }
+}
+
+/* TODO share w/ a3xx?? */
+enum a3xx_color_swap
+fd4_pipe2swap(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ return WXYZ;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_SRGB:
+ case PIPE_FORMAT_X8R8G8B8_SRGB:
+ return ZYXW;
+
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_SRGB:
+ case PIPE_FORMAT_X8B8G8R8_SRGB:
+ return XYZW;
+
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ default:
+ return WZYX;
+ }
+}
+
+static inline enum a4xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+ switch (swiz) {
+ default:
+ case PIPE_SWIZZLE_RED: return A4XX_TEX_X;
+ case PIPE_SWIZZLE_GREEN: return A4XX_TEX_Y;
+ case PIPE_SWIZZLE_BLUE: return A4XX_TEX_Z;
+ case PIPE_SWIZZLE_ALPHA: return A4XX_TEX_W;
+ case PIPE_SWIZZLE_ZERO: return A4XX_TEX_ZERO;
+ case PIPE_SWIZZLE_ONE: return A4XX_TEX_ONE;
+ }
+}
+
+uint32_t
+fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+ unsigned swizzle_b, unsigned swizzle_a)
+{
+ const struct util_format_description *desc =
+ util_format_description(format);
+ unsigned char swiz[4] = {
+ swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+ }, rswiz[4];
+
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+ return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+ A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+ A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+ A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_util.h b/src/gallium/drivers/freedreno/a4xx/fd4_util.h
new file mode 100644
index 00000000000..359882f599e
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_util.h
@@ -0,0 +1,45 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_UTIL_H_
+#define FD4_UTIL_H_
+
+#include "freedreno_util.h"
+
+#include "a4xx.xml.h"
+
+enum a4xx_vtx_fmt fd4_pipe2vtx(enum pipe_format format);
+enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
+enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format);
+enum pipe_format fd4_gmem_restore_format(enum pipe_format format);
+enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
+
+uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+ unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+#endif /* FD4_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c
new file mode 100644
index 00000000000..6f09ec9d047
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c
@@ -0,0 +1,105 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd4_zsa.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+void *
+fd4_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct fd4_zsa_stateobj *so;
+
+ so = CALLOC_STRUCT(fd4_zsa_stateobj);
+ if (!so)
+ return NULL;
+
+ so->base = *cso;
+
+ so->rb_depth_control |=
+ A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+ if (cso->depth.enabled)
+ so->rb_depth_control |=
+ A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+
+ if (cso->depth.writemask)
+ so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+
+ if (cso->stencil[0].enabled) {
+ const struct pipe_stencil_state *s = &cso->stencil[0];
+
+ so->rb_stencil_control |=
+ A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
+ A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+ A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+ A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+ so->rb_stencilrefmask |=
+ 0xff000000 | /* ??? */
+ A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+ A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+ if (cso->stencil[1].enabled) {
+ const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+ so->rb_stencil_control |=
+ A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+ A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+ A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+ A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+ so->rb_stencilrefmask_bf |=
+ 0xff000000 | /* ??? */
+ A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
+ A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
+ }
+ }
+
+ if (cso->alpha.enabled) {
+ so->gras_alpha_control =
+ A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
+ so->rb_alpha_control =
+ A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+ A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+ so->rb_depth_control |=
+ A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+
+ so->rb_render_control = 0x8; /* XXX */
+
+ return so;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
new file mode 100644
index 00000000000..aea12047c49
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
@@ -0,0 +1,58 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_ZSA_H_
+#define FD4_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd4_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state base;
+ uint32_t gras_alpha_control;
+ uint32_t rb_alpha_control;
+ uint32_t rb_render_control;
+ uint32_t rb_depth_control;
+ uint32_t rb_stencil_control;
+ uint32_t rb_stencilrefmask;
+ uint32_t rb_stencilrefmask_bf;
+};
+
+static INLINE struct fd4_zsa_stateobj *
+fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+ return (struct fd4_zsa_stateobj *)zsa;
+}
+
+void * fd4_zsa_state_create(struct pipe_context *pctx,
+ const struct pipe_depth_stencil_alpha_state *cso);
+
+#endif /* FD4_ZSA_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index e873af92943..ce105b8786b 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -52,6 +52,7 @@
#include "a2xx/fd2_screen.h"
#include "a3xx/fd3_screen.h"
+#include "a4xx/fd4_screen.h"
/* XXX this should go away */
#include "state_tracker/drm_driver.h"
@@ -514,7 +515,7 @@ fd_screen_create(struct fd_device *dev)
* before enabling:
*
* If you have a different adreno version, feel free to add it to one
- * of the two cases below and see what happens. And if it works, please
+ * of the cases below and see what happens. And if it works, please
* send a patch ;-)
*/
switch (screen->gpu_id) {
@@ -525,6 +526,9 @@ fd_screen_create(struct fd_device *dev)
case 330:
fd3_screen_init(pscreen);
break;
+ case 420:
+ fd4_screen_init(pscreen);
+ break;
default:
debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
goto fail;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 60d4e4a15d5..41112460155 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -540,7 +540,8 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr,
emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
};
-void * ir3_assemble(struct ir3 *shader, struct ir3_info *info)
+void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
+ uint32_t gpu_id)
{
uint32_t *ptr, *dwords;
uint32_t i;
@@ -550,11 +551,15 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info)
info->max_const = -1;
info->instrs_count = 0;
- /* need a integer number of instruction "groups" (sets of four
- * instructions), so pad out w/ NOPs if needed:
- * (each instruction is 64bits)
+ /* need a integer number of instruction "groups" (sets of 16
+ * instructions on a4xx or sets of 4 instructions on a3xx),
+ * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
*/
- info->sizedwords = 2 * align(shader->instrs_count, 4);
+ if (gpu_id >= 400) {
+ info->sizedwords = 2 * align(shader->instrs_count, 16);
+ } else {
+ info->sizedwords = 2 * align(shader->instrs_count, 4);
+ }
ptr = dwords = calloc(4, info->sizedwords);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 8a5e9fd687c..06bad6e26fc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -264,7 +264,7 @@ struct ir3_block {
struct ir3 * ir3_create(void);
void ir3_destroy(struct ir3 *shader);
void * ir3_assemble(struct ir3 *shader,
- struct ir3_info *info);
+ struct ir3_info *info, uint32_t gpu_id);
void * ir3_alloc(struct ir3 *shader, int sz);
struct ir3_block * ir3_block_create(struct ir3 *shader,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 7de29f33d88..f28ce27a00d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -49,7 +49,8 @@ static void dump_info(struct ir3_shader_variant *so, const char *str)
const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
// for debug, dump some before/after info:
- bin = ir3_assemble(so->ir, &info);
+ // TODO make gpu_id configurable on cmdline
+ bin = ir3_assemble(so->ir, &info, 320);
if (fd_mesa_debug & FD_DBG_DISASM) {
struct ir3_block *block = so->ir->block;
struct ir3_register *reg;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 1f7e869d9f3..0c74f2f26f2 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -56,7 +56,7 @@ assemble_variant(struct ir3_shader_variant *v)
struct fd_context *ctx = fd_context(v->shader->pctx);
uint32_t sz, *bin;
- bin = ir3_assemble(v->ir, &v->info);
+ bin = ir3_assemble(v->ir, &v->info, ctx->screen->gpu_id);
sz = v->info.sizedwords * 4;
v->bo = fd_bo_new(ctx->dev, sz,
@@ -67,7 +67,11 @@ assemble_variant(struct ir3_shader_variant *v)
free(bin);
- v->instrlen = v->info.sizedwords / 8;
+ if (ctx->screen->gpu_id >= 400) {
+ v->instrlen = v->info.sizedwords / (2 * 16);
+ } else {
+ v->instrlen = v->info.sizedwords / (2 * 4);
+ }
/* NOTE: if relative addressing is used, we set constlen in
* the compiler (to worst-case value) since we don't know in
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 3d51603fcfb..f70886e2d3b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -111,7 +111,8 @@ struct ir3_shader_variant {
struct ir3 *ir;
/* the instructions length is in units of instruction groups
- * (4 instructions, 8 dwords):
+ * (4 instructions for a3xx, 16 instructions for a4xx.. each
+ * instruction is 2 dwords):
*/
unsigned instrlen;
@@ -203,7 +204,7 @@ struct ir3_shader {
/* so far, only used for blit_prog shader.. values for
* VPC_VARYING_PS_REPL[i].MODE
*/
- uint32_t vpsrepl[4];
+ uint32_t vpsrepl[8];
};