summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Worth <cworth@cworth.org>2013-11-26 16:25:06 -0800
committerCarl Worth <cworth@cworth.org>2013-11-26 16:25:06 -0800
commit44831bff0feec310ca786e5d101d3b5e42a69f74 (patch)
tree036a9c542dc8909bcad2a92662fcfed4d984b37d
parent08d7c08512090acd612635e722abc6e9582b69c8 (diff)
parent4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 (diff)
Merge branch '9.2-freedreno' of git://github.com/freedreno/mesa into 9.2
This allows the freedreno driver to build with current mesa.
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h93
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_emit.c4
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_gmem.c12
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h439
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c600
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c24
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c31
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c6
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c25
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_util.c5
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir-a3xx.h3
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h319
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c42
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c18
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c1
-rw-r--r--src/gallium/drivers/freedreno/freedreno_state.c2
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h49
21 files changed, 1340 insertions, 359 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index bee01f1c7e1..35463864b95 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a2xx.xml ( 30127 bytes, from 2013-05-05 18:29:35)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05)
Copyright (C) 2013 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -236,56 +238,6 @@ enum sq_tex_filter {
#define REG_A2XX_CP_PFP_UCODE_DATA 0x000000c1
-#define REG_A2XX_CP_RB_BASE 0x000001c0
-
-#define REG_A2XX_CP_RB_CNTL 0x000001c1
-
-#define REG_A2XX_CP_RB_RPTR_ADDR 0x000001c3
-
-#define REG_A2XX_CP_RB_RPTR 0x000001c4
-
-#define REG_A2XX_CP_RB_WPTR 0x000001c5
-
-#define REG_A2XX_CP_RB_WPTR_DELAY 0x000001c6
-
-#define REG_A2XX_CP_RB_RPTR_WR 0x000001c7
-
-#define REG_A2XX_CP_RB_WPTR_BASE 0x000001c8
-
-#define REG_A2XX_CP_QUEUE_THRESHOLDS 0x000001d5
-
-#define REG_A2XX_SCRATCH_UMSK 0x000001dc
-
-#define REG_A2XX_SCRATCH_ADDR 0x000001dd
-
-#define REG_A2XX_CP_STATE_DEBUG_INDEX 0x000001ec
-
-#define REG_A2XX_CP_STATE_DEBUG_DATA 0x000001ed
-
-#define REG_A2XX_CP_INT_CNTL 0x000001f2
-
-#define REG_A2XX_CP_INT_STATUS 0x000001f3
-
-#define REG_A2XX_CP_INT_ACK 0x000001f4
-
-#define REG_A2XX_CP_ME_CNTL 0x000001f6
-
-#define REG_A2XX_CP_ME_STATUS 0x000001f7
-
-#define REG_A2XX_CP_ME_RAM_WADDR 0x000001f8
-
-#define REG_A2XX_CP_ME_RAM_RADDR 0x000001f9
-
-#define REG_A2XX_CP_ME_RAM_DATA 0x000001fa
-
-#define REG_A2XX_CP_DEBUG 0x000001fc
-
-#define REG_A2XX_CP_CSQ_RB_STAT 0x000001fd
-
-#define REG_A2XX_CP_CSQ_IB1_STAT 0x000001fe
-
-#define REG_A2XX_CP_CSQ_IB2_STAT 0x000001ff
-
#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395
#define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397
@@ -338,11 +290,32 @@ enum sq_tex_filter {
#define REG_A2XX_CP_STAT 0x0000047f
-#define REG_A2XX_SCRATCH_REG0 0x00000578
-
-#define REG_A2XX_SCRATCH_REG2 0x0000057a
-
#define REG_A2XX_RBBM_STATUS 0x000005d0
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK 0x0000001f
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT 0
+static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val)
+{
+ return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK;
+}
+#define A2XX_RBBM_STATUS_TC_BUSY 0x00000020
+#define A2XX_RBBM_STATUS_HIRQ_PENDING 0x00000100
+#define A2XX_RBBM_STATUS_CPRQ_PENDING 0x00000200
+#define A2XX_RBBM_STATUS_CFRQ_PENDING 0x00000400
+#define A2XX_RBBM_STATUS_PFRQ_PENDING 0x00000800
+#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA 0x00001000
+#define A2XX_RBBM_STATUS_RBBM_WU_BUSY 0x00004000
+#define A2XX_RBBM_STATUS_CP_NRT_BUSY 0x00010000
+#define A2XX_RBBM_STATUS_MH_BUSY 0x00040000
+#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY 0x00080000
+#define A2XX_RBBM_STATUS_SX_BUSY 0x00200000
+#define A2XX_RBBM_STATUS_TPC_BUSY 0x00400000
+#define A2XX_RBBM_STATUS_SC_CNTX_BUSY 0x01000000
+#define A2XX_RBBM_STATUS_PA_BUSY 0x02000000
+#define A2XX_RBBM_STATUS_VGT_BUSY 0x04000000
+#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY 0x08000000
+#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY 0x10000000
+#define A2XX_RBBM_STATUS_RB_CNTX_BUSY 0x40000000
+#define A2XX_RBBM_STATUS_GUI_ACTIVE 0x80000000
#define REG_A2XX_A220_VSC_BIN_SIZE 0x00000c01
#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f
@@ -358,13 +331,13 @@ static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val)
return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK;
}
-#define REG_A2XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0))
+static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-#define REG_A2XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0))
+static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-#define REG_A2XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0))
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
-#define REG_A2XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0))
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
#define REG_A2XX_PC_DEBUG_CNTL 0x00000c38
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index b03390ec436..35511ba2bd3 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
OUT_RING(ring, sampler->tex0 | view->tex0);
- OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt);
+ OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0);
OUT_RING(ring, view->tex2);
OUT_RING(ring, sampler->tex3 | view->tex3);
OUT_RING(ring, sampler->tex4);
@@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
for (i = 0; i < n; i++) {
struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
OUT_RING (ring, vbufs[i].size);
}
}
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index e239eedb2e0..89f5a4d5259 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base,
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
- OUT_RELOC(ring, rsc->bo, 0, 0); /* RB_COPY_DEST_BASE */
+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_COPY_DEST_BASE */
OUT_RING(ring, rsc->pitch >> 5); /* RB_COPY_DEST_PITCH */
OUT_RING(ring, /* RB_COPY_DEST_INFO */
A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
@@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base,
A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
A2XX_SQ_TEX_0_PITCH(rsc->pitch));
OUT_RELOC(ring, rsc->bo, 0,
- fd2_pipe2surface(psurf->format) | 0x800);
+ fd2_pipe2surface(psurf->format) | 0x800, 0);
OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
OUT_RING(ring, 0x01000000 | // XXX
@@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
y0 = ((float)yoff) / ((float)pfb->height);
y1 = ((float)yoff + bin_h) / ((float)pfb->height);
OUT_PKT3(ring, CP_MEM_WRITE, 9);
- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0);
OUT_RING(ring, fui(x0));
OUT_RING(ring, fui(y0));
OUT_RING(ring, fui(x1));
@@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx)
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- enum pipe_format format = pfb->cbufs[0]->format;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
uint32_t reg;
OUT_PKT3(ring, CP_SET_CONSTANT, 4);
@@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pfb->cbufs[0]->format;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
@@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pfb->cbufs[0]->format;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index c7f5085d032..d183516067b 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05)
Copyright (C) 2013 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -130,6 +132,13 @@ enum a3xx_tex_fmt {
TFMT_NORM_USHORT_5551 = 6,
TFMT_NORM_USHORT_4444 = 7,
TFMT_NORM_UINT_X8Z24 = 10,
+ TFMT_NORM_UINT_NV12_UV_TILED = 17,
+ TFMT_NORM_UINT_NV12_Y_TILED = 19,
+ TFMT_NORM_UINT_NV12_UV = 21,
+ TFMT_NORM_UINT_NV12_Y = 23,
+ TFMT_NORM_UINT_I420_Y = 24,
+ TFMT_NORM_UINT_I420_U = 26,
+ TFMT_NORM_UINT_I420_V = 27,
TFMT_NORM_UINT_2_10_10_10 = 41,
TFMT_NORM_UINT_A8 = 44,
TFMT_NORM_UINT_L8_A8 = 47,
@@ -207,6 +216,37 @@ enum a3xx_tex_swiz {
A3XX_TEX_ONE = 5,
};
+enum a3xx_tex_type {
+ A3XX_TEX_1D = 0,
+ A3XX_TEX_2D = 1,
+ A3XX_TEX_CUBE = 2,
+ A3XX_TEX_3D = 3,
+};
+
+#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001
+#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002
+#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004
+#define A3XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008
+#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010
+#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020
+#define A3XX_INT0_VFD_ERROR 0x00000040
+#define A3XX_INT0_CP_SW_INT 0x00000080
+#define A3XX_INT0_CP_T0_PACKET_IN_IB 0x00000100
+#define A3XX_INT0_CP_OPCODE_ERROR 0x00000200
+#define A3XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400
+#define A3XX_INT0_CP_HW_FAULT 0x00000800
+#define A3XX_INT0_CP_DMA 0x00001000
+#define A3XX_INT0_CP_IB2_INT 0x00002000
+#define A3XX_INT0_CP_IB1_INT 0x00004000
+#define A3XX_INT0_CP_RB_INT 0x00008000
+#define A3XX_INT0_CP_REG_PROTECT_FAULT 0x00010000
+#define A3XX_INT0_CP_RB_DONE_TS 0x00020000
+#define A3XX_INT0_CP_VS_DONE_TS 0x00040000
+#define A3XX_INT0_CP_PS_DONE_TS 0x00080000
+#define A3XX_INT0_CACHE_FLUSH_TS 0x00100000
+#define A3XX_INT0_CP_AHB_ERROR_HALT 0x00200000
+#define A3XX_INT0_MISC_HANG_DETECT 0x01000000
+#define A3XX_INT0_UCHE_OOB_ACCESS 0x02000000
#define REG_A3XX_RBBM_HW_VERSION 0x00000000
#define REG_A3XX_RBBM_HW_RELEASE 0x00000001
@@ -230,6 +270,27 @@ enum a3xx_tex_swiz {
#define REG_A3XX_RBBM_GPR0_CTL 0x0000002e
#define REG_A3XX_RBBM_STATUS 0x00000030
+#define A3XX_RBBM_STATUS_HI_BUSY 0x00000001
+#define A3XX_RBBM_STATUS_CP_ME_BUSY 0x00000002
+#define A3XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004
+#define A3XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000
+#define A3XX_RBBM_STATUS_VBIF_BUSY 0x00008000
+#define A3XX_RBBM_STATUS_TSE_BUSY 0x00010000
+#define A3XX_RBBM_STATUS_RAS_BUSY 0x00020000
+#define A3XX_RBBM_STATUS_RB_BUSY 0x00040000
+#define A3XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000
+#define A3XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000
+#define A3XX_RBBM_STATUS_VFD_BUSY 0x00200000
+#define A3XX_RBBM_STATUS_VPC_BUSY 0x00400000
+#define A3XX_RBBM_STATUS_UCHE_BUSY 0x00800000
+#define A3XX_RBBM_STATUS_SP_BUSY 0x01000000
+#define A3XX_RBBM_STATUS_TPL1_BUSY 0x02000000
+#define A3XX_RBBM_STATUS_MARB_BUSY 0x04000000
+#define A3XX_RBBM_STATUS_VSC_BUSY 0x08000000
+#define A3XX_RBBM_STATUS_ARB_BUSY 0x10000000
+#define A3XX_RBBM_STATUS_HLSQ_BUSY 0x20000000
+#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000
+#define A3XX_RBBM_STATUS_GPU_BUSY 0x80000000
#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033
@@ -251,20 +312,202 @@ enum a3xx_tex_swiz {
#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1 0x00000082
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000084
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000085
+
+#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT 0x00000086
+
+#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT 0x00000087
+
#define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088
+#define REG_A3XX_RBBM_PERFCTR_CP_0_LO 0x00000090
+
+#define REG_A3XX_RBBM_PERFCTR_CP_0_HI 0x00000091
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO 0x00000092
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI 0x00000093
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO 0x00000094
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI 0x00000095
+
+#define REG_A3XX_RBBM_PERFCTR_PC_0_LO 0x00000096
+
+#define REG_A3XX_RBBM_PERFCTR_PC_0_HI 0x00000097
+
+#define REG_A3XX_RBBM_PERFCTR_PC_1_LO 0x00000098
+
+#define REG_A3XX_RBBM_PERFCTR_PC_1_HI 0x00000099
+
+#define REG_A3XX_RBBM_PERFCTR_PC_2_LO 0x0000009a
+
+#define REG_A3XX_RBBM_PERFCTR_PC_2_HI 0x0000009b
+
+#define REG_A3XX_RBBM_PERFCTR_PC_3_LO 0x0000009c
+
+#define REG_A3XX_RBBM_PERFCTR_PC_3_HI 0x0000009d
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO 0x0000009e
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI 0x0000009f
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO 0x000000a0
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI 0x000000a1
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000a2
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000a3
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000a4
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000a5
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000a6
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000a7
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000a8
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000a9
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000aa
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000ab
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000ac
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000ad
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO 0x000000ae
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI 0x000000af
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO 0x000000b0
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI 0x000000b1
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO 0x000000b2
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI 0x000000b3
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO 0x000000b4
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI 0x000000b5
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO 0x000000b6
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI 0x000000b7
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO 0x000000b8
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI 0x000000b9
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO 0x000000ba
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI 0x000000bb
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO 0x000000bc
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI 0x000000bd
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO 0x000000be
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI 0x000000bf
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO 0x000000c0
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI 0x000000c1
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO 0x000000c2
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI 0x000000c3
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO 0x000000c4
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI 0x000000c5
+
+#define REG_A3XX_RBBM_PERFCTR_TP_0_LO 0x000000c6
+
+#define REG_A3XX_RBBM_PERFCTR_TP_0_HI 0x000000c7
+
+#define REG_A3XX_RBBM_PERFCTR_TP_1_LO 0x000000c8
+
+#define REG_A3XX_RBBM_PERFCTR_TP_1_HI 0x000000c9
+
+#define REG_A3XX_RBBM_PERFCTR_TP_2_LO 0x000000ca
+
+#define REG_A3XX_RBBM_PERFCTR_TP_2_HI 0x000000cb
+
+#define REG_A3XX_RBBM_PERFCTR_TP_3_LO 0x000000cc
+
+#define REG_A3XX_RBBM_PERFCTR_TP_3_HI 0x000000cd
+
+#define REG_A3XX_RBBM_PERFCTR_TP_4_LO 0x000000ce
+
+#define REG_A3XX_RBBM_PERFCTR_TP_4_HI 0x000000cf
+
+#define REG_A3XX_RBBM_PERFCTR_TP_5_LO 0x000000d0
+
+#define REG_A3XX_RBBM_PERFCTR_TP_5_HI 0x000000d1
+
+#define REG_A3XX_RBBM_PERFCTR_SP_0_LO 0x000000d2
+
+#define REG_A3XX_RBBM_PERFCTR_SP_0_HI 0x000000d3
+
+#define REG_A3XX_RBBM_PERFCTR_SP_1_LO 0x000000d4
+
+#define REG_A3XX_RBBM_PERFCTR_SP_1_HI 0x000000d5
+
+#define REG_A3XX_RBBM_PERFCTR_SP_2_LO 0x000000d6
+
+#define REG_A3XX_RBBM_PERFCTR_SP_2_HI 0x000000d7
+
+#define REG_A3XX_RBBM_PERFCTR_SP_3_LO 0x000000d8
+
+#define REG_A3XX_RBBM_PERFCTR_SP_3_HI 0x000000d9
+
+#define REG_A3XX_RBBM_PERFCTR_SP_4_LO 0x000000da
+
+#define REG_A3XX_RBBM_PERFCTR_SP_4_HI 0x000000db
+
+#define REG_A3XX_RBBM_PERFCTR_SP_5_LO 0x000000dc
+
+#define REG_A3XX_RBBM_PERFCTR_SP_5_HI 0x000000dd
+
+#define REG_A3XX_RBBM_PERFCTR_SP_6_LO 0x000000de
+
+#define REG_A3XX_RBBM_PERFCTR_SP_6_HI 0x000000df
+
#define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0
#define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1
+#define REG_A3XX_RBBM_PERFCTR_RB_0_LO 0x000000e2
+
+#define REG_A3XX_RBBM_PERFCTR_RB_0_HI 0x000000e3
+
+#define REG_A3XX_RBBM_PERFCTR_RB_1_LO 0x000000e4
+
+#define REG_A3XX_RBBM_PERFCTR_RB_1_HI 0x000000e5
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO 0x000000ea
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI 0x000000eb
+
#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec
#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed
#define REG_A3XX_RBBM_RBBM_CTL 0x00000100
-#define REG_A3XX_RBBM_RBBM_CTL 0x00000100
-
#define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111
#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112
@@ -287,22 +530,20 @@ enum a3xx_tex_swiz {
#define REG_A3XX_CP_MEQ_DATA 0x000001db
+#define REG_A3XX_CP_PERFCOUNTER_SELECT 0x00000445
+
#define REG_A3XX_CP_HW_FAULT 0x0000045c
#define REG_A3XX_CP_PROTECT_CTRL 0x0000045e
#define REG_A3XX_CP_PROTECT_STATUS 0x0000045f
-#define REG_A3XX_CP_PROTECT(i0) (0x00000460 + 0x1*(i0))
+static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; }
-#define REG_A3XX_CP_PROTECT_REG(i0) (0x00000460 + 0x1*(i0))
+static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; }
#define REG_A3XX_CP_AHB_FAULT 0x0000054d
-#define REG_A3XX_CP_SCRATCH_REG2 0x0000057a
-
-#define REG_A3XX_CP_SCRATCH_REG3 0x0000057b
-
#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040
#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000
#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000
@@ -528,9 +769,9 @@ static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
#define REG_A3XX_UNKNOWN_20C3 0x000020c3
-#define REG_A3XX_RB_MRT(i0) (0x000020c4 + 0x4*(i0))
+static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
-#define REG_A3XX_RB_MRT_CONTROL(i0) (0x000020c4 + 0x4*(i0))
+static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008
#define A3XX_RB_MRT_CONTROL_BLEND 0x00000010
#define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020
@@ -553,7 +794,7 @@ static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
}
-#define REG_A3XX_RB_MRT_BUF_INFO(i0) (0x000020c5 + 0x4*(i0))
+static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; }
#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f
#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0
static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
@@ -579,7 +820,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
}
-#define REG_A3XX_RB_MRT_BUF_BASE(i0) (0x000020c6 + 0x4*(i0))
+static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; }
#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0
#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4
static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
@@ -587,7 +828,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
}
-#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0) (0x000020c7 + 0x4*(i0))
+static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; }
#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f
#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0
static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
@@ -627,12 +868,60 @@ static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r
#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000
#define REG_A3XX_RB_BLEND_RED 0x000020e4
+#define A3XX_RB_BLEND_RED_UINT__MASK 0x000000ff
+#define A3XX_RB_BLEND_RED_UINT__SHIFT 0
+static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+ return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A3XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000
+#define A3XX_RB_BLEND_RED_FLOAT__SHIFT 16
+static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val)
+{
+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK;
+}
#define REG_A3XX_RB_BLEND_GREEN 0x000020e5
+#define A3XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff
+#define A3XX_RB_BLEND_GREEN_UINT__SHIFT 0
+static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+ return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A3XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000
+#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT 16
+static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
#define REG_A3XX_RB_BLEND_BLUE 0x000020e6
+#define A3XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff
+#define A3XX_RB_BLEND_BLUE_UINT__SHIFT 0
+static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+ return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A3XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000
+#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT 16
+static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
#define REG_A3XX_RB_BLEND_ALPHA 0x000020e7
+#define A3XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff
+#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT 0
+static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+ return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000
+#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16
+static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
#define REG_A3XX_UNKNOWN_20E8 0x000020e8
@@ -1063,9 +1352,9 @@ static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245
-#define REG_A3XX_VFD_FETCH(i0) (0x00002246 + 0x2*(i0))
+static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; }
-#define REG_A3XX_VFD_FETCH_INSTR_0(i0) (0x00002246 + 0x2*(i0))
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; }
#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f
#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0
static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
@@ -1092,11 +1381,11 @@ static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
}
-#define REG_A3XX_VFD_FETCH_INSTR_1(i0) (0x00002247 + 0x2*(i0))
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; }
-#define REG_A3XX_VFD_DECODE(i0) (0x00002266 + 0x1*(i0))
+static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; }
-#define REG_A3XX_VFD_DECODE_INSTR(i0) (0x00002266 + 0x1*(i0))
+static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; }
#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f
#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0
static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
@@ -1173,13 +1462,13 @@ static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
}
-#define REG_A3XX_VPC_VARYING_INTERP(i0) (0x00002282 + 0x1*(i0))
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
-#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0) (0x00002282 + 0x1*(i0))
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
-#define REG_A3XX_VPC_VARYING_PS_REPL(i0) (0x00002286 + 0x1*(i0))
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
-#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0) (0x00002286 + 0x1*(i0))
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; }
#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a
@@ -1293,9 +1582,9 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
}
-#define REG_A3XX_SP_VS_OUT(i0) (0x000022c7 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-#define REG_A3XX_SP_VS_OUT_REG(i0) (0x000022c7 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff
#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0
static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
@@ -1321,9 +1610,9 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
}
-#define REG_A3XX_SP_VS_VPC_DST(i0) (0x000022d0 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
-#define REG_A3XX_SP_VS_VPC_DST_REG(i0) (0x000022d0 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
@@ -1480,9 +1769,9 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec
-#define REG_A3XX_SP_FS_MRT(i0) (0x000022f0 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
-#define REG_A3XX_SP_FS_MRT_REG(i0) (0x000022f0 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
#define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff
#define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0
static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
@@ -1491,9 +1780,9 @@ static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
}
#define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100
-#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0) (0x000022f4 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
-#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0) (0x000022f4 + 0x1*(i0))
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f
#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0
static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
@@ -1607,9 +1896,9 @@ static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
#define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02
-#define REG_A3XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0))
+static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-#define REG_A3XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0))
+static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
#define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff
#define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0
static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
@@ -1635,26 +1924,46 @@ static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
}
-#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0))
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
-#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0))
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
#define REG_A3XX_UNKNOWN_0C3D 0x00000c3d
+#define REG_A3XX_PC_PERFCOUNTER0_SELECT 0x00000c48
+
+#define REG_A3XX_PC_PERFCOUNTER1_SELECT 0x00000c49
+
+#define REG_A3XX_PC_PERFCOUNTER2_SELECT 0x00000c4a
+
+#define REG_A3XX_PC_PERFCOUNTER3_SELECT 0x00000c4b
+
#define REG_A3XX_UNKNOWN_0C81 0x00000c81
-#define REG_A3XX_GRAS_CL_USER_PLANE(i0) (0x00000ca0 + 0x4*(i0))
+#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT 0x00000c88
+
+#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT 0x00000c89
-#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0) (0x00000ca0 + 0x4*(i0))
+#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT 0x00000c8a
-#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0) (0x00000ca1 + 0x4*(i0))
+#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT 0x00000c8b
-#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0) (0x00000ca2 + 0x4*(i0))
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
-#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0) (0x00000ca3 + 0x4*(i0))
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; }
#define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0
+#define REG_A3XX_RB_PERFCOUNTER0_SELECT 0x00000cc6
+
+#define REG_A3XX_RB_PERFCOUNTER1_SELECT 0x00000cc7
+
#define REG_A3XX_RB_WINDOW_SIZE 0x00000ce0
#define A3XX_RB_WINDOW_SIZE_WIDTH__MASK 0x00003fff
#define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT 0
@@ -1669,18 +1978,46 @@ static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val)
return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK;
}
-#define REG_A3XX_UNKNOWN_0E00 0x00000e00
+#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT 0x00000e00
+
+#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT 0x00000e01
+
+#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT 0x00000e02
+
+#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT 0x00000e03
+
+#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT 0x00000e04
+
+#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT 0x00000e05
#define REG_A3XX_UNKNOWN_0E43 0x00000e43
#define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44
+#define REG_A3XX_VFD_PERFCOUNTER1_SELECT 0x00000e45
+
#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61
#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62
+#define REG_A3XX_VPC_PERFCOUNTER0_SELECT 0x00000e64
+
+#define REG_A3XX_VPC_PERFCOUNTER1_SELECT 0x00000e65
+
#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82
+#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT 0x00000e84
+
+#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT 0x00000e85
+
+#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT 0x00000e86
+
+#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT 0x00000e87
+
+#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT 0x00000e88
+
+#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT 0x00000e89
+
#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0
#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff
#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0
@@ -1724,6 +2061,18 @@ static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_op
#define REG_A3XX_UNKNOWN_0F03 0x00000f03
+#define REG_A3XX_TP_PERFCOUNTER0_SELECT 0x00000f04
+
+#define REG_A3XX_TP_PERFCOUNTER1_SELECT 0x00000f05
+
+#define REG_A3XX_TP_PERFCOUNTER2_SELECT 0x00000f06
+
+#define REG_A3XX_TP_PERFCOUNTER3_SELECT 0x00000f07
+
+#define REG_A3XX_TP_PERFCOUNTER4_SELECT 0x00000f08
+
+#define REG_A3XX_TP_PERFCOUNTER5_SELECT 0x00000f09
+
#define REG_A3XX_TEX_SAMP_0 0x00000000
#define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c
#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2
@@ -1791,6 +2140,12 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
{
return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
}
+#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000
+#define A3XX_TEX_CONST_0_TYPE__SHIFT 30
+static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
+{
+ return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK;
+}
#define REG_A3XX_TEX_CONST_1 0x00000001
#define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index eabe21cb7e9..51154113de6 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg)
return num;
}
-static void regmask_set(regmask_t regmask, struct ir3_register *reg)
+static void regmask_set(regmask_t regmask, struct ir3_register *reg,
+ unsigned wrmask)
{
- unsigned idx = regmask_idx(reg);
- regmask[idx / 8] |= 1 << (idx % 8);
+ unsigned i;
+ for (i = 0; i < 4; i++) {
+ if (wrmask & (1 << i)) {
+ unsigned idx = regmask_idx(reg) + i;
+ regmask[idx / 8] |= 1 << (idx % 8);
+ }
+ }
}
static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
@@ -91,6 +97,7 @@ struct fd3_compile_context {
unsigned next_inloc;
unsigned num_internal_temps;
+ struct tgsi_src_register internal_temps[6];
/* track registers which need to synchronize w/ "complex alu" cat3
* instruction pipeline:
@@ -128,9 +135,16 @@ struct fd3_compile_context {
* up the vector operation
*/
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
};
+
+static void vectorize(struct fd3_compile_context *ctx,
+ struct ir3_instruction *instr, struct tgsi_dst_register *dst,
+ int nsrcs, ...);
+static void create_mov(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *dst, struct tgsi_src_register *src);
+
static unsigned
compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
const struct tgsi_token *tokens)
@@ -154,19 +168,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
/* Immediates go after constants: */
ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
ctx->base_reg[TGSI_FILE_IMMEDIATE] =
- ctx->info.file_count[TGSI_FILE_CONSTANT];
+ ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
/* Temporaries after outputs after inputs: */
ctx->base_reg[TGSI_FILE_INPUT] = 0;
ctx->base_reg[TGSI_FILE_OUTPUT] =
- ctx->info.file_count[TGSI_FILE_INPUT];
+ ctx->info.file_max[TGSI_FILE_INPUT] + 1;
ctx->base_reg[TGSI_FILE_TEMPORARY] =
- ctx->info.file_count[TGSI_FILE_INPUT] +
- ctx->info.file_count[TGSI_FILE_OUTPUT];
+ ctx->info.file_max[TGSI_FILE_INPUT] + 1 +
+ ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
- ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
- ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
+ ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+ ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, tokens);
if (ret != TGSI_PARSE_OK)
@@ -178,6 +192,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
}
static void
+compile_error(struct fd3_compile_context *ctx, const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ _debug_vprintf(format, ap);
+ va_end(ap);
+ tgsi_dump(ctx->tokens, 0);
+ assert(0);
+}
+
+#define compile_assert(ctx, cond) do { \
+ if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
+ } while (0)
+
+static void
compile_free(struct fd3_compile_context *ctx)
{
tgsi_parse_free(&ctx->parser);
@@ -193,6 +222,24 @@ struct instr_translater {
unsigned arg;
};
+static unsigned
+src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
+{
+ unsigned flags = 0;
+
+ if (regmask_get(ctx->needs_ss, reg)) {
+ flags |= IR3_INSTR_SS;
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ }
+
+ if (regmask_get(ctx->needs_sy, reg)) {
+ flags |= IR3_INSTR_SY;
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+ }
+
+ return flags;
+}
+
static struct ir3_register *
add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
const struct tgsi_dst_register *dst, unsigned chan)
@@ -205,9 +252,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
num = dst->Index + ctx->base_reg[dst->File];
break;
default:
- DBG("unsupported dst register file: %s",
+ compile_error(ctx, "unsupported dst register file: %s\n",
tgsi_file_name(dst->File));
- assert(0);
break;
}
@@ -234,14 +280,17 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
flags |= IR3_REG_CONST;
num = src->Index + ctx->base_reg[src->File];
break;
+ case TGSI_FILE_OUTPUT:
+ /* NOTE: we should only end up w/ OUTPUT file for things like
+ * clamp()'ing saturated dst instructions
+ */
case TGSI_FILE_INPUT:
case TGSI_FILE_TEMPORARY:
num = src->Index + ctx->base_reg[src->File];
break;
default:
- DBG("unsupported src register file: %s",
+ compile_error(ctx, "unsupported src register file: %s\n",
tgsi_file_name(src->File));
- assert(0);
break;
}
@@ -254,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
reg = ir3_reg_create(instr, regid(num, chan), flags);
- if (regmask_get(ctx->needs_ss, reg)) {
- instr->flags |= IR3_INSTR_SS;
- memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
- }
-
- if (regmask_get(ctx->needs_sy, reg)) {
- instr->flags |= IR3_INSTR_SY;
- memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
- }
+ instr->flags |= src_flags(ctx, reg);
return reg;
}
@@ -285,11 +326,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
/* Get internal-temp src/dst to use for a sequence of instructions
* generated by a single TGSI op.
*/
-static void
+static struct tgsi_src_register *
get_internal_temp(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst,
- struct tgsi_src_register *tmp_src)
+ struct tgsi_dst_register *tmp_dst)
{
+ struct tgsi_src_register *tmp_src;
int n;
tmp_dst->File = TGSI_FILE_TEMPORARY;
@@ -299,23 +340,78 @@ get_internal_temp(struct fd3_compile_context *ctx,
/* assign next temporary: */
n = ctx->num_internal_temps++;
+ compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
+ tmp_src = &ctx->internal_temps[n];
- tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
+ tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
src_from_dst(tmp_src, tmp_dst);
+
+ return tmp_src;
}
/* same as get_internal_temp, but w/ src.xxxx (for instructions that
* replicate their results)
*/
-static void
+static struct tgsi_src_register *
get_internal_temp_repl(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst,
- struct tgsi_src_register *tmp_src)
+ struct tgsi_dst_register *tmp_dst)
{
- get_internal_temp(ctx, tmp_dst, tmp_src);
+ struct tgsi_src_register *tmp_src =
+ get_internal_temp(ctx, tmp_dst);
tmp_src->SwizzleX = tmp_src->SwizzleY =
tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
+ return tmp_src;
+}
+
+static inline bool
+is_const(struct tgsi_src_register *src)
+{
+ return (src->File == TGSI_FILE_CONSTANT) ||
+ (src->File == TGSI_FILE_IMMEDIATE);
+}
+
+static type_t
+get_ftype(struct fd3_compile_context *ctx)
+{
+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+}
+
+static type_t
+get_utype(struct fd3_compile_context *ctx)
+{
+ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+}
+
+static unsigned
+src_swiz(struct tgsi_src_register *src, int chan)
+{
+ switch (chan) {
+ case 0: return src->SwizzleX;
+ case 1: return src->SwizzleY;
+ case 2: return src->SwizzleZ;
+ case 3: return src->SwizzleW;
+ }
+ assert(0);
+ return 0;
+}
+
+/* for instructions that cannot take a const register as src, if needed
+ * generate a move to temporary gpr:
+ */
+static struct tgsi_src_register *
+get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
+{
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+
+ compile_assert(ctx, is_const(src));
+
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
+
+ create_mov(ctx, &tmp_dst, src);
+
+ return tmp_src;
}
static void
@@ -365,30 +461,11 @@ get_immediate(struct fd3_compile_context *ctx,
reg->SwizzleW = swiz2tgsi[swiz];
}
-static type_t
-get_type(struct fd3_compile_context *ctx)
-{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
-}
-
-static unsigned
-src_swiz(struct tgsi_src_register *src, int chan)
-{
- switch (chan) {
- case 0: return src->SwizzleX;
- case 1: return src->SwizzleY;
- case 2: return src->SwizzleZ;
- case 3: return src->SwizzleW;
- }
- assert(0);
- return 0;
-}
-
static void
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
struct tgsi_src_register *src)
{
- type_t type_mov = get_type(ctx);
+ type_t type_mov = get_ftype(ctx);
unsigned i;
for (i = 0; i < 4; i++) {
@@ -404,7 +481,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
ir3_instr_create(ctx->ir, 0, OPC_NOP);
}
}
+}
+static void
+create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_src_register src;
+
+ src_from_dst(&src, dst);
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+ vectorize(ctx, instr, dst, 2, &src, 0, minval, 0);
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+ vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0);
+}
+
+static void
+create_clamp_imm(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *dst,
+ uint32_t minval, uint32_t maxval)
+{
+ struct tgsi_src_register minconst, maxconst;
+
+ get_immediate(ctx, &minconst, minval);
+ get_immediate(ctx, &maxconst, maxval);
+
+ create_clamp(ctx, dst, &minconst, &maxconst);
}
static struct tgsi_dst_register *
@@ -415,7 +520,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_src_register *src = &inst->Src[i].Register;
if ((src->File == dst->File) && (src->Index == dst->Index)) {
- get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
+ ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
ctx->tmp_dst.WriteMask = dst->WriteMask;
dst = &ctx->tmp_dst;
break;
@@ -430,7 +535,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
{
/* if necessary, add mov back into original dst: */
if (dst != &inst->Dst[0].Register) {
- create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
+ create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
}
}
@@ -478,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
cur->regs[j+1]->num =
regid(cur->regs[j+1]->num >> 2,
src_swiz(src, i));
+ cur->flags |= src_flags(ctx, cur->regs[j+1]);
}
va_end(ap);
}
@@ -496,6 +602,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
* native instructions:
*/
+static inline void
+get_swiz(unsigned *swiz, struct tgsi_src_register *src)
+{
+ swiz[0] = src->SwizzleX;
+ swiz[1] = src->SwizzleY;
+ swiz[2] = src->SwizzleZ;
+ swiz[3] = src->SwizzleW;
+}
+
static void
trans_dotp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
@@ -503,39 +618,35 @@ trans_dotp(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
- unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
- unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
+ unsigned swiz0[4];
+ unsigned swiz1[4];
opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
unsigned n = t->arg; /* number of components */
- unsigned i;
+ unsigned i, swapped = 0;
- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
- /* Blob compiler never seems to use a const in src1 position for
- * mad.*, although there does seem (according to disassembler
- * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
- * is a const. Not sure if this is a hw bug, or simply that the
- * disassembler lies.
+ /* in particular, can't handle const for src1 for cat3/mad:
*/
- if ((src1->File == TGSI_FILE_IMMEDIATE) ||
- (src1->File == TGSI_FILE_CONSTANT)) {
-
- /* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
- */
- for (i = 0; i < 4; i++)
- swiz1[i] = i;
-
- /* the first mul.f will clobber tmp.x, but that is ok
- * because after that point we no longer need tmp.x:
- */
- create_mov(ctx, &tmp_dst, src1);
- src1 = &tmp_src;
+ if (is_const(src1)) {
+ if (!is_const(src0)) {
+ struct tgsi_src_register *tmp;
+ tmp = src0;
+ src0 = src1;
+ src1 = tmp;
+ swapped = 1;
+ } else {
+ src0 = get_unconst(ctx, src0);
+ }
}
+ get_swiz(swiz0, src0);
+ get_swiz(swiz1, src1);
+
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, swiz0[0]);
@@ -548,29 +659,27 @@ trans_dotp(const struct instr_translater *t,
add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, swiz0[i]);
add_src_reg(ctx, instr, src1, swiz1[i]);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, tmp_src, 0);
}
/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
if (t->tgsi_opc == TGSI_OPCODE_DPH) {
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, src1, swiz1[i]);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ if (swapped)
+ add_src_reg(ctx, instr, src0, swiz0[i]);
+ else
+ add_src_reg(ctx, instr, src1, swiz1[i]);
+ add_src_reg(ctx, instr, tmp_src, 0);
n++;
}
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
-
- /* pad out to multiple of 4 scalar instructions: */
- for (i = 2 * n; i % 4; i++) {
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
- }
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
- create_mov(ctx, dst, &tmp_src);
+ create_mov(ctx, dst, tmp_src);
}
/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
@@ -581,37 +690,39 @@ trans_lrp(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst1, tmp_dst2;
- struct tgsi_src_register tmp_src1, tmp_src2;
+ struct tgsi_src_register *tmp_src1, *tmp_src2;
struct tgsi_src_register tmp_const;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
- get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
- get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
+ if (is_const(src0) && is_const(src1))
+ src0 = get_unconst(ctx, src0);
+
+ tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
+ tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
get_immediate(ctx, &tmp_const, fui(1.0));
/* tmp1 = (a * b) */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
- vectorize(ctx, instr, &tmp_dst1, 2,
- &inst->Src[0].Register, 0,
- &inst->Src[1].Register, 0);
+ vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0);
/* tmp2 = (1 - a) */
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
- vectorize(ctx, instr, &tmp_dst2, 2,
- &tmp_const, 0,
- &inst->Src[0].Register, IR3_REG_NEGATE);
+ vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0,
+ src0, IR3_REG_NEGATE);
/* tmp2 = tmp2 * c */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
vectorize(ctx, instr, &tmp_dst2, 2,
- &tmp_src2, 0,
+ tmp_src2, 0,
&inst->Src[2].Register, 0);
/* dst = tmp1 + tmp2 */
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
vectorize(ctx, instr, &inst->Dst[0].Register, 2,
- &tmp_src1, 0,
- &tmp_src2, 0);
+ tmp_src1, 0,
+ tmp_src2, 0);
}
/* FRC(x) = x - FLOOR(x) */
@@ -622,9 +733,9 @@ trans_frac(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
/* tmp = FLOOR(x) */
instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
@@ -635,7 +746,7 @@ trans_frac(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
vectorize(ctx, instr, &inst->Dst[0].Register, 2,
&inst->Src[0].Register, 0,
- &tmp_src, IR3_REG_NEGATE);
+ tmp_src, IR3_REG_NEGATE);
}
/* POW(a,b) = EXP2(b * LOG2(a)) */
@@ -647,24 +758,24 @@ trans_pow(const struct instr_translater *t,
struct ir3_instruction *instr;
struct ir3_register *r;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
/* log2 Rtmp, Rsrc0 */
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, src0->SwizzleX);
- regmask_set(ctx->needs_ss, r);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
/* mul.f Rtmp, Rtmp, Rsrc1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, tmp_src, 0);
add_src_reg(ctx, instr, src1, src1->SwizzleX);
/* blob compiler seems to ensure there are at least 6 instructions
@@ -676,10 +787,10 @@ trans_pow(const struct instr_translater *t,
/* exp2 Rdst, Rtmp */
instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0);
- regmask_set(ctx->needs_ss, r);
+ add_src_reg(ctx, instr, tmp_src, 0);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
- create_mov(ctx, dst, &tmp_src);
+ create_mov(ctx, dst, tmp_src);
}
/* texture fetch/sample instructions: */
@@ -690,8 +801,6 @@ trans_samp(const struct instr_translater *t,
{
struct ir3_register *r;
struct ir3_instruction *instr;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
struct tgsi_src_register *coord = &inst->Src[0].Register;
struct tgsi_src_register *samp = &inst->Src[1].Register;
unsigned tex = inst->Texture.Texture;
@@ -711,7 +820,7 @@ trans_samp(const struct instr_translater *t,
flags |= IR3_INSTR_P;
break;
default:
- assert(0);
+ compile_assert(ctx, 0);
break;
}
@@ -726,10 +835,13 @@ trans_samp(const struct instr_translater *t,
*/
for (i = 1; (i < 4) && (order[i] >= 0); i++) {
if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
- type_t type_mov = get_type(ctx);
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+
+ type_t type_mov = get_ftype(ctx);
/* need to move things around: */
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
for (j = 0; (j < 4) && (order[j] >= 0); j++) {
instr = ir3_instr_create(ctx->ir, 1, 0);
@@ -740,7 +852,7 @@ trans_samp(const struct instr_translater *t,
src_swiz(coord, order[j]));
}
- coord = &tmp_src;
+ coord = tmp_src;
if (j < 4)
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
@@ -750,7 +862,7 @@ trans_samp(const struct instr_translater *t,
}
instr = ir3_instr_create(ctx->ir, 5, t->opc);
- instr->cat5.type = get_type(ctx);
+ instr->cat5.type = get_ftype(ctx);
instr->cat5.samp = samp->Index;
instr->cat5.tex = samp->Index;
instr->flags |= flags;
@@ -760,10 +872,42 @@ trans_samp(const struct instr_translater *t,
add_src_reg(ctx, instr, coord, coord->SwizzleX);
- regmask_set(ctx->needs_sy, r);
+ regmask_set(ctx->needs_sy, r, r->wrmask);
}
-/* CMP(a,b,c) = (a < 0) ? b : c */
+/*
+ * SEQ(a,b) = (a == b) ? 1.0 : 0.0
+ * cmps.f.eq tmp0, b, a
+ * cov.u16f16 dst, tmp0
+ *
+ * SNE(a,b) = (a != b) ? 1.0 : 0.0
+ * cmps.f.eq tmp0, b, a
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * SGE(a,b) = (a >= b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, a, b
+ * cov.u16f16 dst, tmp0
+ *
+ * SLE(a,b) = (a <= b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, b, a
+ * cov.u16f16 dst, tmp0
+ *
+ * SGT(a,b) = (a > b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, b, a
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * SLT(a,b) = (a < b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, a, b
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * CMP(a,b,c) = (a < 0.0) ? b : c
+ * cmps.f.ge tmp0, a, {0.0}
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, c, tmp0, b
+ */
static void
trans_cmp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
@@ -771,35 +915,94 @@ trans_cmp(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
- struct tgsi_src_register constval;
- /* final instruction uses original src1 and src2, so we need get_dst() */
+ struct tgsi_src_register *tmp_src;
+ struct tgsi_src_register constval0, constval1;
+ /* final instruction for CMP() uses orig src1 and src2: */
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *a0, *a1;
+ unsigned condition;
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
- /* cmps.f.ge tmp, src0, 0.0 */
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ a0 = &inst->Src[1].Register; /* b */
+ a1 = &inst->Src[0].Register; /* a */
+ condition = IR3_COND_EQ;
+ break;
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLT:
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &inst->Src[1].Register; /* b */
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGT:
+ a0 = &inst->Src[1].Register; /* b */
+ a1 = &inst->Src[0].Register; /* a */
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_CMP:
+ get_immediate(ctx, &constval0, fui(0.0));
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &constval0; /* {0.0} */
+ condition = IR3_COND_GE;
+ break;
+ default:
+ compile_assert(ctx, 0);
+ return;
+ }
+
+ if (is_const(a0) && is_const(a1))
+ a0 = get_unconst(ctx, a0);
+
+ /* cmps.f.ge tmp, a0, a1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_GE;
- get_immediate(ctx, &constval, fui(0.0));
- vectorize(ctx, instr, &tmp_dst, 2,
- &inst->Src[0].Register, 0,
- &constval, 0);
+ instr->cat2.condition = condition;
+ vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
- /* add.s tmp, tmp, -1 */
- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
- instr->repeat = 3;
- add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0);
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLE:
+ /* cov.u16f16 dst, tmp0 */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = get_utype(ctx);
+ instr->cat1.dst_type = get_ftype(ctx);
+ vectorize(ctx, instr, dst, 1, tmp_src, 0);
+ break;
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_CMP:
+ /* add.s tmp, tmp, -1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ instr->repeat = 3;
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+ if (t->tgsi_opc == TGSI_OPCODE_CMP) {
+ /* sel.{f32,f16} dst, src2, tmp, src1 */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, dst, 3,
+ &inst->Src[2].Register, 0,
+ tmp_src, 0,
+ &inst->Src[1].Register, 0);
+ } else {
+ get_immediate(ctx, &constval0, fui(0.0));
+ get_immediate(ctx, &constval1, fui(1.0));
+ /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, dst, 3,
+ &constval0, 0, tmp_src, 0, &constval1, 0);
+ }
- /* sel.{f32,f16} dst, src2, tmp, src1 */
- instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
- OPC_SEL_F16 : OPC_SEL_F32);
- vectorize(ctx, instr, &inst->Dst[0].Register, 3,
- &inst->Src[2].Register, 0,
- &tmp_src, 0,
- &inst->Src[1].Register, 0);
+ break;
+ }
put_dst(ctx, inst, dst);
}
@@ -858,10 +1061,13 @@ trans_if(const struct instr_translater *t,
get_immediate(ctx, &constval, fui(0.0));
+ if (is_const(src))
+ src = get_unconst(ctx, src);
+
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
ir3_reg_create(instr, regid(REG_P0, 0), 0);
- add_src_reg(ctx, instr, &constval, constval.SwizzleX);
add_src_reg(ctx, instr, src, src->SwizzleX);
+ add_src_reg(ctx, instr, &constval, constval.SwizzleX);
instr->cat2.condition = IR3_COND_EQ;
instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
@@ -939,16 +1145,12 @@ instr_cat2(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
struct ir3_instruction *instr;
unsigned src0_flags = 0;
- instr = ir3_instr_create(ctx->ir, 2, t->opc);
-
switch (t->tgsi_opc) {
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- instr->cat2.condition = t->arg;
- break;
case TGSI_OPCODE_ABS:
src0_flags = IR3_REG_ABS;
break;
@@ -970,48 +1172,65 @@ instr_cat2(const struct instr_translater *t,
case OPC_SETRM:
case OPC_CBITS_B:
/* these only have one src reg */
- vectorize(ctx, instr, dst, 1,
- &inst->Src[0].Register, src0_flags);
+ instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ vectorize(ctx, instr, dst, 1, src0, src0_flags);
break;
default:
- vectorize(ctx, instr, dst, 2,
- &inst->Src[0].Register, src0_flags,
- &inst->Src[1].Register, 0);
+ if (is_const(src0) && is_const(src1))
+ src0 = get_unconst(ctx, src0);
+
+ instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0);
break;
}
put_dst(ctx, inst, dst);
}
+static bool is_mad(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_U16:
+ case OPC_MADSH_U16:
+ case OPC_MAD_S16:
+ case OPC_MADSH_M16:
+ case OPC_MAD_U24:
+ case OPC_MAD_S24:
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void
instr_cat3(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
struct ir3_instruction *instr;
- /* Blob compiler never seems to use a const in src1 position..
- * although there does seem (according to disassembler hidden
- * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
- * const. Not sure if this is a hw bug, or simply that the
- * disassembler lies.
+ /* in particular, can't handle const for src1 for cat3..
+ * for mad, we can swap first two src's if needed:
*/
- if ((src1->File == TGSI_FILE_CONSTANT) ||
- (src1->File == TGSI_FILE_IMMEDIATE)) {
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
- create_mov(ctx, &tmp_dst, src1);
- src1 = &tmp_src;
+ if (is_const(src1)) {
+ if (is_mad(t->opc) && !is_const(src0)) {
+ struct tgsi_src_register *tmp;
+ tmp = src0;
+ src0 = src1;
+ src1 = tmp;
+ } else {
+ src0 = get_unconst(ctx, src0);
+ }
}
instr = ir3_instr_create(ctx->ir, 3,
ctx->so->half_precision ? t->hopc : t->opc);
- vectorize(ctx, instr, dst, 3,
- &inst->Src[0].Register, 0,
- src1, 0,
+ vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
}
@@ -1022,15 +1241,20 @@ instr_cat4(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src = &inst->Src[0].Register;
struct ir3_instruction *instr;
+ /* seems like blob compiler avoids const as src.. */
+ if (is_const(src))
+ src = get_unconst(ctx, src);
+
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
instr = ir3_instr_create(ctx->ir, 4, t->opc);
- vectorize(ctx, instr, dst, 1,
- &inst->Src[0].Register, 0);
+ vectorize(ctx, instr, dst, 1, src, 0);
- regmask_set(ctx->needs_ss, instr->regs[0]);
+ regmask_set(ctx->needs_ss, instr->regs[0],
+ inst->Dst[0].Register.WriteMask);
put_dst(ctx, inst, dst);
}
@@ -1051,12 +1275,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
- INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
- INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
INSTR(LRP, trans_lrp),
INSTR(FRC, trans_frac),
INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
+ INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F),
INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
INSTR(POW, trans_pow),
@@ -1065,6 +1288,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(SIN, instr_cat4, .opc = OPC_COS),
INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
+ INSTR(SGT, trans_cmp),
+ INSTR(SLT, trans_cmp),
+ INSTR(SGE, trans_cmp),
+ INSTR(SLE, trans_cmp),
+ INSTR(SNE, trans_cmp),
+ INSTR(SEQ, trans_cmp),
INSTR(CMP, trans_cmp),
INSTR(IF, trans_if),
INSTR(ELSE, trans_else),
@@ -1132,7 +1361,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
unsigned name = decl->Semantic.Name;
unsigned i;
- assert(decl->Declaration.Semantic); // TODO is this ever not true?
+ compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true?
DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
@@ -1152,9 +1381,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->outputs[so->outputs_count++].regid = regid(i + base, 0);
break;
default:
- DBG("unknown VS semantic name: %s",
+ compile_error(ctx, "unknown VS semantic name: %s\n",
tgsi_semantic_names[name]);
- assert(0);
}
} else {
switch (name) {
@@ -1162,9 +1390,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->color_regid = regid(decl->Range.First + base, 0);
break;
default:
- DBG("unknown VS semantic name: %s",
+ compile_error(ctx, "unknown VS semantic name: %s\n",
tgsi_semantic_names[name]);
- assert(0);
}
}
}
@@ -1223,10 +1450,19 @@ compile_instructions(struct fd3_compile_context *ctx)
t->fxn(t, ctx, inst);
ctx->num_internal_temps = 0;
} else {
- debug_printf("unknown TGSI opc: %s\n",
+ compile_error(ctx, "unknown TGSI opc: %s\n",
tgsi_get_opcode_name(opc));
- tgsi_dump(ctx->tokens, 0);
- assert(0);
+ }
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_ZERO_ONE:
+ create_clamp_imm(ctx, &inst->Dst[0].Register,
+ fui(0.0), fui(1.0));
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ create_clamp_imm(ctx, &inst->Dst[0].Register,
+ fui(-1.0), fui(1.0));
+ break;
}
break;
@@ -1253,6 +1489,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so,
so->ir = ir3_shader_create();
+ assert(so->ir);
+
so->color_regid = regid(63,0);
so->pos_regid = regid(63,0);
so->psize_regid = regid(63,0);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 3ae9b2953e4..589aeed4578 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -40,7 +40,18 @@
static void
fd3_context_destroy(struct pipe_context *pctx)
{
+ struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
+
fd3_prog_fini(pctx);
+
+ fd_bo_del(fd3_ctx->vs_pvt_mem);
+ fd_bo_del(fd3_ctx->fs_pvt_mem);
+ fd_bo_del(fd3_ctx->vsc_size_mem);
+ fd_bo_del(fd3_ctx->vsc_pipe_mem);
+
+ pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
+ pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
+
fd_context_destroy(pctx);
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index a7a4bf78d62..5e586185ad9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
OUT_RELOC(ring, bo, offset,
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
} else {
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
@@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring,
for (i = 0; i < tex->num_textures; i++) {
struct fd3_pipe_sampler_view *view =
fd3_pipe_sampler_view(tex->textures[i]);
- OUT_RELOC(ring, view->tex_resource->bo, 0, 0);
+ OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0);
/* I think each entry is a ptr to mipmap level.. for now, just
* pad w/ null's until I get around to actually implementing
* mipmap support..
@@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
0x40000000 | // XXX
- fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN,
- PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA));
- OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) |
+ fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+ OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
A3XX_TEX_CONST_1_WIDTH(psurf->width) |
A3XX_TEX_CONST_1_HEIGHT(psurf->height));
OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) |
@@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
CP_LOAD_STATE_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RELOC(ring, rsc->bo, 0, 0);
+ OUT_RELOC(ring, rsc->bo, 0, 0, 0);
}
void
@@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0);
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
@@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3);
OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */
- OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */
+ OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */
OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3);
OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */
- OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */
+ OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
@@ -536,8 +536,8 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */
- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1);
- OUT_RING(ring, 0x00000000); /* UNKNOWN_0E00 */
+ OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1);
+ OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */
OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
@@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 1cb170af261..8d2df47c72a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
if (bin_w || (i >= nr_bufs)) {
OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
} else {
- OUT_RELOCS(ring, res->bo, 0, 0, -1);
+ OUT_RELOCW(ring, res->bo, 0, 0, -1);
}
OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
@@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
A3XX_RB_COPY_CONTROL_MODE(mode) |
A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
- OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */
+ OUT_RELOCW(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */
OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp));
OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
@@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
@@ -206,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
}, 1);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- uint32_t base = depth_base(&ctx->gmem) *
- fd_resource(pfb->cbufs[0]->texture)->cpp;
+ uint32_t base = 0;
+ if (pfb->cbufs[0]) {
+ struct fd_resource *rsc =
+ fd_resource(pfb->cbufs[0]->texture);
+ base = depth_base(&ctx->gmem) * rsc->cpp;
+ }
emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
}
@@ -260,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
y1 = ((float)yoff + bin_h) / ((float)pfb->height);
OUT_PKT3(ring, CP_MEM_WRITE, 5);
- OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0);
+ OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
OUT_RING(ring, fui(x0));
OUT_RING(ring, fui(y0));
OUT_RING(ring, fui(x1));
@@ -383,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx)
A3XX_VSC_PIPE_CONFIG_Y(0) |
A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) |
A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y));
- OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */
+ OUT_RELOC(ring, bo, 0, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */
OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */
for (i = 1; i < 8; i++) {
@@ -402,8 +414,11 @@ static void
fd3_emit_sysmem_prep(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture);
struct fd_ringbuffer *ring = ctx->ring;
+ uint32_t pitch = 0;
+
+ if (pfb->cbufs[0])
+ pitch = fd_resource(pfb->cbufs[0]->texture)->pitch;
fd3_emit_restore(ctx);
@@ -414,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
fd3_emit_rbrc_tile_state(ring,
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch));
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index b5a027e6503..c6c51b11ee2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -249,7 +249,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
*/
for (i = 0; i < 6; i++) {
OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1);
- OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */
+ OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER0_SELECT */
OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1);
OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */
@@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
- OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */
+ OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
#endif
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
@@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen));
- OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */
+ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
#endif
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index ae08b8ac90a..e56325ba1f1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
so->base = *cso;
so->texsamp0 =
+ COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
@@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx,
return so;
}
+static enum a3xx_tex_type
+tex_type(unsigned target)
+{
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A3XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A3XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A3XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A3XX_TEX_CUBE;
+ }
+}
+
static struct pipe_sampler_view *
fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
@@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->tex_resource = rsc;
so->texconst0 =
- 0x40000000 | /* ??? */
+ A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
index a08bc2349eb..6537fb77716 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
@@ -306,10 +306,11 @@ fd3_pipe2swap(enum pipe_format format)
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
return WXYZ;
+
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return WZYX;
-
default:
return WZYX;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
index 2fedc7bee38..61c01a7f528 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
@@ -166,8 +166,7 @@ struct ir3_instruction {
};
};
-/* this is just large to cope w/ the large test *.asm: */
-#define MAX_INSTRS 10240
+#define MAX_INSTRS 1024
struct ir3_shader {
unsigned instrs_count;
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index b1198125e97..61979d458ac 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05)
Copyright (C) 2013 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -113,5 +115,318 @@ enum adreno_rb_depth_format {
DEPTHX_24_8 = 1,
};
+enum adreno_mmu_clnt_beh {
+ BEH_NEVR = 0,
+ BEH_TRAN_RNG = 1,
+ BEH_TRAN_FLT = 2,
+};
+
+#define REG_AXXX_MH_MMU_CONFIG 0x00000040
+#define AXXX_MH_MMU_CONFIG_MMU_ENABLE 0x00000001
+#define AXXX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE 0x00000002
+#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK 0x00000030
+#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT 4
+static inline uint32_t AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK 0x000000c0
+#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT 6
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK 0x00000300
+#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT 8
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK 0x00000c00
+#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT 10
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK 0x00003000
+#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT 12
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK 0x0000c000
+#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT 14
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK 0x00030000
+#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT 16
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK 0x000c0000
+#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT 18
+static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK 0x00300000
+#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT 20
+static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK 0x00c00000
+#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT 22
+static inline uint32_t AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK 0x03000000
+#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT 24
+static inline uint32_t AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+ return ((val) << AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK;
+}
+
+#define REG_AXXX_MH_MMU_VA_RANGE 0x00000041
+
+#define REG_AXXX_MH_MMU_PT_BASE 0x00000042
+
+#define REG_AXXX_MH_MMU_PAGE_FAULT 0x00000043
+
+#define REG_AXXX_MH_MMU_TRAN_ERROR 0x00000044
+
+#define REG_AXXX_MH_MMU_INVALIDATE 0x00000045
+
+#define REG_AXXX_MH_MMU_MPU_BASE 0x00000046
+
+#define REG_AXXX_MH_MMU_MPU_END 0x00000047
+
+#define REG_AXXX_CP_RB_BASE 0x000001c0
+
+#define REG_AXXX_CP_RB_CNTL 0x000001c1
+#define AXXX_CP_RB_CNTL_BUFSZ__MASK 0x0000003f
+#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT 0
+static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val)
+{
+ return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BLKSZ__MASK 0x00003f00
+#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT 8
+static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val)
+{
+ return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK 0x00030000
+#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT 16
+static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val)
+{
+ return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK;
+}
+#define AXXX_CP_RB_CNTL_POLL_EN 0x00100000
+#define AXXX_CP_RB_CNTL_NO_UPDATE 0x08000000
+#define AXXX_CP_RB_CNTL_RPTR_WR_EN 0x80000000
+
+#define REG_AXXX_CP_RB_RPTR_ADDR 0x000001c3
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK 0x00000003
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT 0
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val)
+{
+ return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK;
+}
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK 0xfffffffc
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val)
+{
+ return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK;
+}
+
+#define REG_AXXX_CP_RB_RPTR 0x000001c4
+
+#define REG_AXXX_CP_RB_WPTR 0x000001c5
+
+#define REG_AXXX_CP_RB_WPTR_DELAY 0x000001c6
+
+#define REG_AXXX_CP_RB_RPTR_WR 0x000001c7
+
+#define REG_AXXX_CP_RB_WPTR_BASE 0x000001c8
+
+#define REG_AXXX_CP_QUEUE_THRESHOLDS 0x000001d5
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK 0x0000000f
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT 0
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val)
+{
+ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK 0x00000f00
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT 8
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val)
+{
+ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK 0x000f0000
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT 16
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val)
+{
+ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK;
+}
+
+#define REG_AXXX_CP_MEQ_THRESHOLDS 0x000001d6
+
+#define REG_AXXX_CP_CSQ_AVAIL 0x000001d7
+#define AXXX_CP_CSQ_AVAIL_RING__MASK 0x0000007f
+#define AXXX_CP_CSQ_AVAIL_RING__SHIFT 0
+static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB1__MASK 0x00007f00
+#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT 8
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB2__MASK 0x007f0000
+#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT 16
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK;
+}
+
+#define REG_AXXX_CP_STQ_AVAIL 0x000001d8
+#define AXXX_CP_STQ_AVAIL_ST__MASK 0x0000007f
+#define AXXX_CP_STQ_AVAIL_ST__SHIFT 0
+static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val)
+{
+ return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK;
+}
+
+#define REG_AXXX_CP_MEQ_AVAIL 0x000001d9
+#define AXXX_CP_MEQ_AVAIL_MEQ__MASK 0x0000001f
+#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT 0
+static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val)
+{
+ return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK;
+}
+
+#define REG_AXXX_SCRATCH_UMSK 0x000001dc
+#define AXXX_SCRATCH_UMSK_UMSK__MASK 0x000000ff
+#define AXXX_SCRATCH_UMSK_UMSK__SHIFT 0
+static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val)
+{
+ return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK;
+}
+#define AXXX_SCRATCH_UMSK_SWAP__MASK 0x00030000
+#define AXXX_SCRATCH_UMSK_SWAP__SHIFT 16
+static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val)
+{
+ return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK;
+}
+
+#define REG_AXXX_SCRATCH_ADDR 0x000001dd
+
+#define REG_AXXX_CP_ME_RDADDR 0x000001ea
+
+#define REG_AXXX_CP_STATE_DEBUG_INDEX 0x000001ec
+
+#define REG_AXXX_CP_STATE_DEBUG_DATA 0x000001ed
+
+#define REG_AXXX_CP_INT_CNTL 0x000001f2
+
+#define REG_AXXX_CP_INT_STATUS 0x000001f3
+
+#define REG_AXXX_CP_INT_ACK 0x000001f4
+
+#define REG_AXXX_CP_ME_CNTL 0x000001f6
+
+#define REG_AXXX_CP_ME_STATUS 0x000001f7
+
+#define REG_AXXX_CP_ME_RAM_WADDR 0x000001f8
+
+#define REG_AXXX_CP_ME_RAM_RADDR 0x000001f9
+
+#define REG_AXXX_CP_ME_RAM_DATA 0x000001fa
+
+#define REG_AXXX_CP_DEBUG 0x000001fc
+#define AXXX_CP_DEBUG_PREDICATE_DISABLE 0x00800000
+#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE 0x01000000
+#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE 0x02000000
+#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS 0x04000000
+#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE 0x08000000
+#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE 0x10000000
+#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL 0x40000000
+#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE 0x80000000
+
+#define REG_AXXX_CP_CSQ_RB_STAT 0x000001fd
+#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK 0x0000007f
+#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT 0
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK 0x007f0000
+#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT 16
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_IB1_STAT 0x000001fe
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK 0x0000007f
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT 0
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK 0x007f0000
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT 16
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_IB2_STAT 0x000001ff
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK 0x0000007f
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT 0
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK 0x007f0000
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT 16
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val)
+{
+ return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_SCRATCH_REG0 0x00000578
+
+#define REG_AXXX_CP_SCRATCH_REG1 0x00000579
+
+#define REG_AXXX_CP_SCRATCH_REG2 0x0000057a
+
+#define REG_AXXX_CP_SCRATCH_REG3 0x0000057b
+
+#define REG_AXXX_CP_SCRATCH_REG4 0x0000057c
+
+#define REG_AXXX_CP_SCRATCH_REG5 0x0000057d
+
+#define REG_AXXX_CP_SCRATCH_REG6 0x0000057e
+
+#define REG_AXXX_CP_SCRATCH_REG7 0x0000057f
+
+#define REG_AXXX_CP_ME_CF_EVENT_SRC 0x0000060a
+
+#define REG_AXXX_CP_ME_CF_EVENT_ADDR 0x0000060b
+
+#define REG_AXXX_CP_ME_CF_EVENT_DATA 0x0000060c
+
+#define REG_AXXX_CP_ME_NRT_ADDR 0x0000060d
+
+#define REG_AXXX_CP_ME_NRT_DATA 0x0000060e
+
#endif /* ADRENO_COMMON_XML */
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index d3a7baca0e9..94c13f418e7 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36)
- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05)
Copyright (C) 2013 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 44d525b25dd..1d03351f041 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx)
ctx->gmem_reason = 0;
ctx->num_draws = 0;
- fd_resource(pfb->cbufs[0]->texture)->dirty = false;
+ if (pfb->cbufs[0])
+ fd_resource(pfb->cbufs[0]->texture)->dirty = false;
if (pfb->zsbuf)
fd_resource(pfb->zsbuf->texture)->dirty = false;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index b02b8b9f9f9..4a98ab40f9e 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
src_sel, idx_type, IGNORE_VISIBILITY));
OUT_RING(ring, info->count); /* NumIndices */
if (info->indexed) {
- OUT_RELOC(ring, idx_bo, idx_offset, 0);
+ OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
OUT_RING (ring, idx_size);
}
}
@@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
}
DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil,
- util_format_name(pfb->cbufs[0]->format),
- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
ctx->clear(ctx, buffers, color, depth, stencil);
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 12633bd5f38..3d959c65dc7 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -71,12 +71,16 @@ calculate_tiles(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_scissor_state *scissor = &ctx->max_scissor;
- uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format);
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
uint32_t gmem_size = ctx->screen->gmemsize_bytes;
uint32_t minx, miny, width, height;
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t bin_w, bin_h;
uint32_t max_width = 992;
+ uint32_t cpp = 4;
+
+ if (pfb->cbufs[0])
+ cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
if ((gmem->cpp == cpp) &&
!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
@@ -84,10 +88,17 @@ calculate_tiles(struct fd_context *ctx)
return;
}
- minx = scissor->minx & ~31; /* round down to multiple of 32 */
- miny = scissor->miny & ~31;
- width = scissor->maxx - minx;
- height = scissor->maxy - miny;
+ if (fd_mesa_debug & FD_DBG_DSCIS) {
+ minx = 0;
+ miny = 0;
+ width = pfb->width;
+ height = pfb->height;
+ } else {
+ minx = scissor->minx & ~31; /* round down to multiple of 32 */
+ miny = scissor->miny & ~31;
+ width = scissor->maxx - minx;
+ height = scissor->maxy - miny;
+ }
// TODO we probably could optimize this a bit if we know that
// Z or stencil is not enabled for any of the draw calls..
@@ -132,9 +143,7 @@ static void
render_tiles(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- uint32_t i, yoff = 0;
-
- yoff= gmem->miny;
+ uint32_t i, yoff = gmem->miny;
ctx->emit_tile_init(ctx);
@@ -143,13 +152,13 @@ render_tiles(struct fd_context *ctx)
uint32_t bh = gmem->bin_h;
/* clip bin height: */
- bh = MIN2(bh, gmem->height - yoff);
+ bh = MIN2(bh, gmem->miny + gmem->height - yoff);
for (j = 0; j < gmem->nbins_x; j++) {
uint32_t bw = gmem->bin_w;
/* clip bin width: */
- bw = MIN2(bw, gmem->width - xoff);
+ bw = MIN2(bw, gmem->minx + gmem->width - xoff);
DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
bh, yoff, bw, xoff);
@@ -205,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
if (sysmem) {
DBG("rendering sysmem (%s/%s)",
- util_format_name(pfb->cbufs[0]->format),
- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
render_sysmem(ctx);
} else {
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
- util_format_name(pfb->cbufs[0]->format),
- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
calculate_tiles(ctx);
+ DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
render_tiles(ctx);
}
@@ -225,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
/* update timestamps on render targets: */
timestamp = fd_ringbuffer_timestamp(ctx->ring);
- fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
+ if (pfb->cbufs[0])
+ fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
if (pfb->zsbuf)
fd_resource(pfb->zsbuf->texture)->timestamp = timestamp;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 1b1eaa52512..3e051ea9882 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
struct pipe_transfer *ptrans)
{
struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *rsc = fd_resource(ptrans->resource);
+ if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ fd_bo_cpu_fini(rsc->bo);
pipe_resource_reference(&ptrans->resource, NULL);
util_slab_free(&ctx->transfer_pool, ptrans);
}
@@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx,
struct fd_resource *rsc = fd_resource(prsc);
struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool);
enum pipe_format format = prsc->format;
+ uint32_t op = 0;
char *buf;
if (!ptrans)
return NULL;
- /* util_slap_alloc() doesn't zero: */
+ /* util_slab_alloc() doesn't zero: */
memset(ptrans, 0, sizeof(*ptrans));
pipe_resource_reference(&ptrans->resource, prsc);
@@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
ptrans->layer_stride = ptrans->stride;
/* some state trackers (at least XA) don't do this.. */
- fd_resource_transfer_flush_region(pctx, ptrans, box);
+ if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ fd_resource_transfer_flush_region(pctx, ptrans, box);
buf = fd_bo_map(rsc->bo);
if (!buf) {
@@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
return NULL;
}
+ if (usage & PIPE_TRANSFER_READ)
+ op |= DRM_FREEDRENO_PREP_READ;
+
+ if (usage & PIPE_TRANSFER_WRITE)
+ op |= DRM_FREEDRENO_PREP_WRITE;
+
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
+
*pptrans = ptrans;
return buf +
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 52d51c22966..36ef8b0bc53 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = {
{"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
{"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"},
{"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"},
+ {"dscis", FD_DBG_DSCIS, "Disable scissor optimization"},
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 2f5d52c017c..f5290a9af2a 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
unsigned i;
DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush,
- cso->cbufs[0], cso->zsbuf);
+ framebuffer->cbufs[0], framebuffer->zsbuf);
fd_context_render(pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index f18f0fee989..7bbbe8016b5 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -33,8 +33,10 @@
#include <freedreno_ringbuffer.h>
#include "pipe/p_format.h"
+#include "pipe/p_state.h"
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/u_half.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
@@ -47,10 +49,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
enum adreno_stencil_op fd_stencil_op(unsigned op);
-#define FD_DBG_MSGS 0x1
-#define FD_DBG_DISASM 0x2
-#define FD_DBG_DCLEAR 0x4
-#define FD_DBG_DGMEM 0x8
+#define FD_DBG_MSGS 0x01
+#define FD_DBG_DISASM 0x02
+#define FD_DBG_DCLEAR 0x04
+#define FD_DBG_DGMEM 0x08
+#define FD_DBG_DSCIS 0x10
extern int fd_mesa_debug;
#define DBG(fmt, ...) \
@@ -77,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
(1 << 14);
}
+
+static inline enum pipe_format
+pipe_surface_format(struct pipe_surface *psurf)
+{
+ if (!psurf)
+ return PIPE_FORMAT_NONE;
+ return psurf->format;
+}
+
#define LOG_DWORDS 0
@@ -92,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
- uint32_t offset, uint32_t or)
+ uint32_t offset, uint32_t or, int32_t shift)
{
if (LOG_DWORDS) {
- DBG("ring[%p]: OUT_RELOC %04x: %p+%u", ring,
- (uint32_t)(ring->cur - ring->last_start), bo, offset);
+ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
+ (uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
}
- fd_ringbuffer_emit_reloc(ring, bo, offset, or);
+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+ .bo = bo,
+ .flags = FD_RELOC_READ,
+ .offset = offset,
+ .or = or,
+ .shift = shift,
+ });
}
-/* shifted reloc: */
static inline void
-OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo,
+OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint32_t or, int32_t shift)
{
if (LOG_DWORDS) {
- DBG("ring[%p]: OUT_RELOCS %04x: %p+%u << %d", ring,
+ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
}
- fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift);
+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+ .bo = bo,
+ .flags = FD_RELOC_READ | FD_RELOC_WRITE,
+ .offset = offset,
+ .or = or,
+ .shift = shift,
+ });
}
static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
@@ -143,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end)
{
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
- fd_ringbuffer_emit_reloc_ring(ring, start);
+ fd_ringbuffer_emit_reloc_ring(ring, start, end);
OUT_RING(ring, fd_ringmarker_dwords(start, end));
}