diff options
author | Carl Worth <cworth@cworth.org> | 2013-11-26 16:25:06 -0800 |
---|---|---|
committer | Carl Worth <cworth@cworth.org> | 2013-11-26 16:25:06 -0800 |
commit | 44831bff0feec310ca786e5d101d3b5e42a69f74 (patch) | |
tree | 036a9c542dc8909bcad2a92662fcfed4d984b37d | |
parent | 08d7c08512090acd612635e722abc6e9582b69c8 (diff) | |
parent | 4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 (diff) |
Merge branch '9.2-freedreno' of git://github.com/freedreno/mesa into 9.2
This allows the freedreno driver to build with current mesa.
21 files changed, 1340 insertions, 359 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index bee01f1c7e1..35463864b95 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/a2xx.xml ( 30127 bytes, from 2013-05-05 18:29:35) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) +- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) +- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) +- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) Copyright (C) 2013 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -236,56 +238,6 @@ enum sq_tex_filter { #define REG_A2XX_CP_PFP_UCODE_DATA 0x000000c1 -#define REG_A2XX_CP_RB_BASE 0x000001c0 - -#define REG_A2XX_CP_RB_CNTL 0x000001c1 - -#define REG_A2XX_CP_RB_RPTR_ADDR 0x000001c3 - -#define REG_A2XX_CP_RB_RPTR 0x000001c4 - -#define REG_A2XX_CP_RB_WPTR 0x000001c5 - -#define REG_A2XX_CP_RB_WPTR_DELAY 0x000001c6 - -#define REG_A2XX_CP_RB_RPTR_WR 0x000001c7 - -#define REG_A2XX_CP_RB_WPTR_BASE 0x000001c8 - -#define REG_A2XX_CP_QUEUE_THRESHOLDS 0x000001d5 - -#define REG_A2XX_SCRATCH_UMSK 0x000001dc - -#define REG_A2XX_SCRATCH_ADDR 0x000001dd - -#define REG_A2XX_CP_STATE_DEBUG_INDEX 0x000001ec - -#define REG_A2XX_CP_STATE_DEBUG_DATA 0x000001ed - -#define REG_A2XX_CP_INT_CNTL 0x000001f2 - -#define REG_A2XX_CP_INT_STATUS 0x000001f3 - -#define REG_A2XX_CP_INT_ACK 0x000001f4 - -#define REG_A2XX_CP_ME_CNTL 0x000001f6 - -#define REG_A2XX_CP_ME_STATUS 0x000001f7 - -#define REG_A2XX_CP_ME_RAM_WADDR 0x000001f8 - -#define REG_A2XX_CP_ME_RAM_RADDR 0x000001f9 - -#define REG_A2XX_CP_ME_RAM_DATA 0x000001fa - -#define REG_A2XX_CP_DEBUG 0x000001fc - -#define REG_A2XX_CP_CSQ_RB_STAT 0x000001fd - -#define REG_A2XX_CP_CSQ_IB1_STAT 0x000001fe - -#define REG_A2XX_CP_CSQ_IB2_STAT 0x000001ff - #define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395 #define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397 @@ -338,11 +290,32 @@ enum sq_tex_filter { #define REG_A2XX_CP_STAT 0x0000047f -#define REG_A2XX_SCRATCH_REG0 0x00000578 - -#define REG_A2XX_SCRATCH_REG2 0x0000057a - #define REG_A2XX_RBBM_STATUS 0x000005d0 +#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK 0x0000001f +#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT 0 +static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val) +{ + return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK; +} +#define A2XX_RBBM_STATUS_TC_BUSY 0x00000020 +#define A2XX_RBBM_STATUS_HIRQ_PENDING 0x00000100 +#define A2XX_RBBM_STATUS_CPRQ_PENDING 0x00000200 +#define A2XX_RBBM_STATUS_CFRQ_PENDING 0x00000400 +#define A2XX_RBBM_STATUS_PFRQ_PENDING 0x00000800 +#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA 0x00001000 +#define A2XX_RBBM_STATUS_RBBM_WU_BUSY 0x00004000 +#define A2XX_RBBM_STATUS_CP_NRT_BUSY 0x00010000 +#define A2XX_RBBM_STATUS_MH_BUSY 0x00040000 +#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY 0x00080000 +#define A2XX_RBBM_STATUS_SX_BUSY 0x00200000 +#define A2XX_RBBM_STATUS_TPC_BUSY 0x00400000 +#define A2XX_RBBM_STATUS_SC_CNTX_BUSY 0x01000000 +#define A2XX_RBBM_STATUS_PA_BUSY 0x02000000 +#define A2XX_RBBM_STATUS_VGT_BUSY 0x04000000 +#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY 0x08000000 +#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY 0x10000000 +#define A2XX_RBBM_STATUS_RB_CNTX_BUSY 0x40000000 +#define A2XX_RBBM_STATUS_GUI_ACTIVE 0x80000000 #define REG_A2XX_A220_VSC_BIN_SIZE 0x00000c01 #define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f @@ -358,13 +331,13 @@ static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val) return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK; } -#define REG_A2XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) +static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } -#define REG_A2XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) +static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } -#define REG_A2XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) +static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } -#define REG_A2XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) +static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } #define REG_A2XX_PC_DEBUG_CNTL 0x00000c38 diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c index b03390ec436..35511ba2bd3 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c @@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); OUT_RING(ring, sampler->tex0 | view->tex0); - OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt); + OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0); OUT_RING(ring, view->tex2); OUT_RING(ring, sampler->tex3 | view->tex3); OUT_RING(ring, sampler->tex4); @@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); for (i = 0; i < n; i++) { struct fd_resource *rsc = fd_resource(vbufs[i].prsc); - OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3); + OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0); OUT_RING (ring, vbufs[i].size); } } diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c index e239eedb2e0..89f5a4d5259 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c @@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base, OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ - OUT_RELOC(ring, rsc->bo, 0, 0); /* RB_COPY_DEST_BASE */ + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_COPY_DEST_BASE */ OUT_RING(ring, rsc->pitch >> 5); /* RB_COPY_DEST_PITCH */ OUT_RING(ring, /* RB_COPY_DEST_INFO */ A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) | @@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base, A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | A2XX_SQ_TEX_0_PITCH(rsc->pitch)); OUT_RELOC(ring, rsc->bo, 0, - fd2_pipe2surface(psurf->format) | 0x800); + fd2_pipe2surface(psurf->format) | 0x800, 0); OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); OUT_RING(ring, 0x01000000 | // XXX @@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, y0 = ((float)yoff) / ((float)pfb->height); y1 = ((float)yoff + bin_h) / ((float)pfb->height); OUT_PKT3(ring, CP_MEM_WRITE, 9); - OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0); + OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0); OUT_RING(ring, fui(x0)); OUT_RING(ring, fui(y0)); OUT_RING(ring, fui(x1)); @@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx) struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd_gmem_stateobj *gmem = &ctx->gmem; - enum pipe_format format = pfb->cbufs[0]->format; + enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); uint32_t reg; OUT_PKT3(ring, CP_SET_CONSTANT, 4); @@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, { struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pfb->cbufs[0]->format; + enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); @@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, { struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pfb->cbufs[0]->format; + enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index c7f5085d032..d183516067b 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) +- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) +- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) +- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) Copyright (C) 2013 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -130,6 +132,13 @@ enum a3xx_tex_fmt { TFMT_NORM_USHORT_5551 = 6, TFMT_NORM_USHORT_4444 = 7, TFMT_NORM_UINT_X8Z24 = 10, + TFMT_NORM_UINT_NV12_UV_TILED = 17, + TFMT_NORM_UINT_NV12_Y_TILED = 19, + TFMT_NORM_UINT_NV12_UV = 21, + TFMT_NORM_UINT_NV12_Y = 23, + TFMT_NORM_UINT_I420_Y = 24, + TFMT_NORM_UINT_I420_U = 26, + TFMT_NORM_UINT_I420_V = 27, TFMT_NORM_UINT_2_10_10_10 = 41, TFMT_NORM_UINT_A8 = 44, TFMT_NORM_UINT_L8_A8 = 47, @@ -207,6 +216,37 @@ enum a3xx_tex_swiz { A3XX_TEX_ONE = 5, }; +enum a3xx_tex_type { + A3XX_TEX_1D = 0, + A3XX_TEX_2D = 1, + A3XX_TEX_CUBE = 2, + A3XX_TEX_3D = 3, +}; + +#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001 +#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002 +#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004 +#define A3XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 +#define A3XX_INT0_VFD_ERROR 0x00000040 +#define A3XX_INT0_CP_SW_INT 0x00000080 +#define A3XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 +#define A3XX_INT0_CP_OPCODE_ERROR 0x00000200 +#define A3XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 +#define A3XX_INT0_CP_HW_FAULT 0x00000800 +#define A3XX_INT0_CP_DMA 0x00001000 +#define A3XX_INT0_CP_IB2_INT 0x00002000 +#define A3XX_INT0_CP_IB1_INT 0x00004000 +#define A3XX_INT0_CP_RB_INT 0x00008000 +#define A3XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 +#define A3XX_INT0_CP_RB_DONE_TS 0x00020000 +#define A3XX_INT0_CP_VS_DONE_TS 0x00040000 +#define A3XX_INT0_CP_PS_DONE_TS 0x00080000 +#define A3XX_INT0_CACHE_FLUSH_TS 0x00100000 +#define A3XX_INT0_CP_AHB_ERROR_HALT 0x00200000 +#define A3XX_INT0_MISC_HANG_DETECT 0x01000000 +#define A3XX_INT0_UCHE_OOB_ACCESS 0x02000000 #define REG_A3XX_RBBM_HW_VERSION 0x00000000 #define REG_A3XX_RBBM_HW_RELEASE 0x00000001 @@ -230,6 +270,27 @@ enum a3xx_tex_swiz { #define REG_A3XX_RBBM_GPR0_CTL 0x0000002e #define REG_A3XX_RBBM_STATUS 0x00000030 +#define A3XX_RBBM_STATUS_HI_BUSY 0x00000001 +#define A3XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 +#define A3XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 +#define A3XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 +#define A3XX_RBBM_STATUS_VBIF_BUSY 0x00008000 +#define A3XX_RBBM_STATUS_TSE_BUSY 0x00010000 +#define A3XX_RBBM_STATUS_RAS_BUSY 0x00020000 +#define A3XX_RBBM_STATUS_RB_BUSY 0x00040000 +#define A3XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 +#define A3XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 +#define A3XX_RBBM_STATUS_VFD_BUSY 0x00200000 +#define A3XX_RBBM_STATUS_VPC_BUSY 0x00400000 +#define A3XX_RBBM_STATUS_UCHE_BUSY 0x00800000 +#define A3XX_RBBM_STATUS_SP_BUSY 0x01000000 +#define A3XX_RBBM_STATUS_TPL1_BUSY 0x02000000 +#define A3XX_RBBM_STATUS_MARB_BUSY 0x04000000 +#define A3XX_RBBM_STATUS_VSC_BUSY 0x08000000 +#define A3XX_RBBM_STATUS_ARB_BUSY 0x10000000 +#define A3XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 +#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 +#define A3XX_RBBM_STATUS_GPU_BUSY 0x80000000 #define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033 @@ -251,20 +312,202 @@ enum a3xx_tex_swiz { #define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 +#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1 0x00000082 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000084 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000085 + +#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT 0x00000086 + +#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT 0x00000087 + #define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088 +#define REG_A3XX_RBBM_PERFCTR_CP_0_LO 0x00000090 + +#define REG_A3XX_RBBM_PERFCTR_CP_0_HI 0x00000091 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO 0x00000092 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI 0x00000093 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO 0x00000094 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI 0x00000095 + +#define REG_A3XX_RBBM_PERFCTR_PC_0_LO 0x00000096 + +#define REG_A3XX_RBBM_PERFCTR_PC_0_HI 0x00000097 + +#define REG_A3XX_RBBM_PERFCTR_PC_1_LO 0x00000098 + +#define REG_A3XX_RBBM_PERFCTR_PC_1_HI 0x00000099 + +#define REG_A3XX_RBBM_PERFCTR_PC_2_LO 0x0000009a + +#define REG_A3XX_RBBM_PERFCTR_PC_2_HI 0x0000009b + +#define REG_A3XX_RBBM_PERFCTR_PC_3_LO 0x0000009c + +#define REG_A3XX_RBBM_PERFCTR_PC_3_HI 0x0000009d + +#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO 0x0000009e + +#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI 0x0000009f + +#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO 0x000000a0 + +#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI 0x000000a1 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000a2 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000a3 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000a4 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000a5 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000a6 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000a7 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000a8 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000a9 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000aa + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000ab + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000ac + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000ad + +#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO 0x000000ae + +#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI 0x000000af + +#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO 0x000000b0 + +#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI 0x000000b1 + +#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO 0x000000b2 + +#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI 0x000000b3 + +#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO 0x000000b4 + +#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI 0x000000b5 + +#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO 0x000000b6 + +#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI 0x000000b7 + +#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO 0x000000b8 + +#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI 0x000000b9 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO 0x000000ba + +#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI 0x000000bb + +#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO 0x000000bc + +#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI 0x000000bd + +#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO 0x000000be + +#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI 0x000000bf + +#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO 0x000000c0 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI 0x000000c1 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO 0x000000c2 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI 0x000000c3 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO 0x000000c4 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI 0x000000c5 + +#define REG_A3XX_RBBM_PERFCTR_TP_0_LO 0x000000c6 + +#define REG_A3XX_RBBM_PERFCTR_TP_0_HI 0x000000c7 + +#define REG_A3XX_RBBM_PERFCTR_TP_1_LO 0x000000c8 + +#define REG_A3XX_RBBM_PERFCTR_TP_1_HI 0x000000c9 + +#define REG_A3XX_RBBM_PERFCTR_TP_2_LO 0x000000ca + +#define REG_A3XX_RBBM_PERFCTR_TP_2_HI 0x000000cb + +#define REG_A3XX_RBBM_PERFCTR_TP_3_LO 0x000000cc + +#define REG_A3XX_RBBM_PERFCTR_TP_3_HI 0x000000cd + +#define REG_A3XX_RBBM_PERFCTR_TP_4_LO 0x000000ce + +#define REG_A3XX_RBBM_PERFCTR_TP_4_HI 0x000000cf + +#define REG_A3XX_RBBM_PERFCTR_TP_5_LO 0x000000d0 + +#define REG_A3XX_RBBM_PERFCTR_TP_5_HI 0x000000d1 + +#define REG_A3XX_RBBM_PERFCTR_SP_0_LO 0x000000d2 + +#define REG_A3XX_RBBM_PERFCTR_SP_0_HI 0x000000d3 + +#define REG_A3XX_RBBM_PERFCTR_SP_1_LO 0x000000d4 + +#define REG_A3XX_RBBM_PERFCTR_SP_1_HI 0x000000d5 + +#define REG_A3XX_RBBM_PERFCTR_SP_2_LO 0x000000d6 + +#define REG_A3XX_RBBM_PERFCTR_SP_2_HI 0x000000d7 + +#define REG_A3XX_RBBM_PERFCTR_SP_3_LO 0x000000d8 + +#define REG_A3XX_RBBM_PERFCTR_SP_3_HI 0x000000d9 + +#define REG_A3XX_RBBM_PERFCTR_SP_4_LO 0x000000da + +#define REG_A3XX_RBBM_PERFCTR_SP_4_HI 0x000000db + +#define REG_A3XX_RBBM_PERFCTR_SP_5_LO 0x000000dc + +#define REG_A3XX_RBBM_PERFCTR_SP_5_HI 0x000000dd + +#define REG_A3XX_RBBM_PERFCTR_SP_6_LO 0x000000de + +#define REG_A3XX_RBBM_PERFCTR_SP_6_HI 0x000000df + #define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0 #define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1 +#define REG_A3XX_RBBM_PERFCTR_RB_0_LO 0x000000e2 + +#define REG_A3XX_RBBM_PERFCTR_RB_0_HI 0x000000e3 + +#define REG_A3XX_RBBM_PERFCTR_RB_1_LO 0x000000e4 + +#define REG_A3XX_RBBM_PERFCTR_RB_1_HI 0x000000e5 + +#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO 0x000000ea + +#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI 0x000000eb + #define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec #define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed #define REG_A3XX_RBBM_RBBM_CTL 0x00000100 -#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 - #define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111 #define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112 @@ -287,22 +530,20 @@ enum a3xx_tex_swiz { #define REG_A3XX_CP_MEQ_DATA 0x000001db +#define REG_A3XX_CP_PERFCOUNTER_SELECT 0x00000445 + #define REG_A3XX_CP_HW_FAULT 0x0000045c #define REG_A3XX_CP_PROTECT_CTRL 0x0000045e #define REG_A3XX_CP_PROTECT_STATUS 0x0000045f -#define REG_A3XX_CP_PROTECT(i0) (0x00000460 + 0x1*(i0)) +static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; } -#define REG_A3XX_CP_PROTECT_REG(i0) (0x00000460 + 0x1*(i0)) +static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; } #define REG_A3XX_CP_AHB_FAULT 0x0000054d -#define REG_A3XX_CP_SCRATCH_REG2 0x0000057a - -#define REG_A3XX_CP_SCRATCH_REG3 0x0000057b - #define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 #define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 #define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 @@ -528,9 +769,9 @@ static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val) #define REG_A3XX_UNKNOWN_20C3 0x000020c3 -#define REG_A3XX_RB_MRT(i0) (0x000020c4 + 0x4*(i0)) +static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; } -#define REG_A3XX_RB_MRT_CONTROL(i0) (0x000020c4 + 0x4*(i0)) +static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; } #define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 #define A3XX_RB_MRT_CONTROL_BLEND 0x00000010 #define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020 @@ -553,7 +794,7 @@ static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; } -#define REG_A3XX_RB_MRT_BUF_INFO(i0) (0x000020c5 + 0x4*(i0)) +static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; } #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val) @@ -579,7 +820,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; } -#define REG_A3XX_RB_MRT_BUF_BASE(i0) (0x000020c6 + 0x4*(i0)) +static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; } #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0 #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) @@ -587,7 +828,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; } -#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0) (0x000020c7 + 0x4*(i0)) +static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; } #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) @@ -627,12 +868,60 @@ static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r #define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000 #define REG_A3XX_RB_BLEND_RED 0x000020e4 +#define A3XX_RB_BLEND_RED_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_RED_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK; +} +#define A3XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_RED_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK; +} #define REG_A3XX_RB_BLEND_GREEN 0x000020e5 +#define A3XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_GREEN_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK; +} +#define A3XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK; +} #define REG_A3XX_RB_BLEND_BLUE 0x000020e6 +#define A3XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_BLUE_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK; +} +#define A3XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK; +} #define REG_A3XX_RB_BLEND_ALPHA 0x000020e7 +#define A3XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK; +} +#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK; +} #define REG_A3XX_UNKNOWN_20E8 0x000020e8 @@ -1063,9 +1352,9 @@ static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val) #define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 -#define REG_A3XX_VFD_FETCH(i0) (0x00002246 + 0x2*(i0)) +static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; } -#define REG_A3XX_VFD_FETCH_INSTR_0(i0) (0x00002246 + 0x2*(i0)) +static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; } #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) @@ -1092,11 +1381,11 @@ static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val) return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK; } -#define REG_A3XX_VFD_FETCH_INSTR_1(i0) (0x00002247 + 0x2*(i0)) +static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; } -#define REG_A3XX_VFD_DECODE(i0) (0x00002266 + 0x1*(i0)) +static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; } -#define REG_A3XX_VFD_DECODE_INSTR(i0) (0x00002266 + 0x1*(i0)) +static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; } #define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f #define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) @@ -1173,13 +1462,13 @@ static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK; } -#define REG_A3XX_VPC_VARYING_INTERP(i0) (0x00002282 + 0x1*(i0)) +static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; } -#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0) (0x00002282 + 0x1*(i0)) +static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; } -#define REG_A3XX_VPC_VARYING_PS_REPL(i0) (0x00002286 + 0x1*(i0)) +static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; } -#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0) (0x00002286 + 0x1*(i0)) +static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; } #define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a @@ -1293,9 +1582,9 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; } -#define REG_A3XX_SP_VS_OUT(i0) (0x000022c7 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } -#define REG_A3XX_SP_VS_OUT_REG(i0) (0x000022c7 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } #define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff #define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) @@ -1321,9 +1610,9 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK; } -#define REG_A3XX_SP_VS_VPC_DST(i0) (0x000022d0 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; } -#define REG_A3XX_SP_VS_VPC_DST_REG(i0) (0x000022d0 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; } #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) @@ -1480,9 +1769,9 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec -#define REG_A3XX_SP_FS_MRT(i0) (0x000022f0 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; } -#define REG_A3XX_SP_FS_MRT_REG(i0) (0x000022f0 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; } #define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff #define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0 static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) @@ -1491,9 +1780,9 @@ static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) } #define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 -#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0) (0x000022f4 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; } -#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0) (0x000022f4 + 0x1*(i0)) +static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; } #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0 static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val) @@ -1607,9 +1896,9 @@ static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) #define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02 -#define REG_A3XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) +static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } -#define REG_A3XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) +static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } #define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff #define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0 static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val) @@ -1635,26 +1924,46 @@ static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val) return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK; } -#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) +static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } -#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) +static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } #define REG_A3XX_UNKNOWN_0C3D 0x00000c3d +#define REG_A3XX_PC_PERFCOUNTER0_SELECT 0x00000c48 + +#define REG_A3XX_PC_PERFCOUNTER1_SELECT 0x00000c49 + +#define REG_A3XX_PC_PERFCOUNTER2_SELECT 0x00000c4a + +#define REG_A3XX_PC_PERFCOUNTER3_SELECT 0x00000c4b + #define REG_A3XX_UNKNOWN_0C81 0x00000c81 -#define REG_A3XX_GRAS_CL_USER_PLANE(i0) (0x00000ca0 + 0x4*(i0)) +#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT 0x00000c88 + +#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT 0x00000c89 -#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0) (0x00000ca0 + 0x4*(i0)) +#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT 0x00000c8a -#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0) (0x00000ca1 + 0x4*(i0)) +#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT 0x00000c8b -#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0) (0x00000ca2 + 0x4*(i0)) +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } -#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0) (0x00000ca3 + 0x4*(i0)) +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; } #define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0 +#define REG_A3XX_RB_PERFCOUNTER0_SELECT 0x00000cc6 + +#define REG_A3XX_RB_PERFCOUNTER1_SELECT 0x00000cc7 + #define REG_A3XX_RB_WINDOW_SIZE 0x00000ce0 #define A3XX_RB_WINDOW_SIZE_WIDTH__MASK 0x00003fff #define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT 0 @@ -1669,18 +1978,46 @@ static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val) return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK; } -#define REG_A3XX_UNKNOWN_0E00 0x00000e00 +#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT 0x00000e00 + +#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT 0x00000e01 + +#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT 0x00000e02 + +#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT 0x00000e03 + +#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT 0x00000e04 + +#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT 0x00000e05 #define REG_A3XX_UNKNOWN_0E43 0x00000e43 #define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44 +#define REG_A3XX_VFD_PERFCOUNTER1_SELECT 0x00000e45 + #define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61 #define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62 +#define REG_A3XX_VPC_PERFCOUNTER0_SELECT 0x00000e64 + +#define REG_A3XX_VPC_PERFCOUNTER1_SELECT 0x00000e65 + #define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82 +#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT 0x00000e84 + +#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT 0x00000e85 + +#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT 0x00000e86 + +#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT 0x00000e87 + +#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT 0x00000e88 + +#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT 0x00000e89 + #define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0 #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0 @@ -1724,6 +2061,18 @@ static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_op #define REG_A3XX_UNKNOWN_0F03 0x00000f03 +#define REG_A3XX_TP_PERFCOUNTER0_SELECT 0x00000f04 + +#define REG_A3XX_TP_PERFCOUNTER1_SELECT 0x00000f05 + +#define REG_A3XX_TP_PERFCOUNTER2_SELECT 0x00000f06 + +#define REG_A3XX_TP_PERFCOUNTER3_SELECT 0x00000f07 + +#define REG_A3XX_TP_PERFCOUNTER4_SELECT 0x00000f08 + +#define REG_A3XX_TP_PERFCOUNTER5_SELECT 0x00000f09 + #define REG_A3XX_TEX_SAMP_0 0x00000000 #define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c #define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 @@ -1791,6 +2140,12 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) { return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; } +#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000 +#define A3XX_TEX_CONST_0_TYPE__SHIFT 30 +static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val) +{ + return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK; +} #define REG_A3XX_TEX_CONST_1 0x00000001 #define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index eabe21cb7e9..51154113de6 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg) return num; } -static void regmask_set(regmask_t regmask, struct ir3_register *reg) +static void regmask_set(regmask_t regmask, struct ir3_register *reg, + unsigned wrmask) { - unsigned idx = regmask_idx(reg); - regmask[idx / 8] |= 1 << (idx % 8); + unsigned i; + for (i = 0; i < 4; i++) { + if (wrmask & (1 << i)) { + unsigned idx = regmask_idx(reg) + i; + regmask[idx / 8] |= 1 << (idx % 8); + } + } } static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) @@ -91,6 +97,7 @@ struct fd3_compile_context { unsigned next_inloc; unsigned num_internal_temps; + struct tgsi_src_register internal_temps[6]; /* track registers which need to synchronize w/ "complex alu" cat3 * instruction pipeline: @@ -128,9 +135,16 @@ struct fd3_compile_context { * up the vector operation */ struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; }; + +static void vectorize(struct fd3_compile_context *ctx, + struct ir3_instruction *instr, struct tgsi_dst_register *dst, + int nsrcs, ...); +static void create_mov(struct fd3_compile_context *ctx, + struct tgsi_dst_register *dst, struct tgsi_src_register *src); + static unsigned compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, const struct tgsi_token *tokens) @@ -154,19 +168,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, /* Immediates go after constants: */ ctx->base_reg[TGSI_FILE_CONSTANT] = 0; ctx->base_reg[TGSI_FILE_IMMEDIATE] = - ctx->info.file_count[TGSI_FILE_CONSTANT]; + ctx->info.file_max[TGSI_FILE_CONSTANT] + 1; /* Temporaries after outputs after inputs: */ ctx->base_reg[TGSI_FILE_INPUT] = 0; ctx->base_reg[TGSI_FILE_OUTPUT] = - ctx->info.file_count[TGSI_FILE_INPUT]; + ctx->info.file_max[TGSI_FILE_INPUT] + 1; ctx->base_reg[TGSI_FILE_TEMPORARY] = - ctx->info.file_count[TGSI_FILE_INPUT] + - ctx->info.file_count[TGSI_FILE_OUTPUT]; + ctx->info.file_max[TGSI_FILE_INPUT] + 1 + + ctx->info.file_max[TGSI_FILE_OUTPUT] + 1; so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; - ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] + - ctx->info.file_count[TGSI_FILE_IMMEDIATE]); + ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 + + ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); ret = tgsi_parse_init(&ctx->parser, tokens); if (ret != TGSI_PARSE_OK) @@ -178,6 +192,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, } static void +compile_error(struct fd3_compile_context *ctx, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); + tgsi_dump(ctx->tokens, 0); + assert(0); +} + +#define compile_assert(ctx, cond) do { \ + if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ + } while (0) + +static void compile_free(struct fd3_compile_context *ctx) { tgsi_parse_free(&ctx->parser); @@ -193,6 +222,24 @@ struct instr_translater { unsigned arg; }; +static unsigned +src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) +{ + unsigned flags = 0; + + if (regmask_get(ctx->needs_ss, reg)) { + flags |= IR3_INSTR_SS; + memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); + } + + if (regmask_get(ctx->needs_sy, reg)) { + flags |= IR3_INSTR_SY; + memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); + } + + return flags; +} + static struct ir3_register * add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan) @@ -205,9 +252,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, num = dst->Index + ctx->base_reg[dst->File]; break; default: - DBG("unsupported dst register file: %s", + compile_error(ctx, "unsupported dst register file: %s\n", tgsi_file_name(dst->File)); - assert(0); break; } @@ -234,14 +280,17 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, flags |= IR3_REG_CONST; num = src->Index + ctx->base_reg[src->File]; break; + case TGSI_FILE_OUTPUT: + /* NOTE: we should only end up w/ OUTPUT file for things like + * clamp()'ing saturated dst instructions + */ case TGSI_FILE_INPUT: case TGSI_FILE_TEMPORARY: num = src->Index + ctx->base_reg[src->File]; break; default: - DBG("unsupported src register file: %s", + compile_error(ctx, "unsupported src register file: %s\n", tgsi_file_name(src->File)); - assert(0); break; } @@ -254,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, reg = ir3_reg_create(instr, regid(num, chan), flags); - if (regmask_get(ctx->needs_ss, reg)) { - instr->flags |= IR3_INSTR_SS; - memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); - } - - if (regmask_get(ctx->needs_sy, reg)) { - instr->flags |= IR3_INSTR_SY; - memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); - } + instr->flags |= src_flags(ctx, reg); return reg; } @@ -285,11 +326,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) /* Get internal-temp src/dst to use for a sequence of instructions * generated by a single TGSI op. */ -static void +static struct tgsi_src_register * get_internal_temp(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst, - struct tgsi_src_register *tmp_src) + struct tgsi_dst_register *tmp_dst) { + struct tgsi_src_register *tmp_src; int n; tmp_dst->File = TGSI_FILE_TEMPORARY; @@ -299,23 +340,78 @@ get_internal_temp(struct fd3_compile_context *ctx, /* assign next temporary: */ n = ctx->num_internal_temps++; + compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); + tmp_src = &ctx->internal_temps[n]; - tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n; + tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; src_from_dst(tmp_src, tmp_dst); + + return tmp_src; } /* same as get_internal_temp, but w/ src.xxxx (for instructions that * replicate their results) */ -static void +static struct tgsi_src_register * get_internal_temp_repl(struct fd3_compile_context *ctx, - struct tgsi_dst_register *tmp_dst, - struct tgsi_src_register *tmp_src) + struct tgsi_dst_register *tmp_dst) { - get_internal_temp(ctx, tmp_dst, tmp_src); + struct tgsi_src_register *tmp_src = + get_internal_temp(ctx, tmp_dst); tmp_src->SwizzleX = tmp_src->SwizzleY = tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; + return tmp_src; +} + +static inline bool +is_const(struct tgsi_src_register *src) +{ + return (src->File == TGSI_FILE_CONSTANT) || + (src->File == TGSI_FILE_IMMEDIATE); +} + +static type_t +get_ftype(struct fd3_compile_context *ctx) +{ + return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; +} + +static type_t +get_utype(struct fd3_compile_context *ctx) +{ + return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; +} + +static unsigned +src_swiz(struct tgsi_src_register *src, int chan) +{ + switch (chan) { + case 0: return src->SwizzleX; + case 1: return src->SwizzleY; + case 2: return src->SwizzleZ; + case 3: return src->SwizzleW; + } + assert(0); + return 0; +} + +/* for instructions that cannot take a const register as src, if needed + * generate a move to temporary gpr: + */ +static struct tgsi_src_register * +get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) +{ + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; + + compile_assert(ctx, is_const(src)); + + tmp_src = get_internal_temp(ctx, &tmp_dst); + + create_mov(ctx, &tmp_dst, src); + + return tmp_src; } static void @@ -365,30 +461,11 @@ get_immediate(struct fd3_compile_context *ctx, reg->SwizzleW = swiz2tgsi[swiz]; } -static type_t -get_type(struct fd3_compile_context *ctx) -{ - return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; -} - -static unsigned -src_swiz(struct tgsi_src_register *src, int chan) -{ - switch (chan) { - case 0: return src->SwizzleX; - case 1: return src->SwizzleY; - case 2: return src->SwizzleZ; - case 3: return src->SwizzleW; - } - assert(0); - return 0; -} - static void create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src) { - type_t type_mov = get_type(ctx); + type_t type_mov = get_ftype(ctx); unsigned i; for (i = 0; i < 4; i++) { @@ -404,7 +481,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, ir3_instr_create(ctx->ir, 0, OPC_NOP); } } +} +static void +create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + struct tgsi_src_register *minval, struct tgsi_src_register *maxval) +{ + struct ir3_instruction *instr; + struct tgsi_src_register src; + + src_from_dst(&src, dst); + + instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + vectorize(ctx, instr, dst, 2, &src, 0, minval, 0); + + instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); + vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0); +} + +static void +create_clamp_imm(struct fd3_compile_context *ctx, + struct tgsi_dst_register *dst, + uint32_t minval, uint32_t maxval) +{ + struct tgsi_src_register minconst, maxconst; + + get_immediate(ctx, &minconst, minval); + get_immediate(ctx, &maxconst, maxval); + + create_clamp(ctx, dst, &minconst, &maxconst); } static struct tgsi_dst_register * @@ -415,7 +520,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { struct tgsi_src_register *src = &inst->Src[i].Register; if ((src->File == dst->File) && (src->Index == dst->Index)) { - get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src); + ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); ctx->tmp_dst.WriteMask = dst->WriteMask; dst = &ctx->tmp_dst; break; @@ -430,7 +535,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, { /* if necessary, add mov back into original dst: */ if (dst != &inst->Dst[0].Register) { - create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src); + create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); } } @@ -478,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, cur->regs[j+1]->num = regid(cur->regs[j+1]->num >> 2, src_swiz(src, i)); + cur->flags |= src_flags(ctx, cur->regs[j+1]); } va_end(ap); } @@ -496,6 +602,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, * native instructions: */ +static inline void +get_swiz(unsigned *swiz, struct tgsi_src_register *src) +{ + swiz[0] = src->SwizzleX; + swiz[1] = src->SwizzleY; + swiz[2] = src->SwizzleZ; + swiz[3] = src->SwizzleW; +} + static void trans_dotp(const struct instr_translater *t, struct fd3_compile_context *ctx, @@ -503,39 +618,35 @@ trans_dotp(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; struct tgsi_dst_register *dst = &inst->Dst[0].Register; struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; - unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW }; - unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW }; + unsigned swiz0[4]; + unsigned swiz1[4]; opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; unsigned n = t->arg; /* number of components */ - unsigned i; + unsigned i, swapped = 0; - get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp_repl(ctx, &tmp_dst); - /* Blob compiler never seems to use a const in src1 position for - * mad.*, although there does seem (according to disassembler - * hidden in libllvm-a3xx.so) to be a bit to indicate that src1 - * is a const. Not sure if this is a hw bug, or simply that the - * disassembler lies. + /* in particular, can't handle const for src1 for cat3/mad: */ - if ((src1->File == TGSI_FILE_IMMEDIATE) || - (src1->File == TGSI_FILE_CONSTANT)) { - - /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: - */ - for (i = 0; i < 4; i++) - swiz1[i] = i; - - /* the first mul.f will clobber tmp.x, but that is ok - * because after that point we no longer need tmp.x: - */ - create_mov(ctx, &tmp_dst, src1); - src1 = &tmp_src; + if (is_const(src1)) { + if (!is_const(src0)) { + struct tgsi_src_register *tmp; + tmp = src0; + src0 = src1; + src1 = tmp; + swapped = 1; + } else { + src0 = get_unconst(ctx, src0); + } } + get_swiz(swiz0, src0); + get_swiz(swiz1, src1); + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); add_dst_reg(ctx, instr, &tmp_dst, 0); add_src_reg(ctx, instr, src0, swiz0[0]); @@ -548,29 +659,27 @@ trans_dotp(const struct instr_translater *t, add_dst_reg(ctx, instr, &tmp_dst, 0); add_src_reg(ctx, instr, src0, swiz0[i]); add_src_reg(ctx, instr, src1, swiz1[i]); - add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, tmp_src, 0); } /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ if (t->tgsi_opc == TGSI_OPCODE_DPH) { - ir3_instr_create(ctx->ir, 0, OPC_NOP); + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1; instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src1, swiz1[i]); - add_src_reg(ctx, instr, &tmp_src, 0); + if (swapped) + add_src_reg(ctx, instr, src0, swiz0[i]); + else + add_src_reg(ctx, instr, src1, swiz1[i]); + add_src_reg(ctx, instr, tmp_src, 0); n++; } - ir3_instr_create(ctx->ir, 0, OPC_NOP); - - /* pad out to multiple of 4 scalar instructions: */ - for (i = 2 * n; i % 4; i++) { - ir3_instr_create(ctx->ir, 0, OPC_NOP); - } + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2; - create_mov(ctx, dst, &tmp_src); + create_mov(ctx, dst, tmp_src); } /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ @@ -581,37 +690,39 @@ trans_lrp(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst1, tmp_dst2; - struct tgsi_src_register tmp_src1, tmp_src2; + struct tgsi_src_register *tmp_src1, *tmp_src2; struct tgsi_src_register tmp_const; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; - get_internal_temp(ctx, &tmp_dst1, &tmp_src1); - get_internal_temp(ctx, &tmp_dst2, &tmp_src2); + if (is_const(src0) && is_const(src1)) + src0 = get_unconst(ctx, src0); + + tmp_src1 = get_internal_temp(ctx, &tmp_dst1); + tmp_src2 = get_internal_temp(ctx, &tmp_dst2); get_immediate(ctx, &tmp_const, fui(1.0)); /* tmp1 = (a * b) */ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); - vectorize(ctx, instr, &tmp_dst1, 2, - &inst->Src[0].Register, 0, - &inst->Src[1].Register, 0); + vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0); /* tmp2 = (1 - a) */ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); - vectorize(ctx, instr, &tmp_dst2, 2, - &tmp_const, 0, - &inst->Src[0].Register, IR3_REG_NEGATE); + vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0, + src0, IR3_REG_NEGATE); /* tmp2 = tmp2 * c */ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); vectorize(ctx, instr, &tmp_dst2, 2, - &tmp_src2, 0, + tmp_src2, 0, &inst->Src[2].Register, 0); /* dst = tmp1 + tmp2 */ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); vectorize(ctx, instr, &inst->Dst[0].Register, 2, - &tmp_src1, 0, - &tmp_src2, 0); + tmp_src1, 0, + tmp_src2, 0); } /* FRC(x) = x - FLOOR(x) */ @@ -622,9 +733,9 @@ trans_frac(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; - get_internal_temp(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp(ctx, &tmp_dst); /* tmp = FLOOR(x) */ instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); @@ -635,7 +746,7 @@ trans_frac(const struct instr_translater *t, instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); vectorize(ctx, instr, &inst->Dst[0].Register, 2, &inst->Src[0].Register, 0, - &tmp_src, IR3_REG_NEGATE); + tmp_src, IR3_REG_NEGATE); } /* POW(a,b) = EXP2(b * LOG2(a)) */ @@ -647,24 +758,24 @@ trans_pow(const struct instr_translater *t, struct ir3_instruction *instr; struct ir3_register *r; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; + struct tgsi_src_register *tmp_src; struct tgsi_dst_register *dst = &inst->Dst[0].Register; struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; - get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp_repl(ctx, &tmp_dst); /* log2 Rtmp, Rsrc0 */ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); r = add_dst_reg(ctx, instr, &tmp_dst, 0); add_src_reg(ctx, instr, src0, src0->SwizzleX); - regmask_set(ctx->needs_ss, r); + regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); /* mul.f Rtmp, Rtmp, Rsrc1 */ instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, tmp_src, 0); add_src_reg(ctx, instr, src1, src1->SwizzleX); /* blob compiler seems to ensure there are at least 6 instructions @@ -676,10 +787,10 @@ trans_pow(const struct instr_translater *t, /* exp2 Rdst, Rtmp */ instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); r = add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, &tmp_src, 0); - regmask_set(ctx->needs_ss, r); + add_src_reg(ctx, instr, tmp_src, 0); + regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); - create_mov(ctx, dst, &tmp_src); + create_mov(ctx, dst, tmp_src); } /* texture fetch/sample instructions: */ @@ -690,8 +801,6 @@ trans_samp(const struct instr_translater *t, { struct ir3_register *r; struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; struct tgsi_src_register *coord = &inst->Src[0].Register; struct tgsi_src_register *samp = &inst->Src[1].Register; unsigned tex = inst->Texture.Texture; @@ -711,7 +820,7 @@ trans_samp(const struct instr_translater *t, flags |= IR3_INSTR_P; break; default: - assert(0); + compile_assert(ctx, 0); break; } @@ -726,10 +835,13 @@ trans_samp(const struct instr_translater *t, */ for (i = 1; (i < 4) && (order[i] >= 0); i++) { if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { - type_t type_mov = get_type(ctx); + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; + + type_t type_mov = get_ftype(ctx); /* need to move things around: */ - get_internal_temp(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp(ctx, &tmp_dst); for (j = 0; (j < 4) && (order[j] >= 0); j++) { instr = ir3_instr_create(ctx->ir, 1, 0); @@ -740,7 +852,7 @@ trans_samp(const struct instr_translater *t, src_swiz(coord, order[j])); } - coord = &tmp_src; + coord = tmp_src; if (j < 4) ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; @@ -750,7 +862,7 @@ trans_samp(const struct instr_translater *t, } instr = ir3_instr_create(ctx->ir, 5, t->opc); - instr->cat5.type = get_type(ctx); + instr->cat5.type = get_ftype(ctx); instr->cat5.samp = samp->Index; instr->cat5.tex = samp->Index; instr->flags |= flags; @@ -760,10 +872,42 @@ trans_samp(const struct instr_translater *t, add_src_reg(ctx, instr, coord, coord->SwizzleX); - regmask_set(ctx->needs_sy, r); + regmask_set(ctx->needs_sy, r, r->wrmask); } -/* CMP(a,b,c) = (a < 0) ? b : c */ +/* + * SEQ(a,b) = (a == b) ? 1.0 : 0.0 + * cmps.f.eq tmp0, b, a + * cov.u16f16 dst, tmp0 + * + * SNE(a,b) = (a != b) ? 1.0 : 0.0 + * cmps.f.eq tmp0, b, a + * add.s tmp0, tmp0, -1 + * sel.f16 dst, {0.0}, tmp0, {1.0} + * + * SGE(a,b) = (a >= b) ? 1.0 : 0.0 + * cmps.f.ge tmp0, a, b + * cov.u16f16 dst, tmp0 + * + * SLE(a,b) = (a <= b) ? 1.0 : 0.0 + * cmps.f.ge tmp0, b, a + * cov.u16f16 dst, tmp0 + * + * SGT(a,b) = (a > b) ? 1.0 : 0.0 + * cmps.f.ge tmp0, b, a + * add.s tmp0, tmp0, -1 + * sel.f16 dst, {0.0}, tmp0, {1.0} + * + * SLT(a,b) = (a < b) ? 1.0 : 0.0 + * cmps.f.ge tmp0, a, b + * add.s tmp0, tmp0, -1 + * sel.f16 dst, {0.0}, tmp0, {1.0} + * + * CMP(a,b,c) = (a < 0.0) ? b : c + * cmps.f.ge tmp0, a, {0.0} + * add.s tmp0, tmp0, -1 + * sel.f16 dst, c, tmp0, b + */ static void trans_cmp(const struct instr_translater *t, struct fd3_compile_context *ctx, @@ -771,35 +915,94 @@ trans_cmp(const struct instr_translater *t, { struct ir3_instruction *instr; struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct tgsi_src_register constval; - /* final instruction uses original src1 and src2, so we need get_dst() */ + struct tgsi_src_register *tmp_src; + struct tgsi_src_register constval0, constval1; + /* final instruction for CMP() uses orig src1 and src2: */ struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *a0, *a1; + unsigned condition; - get_internal_temp(ctx, &tmp_dst, &tmp_src); + tmp_src = get_internal_temp(ctx, &tmp_dst); - /* cmps.f.ge tmp, src0, 0.0 */ + switch (t->tgsi_opc) { + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + a0 = &inst->Src[1].Register; /* b */ + a1 = &inst->Src[0].Register; /* a */ + condition = IR3_COND_EQ; + break; + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SLT: + a0 = &inst->Src[0].Register; /* a */ + a1 = &inst->Src[1].Register; /* b */ + condition = IR3_COND_GE; + break; + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGT: + a0 = &inst->Src[1].Register; /* b */ + a1 = &inst->Src[0].Register; /* a */ + condition = IR3_COND_GE; + break; + case TGSI_OPCODE_CMP: + get_immediate(ctx, &constval0, fui(0.0)); + a0 = &inst->Src[0].Register; /* a */ + a1 = &constval0; /* {0.0} */ + condition = IR3_COND_GE; + break; + default: + compile_assert(ctx, 0); + return; + } + + if (is_const(a0) && is_const(a1)) + a0 = get_unconst(ctx, a0); + + /* cmps.f.ge tmp, a0, a1 */ instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); - instr->cat2.condition = IR3_COND_GE; - get_immediate(ctx, &constval, fui(0.0)); - vectorize(ctx, instr, &tmp_dst, 2, - &inst->Src[0].Register, 0, - &constval, 0); + instr->cat2.condition = condition; + vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - /* add.s tmp, tmp, -1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); - instr->repeat = 3; - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, &tmp_src, 0); - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; + switch (t->tgsi_opc) { + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SLE: + /* cov.u16f16 dst, tmp0 */ + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = get_utype(ctx); + instr->cat1.dst_type = get_ftype(ctx); + vectorize(ctx, instr, dst, 1, tmp_src, 0); + break; + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_CMP: + /* add.s tmp, tmp, -1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr->repeat = 3; + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; + + if (t->tgsi_opc == TGSI_OPCODE_CMP) { + /* sel.{f32,f16} dst, src2, tmp, src1 */ + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); + vectorize(ctx, instr, dst, 3, + &inst->Src[2].Register, 0, + tmp_src, 0, + &inst->Src[1].Register, 0); + } else { + get_immediate(ctx, &constval0, fui(0.0)); + get_immediate(ctx, &constval1, fui(1.0)); + /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); + vectorize(ctx, instr, dst, 3, + &constval0, 0, tmp_src, 0, &constval1, 0); + } - /* sel.{f32,f16} dst, src2, tmp, src1 */ - instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? - OPC_SEL_F16 : OPC_SEL_F32); - vectorize(ctx, instr, &inst->Dst[0].Register, 3, - &inst->Src[2].Register, 0, - &tmp_src, 0, - &inst->Src[1].Register, 0); + break; + } put_dst(ctx, inst, dst); } @@ -858,10 +1061,13 @@ trans_if(const struct instr_translater *t, get_immediate(ctx, &constval, fui(0.0)); + if (is_const(src)) + src = get_unconst(ctx, src); + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); ir3_reg_create(instr, regid(REG_P0, 0), 0); - add_src_reg(ctx, instr, &constval, constval.SwizzleX); add_src_reg(ctx, instr, src, src->SwizzleX); + add_src_reg(ctx, instr, &constval, constval.SwizzleX); instr->cat2.condition = IR3_COND_EQ; instr = ir3_instr_create(ctx->ir, 0, OPC_BR); @@ -939,16 +1145,12 @@ instr_cat2(const struct instr_translater *t, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; struct ir3_instruction *instr; unsigned src0_flags = 0; - instr = ir3_instr_create(ctx->ir, 2, t->opc); - switch (t->tgsi_opc) { - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - instr->cat2.condition = t->arg; - break; case TGSI_OPCODE_ABS: src0_flags = IR3_REG_ABS; break; @@ -970,48 +1172,65 @@ instr_cat2(const struct instr_translater *t, case OPC_SETRM: case OPC_CBITS_B: /* these only have one src reg */ - vectorize(ctx, instr, dst, 1, - &inst->Src[0].Register, src0_flags); + instr = ir3_instr_create(ctx->ir, 2, t->opc); + vectorize(ctx, instr, dst, 1, src0, src0_flags); break; default: - vectorize(ctx, instr, dst, 2, - &inst->Src[0].Register, src0_flags, - &inst->Src[1].Register, 0); + if (is_const(src0) && is_const(src1)) + src0 = get_unconst(ctx, src0); + + instr = ir3_instr_create(ctx->ir, 2, t->opc); + vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0); break; } put_dst(ctx, inst, dst); } +static bool is_mad(opc_t opc) +{ + switch (opc) { + case OPC_MAD_U16: + case OPC_MADSH_U16: + case OPC_MAD_S16: + case OPC_MADSH_M16: + case OPC_MAD_U24: + case OPC_MAD_S24: + case OPC_MAD_F16: + case OPC_MAD_F32: + return true; + default: + return false; + } +} + static void instr_cat3(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; struct ir3_instruction *instr; - /* Blob compiler never seems to use a const in src1 position.. - * although there does seem (according to disassembler hidden - * in libllvm-a3xx.so) to be a bit to indicate that src1 is a - * const. Not sure if this is a hw bug, or simply that the - * disassembler lies. + /* in particular, can't handle const for src1 for cat3.. + * for mad, we can swap first two src's if needed: */ - if ((src1->File == TGSI_FILE_CONSTANT) || - (src1->File == TGSI_FILE_IMMEDIATE)) { - get_internal_temp(ctx, &tmp_dst, &tmp_src); - create_mov(ctx, &tmp_dst, src1); - src1 = &tmp_src; + if (is_const(src1)) { + if (is_mad(t->opc) && !is_const(src0)) { + struct tgsi_src_register *tmp; + tmp = src0; + src0 = src1; + src1 = tmp; + } else { + src0 = get_unconst(ctx, src0); + } } instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? t->hopc : t->opc); - vectorize(ctx, instr, dst, 3, - &inst->Src[0].Register, 0, - src1, 0, + vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, &inst->Src[2].Register, 0); put_dst(ctx, inst, dst); } @@ -1022,15 +1241,20 @@ instr_cat4(const struct instr_translater *t, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src = &inst->Src[0].Register; struct ir3_instruction *instr; + /* seems like blob compiler avoids const as src.. */ + if (is_const(src)) + src = get_unconst(ctx, src); + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; instr = ir3_instr_create(ctx->ir, 4, t->opc); - vectorize(ctx, instr, dst, 1, - &inst->Src[0].Register, 0); + vectorize(ctx, instr, dst, 1, src, 0); - regmask_set(ctx->needs_ss, instr->regs[0]); + regmask_set(ctx->needs_ss, instr->regs[0], + inst->Dst[0].Register.WriteMask); put_dst(ctx, inst, dst); } @@ -1051,12 +1275,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */ INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), - INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT), - INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE), INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), INSTR(LRP, trans_lrp), INSTR(FRC, trans_frac), INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), + INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F), INSTR(EX2, instr_cat4, .opc = OPC_EXP2), INSTR(LG2, instr_cat4, .opc = OPC_LOG2), INSTR(POW, trans_pow), @@ -1065,6 +1288,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(SIN, instr_cat4, .opc = OPC_COS), INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), + INSTR(SGT, trans_cmp), + INSTR(SLT, trans_cmp), + INSTR(SGE, trans_cmp), + INSTR(SLE, trans_cmp), + INSTR(SNE, trans_cmp), + INSTR(SEQ, trans_cmp), INSTR(CMP, trans_cmp), INSTR(IF, trans_if), INSTR(ELSE, trans_else), @@ -1132,7 +1361,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) unsigned name = decl->Semantic.Name; unsigned i; - assert(decl->Declaration.Semantic); // TODO is this ever not true? + compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true? DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX @@ -1152,9 +1381,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) so->outputs[so->outputs_count++].regid = regid(i + base, 0); break; default: - DBG("unknown VS semantic name: %s", + compile_error(ctx, "unknown VS semantic name: %s\n", tgsi_semantic_names[name]); - assert(0); } } else { switch (name) { @@ -1162,9 +1390,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) so->color_regid = regid(decl->Range.First + base, 0); break; default: - DBG("unknown VS semantic name: %s", + compile_error(ctx, "unknown VS semantic name: %s\n", tgsi_semantic_names[name]); - assert(0); } } } @@ -1223,10 +1450,19 @@ compile_instructions(struct fd3_compile_context *ctx) t->fxn(t, ctx, inst); ctx->num_internal_temps = 0; } else { - debug_printf("unknown TGSI opc: %s\n", + compile_error(ctx, "unknown TGSI opc: %s\n", tgsi_get_opcode_name(opc)); - tgsi_dump(ctx->tokens, 0); - assert(0); + } + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_ZERO_ONE: + create_clamp_imm(ctx, &inst->Dst[0].Register, + fui(0.0), fui(1.0)); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + create_clamp_imm(ctx, &inst->Dst[0].Register, + fui(-1.0), fui(1.0)); + break; } break; @@ -1253,6 +1489,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so, so->ir = ir3_shader_create(); + assert(so->ir); + so->color_regid = regid(63,0); so->pos_regid = regid(63,0); so->psize_regid = regid(63,0); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index 3ae9b2953e4..589aeed4578 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -40,7 +40,18 @@ static void fd3_context_destroy(struct pipe_context *pctx) { + struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx)); + fd3_prog_fini(pctx); + + fd_bo_del(fd3_ctx->vs_pvt_mem); + fd_bo_del(fd3_ctx->fs_pvt_mem); + fd_bo_del(fd3_ctx->vsc_size_mem); + fd_bo_del(fd3_ctx->vsc_pipe_mem); + + pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL); + pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL); + fd_context_destroy(pctx); } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index a7a4bf78d62..5e586185ad9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring, if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } else { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); @@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring, for (i = 0; i < tex->num_textures; i++) { struct fd3_pipe_sampler_view *view = fd3_pipe_sampler_view(tex->textures[i]); - OUT_RELOC(ring, view->tex_resource->bo, 0, 0); + OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0); /* I think each entry is a ptr to mipmap level.. for now, just * pad w/ null's until I get around to actually implementing * mipmap support.. @@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) | 0x40000000 | // XXX - fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN, - PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA)); - OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) | + fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); + OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | A3XX_TEX_CONST_1_WIDTH(psurf->width) | A3XX_TEX_CONST_1_HEIGHT(psurf->height)); OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) | @@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf CP_LOAD_STATE_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RELOC(ring, rsc->bo, 0, 0); + OUT_RELOC(ring, rsc->bo, 0, 0, 0); } void @@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) | A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); - OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0); + OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0); OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1); OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | @@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3); OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */ - OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */ + OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */ OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */ OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3); OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */ - OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */ + OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */ OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */ OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); @@ -536,8 +536,8 @@ fd3_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1); OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */ - OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1); - OUT_RING(ring, 0x00000000); /* UNKNOWN_0E00 */ + OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */ OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2); OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) | @@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx) OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */ OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); - OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */ + OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 1cb170af261..8d2df47c72a 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, if (bin_w || (i >= nr_bufs)) { OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); } else { - OUT_RELOCS(ring, res->bo, 0, 0, -1); + OUT_RELOCW(ring, res->bo, 0, 0, -1); } OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); @@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A3XX_RB_COPY_CONTROL_MODE(mode) | A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); - OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RELOCW(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp)); OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | @@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ + OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); @@ -206,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, }, 1); if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - uint32_t base = depth_base(&ctx->gmem) * - fd_resource(pfb->cbufs[0]->texture)->cpp; + uint32_t base = 0; + if (pfb->cbufs[0]) { + struct fd_resource *rsc = + fd_resource(pfb->cbufs[0]->texture); + base = depth_base(&ctx->gmem) * rsc->cpp; + } emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); } @@ -260,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, y1 = ((float)yoff + bin_h) / ((float)pfb->height); OUT_PKT3(ring, CP_MEM_WRITE, 5); - OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0); + OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); OUT_RING(ring, fui(x0)); OUT_RING(ring, fui(y0)); OUT_RING(ring, fui(x1)); @@ -383,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx) A3XX_VSC_PIPE_CONFIG_Y(0) | A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) | A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y)); - OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ + OUT_RELOC(ring, bo, 0, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */ for (i = 1; i < 8; i++) { @@ -402,8 +414,11 @@ static void fd3_emit_sysmem_prep(struct fd_context *ctx) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture); struct fd_ringbuffer *ring = ctx->ring; + uint32_t pitch = 0; + + if (pfb->cbufs[0]) + pitch = fd_resource(pfb->cbufs[0]->texture)->pitch; fd3_emit_restore(ctx); @@ -414,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); fd3_emit_rbrc_tile_state(ring, - A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch)); + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index b5a027e6503..c6c51b11ee2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -249,7 +249,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, */ for (i = 0; i < 6; i++) { OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1); - OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ + OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER0_SELECT */ OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1); OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ @@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); - OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */ + OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ #endif OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); @@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen)); - OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */ + OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ #endif OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index ae08b8ac90a..e56325ba1f1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx, so->base = *cso; so->texsamp0 = + COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) | A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) | A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) | A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) | @@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx, return so; } +static enum a3xx_tex_type +tex_type(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return A3XX_TEX_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + return A3XX_TEX_2D; + case PIPE_TEXTURE_3D: + return A3XX_TEX_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return A3XX_TEX_CUBE; + } +} + static struct pipe_sampler_view * fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) @@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, so->tex_resource = rsc; so->texconst0 = - 0x40000000 | /* ??? */ + A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c index a08bc2349eb..6537fb77716 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c @@ -306,10 +306,11 @@ fd3_pipe2swap(enum pipe_format format) case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: return WXYZ; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return WZYX; - default: return WZYX; } diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h index 2fedc7bee38..61c01a7f528 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h +++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h @@ -166,8 +166,7 @@ struct ir3_instruction { }; }; -/* this is just large to cope w/ the large test *.asm: */ -#define MAX_INSTRS 10240 +#define MAX_INSTRS 1024 struct ir3_shader { unsigned instrs_count; diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index b1198125e97..61979d458ac 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) +- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) +- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) +- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) Copyright (C) 2013 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -113,5 +115,318 @@ enum adreno_rb_depth_format { DEPTHX_24_8 = 1, }; +enum adreno_mmu_clnt_beh { + BEH_NEVR = 0, + BEH_TRAN_RNG = 1, + BEH_TRAN_FLT = 2, +}; + +#define REG_AXXX_MH_MMU_CONFIG 0x00000040 +#define AXXX_MH_MMU_CONFIG_MMU_ENABLE 0x00000001 +#define AXXX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE 0x00000002 +#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK 0x00000030 +#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT 4 +static inline uint32_t AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK 0x000000c0 +#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT 6 +static inline uint32_t AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK 0x00000300 +#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT 8 +static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK 0x00000c00 +#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT 10 +static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK 0x00003000 +#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT 12 +static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK 0x0000c000 +#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT 14 +static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK 0x00030000 +#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT 16 +static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK 0x000c0000 +#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT 18 +static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK 0x00300000 +#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT 20 +static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK 0x00c00000 +#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT 22 +static inline uint32_t AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK; +} +#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK 0x03000000 +#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT 24 +static inline uint32_t AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK; +} + +#define REG_AXXX_MH_MMU_VA_RANGE 0x00000041 + +#define REG_AXXX_MH_MMU_PT_BASE 0x00000042 + +#define REG_AXXX_MH_MMU_PAGE_FAULT 0x00000043 + +#define REG_AXXX_MH_MMU_TRAN_ERROR 0x00000044 + +#define REG_AXXX_MH_MMU_INVALIDATE 0x00000045 + +#define REG_AXXX_MH_MMU_MPU_BASE 0x00000046 + +#define REG_AXXX_MH_MMU_MPU_END 0x00000047 + +#define REG_AXXX_CP_RB_BASE 0x000001c0 + +#define REG_AXXX_CP_RB_CNTL 0x000001c1 +#define AXXX_CP_RB_CNTL_BUFSZ__MASK 0x0000003f +#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT 0 +static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val) +{ + return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK; +} +#define AXXX_CP_RB_CNTL_BLKSZ__MASK 0x00003f00 +#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT 8 +static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val) +{ + return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK; +} +#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK 0x00030000 +#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT 16 +static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val) +{ + return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK; +} +#define AXXX_CP_RB_CNTL_POLL_EN 0x00100000 +#define AXXX_CP_RB_CNTL_NO_UPDATE 0x08000000 +#define AXXX_CP_RB_CNTL_RPTR_WR_EN 0x80000000 + +#define REG_AXXX_CP_RB_RPTR_ADDR 0x000001c3 +#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK 0x00000003 +#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT 0 +static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val) +{ + return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK; +} +#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK 0xfffffffc +#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2 +static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val) +{ + return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK; +} + +#define REG_AXXX_CP_RB_RPTR 0x000001c4 + +#define REG_AXXX_CP_RB_WPTR 0x000001c5 + +#define REG_AXXX_CP_RB_WPTR_DELAY 0x000001c6 + +#define REG_AXXX_CP_RB_RPTR_WR 0x000001c7 + +#define REG_AXXX_CP_RB_WPTR_BASE 0x000001c8 + +#define REG_AXXX_CP_QUEUE_THRESHOLDS 0x000001d5 +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK 0x0000000f +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT 0 +static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val) +{ + return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK; +} +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK 0x00000f00 +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT 8 +static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val) +{ + return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK; +} +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK 0x000f0000 +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT 16 +static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val) +{ + return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK; +} + +#define REG_AXXX_CP_MEQ_THRESHOLDS 0x000001d6 + +#define REG_AXXX_CP_CSQ_AVAIL 0x000001d7 +#define AXXX_CP_CSQ_AVAIL_RING__MASK 0x0000007f +#define AXXX_CP_CSQ_AVAIL_RING__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK; +} +#define AXXX_CP_CSQ_AVAIL_IB1__MASK 0x00007f00 +#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT 8 +static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK; +} +#define AXXX_CP_CSQ_AVAIL_IB2__MASK 0x007f0000 +#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK; +} + +#define REG_AXXX_CP_STQ_AVAIL 0x000001d8 +#define AXXX_CP_STQ_AVAIL_ST__MASK 0x0000007f +#define AXXX_CP_STQ_AVAIL_ST__SHIFT 0 +static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val) +{ + return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK; +} + +#define REG_AXXX_CP_MEQ_AVAIL 0x000001d9 +#define AXXX_CP_MEQ_AVAIL_MEQ__MASK 0x0000001f +#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT 0 +static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val) +{ + return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK; +} + +#define REG_AXXX_SCRATCH_UMSK 0x000001dc +#define AXXX_SCRATCH_UMSK_UMSK__MASK 0x000000ff +#define AXXX_SCRATCH_UMSK_UMSK__SHIFT 0 +static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val) +{ + return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK; +} +#define AXXX_SCRATCH_UMSK_SWAP__MASK 0x00030000 +#define AXXX_SCRATCH_UMSK_SWAP__SHIFT 16 +static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val) +{ + return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK; +} + +#define REG_AXXX_SCRATCH_ADDR 0x000001dd + +#define REG_AXXX_CP_ME_RDADDR 0x000001ea + +#define REG_AXXX_CP_STATE_DEBUG_INDEX 0x000001ec + +#define REG_AXXX_CP_STATE_DEBUG_DATA 0x000001ed + +#define REG_AXXX_CP_INT_CNTL 0x000001f2 + +#define REG_AXXX_CP_INT_STATUS 0x000001f3 + +#define REG_AXXX_CP_INT_ACK 0x000001f4 + +#define REG_AXXX_CP_ME_CNTL 0x000001f6 + +#define REG_AXXX_CP_ME_STATUS 0x000001f7 + +#define REG_AXXX_CP_ME_RAM_WADDR 0x000001f8 + +#define REG_AXXX_CP_ME_RAM_RADDR 0x000001f9 + +#define REG_AXXX_CP_ME_RAM_DATA 0x000001fa + +#define REG_AXXX_CP_DEBUG 0x000001fc +#define AXXX_CP_DEBUG_PREDICATE_DISABLE 0x00800000 +#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE 0x01000000 +#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE 0x02000000 +#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS 0x04000000 +#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE 0x08000000 +#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE 0x10000000 +#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL 0x40000000 +#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE 0x80000000 + +#define REG_AXXX_CP_CSQ_RB_STAT 0x000001fd +#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK 0x0000007f +#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK; +} +#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK 0x007f0000 +#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK; +} + +#define REG_AXXX_CP_CSQ_IB1_STAT 0x000001fe +#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK 0x0000007f +#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK; +} +#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK 0x007f0000 +#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK; +} + +#define REG_AXXX_CP_CSQ_IB2_STAT 0x000001ff +#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK 0x0000007f +#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK; +} +#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK 0x007f0000 +#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK; +} + +#define REG_AXXX_CP_SCRATCH_REG0 0x00000578 + +#define REG_AXXX_CP_SCRATCH_REG1 0x00000579 + +#define REG_AXXX_CP_SCRATCH_REG2 0x0000057a + +#define REG_AXXX_CP_SCRATCH_REG3 0x0000057b + +#define REG_AXXX_CP_SCRATCH_REG4 0x0000057c + +#define REG_AXXX_CP_SCRATCH_REG5 0x0000057d + +#define REG_AXXX_CP_SCRATCH_REG6 0x0000057e + +#define REG_AXXX_CP_SCRATCH_REG7 0x0000057f + +#define REG_AXXX_CP_ME_CF_EVENT_SRC 0x0000060a + +#define REG_AXXX_CP_ME_CF_EVENT_ADDR 0x0000060b + +#define REG_AXXX_CP_ME_CF_EVENT_DATA 0x0000060c + +#define REG_AXXX_CP_ME_NRT_ADDR 0x0000060d + +#define REG_AXXX_CP_ME_NRT_DATA 0x0000060e + #endif /* ADRENO_COMMON_XML */ diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index d3a7baca0e9..94c13f418e7 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) +- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) +- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) +- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) +- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) Copyright (C) 2013 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 44d525b25dd..1d03351f041 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx) ctx->gmem_reason = 0; ctx->num_draws = 0; - fd_resource(pfb->cbufs[0]->texture)->dirty = false; + if (pfb->cbufs[0]) + fd_resource(pfb->cbufs[0]->texture)->dirty = false; if (pfb->zsbuf) fd_resource(pfb->zsbuf->texture)->dirty = false; } diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index b02b8b9f9f9..4a98ab40f9e 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info) src_sel, idx_type, IGNORE_VISIBILITY)); OUT_RING(ring, info->count); /* NumIndices */ if (info->indexed) { - OUT_RELOC(ring, idx_bo, idx_offset, 0); + OUT_RELOC(ring, idx_bo, idx_offset, 0, 0); OUT_RING (ring, idx_size); } } @@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, } DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil, - util_format_name(pfb->cbufs[0]->format), - pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); + util_format_short_name(pipe_surface_format(pfb->cbufs[0])), + util_format_short_name(pipe_surface_format(pfb->zsbuf))); ctx->clear(ctx, buffers, color, depth, stencil); diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 12633bd5f38..3d959c65dc7 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -71,12 +71,16 @@ calculate_tiles(struct fd_context *ctx) { struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_scissor_state *scissor = &ctx->max_scissor; - uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format); + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; uint32_t gmem_size = ctx->screen->gmemsize_bytes; uint32_t minx, miny, width, height; uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; uint32_t max_width = 992; + uint32_t cpp = 4; + + if (pfb->cbufs[0]) + cpp = util_format_get_blocksize(pfb->cbufs[0]->format); if ((gmem->cpp == cpp) && !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { @@ -84,10 +88,17 @@ calculate_tiles(struct fd_context *ctx) return; } - minx = scissor->minx & ~31; /* round down to multiple of 32 */ - miny = scissor->miny & ~31; - width = scissor->maxx - minx; - height = scissor->maxy - miny; + if (fd_mesa_debug & FD_DBG_DSCIS) { + minx = 0; + miny = 0; + width = pfb->width; + height = pfb->height; + } else { + minx = scissor->minx & ~31; /* round down to multiple of 32 */ + miny = scissor->miny & ~31; + width = scissor->maxx - minx; + height = scissor->maxy - miny; + } // TODO we probably could optimize this a bit if we know that // Z or stencil is not enabled for any of the draw calls.. @@ -132,9 +143,7 @@ static void render_tiles(struct fd_context *ctx) { struct fd_gmem_stateobj *gmem = &ctx->gmem; - uint32_t i, yoff = 0; - - yoff= gmem->miny; + uint32_t i, yoff = gmem->miny; ctx->emit_tile_init(ctx); @@ -143,13 +152,13 @@ render_tiles(struct fd_context *ctx) uint32_t bh = gmem->bin_h; /* clip bin height: */ - bh = MIN2(bh, gmem->height - yoff); + bh = MIN2(bh, gmem->miny + gmem->height - yoff); for (j = 0; j < gmem->nbins_x; j++) { uint32_t bw = gmem->bin_w; /* clip bin width: */ - bw = MIN2(bw, gmem->width - xoff); + bw = MIN2(bw, gmem->minx + gmem->width - xoff); DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", bh, yoff, bw, xoff); @@ -205,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx) if (sysmem) { DBG("rendering sysmem (%s/%s)", - util_format_name(pfb->cbufs[0]->format), - pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); + util_format_short_name(pipe_surface_format(pfb->cbufs[0])), + util_format_short_name(pipe_surface_format(pfb->zsbuf))); render_sysmem(ctx); } else { struct fd_gmem_stateobj *gmem = &ctx->gmem; - DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, - util_format_name(pfb->cbufs[0]->format), - pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); calculate_tiles(ctx); + DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, + util_format_short_name(pipe_surface_format(pfb->cbufs[0])), + util_format_short_name(pipe_surface_format(pfb->zsbuf))); render_tiles(ctx); } @@ -225,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx) /* update timestamps on render targets: */ timestamp = fd_ringbuffer_timestamp(ctx->ring); - fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; + if (pfb->cbufs[0]) + fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; if (pfb->zsbuf) fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 1b1eaa52512..3e051ea9882 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { struct fd_context *ctx = fd_context(pctx); + struct fd_resource *rsc = fd_resource(ptrans->resource); + if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + fd_bo_cpu_fini(rsc->bo); pipe_resource_reference(&ptrans->resource, NULL); util_slab_free(&ctx->transfer_pool, ptrans); } @@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx, struct fd_resource *rsc = fd_resource(prsc); struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool); enum pipe_format format = prsc->format; + uint32_t op = 0; char *buf; if (!ptrans) return NULL; - /* util_slap_alloc() doesn't zero: */ + /* util_slab_alloc() doesn't zero: */ memset(ptrans, 0, sizeof(*ptrans)); pipe_resource_reference(&ptrans->resource, prsc); @@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, ptrans->layer_stride = ptrans->stride; /* some state trackers (at least XA) don't do this.. */ - fd_resource_transfer_flush_region(pctx, ptrans, box); + if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + fd_resource_transfer_flush_region(pctx, ptrans, box); buf = fd_bo_map(rsc->bo); if (!buf) { @@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } + if (usage & PIPE_TRANSFER_READ) + op |= DRM_FREEDRENO_PREP_READ; + + if (usage & PIPE_TRANSFER_WRITE) + op |= DRM_FREEDRENO_PREP_WRITE; + + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op); + *pptrans = ptrans; return buf + diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 52d51c22966..36ef8b0bc53 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = { {"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"}, {"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"}, {"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"}, + {"dscis", FD_DBG_DSCIS, "Disable scissor optimization"}, DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 2f5d52c017c..f5290a9af2a 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx, unsigned i; DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush, - cso->cbufs[0], cso->zsbuf); + framebuffer->cbufs[0], framebuffer->zsbuf); fd_context_render(pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index f18f0fee989..7bbbe8016b5 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -33,8 +33,10 @@ #include <freedreno_ringbuffer.h> #include "pipe/p_format.h" +#include "pipe/p_state.h" #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_half.h" #include "adreno_common.xml.h" #include "adreno_pm4.xml.h" @@ -47,10 +49,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); enum adreno_stencil_op fd_stencil_op(unsigned op); -#define FD_DBG_MSGS 0x1 -#define FD_DBG_DISASM 0x2 -#define FD_DBG_DCLEAR 0x4 -#define FD_DBG_DGMEM 0x8 +#define FD_DBG_MSGS 0x01 +#define FD_DBG_DISASM 0x02 +#define FD_DBG_DCLEAR 0x04 +#define FD_DBG_DGMEM 0x08 +#define FD_DBG_DSCIS 0x10 extern int fd_mesa_debug; #define DBG(fmt, ...) \ @@ -77,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type, (1 << 14); } + +static inline enum pipe_format +pipe_surface_format(struct pipe_surface *psurf) +{ + if (!psurf) + return PIPE_FORMAT_NONE; + return psurf->format; +} + #define LOG_DWORDS 0 @@ -92,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data) static inline void OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or) + uint32_t offset, uint32_t or, int32_t shift) { if (LOG_DWORDS) { - DBG("ring[%p]: OUT_RELOC %04x: %p+%u", ring, - (uint32_t)(ring->cur - ring->last_start), bo, offset); + DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, + (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } - fd_ringbuffer_emit_reloc(ring, bo, offset, or); + fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = FD_RELOC_READ, + .offset = offset, + .or = or, + .shift = shift, + }); } -/* shifted reloc: */ static inline void -OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo, +OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset, uint32_t or, int32_t shift) { if (LOG_DWORDS) { - DBG("ring[%p]: OUT_RELOCS %04x: %p+%u << %d", ring, + DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } - fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift); + fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = FD_RELOC_READ | FD_RELOC_WRITE, + .offset = offset, + .or = or, + .shift = shift, + }); } static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) @@ -143,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start, struct fd_ringmarker *end) { OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); - fd_ringbuffer_emit_reloc_ring(ring, start); + fd_ringbuffer_emit_reloc_ring(ring, start, end); OUT_RING(ring, fd_ringmarker_dwords(start, end)); } |