diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2014-05-23 17:21:59 -0700 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2014-05-23 17:21:59 -0700 |
commit | bfaee5277a340c4c4e0068cc4e0b73c818bd5385 (patch) | |
tree | 455574f3b59bea714426815e3fae83697f710bec /src/gallium/drivers/freedreno | |
parent | 9a8f12ae034feefc7ce189485dfc8f387945eee0 (diff) | |
parent | e084f715482c11c7c2b416929ef36f5d81a9f8ff (diff) |
Merge remote-tracking branch 'robclark/freedreno-10.2' into 10.2
Diffstat (limited to 'src/gallium/drivers/freedreno')
29 files changed, 1847 insertions, 229 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 311b0b6a205..0dc7fc08512 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -3,6 +3,8 @@ C_SOURCES := \ freedreno_lowering.c \ freedreno_program.c \ freedreno_query.c \ + freedreno_query_hw.c \ + freedreno_query_sw.c \ freedreno_fence.c \ freedreno_resource.c \ freedreno_surface.c \ @@ -38,6 +40,7 @@ a3xx_SOURCES := \ a3xx/fd3_emit.c \ a3xx/fd3_gmem.c \ a3xx/fd3_program.c \ + a3xx/fd3_query.c \ a3xx/fd3_rasterizer.c \ a3xx/fd3_screen.c \ a3xx/fd3_texture.c \ diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index 0de872db508..5495728c97e 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57) Copyright (C) 2013-2014 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c index d1a586c8c9b..a533c298115 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c @@ -125,7 +125,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, { unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id); static const struct fd2_sampler_stateobj dummy_sampler = {}; - struct fd2_sampler_stateobj *sampler; + const struct fd2_sampler_stateobj *sampler; struct fd2_pipe_sampler_view *view; if (emitted & (1 << const_idx)) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index c61f7aab74f..8934e213b00 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57) Copyright (C) 2013-2014 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -41,31 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -enum a3xx_render_mode { - RB_RENDERING_PASS = 0, - RB_TILING_PASS = 1, - RB_RESOLVE_PASS = 2, -}; - enum a3xx_tile_mode { LINEAR = 0, TILE_32X32 = 2, }; -enum a3xx_threadmode { - MULTI = 0, - SINGLE = 1, -}; - -enum a3xx_instrbuffermode { - BUFFER = 1, -}; - -enum a3xx_threadsize { - TWO_QUADS = 0, - FOUR_QUADS = 1, -}; - enum a3xx_state_block_id { HLSQ_BLOCK_ID_TP_TEX = 2, HLSQ_BLOCK_ID_TP_MIPMAP = 3, @@ -180,12 +160,6 @@ enum a3xx_color_swap { XYZW = 3, }; -enum a3xx_msaa_samples { - MSAA_ONE = 0, - MSAA_TWO = 1, - MSAA_FOUR = 2, -}; - enum a3xx_sp_perfcounter_select { SP_FS_CFLOW_INSTRUCTIONS = 12, SP_FS_FULL_ALU_INSTRUCTIONS = 14, @@ -212,11 +186,6 @@ enum a3xx_rop_code { ROP_SET = 15, }; -enum adreno_rb_copy_control_mode { - RB_COPY_RESOLVE = 1, - RB_COPY_DEPTH_STENCIL = 5, -}; - enum a3xx_tex_filter { A3XX_TEX_NEAREST = 0, A3XX_TEX_LINEAR = 1, @@ -337,6 +306,7 @@ enum a3xx_tex_type { #define REG_A3XX_RBBM_INT_0_STATUS 0x00000064 #define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 +#define A3XX_RBBM_PERFCTR_CTL_ENABLE 0x00000001 #define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081 @@ -570,6 +540,10 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 #define REG_A3XX_CP_AHB_FAULT 0x0000054d +#define REG_A3XX_SP_GLOBAL_MEM_SIZE 0x00000e22 + +#define REG_A3XX_SP_GLOBAL_MEM_ADDR 0x00000e23 + #define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 #define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 #define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 @@ -644,8 +618,26 @@ static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val) } #define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068 +#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} #define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069 +#define A3XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A3XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK; +} #define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c #define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff @@ -992,6 +984,12 @@ static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mod { return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK; } +#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00 +#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8 +static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) +{ + return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK; +} #define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000 #define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) @@ -1034,6 +1032,12 @@ static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val) { return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK; } +#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 +#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; +} #define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000 #define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14 static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val) @@ -1202,6 +1206,8 @@ static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val) } #define REG_A3XX_RB_SAMPLE_COUNT_CONTROL 0x00002110 +#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET 0x00000001 +#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 #define REG_A3XX_RB_SAMPLE_COUNT_ADDR 0x00002111 @@ -1366,10 +1372,36 @@ static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val) } #define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a +#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK 0x00000003 +#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK; +} +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK 0x00000ffc +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT 2 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK; +} +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK 0x003ff000 +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT 12 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK; +} +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK 0xffc00000 +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT 22 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK; +} + +static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; } -#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG 0x0000220b +static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; } -#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG 0x0000220c +static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; } #define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211 @@ -1377,7 +1409,9 @@ static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val) #define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214 -#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x00002215 +static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; } + +static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; } #define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x00002216 @@ -1624,6 +1658,7 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) } #define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 #define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000 +#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE 0x00800000 #define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000 #define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24 static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val) @@ -1797,6 +1832,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) } #define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000 #define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000 +#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE 0x00800000 #define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000 #define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24 static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val) @@ -1976,6 +2012,42 @@ static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val) #define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f +#define REG_A3XX_VBIF_PERF_CNT_EN 0x00003070 +#define A3XX_VBIF_PERF_CNT_EN_CNT0 0x00000001 +#define A3XX_VBIF_PERF_CNT_EN_CNT1 0x00000002 +#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0 0x00000004 +#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1 0x00000008 +#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2 0x00000010 + +#define REG_A3XX_VBIF_PERF_CNT_CLR 0x00003071 +#define A3XX_VBIF_PERF_CNT_CLR_CNT0 0x00000001 +#define A3XX_VBIF_PERF_CNT_CLR_CNT1 0x00000002 +#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0 0x00000004 +#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1 0x00000008 +#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2 0x00000010 + +#define REG_A3XX_VBIF_PERF_CNT_SEL 0x00003072 + +#define REG_A3XX_VBIF_PERF_CNT0_LO 0x00003073 + +#define REG_A3XX_VBIF_PERF_CNT0_HI 0x00003074 + +#define REG_A3XX_VBIF_PERF_CNT1_LO 0x00003075 + +#define REG_A3XX_VBIF_PERF_CNT1_HI 0x00003076 + +#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO 0x00003077 + +#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI 0x00003078 + +#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO 0x00003079 + +#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI 0x0000307a + +#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO 0x0000307b + +#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI 0x0000307c + #define REG_A3XX_VSC_BIN_SIZE 0x00000c01 #define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f #define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 @@ -2249,6 +2321,12 @@ static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val) { return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK; } +#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK 0x00700000 +#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT 20 +static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK; +} #define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000 #define REG_A3XX_TEX_SAMP_1 0x00000001 @@ -2267,6 +2345,7 @@ static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val) #define REG_A3XX_TEX_CONST_0 0x00000000 #define A3XX_TEX_CONST_0_TILED 0x00000001 +#define A3XX_TEX_CONST_0_SRGB 0x00000004 #define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 #define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4 static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val) @@ -2303,6 +2382,7 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) { return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; } +#define A3XX_TEX_CONST_0_NOCONVERT 0x20000000 #define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000 #define A3XX_TEX_CONST_0_TYPE__SHIFT 30 static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 4f8dcc5fe61..3159e7adee9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -1074,77 +1074,154 @@ trans_arl(const struct instr_translater *t, add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; } -/* texture fetch/sample instructions: */ -static void -trans_samp(const struct instr_translater *t, - struct fd3_compile_context *ctx, +/* + * texture fetch/sample instructions: + */ + +struct tex_info { + int8_t order[4]; + unsigned src_wrmask, flags; +}; + +static const struct tex_info * +get_tex_info(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - struct ir3_instruction *instr; - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct tgsi_src_register *samp = &inst->Src[1].Register; + static const struct tex_info tex1d = { + .order = { 0, -1, -1, -1 }, /* coord.x */ + .src_wrmask = TGSI_WRITEMASK_XY, + .flags = 0, + }; + static const struct tex_info tex1ds = { + .order = { 0, -1, 2, -1 }, /* coord.xz */ + .src_wrmask = TGSI_WRITEMASK_XYZ, + .flags = IR3_INSTR_S, + }; + static const struct tex_info tex2d = { + .order = { 0, 1, -1, -1 }, /* coord.xy */ + .src_wrmask = TGSI_WRITEMASK_XY, + .flags = 0, + }; + static const struct tex_info tex2ds = { + .order = { 0, 1, 2, -1 }, /* coord.xyz */ + .src_wrmask = TGSI_WRITEMASK_XYZ, + .flags = IR3_INSTR_S, + }; + static const struct tex_info tex3d = { + .order = { 0, 1, 2, -1 }, /* coord.xyz */ + .src_wrmask = TGSI_WRITEMASK_XYZ, + .flags = IR3_INSTR_3D, + }; + static const struct tex_info tex3ds = { + .order = { 0, 1, 2, 3 }, /* coord.xyzw */ + .src_wrmask = TGSI_WRITEMASK_XYZW, + .flags = IR3_INSTR_S | IR3_INSTR_3D, + }; + static const struct tex_info txp1d = { + .order = { 0, -1, 3, -1 }, /* coord.xw */ + .src_wrmask = TGSI_WRITEMASK_XYZ, + .flags = IR3_INSTR_P, + }; + static const struct tex_info txp1ds = { + .order = { 0, -1, 2, 3 }, /* coord.xzw */ + .src_wrmask = TGSI_WRITEMASK_XYZW, + .flags = IR3_INSTR_P | IR3_INSTR_S, + }; + static const struct tex_info txp2d = { + .order = { 0, 1, 3, -1 }, /* coord.xyw */ + .src_wrmask = TGSI_WRITEMASK_XYZ, + .flags = IR3_INSTR_P, + }; + static const struct tex_info txp2ds = { + .order = { 0, 1, 2, 3 }, /* coord.xyzw */ + .src_wrmask = TGSI_WRITEMASK_XYZW, + .flags = IR3_INSTR_P | IR3_INSTR_S, + }; + static const struct tex_info txp3d = { + .order = { 0, 1, 2, 3 }, /* coord.xyzw */ + .src_wrmask = TGSI_WRITEMASK_XYZW, + .flags = IR3_INSTR_P | IR3_INSTR_3D, + }; + unsigned tex = inst->Texture.Texture; - int8_t *order; - unsigned i, flags = 0, src_wrmask; - bool needs_mov = false; - switch (t->arg) { + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_TEX: switch (tex) { + case TGSI_TEXTURE_1D: + return &tex1d; + case TGSI_TEXTURE_SHADOW1D: + return &tex1ds; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: - order = (int8_t[4]){ 0, 1, -1, -1 }; - src_wrmask = TGSI_WRITEMASK_XY; - break; + return &tex2d; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + return &tex2ds; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - order = (int8_t[4]){ 0, 1, 2, -1 }; - src_wrmask = TGSI_WRITEMASK_XYZ; - flags |= IR3_INSTR_3D; - break; + return &tex3d; + case TGSI_TEXTURE_SHADOWCUBE: + return &tex3ds; default: compile_error(ctx, "unknown texture type: %s\n", tgsi_texture_names[tex]); - break; + return NULL; } break; case TGSI_OPCODE_TXP: switch (tex) { + case TGSI_TEXTURE_1D: + return &txp1d; + case TGSI_TEXTURE_SHADOW1D: + return &txp1ds; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: - order = (int8_t[4]){ 0, 1, 3, -1 }; - src_wrmask = TGSI_WRITEMASK_XYZ; - break; + return &txp2d; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + return &txp2ds; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - order = (int8_t[4]){ 0, 1, 2, 3 }; - src_wrmask = TGSI_WRITEMASK_XYZW; - flags |= IR3_INSTR_3D; - break; + return &txp3d; default: compile_error(ctx, "unknown texture type: %s\n", tgsi_texture_names[tex]); break; } - flags |= IR3_INSTR_P; - break; - default: - compile_assert(ctx, 0); break; } + compile_assert(ctx, 0); + return NULL; +} + +static struct tgsi_src_register * +get_tex_coord(struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst, + const struct tex_info *tinf) +{ + struct tgsi_src_register *coord = &inst->Src[0].Register; + struct ir3_instruction *instr; + unsigned tex = inst->Texture.Texture; + bool needs_mov = false; + unsigned i; /* cat5 instruction cannot seem to handle const or relative: */ if (is_rel_or_const(coord)) needs_mov = true; + /* 1D textures we fix up w/ 0.0 as 2nd coord: */ + if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D)) + needs_mov = true; + /* The texture sample instructions need to coord in successive * registers/components (ie. src.xy but not src.yx). And TXP * needs the .w component in .z for 2D.. so in some cases we * might need to emit some mov instructions to shuffle things * around: */ - for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++) - if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) + for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++) + if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i])) needs_mov = true; if (needs_mov) { @@ -1157,28 +1234,55 @@ trans_samp(const struct instr_translater *t, /* need to move things around: */ tmp_src = get_internal_temp(ctx, &tmp_dst); - for (j = 0; (j < 4) && (order[j] >= 0); j++) { - instr = instr_create(ctx, 1, 0); + for (j = 0; j < 4; j++) { + if (tinf->order[j] < 0) + continue; + instr = instr_create(ctx, 1, 0); /* mov */ instr->cat1.src_type = type_mov; instr->cat1.dst_type = type_mov; add_dst_reg(ctx, instr, &tmp_dst, j); add_src_reg(ctx, instr, coord, - src_swiz(coord, order[j])); + src_swiz(coord, tinf->order[j])); + } + + /* fix up .y coord: */ + if ((tex == TGSI_TEXTURE_1D) || + (tex == TGSI_TEXTURE_SHADOW1D)) { + instr = instr_create(ctx, 1, 0); /* mov */ + instr->cat1.src_type = type_mov; + instr->cat1.dst_type = type_mov; + add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */ + ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5; } coord = tmp_src; } + return coord; +} + +static void +trans_samp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *coord; + struct tgsi_src_register *samp = &inst->Src[1].Register; + const struct tex_info *tinf; + + tinf = get_tex_info(ctx, inst); + coord = get_tex_coord(ctx, inst, tinf); + instr = instr_create(ctx, 5, t->opc); instr->cat5.type = get_ftype(ctx); instr->cat5.samp = samp->Index; instr->cat5.tex = samp->Index; - instr->flags |= flags; - - add_dst_reg_wrmask(ctx, instr, &inst->Dst[0].Register, 0, - inst->Dst[0].Register.WriteMask); + instr->flags |= tinf->flags; - add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, src_wrmask); + add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask); + add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask); } /* @@ -1231,15 +1335,19 @@ trans_cmp(const struct instr_translater *t, switch (t->tgsi_opc) { case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_FSEQ: condition = IR3_COND_EQ; break; case TGSI_OPCODE_SNE: + case TGSI_OPCODE_FSNE: condition = IR3_COND_NE; break; case TGSI_OPCODE_SGE: + case TGSI_OPCODE_FSGE: condition = IR3_COND_GE; break; case TGSI_OPCODE_SLT: + case TGSI_OPCODE_FSLT: condition = IR3_COND_LT; break; case TGSI_OPCODE_SLE: @@ -1269,11 +1377,15 @@ trans_cmp(const struct instr_translater *t, switch (t->tgsi_opc) { case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_FSEQ: case TGSI_OPCODE_SGE: + case TGSI_OPCODE_FSGE: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: + case TGSI_OPCODE_FSNE: case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLT: + case TGSI_OPCODE_FSLT: /* cov.u16f16 dst, tmp0 */ instr = instr_create(ctx, 1, 0); instr->cat1.src_type = get_utype(ctx); @@ -1294,6 +1406,96 @@ trans_cmp(const struct instr_translater *t, } /* + * USNE(a,b) = (a != b) ? 1 : 0 + * cmps.u32.ne dst, a, b + * + * USEQ(a,b) = (a == b) ? 1 : 0 + * cmps.u32.eq dst, a, b + * + * ISGE(a,b) = (a > b) ? 1 : 0 + * cmps.s32.ge dst, a, b + * + * USGE(a,b) = (a > b) ? 1 : 0 + * cmps.u32.ge dst, a, b + * + * ISLT(a,b) = (a < b) ? 1 : 0 + * cmps.s32.lt dst, a, b + * + * USLT(a,b) = (a < b) ? 1 : 0 + * cmps.u32.lt dst, a, b + * + * UCMP(a,b,c) = (a < 0) ? b : c + * cmps.u32.lt tmp0, a, {0} + * sel.b16 dst, b, tmp0, c + */ +static void +trans_icmp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register constval0; + struct tgsi_src_register *a0, *a1, *a2; + unsigned condition; + + a0 = &inst->Src[0].Register; /* a */ + a1 = &inst->Src[1].Register; /* b */ + + switch (t->tgsi_opc) { + case TGSI_OPCODE_USNE: + condition = IR3_COND_NE; + break; + case TGSI_OPCODE_USEQ: + condition = IR3_COND_EQ; + break; + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USGE: + condition = IR3_COND_GE; + break; + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_USLT: + condition = IR3_COND_LT; + break; + case TGSI_OPCODE_UCMP: + get_immediate(ctx, &constval0, 0); + a0 = &inst->Src[0].Register; /* a */ + a1 = &constval0; /* {0} */ + condition = IR3_COND_LT; + break; + + default: + compile_assert(ctx, 0); + return; + } + + if (is_const(a0) && is_const(a1)) + a0 = get_unconst(ctx, a0); + + if (t->tgsi_opc == TGSI_OPCODE_UCMP) { + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; + tmp_src = get_internal_temp(ctx, &tmp_dst); + /* cmps.u32.lt tmp, a0, a1 */ + instr = instr_create(ctx, 2, t->opc); + instr->cat2.condition = condition; + vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); + + a1 = &inst->Src[1].Register; + a2 = &inst->Src[2].Register; + /* sel.{b32,b16} dst, src2, tmp, src1 */ + instr = instr_create(ctx, 3, OPC_SEL_B32); + vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); + } else { + /* cmps.{u32,s32}.<cond> dst, a0, a1 */ + instr = instr_create(ctx, 2, t->opc); + instr->cat2.condition = condition; + vectorize(ctx, instr, dst, 2, a0, 0, a1, 0); + } + put_dst(ctx, inst, dst); +} + +/* * Conditional / Flow control */ @@ -1533,7 +1735,7 @@ trans_endif(const struct instr_translater *t, } /* - * Kill / Kill-if + * Kill */ static void @@ -1580,6 +1782,76 @@ trans_kill(const struct instr_translater *t, } /* + * Kill-If + */ + +static void +trans_killif(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_src_register *src = &inst->Src[0].Register; + struct ir3_instruction *instr, *immed, *cond = NULL; + bool inv = false; + + immed = create_immed(ctx, 0.0); + + /* cmps.f.ne p0.x, cond, {0.0} */ + instr = instr_create(ctx, 2, OPC_CMPS_F); + instr->cat2.condition = IR3_COND_NE; + ir3_reg_create(instr, regid(REG_P0, 0), 0); + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; + add_src_reg(ctx, instr, src, src->SwizzleX); + + cond = instr; + + /* kill p0.x */ + instr = instr_create(ctx, 0, OPC_KILL); + instr->cat0.inv = inv; + ir3_reg_create(instr, 0, 0); /* dummy dst */ + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; + + ctx->kill[ctx->kill_count++] = instr; + +} +/* + * I2F / U2F / F2I / F2U + */ + +static void +trans_cov(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src = &inst->Src[0].Register; + + // cov.f32s32 dst, tmp0 / + instr = instr_create(ctx, 1, 0); + switch (t->tgsi_opc) { + case TGSI_OPCODE_U2F: + instr->cat1.src_type = TYPE_U32; + instr->cat1.dst_type = TYPE_F32; + break; + case TGSI_OPCODE_I2F: + instr->cat1.src_type = TYPE_S32; + instr->cat1.dst_type = TYPE_F32; + break; + case TGSI_OPCODE_F2U: + instr->cat1.src_type = TYPE_F32; + instr->cat1.dst_type = TYPE_U32; + break; + case TGSI_OPCODE_F2I: + instr->cat1.src_type = TYPE_F32; + instr->cat1.dst_type = TYPE_S32; + break; + + } + vectorize(ctx, instr, dst, 1, src, 0); +} + +/* * Handlers for TGSI instructions which do have 1:1 mapping to native * instructions: */ @@ -1616,9 +1888,11 @@ instr_cat2(const struct instr_translater *t, switch (t->tgsi_opc) { case TGSI_OPCODE_ABS: + case TGSI_OPCODE_IABS: src0_flags = IR3_REG_ABS; break; case TGSI_OPCODE_SUB: + case TGSI_OPCODE_INEG: src1_flags = IR3_REG_NEGATE; break; } @@ -1724,6 +1998,22 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), + INSTR(UADD, instr_cat2, .opc = OPC_ADD_U), + INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S), + INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U), + INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S), + INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U), + INSTR(AND, instr_cat2, .opc = OPC_AND_B), + INSTR(OR, instr_cat2, .opc = OPC_OR_B), + INSTR(NOT, instr_cat2, .opc = OPC_NOT_B), + INSTR(XOR, instr_cat2, .opc = OPC_XOR_B), + INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U), + INSTR(SHL, instr_cat2, .opc = OPC_SHL_B), + INSTR(USHR, instr_cat2, .opc = OPC_SHR_B), + INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B), + INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S), + INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S), + INSTR(AND, instr_cat2, .opc = OPC_AND_B), INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), INSTR(CLAMP, trans_clamp), @@ -1741,16 +2031,33 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), INSTR(SGT, trans_cmp), INSTR(SLT, trans_cmp), + INSTR(FSLT, trans_cmp), INSTR(SGE, trans_cmp), + INSTR(FSGE, trans_cmp), INSTR(SLE, trans_cmp), INSTR(SNE, trans_cmp), + INSTR(FSNE, trans_cmp), INSTR(SEQ, trans_cmp), + INSTR(FSEQ, trans_cmp), INSTR(CMP, trans_cmp), + INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U), + INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U), + INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S), + INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U), + INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S), + INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U), + INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U), INSTR(IF, trans_if), + INSTR(UIF, trans_if), INSTR(ELSE, trans_else), INSTR(ENDIF, trans_endif), INSTR(END, instr_cat0, .opc = OPC_END), INSTR(KILL, trans_kill, .opc = OPC_KILL), + INSTR(KILL_IF, trans_killif, .opc = OPC_KILL), + INSTR(I2F, trans_cov), + INSTR(U2F, trans_cov), + INSTR(F2I, trans_cov), + INSTR(F2U, trans_cov), }; static fd3_semantic @@ -1935,6 +2242,8 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) DBG("decl in -> r%d", i); + compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); + so->inputs[n].semantic = decl_semantic(&decl->Semantic); so->inputs[n].compmask = (1 << ncomp) - 1; so->inputs[n].regid = r; @@ -2024,6 +2333,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) ncomp = 4; + compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); + so->outputs[n].semantic = decl_semantic(&decl->Semantic); so->outputs[n].regid = regid(i, comp); @@ -2147,6 +2458,7 @@ compile_instructions(struct fd3_compile_context *ctx) struct tgsi_full_immediate *imm = &ctx->parser.FullToken.FullImmediate; unsigned n = ctx->so->immediates_count++; + compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates)); memcpy(ctx->so->immediates[n].val, imm->u, 16); break; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c index ddb69243c11..0f7044b56f1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c @@ -1324,6 +1324,8 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) DBG("decl in -> r%d", i + base); // XXX + compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); + so->inputs[n].semantic = decl_semantic(&decl->Semantic); so->inputs[n].compmask = (1 << ncomp) - 1; so->inputs[n].ncomp = ncomp; @@ -1410,6 +1412,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) for (i = decl->Range.First; i <= decl->Range.Last; i++) { unsigned n = so->outputs_count++; + compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); so->outputs[n].semantic = decl_semantic(&decl->Semantic); so->outputs[n].regid = regid(i + base, comp); } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index f36cbd946a0..847414ac082 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -33,6 +33,7 @@ #include "fd3_emit.h" #include "fd3_gmem.h" #include "fd3_program.h" +#include "fd3_query.h" #include "fd3_rasterizer.h" #include "fd3_texture.h" #include "fd3_zsa.h" @@ -134,5 +135,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv) fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx); fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx); + fd3_query_context_init(pctx); + return pctx; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 2e2a66dc616..17f3dcfe04e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -406,7 +406,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4)); - for (i = 0, j = -1; j < (int)fp->inputs_count; i++) { + for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); @@ -428,7 +428,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_RING(ring, reg); } - for (i = 0, j = -1; j < (int)fp->inputs_count; i++) { + for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 0439d39dbff..28ad52ecd7c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -91,7 +91,7 @@ struct fd3_shader_variant { struct { fd3_semantic semantic; uint8_t regid; - } outputs[16]; + } outputs[16 + 2]; /* +POSITION +PSIZE */ bool writes_pos, writes_psize; /* vertices/inputs: */ @@ -104,7 +104,7 @@ struct fd3_shader_variant { /* in theory inloc of fs should match outloc of vs: */ uint8_t inloc; uint8_t bary; - } inputs[16]; + } inputs[16 + 2]; /* +POSITION +FACE */ unsigned total_in; /* sum of inputs (scalar) */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c new file mode 100644 index 00000000000..77ae8b6b1d1 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c @@ -0,0 +1,139 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "freedreno_query_hw.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +#include "fd3_query.h" +#include "fd3_util.h" + + +struct fd_rb_samp_ctrs { + uint64_t ctr[16]; +}; + +/* + * Occlusion Query: + * + * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they + * interpret results + */ + +static struct fd_hw_sample * +occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd_hw_sample *samp = + fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs)); + + /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of + * HW_QUERY_BASE_REG register: + */ + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000); + OUT_RING(ring, HW_QUERY_BASE_REG); + OUT_RING(ring, samp->offset); + + OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1); + OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, USE_VISIBILITY)); + OUT_RING(ring, 0); /* NumIndices */ + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, ZPASS_DONE); + + OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1); + OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE); + + OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1); + OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 | + A3XX_VBIF_PERF_CNT_EN_CNT1 | + A3XX_VBIF_PERF_CNT_EN_PWRCNT0 | + A3XX_VBIF_PERF_CNT_EN_PWRCNT1 | + A3XX_VBIF_PERF_CNT_EN_PWRCNT2); + + return samp; +} + +static uint64_t +count_samples(const struct fd_rb_samp_ctrs *start, + const struct fd_rb_samp_ctrs *end) +{ + uint64_t n = 0; + unsigned i; + + /* not quite sure what all of these are, possibly different + * counters for each MRT render target: + */ + for (i = 0; i < 16; i += 4) + n += end->ctr[i] - start->ctr[i]; + + return n; +} + +static void +occlusion_counter_accumulate_result(struct fd_context *ctx, + const void *start, const void *end, + union pipe_query_result *result) +{ + uint64_t n = count_samples(start, end); + result->u64 += n; +} + +static void +occlusion_predicate_accumulate_result(struct fd_context *ctx, + const void *start, const void *end, + union pipe_query_result *result) +{ + uint64_t n = count_samples(start, end); + result->b |= (n > 0); +} + +static const struct fd_hw_sample_provider occlusion_counter = { + .query_type = PIPE_QUERY_OCCLUSION_COUNTER, + .active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */ + .get_sample = occlusion_get_sample, + .accumulate_result = occlusion_counter_accumulate_result, +}; + +static const struct fd_hw_sample_provider occlusion_predicate = { + .query_type = PIPE_QUERY_OCCLUSION_PREDICATE, + .active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */ + .get_sample = occlusion_get_sample, + .accumulate_result = occlusion_predicate_accumulate_result, +}; + +void fd3_query_context_init(struct pipe_context *pctx) +{ + fd_hw_query_register_provider(pctx, &occlusion_counter); + fd_hw_query_register_provider(pctx, &occlusion_predicate); +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.h b/src/gallium/drivers/freedreno/a3xx/fd3_query.h new file mode 100644 index 00000000000..842c822aa0f --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FD3_QUERY_H_ +#define FD3_QUERY_H_ + +#include "pipe/p_context.h" + +void fd3_query_context_init(struct pipe_context *pctx); + +#endif /* FD3_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c index c2d3249cd91..1a1a7cbd653 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -40,6 +40,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) { struct fd3_rasterizer_stateobj *so; + float psize_min, psize_max; so = CALLOC_STRUCT(fd3_rasterizer_stateobj); if (!so) @@ -47,19 +48,28 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, so->base = *cso; + if (cso->point_size_per_vertex) { + psize_min = util_get_min_point_size(cso); + psize_max = 8192; + } else { + /* Force the point size to be as if the vertex output was disabled. */ + psize_min = cso->point_size; + psize_max = cso->point_size; + } + /* if (cso->line_stipple_enable) { ??? TODO line stipple } TODO cso->half_pixel_center - TODO cso->point_size - TODO psize_min/psize_max if (cso->multisample) TODO */ so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */ - so->gras_su_point_minmax = 0xffc00010; /* ??? */ - so->gras_su_point_size = 0x00000008; /* ??? */ + so->gras_su_point_minmax = + A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) | + A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2); + so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size/2); so->gras_su_poly_offset_scale = A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale); so->gras_su_poly_offset_offset = diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index d15cf379190..2081775083a 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -30,6 +30,7 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_inlines.h" +#include "util/u_format.h" #include "fd3_texture.h" #include "fd3_util.h" @@ -99,6 +100,9 @@ fd3_sampler_state_create(struct pipe_context *pctx, A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) | A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r)); + if (cso->compare_mode) + so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { so->texsamp1 = A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | @@ -158,6 +162,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, A3XX_TEX_CONST_0_MIPLVLS(miplevels) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); + + if (util_format_is_srgb(cso->format)) + so->texconst0 |= A3XX_TEX_CONST_0_SRGB; + so->texconst1 = A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(prsc->width0) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c index baafc7831dc..682b47d0836 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c @@ -235,6 +235,10 @@ fd3_pipe2tex(enum pipe_format format) case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_R8G8B8X8_SRGB: return TFMT_NORM_UINT_8_8_8_8; case PIPE_FORMAT_Z24X8_UNORM: @@ -275,6 +279,12 @@ fd3_pipe2fetchsize(enum pipe_format format) case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_R8G8B8X8_SRGB: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: return TFETCH_4_BYTE; @@ -379,14 +389,14 @@ fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, { const struct util_format_description *desc = util_format_description(format); - uint8_t swiz[] = { + unsigned char swiz[4] = { swizzle_r, swizzle_g, swizzle_b, swizzle_a, - PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE, - PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE, - }; - - return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) | - A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) | - A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) | - A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]])); + }, rswiz[4]; + + util_format_compose_swizzles(desc->swizzle, swiz, rswiz); + + return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | + A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | + A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | + A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); } diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index 4d99be17985..3610543e7ef 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57) Copyright (C) 2013-2014 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -116,6 +116,39 @@ enum adreno_rb_depth_format { DEPTHX_24_8 = 1, }; +enum adreno_rb_copy_control_mode { + RB_COPY_RESOLVE = 1, + RB_COPY_CLEAR = 2, + RB_COPY_DEPTH_STENCIL = 5, +}; + +enum a3xx_render_mode { + RB_RENDERING_PASS = 0, + RB_TILING_PASS = 1, + RB_RESOLVE_PASS = 2, + RB_COMPUTE_PASS = 3, +}; + +enum a3xx_msaa_samples { + MSAA_ONE = 0, + MSAA_TWO = 1, + MSAA_FOUR = 2, +}; + +enum a3xx_threadmode { + MULTI = 0, + SINGLE = 1, +}; + +enum a3xx_instrbuffermode { + BUFFER = 1, +}; + +enum a3xx_threadsize { + TWO_QUADS = 0, + FOUR_QUADS = 1, +}; + #define REG_AXXX_CP_RB_BASE 0x000001c0 #define REG_AXXX_CP_RB_CNTL 0x000001c1 diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index 68a289398aa..52b454b32b5 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57) Copyright (C) 2013-2014 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -164,6 +164,11 @@ enum adreno_pm4_type3_packets { CP_SET_BIN = 76, CP_TEST_TWO_MEMS = 113, CP_WAIT_FOR_ME = 19, + CP_SET_DRAW_STATE = 67, + CP_DRAW_INDX_OFFSET = 56, + CP_DRAW_INDIRECT = 40, + CP_DRAW_INDX_INDIRECT = 41, + CP_DRAW_AUTO = 36, IN_IB_PREFETCH_END = 23, IN_SUBBLK_PREFETCH = 31, IN_INSTR_PREFETCH = 32, @@ -351,6 +356,93 @@ static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val) return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK; } +#define REG_CP_DRAW_INDX_OFFSET_0 0x00000000 +#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK 0x0000003f +#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK 0x000000c0 +#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT 6 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK 0x00000700 +#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT 8 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000800 +#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 11 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum pc_di_index_size val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_NOT_EOP 0x00001000 +#define CP_DRAW_INDX_OFFSET_0_SMALL_INDEX 0x00002000 +#define CP_DRAW_INDX_OFFSET_0_PRE_DRAW_INITIATOR_ENABLE 0x00004000 +#define CP_DRAW_INDX_OFFSET_0_NUM_INDICES__MASK 0xffff0000 +#define CP_DRAW_INDX_OFFSET_0_NUM_INDICES__SHIFT 16 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_NUM_INDICES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_0_NUM_INDICES__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_1 0x00000001 + +#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002 +#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002 +#define CP_DRAW_INDX_OFFSET_2_INDX_BASE__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_2_INDX_BASE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_2_INDX_BASE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_2_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_2_INDX_BASE__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002 +#define CP_DRAW_INDX_OFFSET_2_INDX_SIZE__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_2_INDX_SIZE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_2_INDX_SIZE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_2_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_2_INDX_SIZE__MASK; +} + +#define REG_CP_SET_DRAW_STATE_0 0x00000000 +#define CP_SET_DRAW_STATE_0_COUNT__MASK 0x0000ffff +#define CP_SET_DRAW_STATE_0_COUNT__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE_0_COUNT(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE_0_COUNT__SHIFT) & CP_SET_DRAW_STATE_0_COUNT__MASK; +} +#define CP_SET_DRAW_STATE_0_DIRTY 0x00010000 +#define CP_SET_DRAW_STATE_0_DISABLE 0x00020000 +#define CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS 0x00040000 +#define CP_SET_DRAW_STATE_0_LOAD_IMMED 0x00080000 +#define CP_SET_DRAW_STATE_0_GROUP_ID__MASK 0x1f000000 +#define CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT 24 +static inline uint32_t CP_SET_DRAW_STATE_0_GROUP_ID(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE_0_GROUP_ID__MASK; +} + +#define REG_CP_SET_DRAW_STATE_1 0x00000001 +#define CP_SET_DRAW_STATE_1_ADDR__MASK 0xffffffff +#define CP_SET_DRAW_STATE_1_ADDR__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE_1_ADDR(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE_1_ADDR__SHIFT) & CP_SET_DRAW_STATE_1_ADDR__MASK; +} + #define REG_CP_SET_BIN_0 0x00000000 #define REG_CP_SET_BIN_1 0x00000001 diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index a8fe3111c3d..496a4227099 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -34,6 +34,7 @@ #include "freedreno_state.h" #include "freedreno_gmem.h" #include "freedreno_query.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" static struct fd_ringbuffer *next_rb(struct fd_context *ctx) @@ -145,6 +146,7 @@ fd_context_destroy(struct pipe_context *pctx) DBG(""); fd_prog_fini(pctx); + fd_hw_query_fini(pctx); util_slab_destroy(&ctx->transfer_pool); @@ -221,6 +223,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, fd_query_context_init(pctx); fd_texture_init(pctx); fd_state_init(pctx); + fd_hw_query_init(pctx); ctx->blitter = util_blitter_create(pctx); if (!ctx->blitter) diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index a50e6236903..46984823427 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -33,6 +33,7 @@ #include "pipe/p_context.h" #include "indices/u_primconvert.h" #include "util/u_blitter.h" +#include "util/u_double_list.h" #include "util/u_slab.h" #include "util/u_string.h" @@ -82,16 +83,80 @@ struct fd_vertex_stateobj { unsigned num_elements; }; +/* Bitmask of stages in rendering that a particular query query is + * active. Queries will be automatically started/stopped (generating + * additional fd_hw_sample_period's) on entrance/exit from stages that + * are applicable to the query. + * + * NOTE: set the stage to NULL at end of IB to ensure no query is still + * active. Things aren't going to work out the way you want if a query + * is active across IB's (or between tile IB and draw IB) + */ +enum fd_render_stage { + FD_STAGE_NULL = 0x00, + FD_STAGE_DRAW = 0x01, + FD_STAGE_CLEAR = 0x02, + /* TODO before queries which include MEM2GMEM or GMEM2MEM will + * work we will need to call fd_hw_query_prepare() from somewhere + * appropriate so that queries in the tiling IB get backed with + * memory to write results to. + */ + FD_STAGE_MEM2GMEM = 0x04, + FD_STAGE_GMEM2MEM = 0x08, + /* used for driver internal draws (ie. util_blitter_blit()): */ + FD_STAGE_BLIT = 0x10, +}; + +#define MAX_HW_SAMPLE_PROVIDERS 4 +struct fd_hw_sample_provider; +struct fd_hw_sample; + struct fd_context { struct pipe_context base; struct fd_device *dev; struct fd_screen *screen; + struct blitter_context *blitter; struct primconvert_context *primconvert; + /* slab for pipe_transfer allocations: */ struct util_slab_mempool transfer_pool; + /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */ + struct util_slab_mempool sample_pool; + struct util_slab_mempool sample_period_pool; + + /* next sample offset.. incremented for each sample in the batch/ + * submit, reset to zero on next submit. + */ + uint32_t next_sample_offset; + + /* sample-providers for hw queries: */ + const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS]; + + /* cached samples (in case multiple queries need to reference + * the same sample snapshot) + */ + struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; + + /* tracking for current stage, to know when to start/stop + * any active queries: + */ + enum fd_render_stage stage; + + /* list of active queries: */ + struct list_head active_queries; + + /* list of queries that are not active, but were active in the + * current submit: + */ + struct list_head current_queries; + + /* current query result bo and tile stride: */ + struct fd_bo *query_bo; + uint32_t query_tile_stride; + /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation: diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 11bb8d8333d..e3c8cc8e5a0 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -36,6 +36,7 @@ #include "freedreno_context.h" #include "freedreno_state.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" @@ -70,7 +71,7 @@ fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, idx_bo = fd_resource(idx->buffer)->bo; idx_type = size2indextype(idx->index_size); idx_size = idx->index_size * info->count; - idx_offset = idx->offset; + idx_offset = idx->offset + (info->start * idx->index_size); src_sel = DI_SRC_SEL_DMA; } else { idx_bo = NULL; @@ -156,6 +157,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) /* and any buffers used, need to be resolved: */ ctx->resolve |= buffers; + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW); ctx->draw(ctx, info); } @@ -188,6 +190,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR); + ctx->clear(ctx, buffers, color, depth, stencil); ctx->dirty |= FD_DIRTY_ZSA | diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 2d4de442452..861ebf5675e 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -35,6 +35,7 @@ #include "freedreno_gmem.h" #include "freedreno_context.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" /* @@ -273,17 +274,24 @@ render_tiles(struct fd_context *ctx) ctx->emit_tile_prep(ctx, tile); - if (ctx->restore) + if (ctx->restore) { + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM); ctx->emit_tile_mem2gmem(ctx, tile); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); + } ctx->emit_tile_renderprep(ctx, tile); + fd_hw_query_prepare_tile(ctx, i, ctx->ring); + /* emit IB to drawcmds: */ OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); fd_reset_wfi(ctx); /* emit gmem2mem to transfer tile back to system memory: */ + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM); ctx->emit_tile_gmem2mem(ctx, tile); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); } } @@ -292,6 +300,8 @@ render_sysmem(struct fd_context *ctx) { ctx->emit_sysmem_prep(ctx); + fd_hw_query_prepare_tile(ctx, 0, ctx->ring); + /* emit IB to drawcmds: */ OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); fd_reset_wfi(ctx); @@ -314,6 +324,11 @@ fd_gmem_render_tiles(struct pipe_context *pctx) } } + /* close out the draw cmds by making sure any active queries are + * paused: + */ + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); + /* mark the end of the clear/draw cmds before emitting per-tile cmds: */ fd_ringmarker_mark(ctx->draw_end); fd_ringmarker_mark(ctx->binning_end); @@ -326,6 +341,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx) DBG("rendering sysmem (%s/%s)", util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); + fd_hw_query_prepare(ctx, 1); render_sysmem(ctx); ctx->stats.batch_sysmem++; } else { @@ -334,6 +350,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx) DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); + fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y); render_tiles(ctx); ctx->stats.batch_gmem++; } diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index 3913896bf56..8753a4b02c9 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -1,7 +1,7 @@ /* -*- mode: C; c-file-style: "k&r"; ttxab-width 4; indent-tabs-mode: t; -*- */ /* - * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,63 +27,27 @@ */ #include "pipe/p_state.h" -#include "util/u_string.h" #include "util/u_memory.h" -#include "util/u_inlines.h" -#include "os/os_time.h" #include "freedreno_query.h" +#include "freedreno_query_sw.h" +#include "freedreno_query_hw.h" #include "freedreno_context.h" #include "freedreno_util.h" -#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) -#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */ -#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */ -#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */ -#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */ - -/* Currently just simple cpu query's supported.. probably need - * to refactor this a bit when I'm eventually ready to add gpu - * queries: +/* + * Pipe Query interface: */ -struct fd_query { - int type; - /* storage for the collected data */ - union pipe_query_result data; - bool active; - uint64_t begin_value, end_value; - uint64_t begin_time, end_time; -}; - -static inline struct fd_query * -fd_query(struct pipe_query *pq) -{ - return (struct fd_query *)pq; -} static struct pipe_query * fd_create_query(struct pipe_context *pctx, unsigned query_type) { + struct fd_context *ctx = fd_context(pctx); struct fd_query *q; - switch (query_type) { - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_PRIMITIVES_EMITTED: - case FD_QUERY_DRAW_CALLS: - case FD_QUERY_BATCH_TOTAL: - case FD_QUERY_BATCH_SYSMEM: - case FD_QUERY_BATCH_GMEM: - case FD_QUERY_BATCH_RESTORE: - break; - default: - return NULL; - } - - q = CALLOC_STRUCT(fd_query); + q = fd_sw_create_query(ctx, query_type); if (!q) - return NULL; - - q->type = query_type; + q = fd_hw_create_query(ctx, query_type); return (struct pipe_query *) q; } @@ -92,64 +56,21 @@ static void fd_destroy_query(struct pipe_context *pctx, struct pipe_query *pq) { struct fd_query *q = fd_query(pq); - free(q); -} - -static uint64_t -read_counter(struct pipe_context *pctx, int type) -{ - struct fd_context *ctx = fd_context(pctx); - switch (type) { - case PIPE_QUERY_PRIMITIVES_GENERATED: - /* for now same thing as _PRIMITIVES_EMITTED */ - case PIPE_QUERY_PRIMITIVES_EMITTED: - return ctx->stats.prims_emitted; - case FD_QUERY_DRAW_CALLS: - return ctx->stats.draw_calls; - case FD_QUERY_BATCH_TOTAL: - return ctx->stats.batch_total; - case FD_QUERY_BATCH_SYSMEM: - return ctx->stats.batch_sysmem; - case FD_QUERY_BATCH_GMEM: - return ctx->stats.batch_gmem; - case FD_QUERY_BATCH_RESTORE: - return ctx->stats.batch_restore; - } - return 0; -} - -static bool -is_rate_query(struct fd_query *q) -{ - switch (q->type) { - case FD_QUERY_BATCH_TOTAL: - case FD_QUERY_BATCH_SYSMEM: - case FD_QUERY_BATCH_GMEM: - case FD_QUERY_BATCH_RESTORE: - return true; - default: - return false; - } + q->funcs->destroy_query(fd_context(pctx), q); } static void fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq) { struct fd_query *q = fd_query(pq); - q->active = true; - q->begin_value = read_counter(pctx, q->type); - if (is_rate_query(q)) - q->begin_time = os_time_get(); + q->funcs->begin_query(fd_context(pctx), q); } static void fd_end_query(struct pipe_context *pctx, struct pipe_query *pq) { struct fd_query *q = fd_query(pq); - q->active = false; - q->end_value = read_counter(pctx, q->type); - if (is_rate_query(q)) - q->end_time = os_time_get(); + q->funcs->end_query(fd_context(pctx), q); } static boolean @@ -157,21 +78,7 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq, boolean wait, union pipe_query_result *result) { struct fd_query *q = fd_query(pq); - - if (q->active) - return false; - - util_query_clear_result(result, q->type); - - result->u64 = q->end_value - q->begin_value; - - if (is_rate_query(q)) { - double fps = (result->u64 * 1000000) / - (double)(q->end_time - q->begin_time); - result->u64 = (uint64_t)fps; - } - - return true; + return q->funcs->get_query_result(fd_context(pctx), q, wait, result); } static int diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h index 8bcbba2fdc9..bc9a7a20559 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.h +++ b/src/gallium/drivers/freedreno/freedreno_query.h @@ -1,7 +1,7 @@ /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ /* - * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> + * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +31,37 @@ #include "pipe/p_context.h" +struct fd_context; +struct fd_query; + +struct fd_query_funcs { + void (*destroy_query)(struct fd_context *ctx, + struct fd_query *q); + void (*begin_query)(struct fd_context *ctx, struct fd_query *q); + void (*end_query)(struct fd_context *ctx, struct fd_query *q); + boolean (*get_query_result)(struct fd_context *ctx, + struct fd_query *q, boolean wait, + union pipe_query_result *result); +}; + +struct fd_query { + const struct fd_query_funcs *funcs; + bool active; + int type; +}; + +static inline struct fd_query * +fd_query(struct pipe_query *pq) +{ + return (struct fd_query *)pq; +} + +#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0) +#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */ +#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */ +#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */ +#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */ + void fd_query_screen_init(struct pipe_screen *pscreen); void fd_query_context_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c new file mode 100644 index 00000000000..38bd3dedad4 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -0,0 +1,465 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "freedreno_query_hw.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +struct fd_hw_sample_period { + struct fd_hw_sample *start, *end; + struct list_head list; +}; + +/* maps query_type to sample provider idx: */ +static int pidx(unsigned query_type) +{ + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + return 0; + case PIPE_QUERY_OCCLUSION_PREDICATE: + return 1; + default: + return -1; + } +} + +static struct fd_hw_sample * +get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring, + unsigned query_type) +{ + struct fd_hw_sample *samp = NULL; + int idx = pidx(query_type); + + if (!ctx->sample_cache[idx]) { + ctx->sample_cache[idx] = + ctx->sample_providers[idx]->get_sample(ctx, ring); + } + + fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]); + + return samp; +} + +static void +clear_sample_cache(struct fd_context *ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++) + fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL); +} + +static bool +is_active(struct fd_hw_query *hq, enum fd_render_stage stage) +{ + return !!(hq->provider->active & stage); +} + + +static void +resume_query(struct fd_context *ctx, struct fd_hw_query *hq, + struct fd_ringbuffer *ring) +{ + assert(!hq->period); + hq->period = util_slab_alloc(&ctx->sample_period_pool); + list_inithead(&hq->period->list); + hq->period->start = get_sample(ctx, ring, hq->base.type); + /* NOTE: util_slab_alloc() does not zero out the buffer: */ + hq->period->end = NULL; +} + +static void +pause_query(struct fd_context *ctx, struct fd_hw_query *hq, + struct fd_ringbuffer *ring) +{ + assert(hq->period && !hq->period->end); + hq->period->end = get_sample(ctx, ring, hq->base.type); + list_addtail(&hq->period->list, &hq->current_periods); + hq->period = NULL; +} + +static void +destroy_periods(struct fd_context *ctx, struct list_head *list) +{ + struct fd_hw_sample_period *period, *s; + LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) { + fd_hw_sample_reference(ctx, &period->start, NULL); + fd_hw_sample_reference(ctx, &period->end, NULL); + list_del(&period->list); + util_slab_free(&ctx->sample_period_pool, period); + } +} + +static void +fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_hw_query *hq = fd_hw_query(q); + + destroy_periods(ctx, &hq->periods); + destroy_periods(ctx, &hq->current_periods); + list_del(&hq->list); + + free(hq); +} + +static void +fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_hw_query *hq = fd_hw_query(q); + if (q->active) + return; + + /* begin_query() should clear previous results: */ + destroy_periods(ctx, &hq->periods); + + if (is_active(hq, ctx->stage)) + resume_query(ctx, hq, ctx->ring); + + q->active = true; + + /* add to active list: */ + list_del(&hq->list); + list_addtail(&hq->list, &ctx->active_queries); +} + +static void +fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_hw_query *hq = fd_hw_query(q); + if (!q->active) + return; + if (is_active(hq, ctx->stage)) + pause_query(ctx, hq, ctx->ring); + q->active = false; + /* move to current list: */ + list_del(&hq->list); + list_addtail(&hq->list, &ctx->current_queries); +} + +/* helper to get ptr to specified sample: */ +static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr) +{ + return ((char *)ptr) + (samp->tile_stride * n) + samp->offset; +} + +static boolean +fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, + boolean wait, union pipe_query_result *result) +{ + struct fd_hw_query *hq = fd_hw_query(q); + const struct fd_hw_sample_provider *p = hq->provider; + struct fd_hw_sample_period *period; + + if (q->active) + return false; + + /* if the app tries to read back the query result before the + * back is submitted, that forces us to flush so that there + * are actually results to wait for: + */ + if (!LIST_IS_EMPTY(&hq->list)) { + DBG("reading query result forces flush!"); + ctx->needs_flush = true; + fd_context_render(&ctx->base); + } + + util_query_clear_result(result, q->type); + + if (LIST_IS_EMPTY(&hq->periods)) + return true; + + assert(LIST_IS_EMPTY(&hq->list)); + assert(LIST_IS_EMPTY(&hq->current_periods)); + assert(!hq->period); + + if (LIST_IS_EMPTY(&hq->periods)) + return true; + + /* if !wait, then check the last sample (the one most likely to + * not be ready yet) and bail if it is not ready: + */ + if (!wait) { + int ret; + + period = LIST_ENTRY(struct fd_hw_sample_period, + hq->periods.prev, list); + + ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe, + DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC); + if (ret) + return false; + + fd_bo_cpu_fini(period->end->bo); + } + + /* sum the result across all sample periods: */ + LIST_FOR_EACH_ENTRY(period, &hq->periods, list) { + struct fd_hw_sample *start = period->start; + struct fd_hw_sample *end = period->end; + unsigned i; + + /* start and end samples should be from same batch: */ + assert(start->bo == end->bo); + assert(start->num_tiles == end->num_tiles); + + for (i = 0; i < start->num_tiles; i++) { + void *ptr; + + fd_bo_cpu_prep(start->bo, ctx->screen->pipe, + DRM_FREEDRENO_PREP_READ); + + ptr = fd_bo_map(start->bo); + + p->accumulate_result(ctx, sampptr(period->start, i, ptr), + sampptr(period->end, i, ptr), result); + + fd_bo_cpu_fini(start->bo); + } + } + + return true; +} + +static const struct fd_query_funcs hw_query_funcs = { + .destroy_query = fd_hw_destroy_query, + .begin_query = fd_hw_begin_query, + .end_query = fd_hw_end_query, + .get_query_result = fd_hw_get_query_result, +}; + +struct fd_query * +fd_hw_create_query(struct fd_context *ctx, unsigned query_type) +{ + struct fd_hw_query *hq; + struct fd_query *q; + int idx = pidx(query_type); + + if ((idx < 0) || !ctx->sample_providers[idx]) + return NULL; + + hq = CALLOC_STRUCT(fd_hw_query); + if (!hq) + return NULL; + + hq->provider = ctx->sample_providers[idx]; + + list_inithead(&hq->periods); + list_inithead(&hq->current_periods); + list_inithead(&hq->list); + + q = &hq->base; + q->funcs = &hw_query_funcs; + q->type = query_type; + + return q; +} + +struct fd_hw_sample * +fd_hw_sample_init(struct fd_context *ctx, uint32_t size) +{ + struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool); + pipe_reference_init(&samp->reference, 1); + samp->size = size; + samp->offset = ctx->next_sample_offset; + /* NOTE: util_slab_alloc() does not zero out the buffer: */ + samp->bo = NULL; + samp->num_tiles = 0; + samp->tile_stride = 0; + ctx->next_sample_offset += size; + return samp; +} + +void +__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp) +{ + if (samp->bo) + fd_bo_del(samp->bo); + util_slab_free(&ctx->sample_pool, samp); +} + +static void +prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo, + uint32_t num_tiles, uint32_t tile_stride) +{ + if (samp->bo) { + assert(samp->bo == bo); + assert(samp->num_tiles == num_tiles); + assert(samp->tile_stride == tile_stride); + return; + } + samp->bo = bo; + samp->num_tiles = num_tiles; + samp->tile_stride = tile_stride; +} + +static void +prepare_query(struct fd_hw_query *hq, struct fd_bo *bo, + uint32_t num_tiles, uint32_t tile_stride) +{ + struct fd_hw_sample_period *period, *s; + + /* prepare all the samples in the query: */ + LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) { + prepare_sample(period->start, bo, num_tiles, tile_stride); + prepare_sample(period->end, bo, num_tiles, tile_stride); + + /* move from current_periods list to periods list: */ + list_del(&period->list); + list_addtail(&period->list, &hq->periods); + } +} + +static void +prepare_queries(struct fd_context *ctx, struct fd_bo *bo, + uint32_t num_tiles, uint32_t tile_stride, + struct list_head *list, bool remove) +{ + struct fd_hw_query *hq, *s; + LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) { + prepare_query(hq, bo, num_tiles, tile_stride); + if (remove) + list_delinit(&hq->list); + } +} + +/* called from gmem code once total storage requirements are known (ie. + * number of samples times number of tiles) + */ +void +fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles) +{ + uint32_t tile_stride = ctx->next_sample_offset; + struct fd_bo *bo; + + if (ctx->query_bo) + fd_bo_del(ctx->query_bo); + + if (tile_stride > 0) { + bo = fd_bo_new(ctx->dev, tile_stride * num_tiles, + DRM_FREEDRENO_GEM_CACHE_WCOMBINE | + DRM_FREEDRENO_GEM_TYPE_KMEM); + } else { + bo = NULL; + } + + ctx->query_bo = bo; + ctx->query_tile_stride = tile_stride; + + prepare_queries(ctx, bo, num_tiles, tile_stride, + &ctx->active_queries, false); + prepare_queries(ctx, bo, num_tiles, tile_stride, + &ctx->current_queries, true); + + /* reset things for next batch: */ + ctx->next_sample_offset = 0; +} + +void +fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, + struct fd_ringbuffer *ring) +{ + uint32_t tile_stride = ctx->query_tile_stride; + uint32_t offset = tile_stride * n; + + /* bail if no queries: */ + if (tile_stride == 0) + return; + + fd_wfi(ctx, ring); + OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1); + OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0); +} + +void +fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum fd_render_stage stage) +{ + /* special case: internal blits (like mipmap level generation) + * go through normal draw path (via util_blitter_blit()).. but + * we need to ignore the FD_STAGE_DRAW which will be set, so we + * don't enable queries which should be paused during internal + * blits: + */ + if ((ctx->stage == FD_STAGE_BLIT) && + (stage != FD_STAGE_NULL)) + return; + + if (stage != ctx->stage) { + struct fd_hw_query *hq; + LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) { + bool was_active = is_active(hq, ctx->stage); + bool now_active = is_active(hq, stage); + + if (now_active && !was_active) + resume_query(ctx, hq, ring); + else if (was_active && !now_active) + pause_query(ctx, hq, ring); + } + } + clear_sample_cache(ctx); + ctx->stage = stage; +} + +void +fd_hw_query_register_provider(struct pipe_context *pctx, + const struct fd_hw_sample_provider *provider) +{ + struct fd_context *ctx = fd_context(pctx); + int idx = pidx(provider->query_type); + + assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS)); + assert(!ctx->sample_providers[idx]); + + ctx->sample_providers[idx] = provider; +} + +void +fd_hw_query_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), + 16, UTIL_SLAB_SINGLETHREADED); + util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period), + 16, UTIL_SLAB_SINGLETHREADED); + list_inithead(&ctx->active_queries); + list_inithead(&ctx->current_queries); +} + +void +fd_hw_query_fini(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + util_slab_destroy(&ctx->sample_pool); + util_slab_destroy(&ctx->sample_period_pool); +} diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h new file mode 100644 index 00000000000..62baa3ac5b5 --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h @@ -0,0 +1,164 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FREEDRENO_QUERY_HW_H_ +#define FREEDRENO_QUERY_HW_H_ + +#include "util/u_double_list.h" + +#include "freedreno_query.h" +#include "freedreno_context.h" + + +/* + * HW Queries: + * + * See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries + * + * Hardware queries will be specific to gpu generation, but they need + * some common infrastructure for triggering start/stop samples at + * various points (for example, to exclude mem2gmem/gmem2mem or clear) + * as well as per tile tracking. + * + * NOTE: in at least some cases hw writes sample values to memory addr + * specified in some register. So we don't really have the option to + * just sample the same counter multiple times for multiple different + * queries with the same query_type. So we cache per sample provider + * the most recent sample since the last draw. This way multiple + * sample periods for multiple queries can reference the same sample. + * + * fd_hw_sample_provider: + * - one per query type, registered/implemented by gpu generation + * specific code + * - can construct fd_hw_samples on demand + * - most recent sample (since last draw) cached so multiple + * different queries can ref the same sample + * + * fd_hw_sample: + * - abstracts one snapshot of counter value(s) across N tiles + * - backing object not allocated until submit time when number + * of samples and number of tiles is known + * + * fd_hw_sample_period: + * - consists of start and stop sample + * - a query accumulates a list of sample periods + * - the query result is the sum of the sample periods + */ + +struct fd_hw_sample_provider { + unsigned query_type; + + /* stages applicable to the query type: */ + enum fd_render_stage active; + + /* when a new sample is required, emit appropriate cmdstream + * and return a sample object: + */ + struct fd_hw_sample *(*get_sample)(struct fd_context *ctx, + struct fd_ringbuffer *ring); + + /* accumulate the results from specified sample period: */ + void (*accumulate_result)(struct fd_context *ctx, + const void *start, const void *end, + union pipe_query_result *result); +}; + +struct fd_hw_sample { + struct pipe_reference reference; /* keep this first */ + + /* offset and size of the sample are know at the time the + * sample is constructed. + */ + uint32_t size; + uint32_t offset; + + /* backing object, offset/stride/etc are determined not when + * the sample is constructed, but when the batch is submitted. + * This way we can defer allocation until total # of requested + * samples, and total # of tiles, is known. + */ + struct fd_bo *bo; + uint32_t num_tiles; + uint32_t tile_stride; +}; + +struct fd_hw_sample_period; + +struct fd_hw_query { + struct fd_query base; + + const struct fd_hw_sample_provider *provider; + + /* list of fd_hw_sample_period in previous submits: */ + struct list_head periods; + + /* list of fd_hw_sample_period's in current submit: */ + struct list_head current_periods; + + /* if active and not paused, the current sample period (not + * yet added to current_periods): + */ + struct fd_hw_sample_period *period; + + struct list_head list; /* list-node in ctx->active_queries */ +}; + +static inline struct fd_hw_query * +fd_hw_query(struct fd_query *q) +{ + return (struct fd_hw_query *)q; +} + +struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type); +/* helper for sample providers: */ +struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size); +/* don't call directly, use fd_hw_sample_reference() */ +void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp); +void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles); +void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, + struct fd_ringbuffer *ring); +void fd_hw_query_set_stage(struct fd_context *ctx, + struct fd_ringbuffer *ring, enum fd_render_stage stage); +void fd_hw_query_register_provider(struct pipe_context *pctx, + const struct fd_hw_sample_provider *provider); +void fd_hw_query_init(struct pipe_context *pctx); +void fd_hw_query_fini(struct pipe_context *pctx); + +static inline void +fd_hw_sample_reference(struct fd_context *ctx, + struct fd_hw_sample **ptr, struct fd_hw_sample *samp) +{ + struct fd_hw_sample *old_samp = *ptr; + + if (pipe_reference(&(*ptr)->reference, &samp->reference)) + __fd_hw_sample_destroy(ctx, old_samp); + if (ptr) + *ptr = samp; +} + +#endif /* FREEDRENO_QUERY_HW_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.c b/src/gallium/drivers/freedreno/freedreno_query_sw.c new file mode 100644 index 00000000000..8d81698f31d --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_sw.c @@ -0,0 +1,165 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "os/os_time.h" + +#include "freedreno_query_sw.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +/* + * SW Queries: + * + * In the core, we have some support for basic sw counters + */ + +static void +fd_sw_destroy_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_sw_query *sq = fd_sw_query(q); + free(sq); +} + +static uint64_t +read_counter(struct fd_context *ctx, int type) +{ + switch (type) { + case PIPE_QUERY_PRIMITIVES_GENERATED: + /* for now same thing as _PRIMITIVES_EMITTED */ + case PIPE_QUERY_PRIMITIVES_EMITTED: + return ctx->stats.prims_emitted; + case FD_QUERY_DRAW_CALLS: + return ctx->stats.draw_calls; + case FD_QUERY_BATCH_TOTAL: + return ctx->stats.batch_total; + case FD_QUERY_BATCH_SYSMEM: + return ctx->stats.batch_sysmem; + case FD_QUERY_BATCH_GMEM: + return ctx->stats.batch_gmem; + case FD_QUERY_BATCH_RESTORE: + return ctx->stats.batch_restore; + } + return 0; +} + +static bool +is_rate_query(struct fd_query *q) +{ + switch (q->type) { + case FD_QUERY_BATCH_TOTAL: + case FD_QUERY_BATCH_SYSMEM: + case FD_QUERY_BATCH_GMEM: + case FD_QUERY_BATCH_RESTORE: + return true; + default: + return false; + } +} + +static void +fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_sw_query *sq = fd_sw_query(q); + q->active = true; + sq->begin_value = read_counter(ctx, q->type); + if (is_rate_query(q)) + sq->begin_time = os_time_get(); +} + +static void +fd_sw_end_query(struct fd_context *ctx, struct fd_query *q) +{ + struct fd_sw_query *sq = fd_sw_query(q); + q->active = false; + sq->end_value = read_counter(ctx, q->type); + if (is_rate_query(q)) + sq->end_time = os_time_get(); +} + +static boolean +fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q, + boolean wait, union pipe_query_result *result) +{ + struct fd_sw_query *sq = fd_sw_query(q); + + if (q->active) + return false; + + util_query_clear_result(result, q->type); + + result->u64 = sq->end_value - sq->begin_value; + + if (is_rate_query(q)) { + double fps = (result->u64 * 1000000) / + (double)(sq->end_time - sq->begin_time); + result->u64 = (uint64_t)fps; + } + + return true; +} + +static const struct fd_query_funcs sw_query_funcs = { + .destroy_query = fd_sw_destroy_query, + .begin_query = fd_sw_begin_query, + .end_query = fd_sw_end_query, + .get_query_result = fd_sw_get_query_result, +}; + +struct fd_query * +fd_sw_create_query(struct fd_context *ctx, unsigned query_type) +{ + struct fd_sw_query *sq; + struct fd_query *q; + + switch (query_type) { + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case FD_QUERY_DRAW_CALLS: + case FD_QUERY_BATCH_TOTAL: + case FD_QUERY_BATCH_SYSMEM: + case FD_QUERY_BATCH_GMEM: + case FD_QUERY_BATCH_RESTORE: + break; + default: + return NULL; + } + + sq = CALLOC_STRUCT(fd_sw_query); + if (!sq) + return NULL; + + q = &sq->base; + q->funcs = &sw_query_funcs; + q->type = query_type; + + return q; +} diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.h b/src/gallium/drivers/freedreno/freedreno_query_sw.h new file mode 100644 index 00000000000..3446474d0bd --- /dev/null +++ b/src/gallium/drivers/freedreno/freedreno_query_sw.h @@ -0,0 +1,55 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#ifndef FREEDRENO_QUERY_SW_H_ +#define FREEDRENO_QUERY_SW_H_ + +#include "freedreno_query.h" + +/* + * SW Queries: + * + * In the core, we have some support for basic sw counters + */ + +struct fd_sw_query { + struct fd_query base; + uint64_t begin_value, end_value; + uint64_t begin_time, end_time; +}; + +static inline struct fd_sw_query * +fd_sw_query(struct fd_query *q) +{ + return (struct fd_sw_query *)q; +} + +struct fd_query * fd_sw_create_query(struct fd_context *ctx, + unsigned query_type); + +#endif /* FREEDRENO_QUERY_SW_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index bd8c6cb9907..289f3653e12 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -36,6 +36,7 @@ #include "freedreno_screen.h" #include "freedreno_surface.h" #include "freedreno_context.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" #include <errno.h> @@ -401,7 +402,9 @@ render_blit(struct pipe_context *pctx, struct pipe_blit_info *info) util_blitter_save_fragment_sampler_views(ctx->blitter, ctx->fragtex.num_textures, ctx->fragtex.textures); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT); util_blitter_blit(ctx->blitter, info); + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); return true; } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index d62d4b61b6f..1c106894a61 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -143,6 +143,8 @@ tables for things that differ if the delta is not too much.. static int fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { + struct fd_screen *screen = fd_screen(pscreen); + /* this is probably not totally correct.. but it's a start: */ switch (param) { /* Supported features (boolean caps). */ @@ -161,8 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_SM3: case PIPE_CAP_SEAMLESS_CUBE_MAP: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: @@ -180,6 +180,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_PRIMITIVE_RESTART: return 0; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -229,17 +231,18 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return MAX_MIP_LEVELS; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return 9192; + return 0; /* TODO: a3xx+ should support (required in gles3) */ /* Render targets. */ case PIPE_CAP_MAX_RENDER_TARGETS: return 1; - /* Timer queries. */ + /* Queries. */ case PIPE_CAP_QUERY_TIME_ELAPSED: - case PIPE_CAP_OCCLUSION_QUERY: case PIPE_CAP_QUERY_TIMESTAMP: return 0; + case PIPE_CAP_OCCLUSION_QUERY: + return (screen->gpu_id >= 300) ? 1: 0; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -252,7 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; - case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return 64; default: @@ -315,7 +318,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: return 8; /* XXX */ case PIPE_SHADER_CAP_MAX_INPUTS: - return 32; + return 16; case PIPE_SHADER_CAP_MAX_TEMPS: return 64; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_ADDRS: diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 356f4165792..b57702c54c8 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -223,11 +223,18 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start, emit_marker(ring, 6); } +/* CP_SCRATCH_REG4 is used to hold base address for query results: */ +#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 + static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx) { extern unsigned marker_cnt; - OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG0 + scratch_idx, 1); + unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx; + assert(reg != HW_QUERY_BASE_REG); + if (reg == HW_QUERY_BASE_REG) + return; + OUT_PKT0(ring, reg, 1); OUT_RING(ring, ++marker_cnt); } |