summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources3
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h10
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_emit.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h158
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c394
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_query.c139
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_query.h36
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c18
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c8
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_util.c28
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h43
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h102
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h65
-rw-r--r--src/gallium/drivers/freedreno/freedreno_draw.c6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c19
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.c117
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.h33
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.c465
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.h164
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_sw.c165
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_sw.h55
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c17
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h9
29 files changed, 1847 insertions, 229 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 311b0b6a205..0dc7fc08512 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -3,6 +3,8 @@ C_SOURCES := \
freedreno_lowering.c \
freedreno_program.c \
freedreno_query.c \
+ freedreno_query_hw.c \
+ freedreno_query_sw.c \
freedreno_fence.c \
freedreno_resource.c \
freedreno_surface.c \
@@ -38,6 +40,7 @@ a3xx_SOURCES := \
a3xx/fd3_emit.c \
a3xx/fd3_gmem.c \
a3xx/fd3_program.c \
+ a3xx/fd3_query.c \
a3xx/fd3_rasterizer.c \
a3xx/fd3_screen.c \
a3xx/fd3_texture.c \
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index 0de872db508..5495728c97e 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
Copyright (C) 2013-2014 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index d1a586c8c9b..a533c298115 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -125,7 +125,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
{
unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
static const struct fd2_sampler_stateobj dummy_sampler = {};
- struct fd2_sampler_stateobj *sampler;
+ const struct fd2_sampler_stateobj *sampler;
struct fd2_pipe_sampler_view *view;
if (emitted & (1 << const_idx))
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index c61f7aab74f..8934e213b00 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
Copyright (C) 2013-2014 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -41,31 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-enum a3xx_render_mode {
- RB_RENDERING_PASS = 0,
- RB_TILING_PASS = 1,
- RB_RESOLVE_PASS = 2,
-};
-
enum a3xx_tile_mode {
LINEAR = 0,
TILE_32X32 = 2,
};
-enum a3xx_threadmode {
- MULTI = 0,
- SINGLE = 1,
-};
-
-enum a3xx_instrbuffermode {
- BUFFER = 1,
-};
-
-enum a3xx_threadsize {
- TWO_QUADS = 0,
- FOUR_QUADS = 1,
-};
-
enum a3xx_state_block_id {
HLSQ_BLOCK_ID_TP_TEX = 2,
HLSQ_BLOCK_ID_TP_MIPMAP = 3,
@@ -180,12 +160,6 @@ enum a3xx_color_swap {
XYZW = 3,
};
-enum a3xx_msaa_samples {
- MSAA_ONE = 0,
- MSAA_TWO = 1,
- MSAA_FOUR = 2,
-};
-
enum a3xx_sp_perfcounter_select {
SP_FS_CFLOW_INSTRUCTIONS = 12,
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
@@ -212,11 +186,6 @@ enum a3xx_rop_code {
ROP_SET = 15,
};
-enum adreno_rb_copy_control_mode {
- RB_COPY_RESOLVE = 1,
- RB_COPY_DEPTH_STENCIL = 5,
-};
-
enum a3xx_tex_filter {
A3XX_TEX_NEAREST = 0,
A3XX_TEX_LINEAR = 1,
@@ -337,6 +306,7 @@ enum a3xx_tex_type {
#define REG_A3XX_RBBM_INT_0_STATUS 0x00000064
#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080
+#define A3XX_RBBM_PERFCTR_CTL_ENABLE 0x00000001
#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081
@@ -570,6 +540,10 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
#define REG_A3XX_CP_AHB_FAULT 0x0000054d
+#define REG_A3XX_SP_GLOBAL_MEM_SIZE 0x00000e22
+
+#define REG_A3XX_SP_GLOBAL_MEM_ADDR 0x00000e23
+
#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040
#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000
#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000
@@ -644,8 +618,26 @@ static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
}
#define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068
+#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff
+#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0
+static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+ return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000
+#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16
+static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+ return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069
+#define A3XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff
+#define A3XX_GRAS_SU_POINT_SIZE__SHIFT 0
+static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
+{
+ return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
+}
#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c
#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff
@@ -992,6 +984,12 @@ static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mod
{
return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
}
+#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00
+#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8
+static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
+{
+ return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
+}
#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000
#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14
static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
@@ -1034,6 +1032,12 @@ static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
{
return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
}
+#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00
+#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+ return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000
#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14
static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
@@ -1202,6 +1206,8 @@ static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val)
}
#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL 0x00002110
+#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET 0x00000001
+#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002
#define REG_A3XX_RB_SAMPLE_COUNT_ADDR 0x00002111
@@ -1366,10 +1372,36 @@ static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
}
#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK 0x00000003
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT 0
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK 0x00000ffc
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT 2
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK 0x003ff000
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT 12
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK 0xffc00000
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT 22
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK;
+}
+
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; }
-#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG 0x0000220b
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; }
-#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG 0x0000220c
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; }
#define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211
@@ -1377,7 +1409,9 @@ static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214
-#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x00002215
+static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; }
#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x00002216
@@ -1624,6 +1658,7 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
}
#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000
#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000
+#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE 0x00800000
#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000
#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24
static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
@@ -1797,6 +1832,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
}
#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000
#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000
+#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE 0x00800000
#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000
#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24
static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
@@ -1976,6 +2012,42 @@ static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
#define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f
+#define REG_A3XX_VBIF_PERF_CNT_EN 0x00003070
+#define A3XX_VBIF_PERF_CNT_EN_CNT0 0x00000001
+#define A3XX_VBIF_PERF_CNT_EN_CNT1 0x00000002
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0 0x00000004
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1 0x00000008
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2 0x00000010
+
+#define REG_A3XX_VBIF_PERF_CNT_CLR 0x00003071
+#define A3XX_VBIF_PERF_CNT_CLR_CNT0 0x00000001
+#define A3XX_VBIF_PERF_CNT_CLR_CNT1 0x00000002
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0 0x00000004
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1 0x00000008
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2 0x00000010
+
+#define REG_A3XX_VBIF_PERF_CNT_SEL 0x00003072
+
+#define REG_A3XX_VBIF_PERF_CNT0_LO 0x00003073
+
+#define REG_A3XX_VBIF_PERF_CNT0_HI 0x00003074
+
+#define REG_A3XX_VBIF_PERF_CNT1_LO 0x00003075
+
+#define REG_A3XX_VBIF_PERF_CNT1_HI 0x00003076
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO 0x00003077
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI 0x00003078
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO 0x00003079
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI 0x0000307a
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO 0x0000307b
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI 0x0000307c
+
#define REG_A3XX_VSC_BIN_SIZE 0x00000c01
#define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f
#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0
@@ -2249,6 +2321,12 @@ static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
{
return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
}
+#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK 0x00700000
+#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT 20
+static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val)
+{
+ return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
+}
#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000
#define REG_A3XX_TEX_SAMP_1 0x00000001
@@ -2267,6 +2345,7 @@ static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val)
#define REG_A3XX_TEX_CONST_0 0x00000000
#define A3XX_TEX_CONST_0_TILED 0x00000001
+#define A3XX_TEX_CONST_0_SRGB 0x00000004
#define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070
#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4
static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
@@ -2303,6 +2382,7 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
{
return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
}
+#define A3XX_TEX_CONST_0_NOCONVERT 0x20000000
#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000
#define A3XX_TEX_CONST_0_TYPE__SHIFT 30
static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 4f8dcc5fe61..3159e7adee9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -1074,77 +1074,154 @@ trans_arl(const struct instr_translater *t,
add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
}
-/* texture fetch/sample instructions: */
-static void
-trans_samp(const struct instr_translater *t,
- struct fd3_compile_context *ctx,
+/*
+ * texture fetch/sample instructions:
+ */
+
+struct tex_info {
+ int8_t order[4];
+ unsigned src_wrmask, flags;
+};
+
+static const struct tex_info *
+get_tex_info(struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- struct ir3_instruction *instr;
- struct tgsi_src_register *coord = &inst->Src[0].Register;
- struct tgsi_src_register *samp = &inst->Src[1].Register;
+ static const struct tex_info tex1d = {
+ .order = { 0, -1, -1, -1 }, /* coord.x */
+ .src_wrmask = TGSI_WRITEMASK_XY,
+ .flags = 0,
+ };
+ static const struct tex_info tex1ds = {
+ .order = { 0, -1, 2, -1 }, /* coord.xz */
+ .src_wrmask = TGSI_WRITEMASK_XYZ,
+ .flags = IR3_INSTR_S,
+ };
+ static const struct tex_info tex2d = {
+ .order = { 0, 1, -1, -1 }, /* coord.xy */
+ .src_wrmask = TGSI_WRITEMASK_XY,
+ .flags = 0,
+ };
+ static const struct tex_info tex2ds = {
+ .order = { 0, 1, 2, -1 }, /* coord.xyz */
+ .src_wrmask = TGSI_WRITEMASK_XYZ,
+ .flags = IR3_INSTR_S,
+ };
+ static const struct tex_info tex3d = {
+ .order = { 0, 1, 2, -1 }, /* coord.xyz */
+ .src_wrmask = TGSI_WRITEMASK_XYZ,
+ .flags = IR3_INSTR_3D,
+ };
+ static const struct tex_info tex3ds = {
+ .order = { 0, 1, 2, 3 }, /* coord.xyzw */
+ .src_wrmask = TGSI_WRITEMASK_XYZW,
+ .flags = IR3_INSTR_S | IR3_INSTR_3D,
+ };
+ static const struct tex_info txp1d = {
+ .order = { 0, -1, 3, -1 }, /* coord.xw */
+ .src_wrmask = TGSI_WRITEMASK_XYZ,
+ .flags = IR3_INSTR_P,
+ };
+ static const struct tex_info txp1ds = {
+ .order = { 0, -1, 2, 3 }, /* coord.xzw */
+ .src_wrmask = TGSI_WRITEMASK_XYZW,
+ .flags = IR3_INSTR_P | IR3_INSTR_S,
+ };
+ static const struct tex_info txp2d = {
+ .order = { 0, 1, 3, -1 }, /* coord.xyw */
+ .src_wrmask = TGSI_WRITEMASK_XYZ,
+ .flags = IR3_INSTR_P,
+ };
+ static const struct tex_info txp2ds = {
+ .order = { 0, 1, 2, 3 }, /* coord.xyzw */
+ .src_wrmask = TGSI_WRITEMASK_XYZW,
+ .flags = IR3_INSTR_P | IR3_INSTR_S,
+ };
+ static const struct tex_info txp3d = {
+ .order = { 0, 1, 2, 3 }, /* coord.xyzw */
+ .src_wrmask = TGSI_WRITEMASK_XYZW,
+ .flags = IR3_INSTR_P | IR3_INSTR_3D,
+ };
+
unsigned tex = inst->Texture.Texture;
- int8_t *order;
- unsigned i, flags = 0, src_wrmask;
- bool needs_mov = false;
- switch (t->arg) {
+ switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_TEX:
switch (tex) {
+ case TGSI_TEXTURE_1D:
+ return &tex1d;
+ case TGSI_TEXTURE_SHADOW1D:
+ return &tex1ds;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
- order = (int8_t[4]){ 0, 1, -1, -1 };
- src_wrmask = TGSI_WRITEMASK_XY;
- break;
+ return &tex2d;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return &tex2ds;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
- order = (int8_t[4]){ 0, 1, 2, -1 };
- src_wrmask = TGSI_WRITEMASK_XYZ;
- flags |= IR3_INSTR_3D;
- break;
+ return &tex3d;
+ case TGSI_TEXTURE_SHADOWCUBE:
+ return &tex3ds;
default:
compile_error(ctx, "unknown texture type: %s\n",
tgsi_texture_names[tex]);
- break;
+ return NULL;
}
break;
case TGSI_OPCODE_TXP:
switch (tex) {
+ case TGSI_TEXTURE_1D:
+ return &txp1d;
+ case TGSI_TEXTURE_SHADOW1D:
+ return &txp1ds;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
- order = (int8_t[4]){ 0, 1, 3, -1 };
- src_wrmask = TGSI_WRITEMASK_XYZ;
- break;
+ return &txp2d;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return &txp2ds;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
- order = (int8_t[4]){ 0, 1, 2, 3 };
- src_wrmask = TGSI_WRITEMASK_XYZW;
- flags |= IR3_INSTR_3D;
- break;
+ return &txp3d;
default:
compile_error(ctx, "unknown texture type: %s\n",
tgsi_texture_names[tex]);
break;
}
- flags |= IR3_INSTR_P;
- break;
- default:
- compile_assert(ctx, 0);
break;
}
+ compile_assert(ctx, 0);
+ return NULL;
+}
+
+static struct tgsi_src_register *
+get_tex_coord(struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst,
+ const struct tex_info *tinf)
+{
+ struct tgsi_src_register *coord = &inst->Src[0].Register;
+ struct ir3_instruction *instr;
+ unsigned tex = inst->Texture.Texture;
+ bool needs_mov = false;
+ unsigned i;
/* cat5 instruction cannot seem to handle const or relative: */
if (is_rel_or_const(coord))
needs_mov = true;
+ /* 1D textures we fix up w/ 0.0 as 2nd coord: */
+ if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D))
+ needs_mov = true;
+
/* The texture sample instructions need to coord in successive
* registers/components (ie. src.xy but not src.yx). And TXP
* needs the .w component in .z for 2D.. so in some cases we
* might need to emit some mov instructions to shuffle things
* around:
*/
- for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++)
- if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i]))
+ for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++)
+ if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i]))
needs_mov = true;
if (needs_mov) {
@@ -1157,28 +1234,55 @@ trans_samp(const struct instr_translater *t,
/* need to move things around: */
tmp_src = get_internal_temp(ctx, &tmp_dst);
- for (j = 0; (j < 4) && (order[j] >= 0); j++) {
- instr = instr_create(ctx, 1, 0);
+ for (j = 0; j < 4; j++) {
+ if (tinf->order[j] < 0)
+ continue;
+ instr = instr_create(ctx, 1, 0); /* mov */
instr->cat1.src_type = type_mov;
instr->cat1.dst_type = type_mov;
add_dst_reg(ctx, instr, &tmp_dst, j);
add_src_reg(ctx, instr, coord,
- src_swiz(coord, order[j]));
+ src_swiz(coord, tinf->order[j]));
+ }
+
+ /* fix up .y coord: */
+ if ((tex == TGSI_TEXTURE_1D) ||
+ (tex == TGSI_TEXTURE_SHADOW1D)) {
+ instr = instr_create(ctx, 1, 0); /* mov */
+ instr->cat1.src_type = type_mov;
+ instr->cat1.dst_type = type_mov;
+ add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5;
}
coord = tmp_src;
}
+ return coord;
+}
+
+static void
+trans_samp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *coord;
+ struct tgsi_src_register *samp = &inst->Src[1].Register;
+ const struct tex_info *tinf;
+
+ tinf = get_tex_info(ctx, inst);
+ coord = get_tex_coord(ctx, inst, tinf);
+
instr = instr_create(ctx, 5, t->opc);
instr->cat5.type = get_ftype(ctx);
instr->cat5.samp = samp->Index;
instr->cat5.tex = samp->Index;
- instr->flags |= flags;
-
- add_dst_reg_wrmask(ctx, instr, &inst->Dst[0].Register, 0,
- inst->Dst[0].Register.WriteMask);
+ instr->flags |= tinf->flags;
- add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, src_wrmask);
+ add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
+ add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask);
}
/*
@@ -1231,15 +1335,19 @@ trans_cmp(const struct instr_translater *t,
switch (t->tgsi_opc) {
case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_FSEQ:
condition = IR3_COND_EQ;
break;
case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_FSNE:
condition = IR3_COND_NE;
break;
case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_FSGE:
condition = IR3_COND_GE;
break;
case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_FSLT:
condition = IR3_COND_LT;
break;
case TGSI_OPCODE_SLE:
@@ -1269,11 +1377,15 @@ trans_cmp(const struct instr_translater *t,
switch (t->tgsi_opc) {
case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_SLE:
case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_SGT:
case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_FSLT:
/* cov.u16f16 dst, tmp0 */
instr = instr_create(ctx, 1, 0);
instr->cat1.src_type = get_utype(ctx);
@@ -1294,6 +1406,96 @@ trans_cmp(const struct instr_translater *t,
}
/*
+ * USNE(a,b) = (a != b) ? 1 : 0
+ * cmps.u32.ne dst, a, b
+ *
+ * USEQ(a,b) = (a == b) ? 1 : 0
+ * cmps.u32.eq dst, a, b
+ *
+ * ISGE(a,b) = (a > b) ? 1 : 0
+ * cmps.s32.ge dst, a, b
+ *
+ * USGE(a,b) = (a > b) ? 1 : 0
+ * cmps.u32.ge dst, a, b
+ *
+ * ISLT(a,b) = (a < b) ? 1 : 0
+ * cmps.s32.lt dst, a, b
+ *
+ * USLT(a,b) = (a < b) ? 1 : 0
+ * cmps.u32.lt dst, a, b
+ *
+ * UCMP(a,b,c) = (a < 0) ? b : c
+ * cmps.u32.lt tmp0, a, {0}
+ * sel.b16 dst, b, tmp0, c
+ */
+static void
+trans_icmp(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register constval0;
+ struct tgsi_src_register *a0, *a1, *a2;
+ unsigned condition;
+
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &inst->Src[1].Register; /* b */
+
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_USNE:
+ condition = IR3_COND_NE;
+ break;
+ case TGSI_OPCODE_USEQ:
+ condition = IR3_COND_EQ;
+ break;
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USGE:
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_USLT:
+ condition = IR3_COND_LT;
+ break;
+ case TGSI_OPCODE_UCMP:
+ get_immediate(ctx, &constval0, 0);
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &constval0; /* {0} */
+ condition = IR3_COND_LT;
+ break;
+
+ default:
+ compile_assert(ctx, 0);
+ return;
+ }
+
+ if (is_const(a0) && is_const(a1))
+ a0 = get_unconst(ctx, a0);
+
+ if (t->tgsi_opc == TGSI_OPCODE_UCMP) {
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
+ /* cmps.u32.lt tmp, a0, a1 */
+ instr = instr_create(ctx, 2, t->opc);
+ instr->cat2.condition = condition;
+ vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
+
+ a1 = &inst->Src[1].Register;
+ a2 = &inst->Src[2].Register;
+ /* sel.{b32,b16} dst, src2, tmp, src1 */
+ instr = instr_create(ctx, 3, OPC_SEL_B32);
+ vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
+ } else {
+ /* cmps.{u32,s32}.<cond> dst, a0, a1 */
+ instr = instr_create(ctx, 2, t->opc);
+ instr->cat2.condition = condition;
+ vectorize(ctx, instr, dst, 2, a0, 0, a1, 0);
+ }
+ put_dst(ctx, inst, dst);
+}
+
+/*
* Conditional / Flow control
*/
@@ -1533,7 +1735,7 @@ trans_endif(const struct instr_translater *t,
}
/*
- * Kill / Kill-if
+ * Kill
*/
static void
@@ -1580,6 +1782,76 @@ trans_kill(const struct instr_translater *t,
}
/*
+ * Kill-If
+ */
+
+static void
+trans_killif(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+ struct ir3_instruction *instr, *immed, *cond = NULL;
+ bool inv = false;
+
+ immed = create_immed(ctx, 0.0);
+
+ /* cmps.f.ne p0.x, cond, {0.0} */
+ instr = instr_create(ctx, 2, OPC_CMPS_F);
+ instr->cat2.condition = IR3_COND_NE;
+ ir3_reg_create(instr, regid(REG_P0, 0), 0);
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
+ add_src_reg(ctx, instr, src, src->SwizzleX);
+
+ cond = instr;
+
+ /* kill p0.x */
+ instr = instr_create(ctx, 0, OPC_KILL);
+ instr->cat0.inv = inv;
+ ir3_reg_create(instr, 0, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
+
+ ctx->kill[ctx->kill_count++] = instr;
+
+}
+/*
+ * I2F / U2F / F2I / F2U
+ */
+
+static void
+trans_cov(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+
+ // cov.f32s32 dst, tmp0 /
+ instr = instr_create(ctx, 1, 0);
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_U2F:
+ instr->cat1.src_type = TYPE_U32;
+ instr->cat1.dst_type = TYPE_F32;
+ break;
+ case TGSI_OPCODE_I2F:
+ instr->cat1.src_type = TYPE_S32;
+ instr->cat1.dst_type = TYPE_F32;
+ break;
+ case TGSI_OPCODE_F2U:
+ instr->cat1.src_type = TYPE_F32;
+ instr->cat1.dst_type = TYPE_U32;
+ break;
+ case TGSI_OPCODE_F2I:
+ instr->cat1.src_type = TYPE_F32;
+ instr->cat1.dst_type = TYPE_S32;
+ break;
+
+ }
+ vectorize(ctx, instr, dst, 1, src, 0);
+}
+
+/*
* Handlers for TGSI instructions which do have 1:1 mapping to native
* instructions:
*/
@@ -1616,9 +1888,11 @@ instr_cat2(const struct instr_translater *t,
switch (t->tgsi_opc) {
case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_IABS:
src0_flags = IR3_REG_ABS;
break;
case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_INEG:
src1_flags = IR3_REG_NEGATE;
break;
}
@@ -1724,6 +1998,22 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
+ INSTR(UADD, instr_cat2, .opc = OPC_ADD_U),
+ INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S),
+ INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U),
+ INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S),
+ INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U),
+ INSTR(AND, instr_cat2, .opc = OPC_AND_B),
+ INSTR(OR, instr_cat2, .opc = OPC_OR_B),
+ INSTR(NOT, instr_cat2, .opc = OPC_NOT_B),
+ INSTR(XOR, instr_cat2, .opc = OPC_XOR_B),
+ INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U),
+ INSTR(SHL, instr_cat2, .opc = OPC_SHL_B),
+ INSTR(USHR, instr_cat2, .opc = OPC_SHR_B),
+ INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B),
+ INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S),
+ INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S),
+ INSTR(AND, instr_cat2, .opc = OPC_AND_B),
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
INSTR(CLAMP, trans_clamp),
@@ -1741,16 +2031,33 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
INSTR(SGT, trans_cmp),
INSTR(SLT, trans_cmp),
+ INSTR(FSLT, trans_cmp),
INSTR(SGE, trans_cmp),
+ INSTR(FSGE, trans_cmp),
INSTR(SLE, trans_cmp),
INSTR(SNE, trans_cmp),
+ INSTR(FSNE, trans_cmp),
INSTR(SEQ, trans_cmp),
+ INSTR(FSEQ, trans_cmp),
INSTR(CMP, trans_cmp),
+ INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U),
+ INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U),
+ INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S),
+ INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U),
+ INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
+ INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
+ INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U),
INSTR(IF, trans_if),
+ INSTR(UIF, trans_if),
INSTR(ELSE, trans_else),
INSTR(ENDIF, trans_endif),
INSTR(END, instr_cat0, .opc = OPC_END),
INSTR(KILL, trans_kill, .opc = OPC_KILL),
+ INSTR(KILL_IF, trans_killif, .opc = OPC_KILL),
+ INSTR(I2F, trans_cov),
+ INSTR(U2F, trans_cov),
+ INSTR(F2I, trans_cov),
+ INSTR(F2U, trans_cov),
};
static fd3_semantic
@@ -1935,6 +2242,8 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
DBG("decl in -> r%d", i);
+ compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
+
so->inputs[n].semantic = decl_semantic(&decl->Semantic);
so->inputs[n].compmask = (1 << ncomp) - 1;
so->inputs[n].regid = r;
@@ -2024,6 +2333,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
ncomp = 4;
+ compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
+
so->outputs[n].semantic = decl_semantic(&decl->Semantic);
so->outputs[n].regid = regid(i, comp);
@@ -2147,6 +2458,7 @@ compile_instructions(struct fd3_compile_context *ctx)
struct tgsi_full_immediate *imm =
&ctx->parser.FullToken.FullImmediate;
unsigned n = ctx->so->immediates_count++;
+ compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates));
memcpy(ctx->so->immediates[n].val, imm->u, 16);
break;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
index ddb69243c11..0f7044b56f1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
@@ -1324,6 +1324,8 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
DBG("decl in -> r%d", i + base); // XXX
+ compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
+
so->inputs[n].semantic = decl_semantic(&decl->Semantic);
so->inputs[n].compmask = (1 << ncomp) - 1;
so->inputs[n].ncomp = ncomp;
@@ -1410,6 +1412,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
unsigned n = so->outputs_count++;
+ compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
so->outputs[n].semantic = decl_semantic(&decl->Semantic);
so->outputs[n].regid = regid(i + base, comp);
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index f36cbd946a0..847414ac082 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -33,6 +33,7 @@
#include "fd3_emit.h"
#include "fd3_gmem.h"
#include "fd3_program.h"
+#include "fd3_query.h"
#include "fd3_rasterizer.h"
#include "fd3_texture.h"
#include "fd3_zsa.h"
@@ -134,5 +135,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+ fd3_query_context_init(pctx);
+
return pctx;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 2e2a66dc616..17f3dcfe04e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -406,7 +406,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));
- for (i = 0, j = -1; j < (int)fp->inputs_count; i++) {
+ for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
uint32_t reg = 0;
OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
@@ -428,7 +428,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_RING(ring, reg);
}
- for (i = 0, j = -1; j < (int)fp->inputs_count; i++) {
+ for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) {
uint32_t reg = 0;
OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 0439d39dbff..28ad52ecd7c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -91,7 +91,7 @@ struct fd3_shader_variant {
struct {
fd3_semantic semantic;
uint8_t regid;
- } outputs[16];
+ } outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_psize;
/* vertices/inputs: */
@@ -104,7 +104,7 @@ struct fd3_shader_variant {
/* in theory inloc of fs should match outloc of vs: */
uint8_t inloc;
uint8_t bary;
- } inputs[16];
+ } inputs[16 + 2]; /* +POSITION +FACE */
unsigned total_in; /* sum of inputs (scalar) */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
new file mode 100644
index 00000000000..77ae8b6b1d1
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
@@ -0,0 +1,139 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+#include "fd3_query.h"
+#include "fd3_util.h"
+
+
+struct fd_rb_samp_ctrs {
+ uint64_t ctr[16];
+};
+
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+
+static struct fd_hw_sample *
+occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ struct fd_hw_sample *samp =
+ fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+
+ /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+ * HW_QUERY_BASE_REG register:
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
+ OUT_RING(ring, HW_QUERY_BASE_REG);
+ OUT_RING(ring, samp->offset);
+
+ OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
+ OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, USE_VISIBILITY));
+ OUT_RING(ring, 0); /* NumIndices */
+
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, ZPASS_DONE);
+
+ OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
+ OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
+
+ OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
+ OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 |
+ A3XX_VBIF_PERF_CNT_EN_CNT1 |
+ A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
+ A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
+ A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
+
+ return samp;
+}
+
+static uint64_t
+count_samples(const struct fd_rb_samp_ctrs *start,
+ const struct fd_rb_samp_ctrs *end)
+{
+ uint64_t n = 0;
+ unsigned i;
+
+ /* not quite sure what all of these are, possibly different
+ * counters for each MRT render target:
+ */
+ for (i = 0; i < 16; i += 4)
+ n += end->ctr[i] - start->ctr[i];
+
+ return n;
+}
+
+static void
+occlusion_counter_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = count_samples(start, end);
+ result->u64 += n;
+}
+
+static void
+occlusion_predicate_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = count_samples(start, end);
+ result->b |= (n > 0);
+}
+
+static const struct fd_hw_sample_provider occlusion_counter = {
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_counter_accumulate_result,
+};
+
+static const struct fd_hw_sample_provider occlusion_predicate = {
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
+};
+
+void fd3_query_context_init(struct pipe_context *pctx)
+{
+ fd_hw_query_register_provider(pctx, &occlusion_counter);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate);
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.h b/src/gallium/drivers/freedreno/a3xx/fd3_query.h
new file mode 100644
index 00000000000..842c822aa0f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.h
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_QUERY_H_
+#define FD3_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd3_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD3_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
index c2d3249cd91..1a1a7cbd653 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
@@ -40,6 +40,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
const struct pipe_rasterizer_state *cso)
{
struct fd3_rasterizer_stateobj *so;
+ float psize_min, psize_max;
so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
if (!so)
@@ -47,19 +48,28 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
so->base = *cso;
+ if (cso->point_size_per_vertex) {
+ psize_min = util_get_min_point_size(cso);
+ psize_max = 8192;
+ } else {
+ /* Force the point size to be as if the vertex output was disabled. */
+ psize_min = cso->point_size;
+ psize_max = cso->point_size;
+ }
+
/*
if (cso->line_stipple_enable) {
??? TODO line stipple
}
TODO cso->half_pixel_center
- TODO cso->point_size
- TODO psize_min/psize_max
if (cso->multisample)
TODO
*/
so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
- so->gras_su_point_minmax = 0xffc00010; /* ??? */
- so->gras_su_point_size = 0x00000008; /* ??? */
+ so->gras_su_point_minmax =
+ A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
+ A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
+ so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
so->gras_su_poly_offset_scale =
A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
so->gras_su_poly_offset_offset =
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index d15cf379190..2081775083a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -30,6 +30,7 @@
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
+#include "util/u_format.h"
#include "fd3_texture.h"
#include "fd3_util.h"
@@ -99,6 +100,9 @@ fd3_sampler_state_create(struct pipe_context *pctx,
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+ if (cso->compare_mode)
+ so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
so->texsamp1 =
A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
@@ -158,6 +162,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
+
+ if (util_format_is_srgb(cso->format))
+ so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
+
so->texconst1 =
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
A3XX_TEX_CONST_1_WIDTH(prsc->width0) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
index baafc7831dc..682b47d0836 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
@@ -235,6 +235,10 @@ fd3_pipe2tex(enum pipe_format format)
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ case PIPE_FORMAT_R8G8B8X8_SRGB:
return TFMT_NORM_UINT_8_8_8_8;
case PIPE_FORMAT_Z24X8_UNORM:
@@ -275,6 +279,12 @@ fd3_pipe2fetchsize(enum pipe_format format)
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ case PIPE_FORMAT_R8G8B8X8_SRGB:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
return TFETCH_4_BYTE;
@@ -379,14 +389,14 @@ fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
{
const struct util_format_description *desc =
util_format_description(format);
- uint8_t swiz[] = {
+ unsigned char swiz[4] = {
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
- PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
- PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
- };
-
- return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
- A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
- A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
- A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
+ }, rswiz[4];
+
+ util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+ return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+ A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+ A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+ A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
}
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 4d99be17985..3610543e7ef 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
Copyright (C) 2013-2014 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -116,6 +116,39 @@ enum adreno_rb_depth_format {
DEPTHX_24_8 = 1,
};
+enum adreno_rb_copy_control_mode {
+ RB_COPY_RESOLVE = 1,
+ RB_COPY_CLEAR = 2,
+ RB_COPY_DEPTH_STENCIL = 5,
+};
+
+enum a3xx_render_mode {
+ RB_RENDERING_PASS = 0,
+ RB_TILING_PASS = 1,
+ RB_RESOLVE_PASS = 2,
+ RB_COMPUTE_PASS = 3,
+};
+
+enum a3xx_msaa_samples {
+ MSAA_ONE = 0,
+ MSAA_TWO = 1,
+ MSAA_FOUR = 2,
+};
+
+enum a3xx_threadmode {
+ MULTI = 0,
+ SINGLE = 1,
+};
+
+enum a3xx_instrbuffermode {
+ BUFFER = 1,
+};
+
+enum a3xx_threadsize {
+ TWO_QUADS = 0,
+ FOUR_QUADS = 1,
+};
+
#define REG_AXXX_CP_RB_BASE 0x000001c0
#define REG_AXXX_CP_RB_CNTL 0x000001c1
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 68a289398aa..52b454b32b5 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
Copyright (C) 2013-2014 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -164,6 +164,11 @@ enum adreno_pm4_type3_packets {
CP_SET_BIN = 76,
CP_TEST_TWO_MEMS = 113,
CP_WAIT_FOR_ME = 19,
+ CP_SET_DRAW_STATE = 67,
+ CP_DRAW_INDX_OFFSET = 56,
+ CP_DRAW_INDIRECT = 40,
+ CP_DRAW_INDX_INDIRECT = 41,
+ CP_DRAW_AUTO = 36,
IN_IB_PREFETCH_END = 23,
IN_SUBBLK_PREFETCH = 31,
IN_INSTR_PREFETCH = 32,
@@ -351,6 +356,93 @@ static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val)
return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK;
}
+#define REG_CP_DRAW_INDX_OFFSET_0 0x00000000
+#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK 0x0000003f
+#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT 0
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK 0x000000c0
+#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT 6
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK 0x00000700
+#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT 8
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000800
+#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 11
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum pc_di_index_size val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_NOT_EOP 0x00001000
+#define CP_DRAW_INDX_OFFSET_0_SMALL_INDEX 0x00002000
+#define CP_DRAW_INDX_OFFSET_0_PRE_DRAW_INITIATOR_ENABLE 0x00004000
+#define CP_DRAW_INDX_OFFSET_0_NUM_INDICES__MASK 0xffff0000
+#define CP_DRAW_INDX_OFFSET_0_NUM_INDICES__SHIFT 16
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_NUM_INDICES(uint32_t val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_0_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_0_NUM_INDICES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_1 0x00000001
+
+#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002
+#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK 0xffffffff
+#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT 0
+static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002
+#define CP_DRAW_INDX_OFFSET_2_INDX_BASE__MASK 0xffffffff
+#define CP_DRAW_INDX_OFFSET_2_INDX_BASE__SHIFT 0
+static inline uint32_t CP_DRAW_INDX_OFFSET_2_INDX_BASE(uint32_t val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_2_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_2_INDX_BASE__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002
+#define CP_DRAW_INDX_OFFSET_2_INDX_SIZE__MASK 0xffffffff
+#define CP_DRAW_INDX_OFFSET_2_INDX_SIZE__SHIFT 0
+static inline uint32_t CP_DRAW_INDX_OFFSET_2_INDX_SIZE(uint32_t val)
+{
+ return ((val) << CP_DRAW_INDX_OFFSET_2_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_2_INDX_SIZE__MASK;
+}
+
+#define REG_CP_SET_DRAW_STATE_0 0x00000000
+#define CP_SET_DRAW_STATE_0_COUNT__MASK 0x0000ffff
+#define CP_SET_DRAW_STATE_0_COUNT__SHIFT 0
+static inline uint32_t CP_SET_DRAW_STATE_0_COUNT(uint32_t val)
+{
+ return ((val) << CP_SET_DRAW_STATE_0_COUNT__SHIFT) & CP_SET_DRAW_STATE_0_COUNT__MASK;
+}
+#define CP_SET_DRAW_STATE_0_DIRTY 0x00010000
+#define CP_SET_DRAW_STATE_0_DISABLE 0x00020000
+#define CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS 0x00040000
+#define CP_SET_DRAW_STATE_0_LOAD_IMMED 0x00080000
+#define CP_SET_DRAW_STATE_0_GROUP_ID__MASK 0x1f000000
+#define CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT 24
+static inline uint32_t CP_SET_DRAW_STATE_0_GROUP_ID(uint32_t val)
+{
+ return ((val) << CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE_0_GROUP_ID__MASK;
+}
+
+#define REG_CP_SET_DRAW_STATE_1 0x00000001
+#define CP_SET_DRAW_STATE_1_ADDR__MASK 0xffffffff
+#define CP_SET_DRAW_STATE_1_ADDR__SHIFT 0
+static inline uint32_t CP_SET_DRAW_STATE_1_ADDR(uint32_t val)
+{
+ return ((val) << CP_SET_DRAW_STATE_1_ADDR__SHIFT) & CP_SET_DRAW_STATE_1_ADDR__MASK;
+}
+
#define REG_CP_SET_BIN_0 0x00000000
#define REG_CP_SET_BIN_1 0x00000001
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index a8fe3111c3d..496a4227099 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -34,6 +34,7 @@
#include "freedreno_state.h"
#include "freedreno_gmem.h"
#include "freedreno_query.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
@@ -145,6 +146,7 @@ fd_context_destroy(struct pipe_context *pctx)
DBG("");
fd_prog_fini(pctx);
+ fd_hw_query_fini(pctx);
util_slab_destroy(&ctx->transfer_pool);
@@ -221,6 +223,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
fd_query_context_init(pctx);
fd_texture_init(pctx);
fd_state_init(pctx);
+ fd_hw_query_init(pctx);
ctx->blitter = util_blitter_create(pctx);
if (!ctx->blitter)
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index a50e6236903..46984823427 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -33,6 +33,7 @@
#include "pipe/p_context.h"
#include "indices/u_primconvert.h"
#include "util/u_blitter.h"
+#include "util/u_double_list.h"
#include "util/u_slab.h"
#include "util/u_string.h"
@@ -82,16 +83,80 @@ struct fd_vertex_stateobj {
unsigned num_elements;
};
+/* Bitmask of stages in rendering that a particular query query is
+ * active. Queries will be automatically started/stopped (generating
+ * additional fd_hw_sample_period's) on entrance/exit from stages that
+ * are applicable to the query.
+ *
+ * NOTE: set the stage to NULL at end of IB to ensure no query is still
+ * active. Things aren't going to work out the way you want if a query
+ * is active across IB's (or between tile IB and draw IB)
+ */
+enum fd_render_stage {
+ FD_STAGE_NULL = 0x00,
+ FD_STAGE_DRAW = 0x01,
+ FD_STAGE_CLEAR = 0x02,
+ /* TODO before queries which include MEM2GMEM or GMEM2MEM will
+ * work we will need to call fd_hw_query_prepare() from somewhere
+ * appropriate so that queries in the tiling IB get backed with
+ * memory to write results to.
+ */
+ FD_STAGE_MEM2GMEM = 0x04,
+ FD_STAGE_GMEM2MEM = 0x08,
+ /* used for driver internal draws (ie. util_blitter_blit()): */
+ FD_STAGE_BLIT = 0x10,
+};
+
+#define MAX_HW_SAMPLE_PROVIDERS 4
+struct fd_hw_sample_provider;
+struct fd_hw_sample;
+
struct fd_context {
struct pipe_context base;
struct fd_device *dev;
struct fd_screen *screen;
+
struct blitter_context *blitter;
struct primconvert_context *primconvert;
+ /* slab for pipe_transfer allocations: */
struct util_slab_mempool transfer_pool;
+ /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
+ struct util_slab_mempool sample_pool;
+ struct util_slab_mempool sample_period_pool;
+
+ /* next sample offset.. incremented for each sample in the batch/
+ * submit, reset to zero on next submit.
+ */
+ uint32_t next_sample_offset;
+
+ /* sample-providers for hw queries: */
+ const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS];
+
+ /* cached samples (in case multiple queries need to reference
+ * the same sample snapshot)
+ */
+ struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+
+ /* tracking for current stage, to know when to start/stop
+ * any active queries:
+ */
+ enum fd_render_stage stage;
+
+ /* list of active queries: */
+ struct list_head active_queries;
+
+ /* list of queries that are not active, but were active in the
+ * current submit:
+ */
+ struct list_head current_queries;
+
+ /* current query result bo and tile stride: */
+ struct fd_bo *query_bo;
+ uint32_t query_tile_stride;
+
/* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
* DI_PT_x value to use for draw initiator. There are some
* slight differences between generation:
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index 11bb8d8333d..e3c8cc8e5a0 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -36,6 +36,7 @@
#include "freedreno_context.h"
#include "freedreno_state.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
@@ -70,7 +71,7 @@ fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
idx_bo = fd_resource(idx->buffer)->bo;
idx_type = size2indextype(idx->index_size);
idx_size = idx->index_size * info->count;
- idx_offset = idx->offset;
+ idx_offset = idx->offset + (info->start * idx->index_size);
src_sel = DI_SRC_SEL_DMA;
} else {
idx_bo = NULL;
@@ -156,6 +157,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* and any buffers used, need to be resolved: */
ctx->resolve |= buffers;
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
ctx->draw(ctx, info);
}
@@ -188,6 +190,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR);
+
ctx->clear(ctx, buffers, color, depth, stencil);
ctx->dirty |= FD_DIRTY_ZSA |
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 2d4de442452..861ebf5675e 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -35,6 +35,7 @@
#include "freedreno_gmem.h"
#include "freedreno_context.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
/*
@@ -273,17 +274,24 @@ render_tiles(struct fd_context *ctx)
ctx->emit_tile_prep(ctx, tile);
- if (ctx->restore)
+ if (ctx->restore) {
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM);
ctx->emit_tile_mem2gmem(ctx, tile);
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+ }
ctx->emit_tile_renderprep(ctx, tile);
+ fd_hw_query_prepare_tile(ctx, i, ctx->ring);
+
/* emit IB to drawcmds: */
OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
fd_reset_wfi(ctx);
/* emit gmem2mem to transfer tile back to system memory: */
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM);
ctx->emit_tile_gmem2mem(ctx, tile);
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
}
}
@@ -292,6 +300,8 @@ render_sysmem(struct fd_context *ctx)
{
ctx->emit_sysmem_prep(ctx);
+ fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
+
/* emit IB to drawcmds: */
OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
fd_reset_wfi(ctx);
@@ -314,6 +324,11 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
}
}
+ /* close out the draw cmds by making sure any active queries are
+ * paused:
+ */
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+
/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
fd_ringmarker_mark(ctx->draw_end);
fd_ringmarker_mark(ctx->binning_end);
@@ -326,6 +341,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
DBG("rendering sysmem (%s/%s)",
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+ fd_hw_query_prepare(ctx, 1);
render_sysmem(ctx);
ctx->stats.batch_sysmem++;
} else {
@@ -334,6 +350,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+ fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
render_tiles(ctx);
ctx->stats.batch_gmem++;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c
index 3913896bf56..8753a4b02c9 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.c
+++ b/src/gallium/drivers/freedreno/freedreno_query.c
@@ -1,7 +1,7 @@
/* -*- mode: C; c-file-style: "k&r"; ttxab-width 4; indent-tabs-mode: t; -*- */
/*
- * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,63 +27,27 @@
*/
#include "pipe/p_state.h"
-#include "util/u_string.h"
#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "os/os_time.h"
#include "freedreno_query.h"
+#include "freedreno_query_sw.h"
+#include "freedreno_query_hw.h"
#include "freedreno_context.h"
#include "freedreno_util.h"
-#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
-#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */
-#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
-#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */
-
-/* Currently just simple cpu query's supported.. probably need
- * to refactor this a bit when I'm eventually ready to add gpu
- * queries:
+/*
+ * Pipe Query interface:
*/
-struct fd_query {
- int type;
- /* storage for the collected data */
- union pipe_query_result data;
- bool active;
- uint64_t begin_value, end_value;
- uint64_t begin_time, end_time;
-};
-
-static inline struct fd_query *
-fd_query(struct pipe_query *pq)
-{
- return (struct fd_query *)pq;
-}
static struct pipe_query *
fd_create_query(struct pipe_context *pctx, unsigned query_type)
{
+ struct fd_context *ctx = fd_context(pctx);
struct fd_query *q;
- switch (query_type) {
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case FD_QUERY_DRAW_CALLS:
- case FD_QUERY_BATCH_TOTAL:
- case FD_QUERY_BATCH_SYSMEM:
- case FD_QUERY_BATCH_GMEM:
- case FD_QUERY_BATCH_RESTORE:
- break;
- default:
- return NULL;
- }
-
- q = CALLOC_STRUCT(fd_query);
+ q = fd_sw_create_query(ctx, query_type);
if (!q)
- return NULL;
-
- q->type = query_type;
+ q = fd_hw_create_query(ctx, query_type);
return (struct pipe_query *) q;
}
@@ -92,64 +56,21 @@ static void
fd_destroy_query(struct pipe_context *pctx, struct pipe_query *pq)
{
struct fd_query *q = fd_query(pq);
- free(q);
-}
-
-static uint64_t
-read_counter(struct pipe_context *pctx, int type)
-{
- struct fd_context *ctx = fd_context(pctx);
- switch (type) {
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- /* for now same thing as _PRIMITIVES_EMITTED */
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- return ctx->stats.prims_emitted;
- case FD_QUERY_DRAW_CALLS:
- return ctx->stats.draw_calls;
- case FD_QUERY_BATCH_TOTAL:
- return ctx->stats.batch_total;
- case FD_QUERY_BATCH_SYSMEM:
- return ctx->stats.batch_sysmem;
- case FD_QUERY_BATCH_GMEM:
- return ctx->stats.batch_gmem;
- case FD_QUERY_BATCH_RESTORE:
- return ctx->stats.batch_restore;
- }
- return 0;
-}
-
-static bool
-is_rate_query(struct fd_query *q)
-{
- switch (q->type) {
- case FD_QUERY_BATCH_TOTAL:
- case FD_QUERY_BATCH_SYSMEM:
- case FD_QUERY_BATCH_GMEM:
- case FD_QUERY_BATCH_RESTORE:
- return true;
- default:
- return false;
- }
+ q->funcs->destroy_query(fd_context(pctx), q);
}
static void
fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq)
{
struct fd_query *q = fd_query(pq);
- q->active = true;
- q->begin_value = read_counter(pctx, q->type);
- if (is_rate_query(q))
- q->begin_time = os_time_get();
+ q->funcs->begin_query(fd_context(pctx), q);
}
static void
fd_end_query(struct pipe_context *pctx, struct pipe_query *pq)
{
struct fd_query *q = fd_query(pq);
- q->active = false;
- q->end_value = read_counter(pctx, q->type);
- if (is_rate_query(q))
- q->end_time = os_time_get();
+ q->funcs->end_query(fd_context(pctx), q);
}
static boolean
@@ -157,21 +78,7 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
boolean wait, union pipe_query_result *result)
{
struct fd_query *q = fd_query(pq);
-
- if (q->active)
- return false;
-
- util_query_clear_result(result, q->type);
-
- result->u64 = q->end_value - q->begin_value;
-
- if (is_rate_query(q)) {
- double fps = (result->u64 * 1000000) /
- (double)(q->end_time - q->begin_time);
- result->u64 = (uint64_t)fps;
- }
-
- return true;
+ return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
}
static int
diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h
index 8bcbba2fdc9..bc9a7a20559 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.h
+++ b/src/gallium/drivers/freedreno/freedreno_query.h
@@ -1,7 +1,7 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
- * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,6 +31,37 @@
#include "pipe/p_context.h"
+struct fd_context;
+struct fd_query;
+
+struct fd_query_funcs {
+ void (*destroy_query)(struct fd_context *ctx,
+ struct fd_query *q);
+ void (*begin_query)(struct fd_context *ctx, struct fd_query *q);
+ void (*end_query)(struct fd_context *ctx, struct fd_query *q);
+ boolean (*get_query_result)(struct fd_context *ctx,
+ struct fd_query *q, boolean wait,
+ union pipe_query_result *result);
+};
+
+struct fd_query {
+ const struct fd_query_funcs *funcs;
+ bool active;
+ int type;
+};
+
+static inline struct fd_query *
+fd_query(struct pipe_query *pq)
+{
+ return (struct fd_query *)pq;
+}
+
+#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
+#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */
+#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
+#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */
+
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
new file mode 100644
index 00000000000..38bd3dedad4
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -0,0 +1,465 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+struct fd_hw_sample_period {
+ struct fd_hw_sample *start, *end;
+ struct list_head list;
+};
+
+/* maps query_type to sample provider idx: */
+static int pidx(unsigned query_type)
+{
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ return 0;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ return 1;
+ default:
+ return -1;
+ }
+}
+
+static struct fd_hw_sample *
+get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ unsigned query_type)
+{
+ struct fd_hw_sample *samp = NULL;
+ int idx = pidx(query_type);
+
+ if (!ctx->sample_cache[idx]) {
+ ctx->sample_cache[idx] =
+ ctx->sample_providers[idx]->get_sample(ctx, ring);
+ }
+
+ fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
+
+ return samp;
+}
+
+static void
+clear_sample_cache(struct fd_context *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
+ fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
+}
+
+static bool
+is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
+{
+ return !!(hq->provider->active & stage);
+}
+
+
+static void
+resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
+ struct fd_ringbuffer *ring)
+{
+ assert(!hq->period);
+ hq->period = util_slab_alloc(&ctx->sample_period_pool);
+ list_inithead(&hq->period->list);
+ hq->period->start = get_sample(ctx, ring, hq->base.type);
+ /* NOTE: util_slab_alloc() does not zero out the buffer: */
+ hq->period->end = NULL;
+}
+
+static void
+pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
+ struct fd_ringbuffer *ring)
+{
+ assert(hq->period && !hq->period->end);
+ hq->period->end = get_sample(ctx, ring, hq->base.type);
+ list_addtail(&hq->period->list, &hq->current_periods);
+ hq->period = NULL;
+}
+
+static void
+destroy_periods(struct fd_context *ctx, struct list_head *list)
+{
+ struct fd_hw_sample_period *period, *s;
+ LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
+ fd_hw_sample_reference(ctx, &period->start, NULL);
+ fd_hw_sample_reference(ctx, &period->end, NULL);
+ list_del(&period->list);
+ util_slab_free(&ctx->sample_period_pool, period);
+ }
+}
+
+static void
+fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
+{
+ struct fd_hw_query *hq = fd_hw_query(q);
+
+ destroy_periods(ctx, &hq->periods);
+ destroy_periods(ctx, &hq->current_periods);
+ list_del(&hq->list);
+
+ free(hq);
+}
+
+static void
+fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
+{
+ struct fd_hw_query *hq = fd_hw_query(q);
+ if (q->active)
+ return;
+
+ /* begin_query() should clear previous results: */
+ destroy_periods(ctx, &hq->periods);
+
+ if (is_active(hq, ctx->stage))
+ resume_query(ctx, hq, ctx->ring);
+
+ q->active = true;
+
+ /* add to active list: */
+ list_del(&hq->list);
+ list_addtail(&hq->list, &ctx->active_queries);
+}
+
+static void
+fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
+{
+ struct fd_hw_query *hq = fd_hw_query(q);
+ if (!q->active)
+ return;
+ if (is_active(hq, ctx->stage))
+ pause_query(ctx, hq, ctx->ring);
+ q->active = false;
+ /* move to current list: */
+ list_del(&hq->list);
+ list_addtail(&hq->list, &ctx->current_queries);
+}
+
+/* helper to get ptr to specified sample: */
+static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
+{
+ return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
+}
+
+static boolean
+fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+ struct fd_hw_query *hq = fd_hw_query(q);
+ const struct fd_hw_sample_provider *p = hq->provider;
+ struct fd_hw_sample_period *period;
+
+ if (q->active)
+ return false;
+
+ /* if the app tries to read back the query result before the
+ * back is submitted, that forces us to flush so that there
+ * are actually results to wait for:
+ */
+ if (!LIST_IS_EMPTY(&hq->list)) {
+ DBG("reading query result forces flush!");
+ ctx->needs_flush = true;
+ fd_context_render(&ctx->base);
+ }
+
+ util_query_clear_result(result, q->type);
+
+ if (LIST_IS_EMPTY(&hq->periods))
+ return true;
+
+ assert(LIST_IS_EMPTY(&hq->list));
+ assert(LIST_IS_EMPTY(&hq->current_periods));
+ assert(!hq->period);
+
+ if (LIST_IS_EMPTY(&hq->periods))
+ return true;
+
+ /* if !wait, then check the last sample (the one most likely to
+ * not be ready yet) and bail if it is not ready:
+ */
+ if (!wait) {
+ int ret;
+
+ period = LIST_ENTRY(struct fd_hw_sample_period,
+ hq->periods.prev, list);
+
+ ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe,
+ DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
+ if (ret)
+ return false;
+
+ fd_bo_cpu_fini(period->end->bo);
+ }
+
+ /* sum the result across all sample periods: */
+ LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
+ struct fd_hw_sample *start = period->start;
+ struct fd_hw_sample *end = period->end;
+ unsigned i;
+
+ /* start and end samples should be from same batch: */
+ assert(start->bo == end->bo);
+ assert(start->num_tiles == end->num_tiles);
+
+ for (i = 0; i < start->num_tiles; i++) {
+ void *ptr;
+
+ fd_bo_cpu_prep(start->bo, ctx->screen->pipe,
+ DRM_FREEDRENO_PREP_READ);
+
+ ptr = fd_bo_map(start->bo);
+
+ p->accumulate_result(ctx, sampptr(period->start, i, ptr),
+ sampptr(period->end, i, ptr), result);
+
+ fd_bo_cpu_fini(start->bo);
+ }
+ }
+
+ return true;
+}
+
+static const struct fd_query_funcs hw_query_funcs = {
+ .destroy_query = fd_hw_destroy_query,
+ .begin_query = fd_hw_begin_query,
+ .end_query = fd_hw_end_query,
+ .get_query_result = fd_hw_get_query_result,
+};
+
+struct fd_query *
+fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
+{
+ struct fd_hw_query *hq;
+ struct fd_query *q;
+ int idx = pidx(query_type);
+
+ if ((idx < 0) || !ctx->sample_providers[idx])
+ return NULL;
+
+ hq = CALLOC_STRUCT(fd_hw_query);
+ if (!hq)
+ return NULL;
+
+ hq->provider = ctx->sample_providers[idx];
+
+ list_inithead(&hq->periods);
+ list_inithead(&hq->current_periods);
+ list_inithead(&hq->list);
+
+ q = &hq->base;
+ q->funcs = &hw_query_funcs;
+ q->type = query_type;
+
+ return q;
+}
+
+struct fd_hw_sample *
+fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
+{
+ struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
+ pipe_reference_init(&samp->reference, 1);
+ samp->size = size;
+ samp->offset = ctx->next_sample_offset;
+ /* NOTE: util_slab_alloc() does not zero out the buffer: */
+ samp->bo = NULL;
+ samp->num_tiles = 0;
+ samp->tile_stride = 0;
+ ctx->next_sample_offset += size;
+ return samp;
+}
+
+void
+__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
+{
+ if (samp->bo)
+ fd_bo_del(samp->bo);
+ util_slab_free(&ctx->sample_pool, samp);
+}
+
+static void
+prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
+ uint32_t num_tiles, uint32_t tile_stride)
+{
+ if (samp->bo) {
+ assert(samp->bo == bo);
+ assert(samp->num_tiles == num_tiles);
+ assert(samp->tile_stride == tile_stride);
+ return;
+ }
+ samp->bo = bo;
+ samp->num_tiles = num_tiles;
+ samp->tile_stride = tile_stride;
+}
+
+static void
+prepare_query(struct fd_hw_query *hq, struct fd_bo *bo,
+ uint32_t num_tiles, uint32_t tile_stride)
+{
+ struct fd_hw_sample_period *period, *s;
+
+ /* prepare all the samples in the query: */
+ LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
+ prepare_sample(period->start, bo, num_tiles, tile_stride);
+ prepare_sample(period->end, bo, num_tiles, tile_stride);
+
+ /* move from current_periods list to periods list: */
+ list_del(&period->list);
+ list_addtail(&period->list, &hq->periods);
+ }
+}
+
+static void
+prepare_queries(struct fd_context *ctx, struct fd_bo *bo,
+ uint32_t num_tiles, uint32_t tile_stride,
+ struct list_head *list, bool remove)
+{
+ struct fd_hw_query *hq, *s;
+ LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
+ prepare_query(hq, bo, num_tiles, tile_stride);
+ if (remove)
+ list_delinit(&hq->list);
+ }
+}
+
+/* called from gmem code once total storage requirements are known (ie.
+ * number of samples times number of tiles)
+ */
+void
+fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
+{
+ uint32_t tile_stride = ctx->next_sample_offset;
+ struct fd_bo *bo;
+
+ if (ctx->query_bo)
+ fd_bo_del(ctx->query_bo);
+
+ if (tile_stride > 0) {
+ bo = fd_bo_new(ctx->dev, tile_stride * num_tiles,
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+ DRM_FREEDRENO_GEM_TYPE_KMEM);
+ } else {
+ bo = NULL;
+ }
+
+ ctx->query_bo = bo;
+ ctx->query_tile_stride = tile_stride;
+
+ prepare_queries(ctx, bo, num_tiles, tile_stride,
+ &ctx->active_queries, false);
+ prepare_queries(ctx, bo, num_tiles, tile_stride,
+ &ctx->current_queries, true);
+
+ /* reset things for next batch: */
+ ctx->next_sample_offset = 0;
+}
+
+void
+fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+ struct fd_ringbuffer *ring)
+{
+ uint32_t tile_stride = ctx->query_tile_stride;
+ uint32_t offset = tile_stride * n;
+
+ /* bail if no queries: */
+ if (tile_stride == 0)
+ return;
+
+ fd_wfi(ctx, ring);
+ OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
+ OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0);
+}
+
+void
+fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ enum fd_render_stage stage)
+{
+ /* special case: internal blits (like mipmap level generation)
+ * go through normal draw path (via util_blitter_blit()).. but
+ * we need to ignore the FD_STAGE_DRAW which will be set, so we
+ * don't enable queries which should be paused during internal
+ * blits:
+ */
+ if ((ctx->stage == FD_STAGE_BLIT) &&
+ (stage != FD_STAGE_NULL))
+ return;
+
+ if (stage != ctx->stage) {
+ struct fd_hw_query *hq;
+ LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
+ bool was_active = is_active(hq, ctx->stage);
+ bool now_active = is_active(hq, stage);
+
+ if (now_active && !was_active)
+ resume_query(ctx, hq, ring);
+ else if (was_active && !now_active)
+ pause_query(ctx, hq, ring);
+ }
+ }
+ clear_sample_cache(ctx);
+ ctx->stage = stage;
+}
+
+void
+fd_hw_query_register_provider(struct pipe_context *pctx,
+ const struct fd_hw_sample_provider *provider)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ int idx = pidx(provider->query_type);
+
+ assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
+ assert(!ctx->sample_providers[idx]);
+
+ ctx->sample_providers[idx] = provider;
+}
+
+void
+fd_hw_query_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
+ 16, UTIL_SLAB_SINGLETHREADED);
+ util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
+ 16, UTIL_SLAB_SINGLETHREADED);
+ list_inithead(&ctx->active_queries);
+ list_inithead(&ctx->current_queries);
+}
+
+void
+fd_hw_query_fini(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ util_slab_destroy(&ctx->sample_pool);
+ util_slab_destroy(&ctx->sample_period_pool);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h
new file mode 100644
index 00000000000..62baa3ac5b5
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h
@@ -0,0 +1,164 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_QUERY_HW_H_
+#define FREEDRENO_QUERY_HW_H_
+
+#include "util/u_double_list.h"
+
+#include "freedreno_query.h"
+#include "freedreno_context.h"
+
+
+/*
+ * HW Queries:
+ *
+ * See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries
+ *
+ * Hardware queries will be specific to gpu generation, but they need
+ * some common infrastructure for triggering start/stop samples at
+ * various points (for example, to exclude mem2gmem/gmem2mem or clear)
+ * as well as per tile tracking.
+ *
+ * NOTE: in at least some cases hw writes sample values to memory addr
+ * specified in some register. So we don't really have the option to
+ * just sample the same counter multiple times for multiple different
+ * queries with the same query_type. So we cache per sample provider
+ * the most recent sample since the last draw. This way multiple
+ * sample periods for multiple queries can reference the same sample.
+ *
+ * fd_hw_sample_provider:
+ * - one per query type, registered/implemented by gpu generation
+ * specific code
+ * - can construct fd_hw_samples on demand
+ * - most recent sample (since last draw) cached so multiple
+ * different queries can ref the same sample
+ *
+ * fd_hw_sample:
+ * - abstracts one snapshot of counter value(s) across N tiles
+ * - backing object not allocated until submit time when number
+ * of samples and number of tiles is known
+ *
+ * fd_hw_sample_period:
+ * - consists of start and stop sample
+ * - a query accumulates a list of sample periods
+ * - the query result is the sum of the sample periods
+ */
+
+struct fd_hw_sample_provider {
+ unsigned query_type;
+
+ /* stages applicable to the query type: */
+ enum fd_render_stage active;
+
+ /* when a new sample is required, emit appropriate cmdstream
+ * and return a sample object:
+ */
+ struct fd_hw_sample *(*get_sample)(struct fd_context *ctx,
+ struct fd_ringbuffer *ring);
+
+ /* accumulate the results from specified sample period: */
+ void (*accumulate_result)(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result);
+};
+
+struct fd_hw_sample {
+ struct pipe_reference reference; /* keep this first */
+
+ /* offset and size of the sample are know at the time the
+ * sample is constructed.
+ */
+ uint32_t size;
+ uint32_t offset;
+
+ /* backing object, offset/stride/etc are determined not when
+ * the sample is constructed, but when the batch is submitted.
+ * This way we can defer allocation until total # of requested
+ * samples, and total # of tiles, is known.
+ */
+ struct fd_bo *bo;
+ uint32_t num_tiles;
+ uint32_t tile_stride;
+};
+
+struct fd_hw_sample_period;
+
+struct fd_hw_query {
+ struct fd_query base;
+
+ const struct fd_hw_sample_provider *provider;
+
+ /* list of fd_hw_sample_period in previous submits: */
+ struct list_head periods;
+
+ /* list of fd_hw_sample_period's in current submit: */
+ struct list_head current_periods;
+
+ /* if active and not paused, the current sample period (not
+ * yet added to current_periods):
+ */
+ struct fd_hw_sample_period *period;
+
+ struct list_head list; /* list-node in ctx->active_queries */
+};
+
+static inline struct fd_hw_query *
+fd_hw_query(struct fd_query *q)
+{
+ return (struct fd_hw_query *)q;
+}
+
+struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
+/* helper for sample providers: */
+struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size);
+/* don't call directly, use fd_hw_sample_reference() */
+void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp);
+void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles);
+void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+ struct fd_ringbuffer *ring);
+void fd_hw_query_set_stage(struct fd_context *ctx,
+ struct fd_ringbuffer *ring, enum fd_render_stage stage);
+void fd_hw_query_register_provider(struct pipe_context *pctx,
+ const struct fd_hw_sample_provider *provider);
+void fd_hw_query_init(struct pipe_context *pctx);
+void fd_hw_query_fini(struct pipe_context *pctx);
+
+static inline void
+fd_hw_sample_reference(struct fd_context *ctx,
+ struct fd_hw_sample **ptr, struct fd_hw_sample *samp)
+{
+ struct fd_hw_sample *old_samp = *ptr;
+
+ if (pipe_reference(&(*ptr)->reference, &samp->reference))
+ __fd_hw_sample_destroy(ctx, old_samp);
+ if (ptr)
+ *ptr = samp;
+}
+
+#endif /* FREEDRENO_QUERY_HW_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.c b/src/gallium/drivers/freedreno/freedreno_query_sw.c
new file mode 100644
index 00000000000..8d81698f31d
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_query_sw.c
@@ -0,0 +1,165 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "os/os_time.h"
+
+#include "freedreno_query_sw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+/*
+ * SW Queries:
+ *
+ * In the core, we have some support for basic sw counters
+ */
+
+static void
+fd_sw_destroy_query(struct fd_context *ctx, struct fd_query *q)
+{
+ struct fd_sw_query *sq = fd_sw_query(q);
+ free(sq);
+}
+
+static uint64_t
+read_counter(struct fd_context *ctx, int type)
+{
+ switch (type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ /* for now same thing as _PRIMITIVES_EMITTED */
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ return ctx->stats.prims_emitted;
+ case FD_QUERY_DRAW_CALLS:
+ return ctx->stats.draw_calls;
+ case FD_QUERY_BATCH_TOTAL:
+ return ctx->stats.batch_total;
+ case FD_QUERY_BATCH_SYSMEM:
+ return ctx->stats.batch_sysmem;
+ case FD_QUERY_BATCH_GMEM:
+ return ctx->stats.batch_gmem;
+ case FD_QUERY_BATCH_RESTORE:
+ return ctx->stats.batch_restore;
+ }
+ return 0;
+}
+
+static bool
+is_rate_query(struct fd_query *q)
+{
+ switch (q->type) {
+ case FD_QUERY_BATCH_TOTAL:
+ case FD_QUERY_BATCH_SYSMEM:
+ case FD_QUERY_BATCH_GMEM:
+ case FD_QUERY_BATCH_RESTORE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void
+fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
+{
+ struct fd_sw_query *sq = fd_sw_query(q);
+ q->active = true;
+ sq->begin_value = read_counter(ctx, q->type);
+ if (is_rate_query(q))
+ sq->begin_time = os_time_get();
+}
+
+static void
+fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
+{
+ struct fd_sw_query *sq = fd_sw_query(q);
+ q->active = false;
+ sq->end_value = read_counter(ctx, q->type);
+ if (is_rate_query(q))
+ sq->end_time = os_time_get();
+}
+
+static boolean
+fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+ struct fd_sw_query *sq = fd_sw_query(q);
+
+ if (q->active)
+ return false;
+
+ util_query_clear_result(result, q->type);
+
+ result->u64 = sq->end_value - sq->begin_value;
+
+ if (is_rate_query(q)) {
+ double fps = (result->u64 * 1000000) /
+ (double)(sq->end_time - sq->begin_time);
+ result->u64 = (uint64_t)fps;
+ }
+
+ return true;
+}
+
+static const struct fd_query_funcs sw_query_funcs = {
+ .destroy_query = fd_sw_destroy_query,
+ .begin_query = fd_sw_begin_query,
+ .end_query = fd_sw_end_query,
+ .get_query_result = fd_sw_get_query_result,
+};
+
+struct fd_query *
+fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
+{
+ struct fd_sw_query *sq;
+ struct fd_query *q;
+
+ switch (query_type) {
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case FD_QUERY_DRAW_CALLS:
+ case FD_QUERY_BATCH_TOTAL:
+ case FD_QUERY_BATCH_SYSMEM:
+ case FD_QUERY_BATCH_GMEM:
+ case FD_QUERY_BATCH_RESTORE:
+ break;
+ default:
+ return NULL;
+ }
+
+ sq = CALLOC_STRUCT(fd_sw_query);
+ if (!sq)
+ return NULL;
+
+ q = &sq->base;
+ q->funcs = &sw_query_funcs;
+ q->type = query_type;
+
+ return q;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.h b/src/gallium/drivers/freedreno/freedreno_query_sw.h
new file mode 100644
index 00000000000..3446474d0bd
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_query_sw.h
@@ -0,0 +1,55 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_QUERY_SW_H_
+#define FREEDRENO_QUERY_SW_H_
+
+#include "freedreno_query.h"
+
+/*
+ * SW Queries:
+ *
+ * In the core, we have some support for basic sw counters
+ */
+
+struct fd_sw_query {
+ struct fd_query base;
+ uint64_t begin_value, end_value;
+ uint64_t begin_time, end_time;
+};
+
+static inline struct fd_sw_query *
+fd_sw_query(struct fd_query *q)
+{
+ return (struct fd_sw_query *)q;
+}
+
+struct fd_query * fd_sw_create_query(struct fd_context *ctx,
+ unsigned query_type);
+
+#endif /* FREEDRENO_QUERY_SW_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index bd8c6cb9907..289f3653e12 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -36,6 +36,7 @@
#include "freedreno_screen.h"
#include "freedreno_surface.h"
#include "freedreno_context.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
#include <errno.h>
@@ -401,7 +402,9 @@ render_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
util_blitter_save_fragment_sampler_views(ctx->blitter,
ctx->fragtex.num_textures, ctx->fragtex.textures);
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
util_blitter_blit(ctx->blitter, info);
+ fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
return true;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index d62d4b61b6f..1c106894a61 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -143,6 +143,8 @@ tables for things that differ if the delta is not too much..
static int
fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
+ struct fd_screen *screen = fd_screen(pscreen);
+
/* this is probably not totally correct.. but it's a start: */
switch (param) {
/* Supported features (boolean caps). */
@@ -161,8 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_SM3:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
- case PIPE_CAP_PRIMITIVE_RESTART:
- case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
@@ -180,6 +180,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_PRIMITIVE_RESTART:
return 0;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -229,17 +231,18 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return MAX_MIP_LEVELS;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return 9192;
+ return 0; /* TODO: a3xx+ should support (required in gles3) */
/* Render targets. */
case PIPE_CAP_MAX_RENDER_TARGETS:
return 1;
- /* Timer queries. */
+ /* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
- case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_QUERY_TIMESTAMP:
return 0;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return (screen->gpu_id >= 300) ? 1: 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -252,7 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
default:
@@ -315,7 +318,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return 8; /* XXX */
case PIPE_SHADER_CAP_MAX_INPUTS:
- return 32;
+ return 16;
case PIPE_SHADER_CAP_MAX_TEMPS:
return 64; /* Max native temporaries. */
case PIPE_SHADER_CAP_MAX_ADDRS:
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 356f4165792..b57702c54c8 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -223,11 +223,18 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
emit_marker(ring, 6);
}
+/* CP_SCRATCH_REG4 is used to hold base address for query results: */
+#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
+
static inline void
emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
{
extern unsigned marker_cnt;
- OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG0 + scratch_idx, 1);
+ unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
+ assert(reg != HW_QUERY_BASE_REG);
+ if (reg == HW_QUERY_BASE_REG)
+ return;
+ OUT_PKT0(ring, reg, 1);
OUT_RING(ring, ++marker_cnt);
}