diff options
author | Rob Clark <robdclark@chromium.org> | 2021-04-16 10:54:12 -0700 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-04-17 15:38:56 +0000 |
commit | 3894bc966424cad8dd393287137e5df028776414 (patch) | |
tree | ca9e2793296627d26aad25dc9d323723a4bbbe21 | |
parent | ccd68b672aa91104875c4c7eba16ef83efcf7b3a (diff) |
freedreno/computerator: Re-indent
clang-format -fallback-style=none --style=file -i src/freedreno/computerator/*.[ch]
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10293>
-rw-r--r-- | src/freedreno/computerator/a6xx.c | 743 | ||||
-rw-r--r-- | src/freedreno/computerator/ir3_asm.c | 42 | ||||
-rw-r--r-- | src/freedreno/computerator/ir3_asm.h | 12 | ||||
-rw-r--r-- | src/freedreno/computerator/main.c | 438 | ||||
-rw-r--r-- | src/freedreno/computerator/main.h | 59 |
5 files changed, 644 insertions, 650 deletions
diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index 5679f212111..ffd7f74917b 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -29,21 +29,21 @@ #include "adreno_common.xml.h" #include "a6xx.xml.h" -#include "main.h" #include "ir3_asm.h" +#include "main.h" struct a6xx_backend { - struct backend base; + struct backend base; - struct ir3_compiler *compiler; - struct fd_device *dev; + struct ir3_compiler *compiler; + struct fd_device *dev; - unsigned seqno; - struct fd_bo *control_mem; + unsigned seqno; + struct fd_bo *control_mem; - struct fd_bo *query_mem; - const struct perfcntr *perfcntrs; - unsigned num_perfcntrs; + struct fd_bo *query_mem; + const struct perfcntr *perfcntrs; + unsigned num_perfcntrs; }; define_cast(backend, a6xx_backend); @@ -53,41 +53,38 @@ define_cast(backend, a6xx_backend); /* This struct defines the layout of the fd6_context::control buffer: */ struct fd6_control { - uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */ - uint32_t _pad0; - volatile uint32_t vsc_overflow; - uint32_t _pad1; - /* flag set from cmdstream when VSC overflow detected: */ - uint32_t vsc_scratch; - uint32_t _pad2; - uint32_t _pad3; - uint32_t _pad4; - - /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ - struct { - uint32_t offset; - uint32_t pad[7]; - } flush_base[4]; + uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */ + uint32_t _pad0; + volatile uint32_t vsc_overflow; + uint32_t _pad1; + /* flag set from cmdstream when VSC overflow detected: */ + uint32_t vsc_scratch; + uint32_t _pad2; + uint32_t _pad3; + uint32_t _pad4; + + /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ + struct { + uint32_t offset; + uint32_t pad[7]; + } flush_base[4]; }; -#define control_ptr(a6xx_backend, member) \ - (a6xx_backend)->control_mem, offsetof(struct fd6_control, member), 0, 0 - +#define control_ptr(a6xx_backend, member) \ + (a6xx_backend)->control_mem, offsetof(struct fd6_control, member), 0, 0 struct PACKED fd6_query_sample { - uint64_t start; - uint64_t result; - uint64_t stop; + uint64_t start; + uint64_t result; + uint64_t stop; }; - /* offset of a single field of an array of fd6_query_sample: */ -#define query_sample_idx(a6xx_backend, idx, field) \ - (a6xx_backend)->query_mem, \ - (idx * sizeof(struct fd6_query_sample)) + \ - offsetof(struct fd6_query_sample, field), \ - 0, 0 - +#define query_sample_idx(a6xx_backend, idx, field) \ + (a6xx_backend)->query_mem, \ + (idx * sizeof(struct fd6_query_sample)) + \ + offsetof(struct fd6_query_sample, field), \ + 0, 0 /* * Backend implementation: @@ -96,405 +93,403 @@ struct PACKED fd6_query_sample { static struct kernel * a6xx_assemble(struct backend *b, FILE *in) { - struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); - struct ir3_kernel *ir3_kernel = - ir3_asm_assemble(a6xx_backend->compiler, in); - ir3_kernel->backend = b; - return &ir3_kernel->base; + struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); + struct ir3_kernel *ir3_kernel = ir3_asm_assemble(a6xx_backend->compiler, in); + ir3_kernel->backend = b; + return &ir3_kernel->base; } static void a6xx_disassemble(struct kernel *kernel, FILE *out) { - ir3_asm_disassemble(to_ir3_kernel(kernel), out); + ir3_asm_disassemble(to_ir3_kernel(kernel), out); } static void cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) { - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct ir3_shader_variant *v = ir3_kernel->v; - const struct ir3_info *i = &v->info; - enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64; - - OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1); - OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4); - - OUT_PKT4(ring, REG_A6XX_SP_PERFCTR_ENABLE, 1); - OUT_RING(ring, A6XX_SP_PERFCTR_ENABLE_CS); - - OUT_PKT4(ring, REG_A6XX_SP_FLOAT_CNTL, 1); - OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1); - OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_HS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_DS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_GS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_FS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_CS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_CS_IBO | - A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO); - - unsigned constlen = align(v->constlen, 4); - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(constlen) | - A6XX_HLSQ_CS_CNTL_ENABLED); - - OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); - OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(kernel->num_bufs) | - A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | - A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ - OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ - - OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1); - OUT_RING(ring, A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | - A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | - A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | - COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | - A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack)); - - OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); - OUT_RING(ring, 0x41); - - uint32_t local_invocation_id, work_group_id; - local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); - work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID); - - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz)); - - OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); - OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ - - OUT_PKT4(ring, REG_A6XX_SP_CS_INSTRLEN, 1); - OUT_RING(ring, v->instrlen); - - OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); - OUT_RELOC(ring, v->bo, 0, 0, 0); - - OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(v->instrlen)); - OUT_RELOC(ring, v->bo, 0, 0, 0); + struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); + struct ir3_shader_variant *v = ir3_kernel->v; + const struct ir3_info *i = &v->info; + enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64; + + OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1); + OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4); + + OUT_PKT4(ring, REG_A6XX_SP_PERFCTR_ENABLE, 1); + OUT_RING(ring, A6XX_SP_PERFCTR_ENABLE_CS); + + OUT_PKT4(ring, REG_A6XX_SP_FLOAT_CNTL, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1); + OUT_RING( + ring, + A6XX_HLSQ_INVALIDATE_CMD_VS_STATE | A6XX_HLSQ_INVALIDATE_CMD_HS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_DS_STATE | A6XX_HLSQ_INVALIDATE_CMD_GS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_FS_STATE | A6XX_HLSQ_INVALIDATE_CMD_CS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_CS_IBO | A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO); + + unsigned constlen = align(v->constlen, 4); + OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); + OUT_RING(ring, + A6XX_HLSQ_CS_CNTL_CONSTLEN(constlen) | A6XX_HLSQ_CS_CNTL_ENABLED); + + OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); + OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | + A6XX_SP_CS_CONFIG_NIBO(kernel->num_bufs) | + A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | + A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ + OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ + + OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1); + OUT_RING(ring, + A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | + A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | + A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | + COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | + A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack)); + + OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); + OUT_RING(ring, 0x41); + + uint32_t local_invocation_id, work_group_id; + local_invocation_id = + ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); + work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID); + + OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2); + OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | + A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); + OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | + A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz)); + + OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); + OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ + + OUT_PKT4(ring, REG_A6XX_SP_CS_INSTRLEN, 1); + OUT_RING(ring, v->instrlen); + + OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); + OUT_RELOC(ring, v->bo, 0, 0, 0); + + OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | + CP_LOAD_STATE6_0_NUM_UNIT(v->instrlen)); + OUT_RELOC(ring, v->bo, 0, 0, 0); } static void -emit_const(struct fd_ringbuffer *ring, uint32_t regid, - uint32_t sizedwords, const uint32_t *dwords) +emit_const(struct fd_ringbuffer *ring, uint32_t regid, uint32_t sizedwords, + const uint32_t *dwords) { - uint32_t align_sz; + uint32_t align_sz; - debug_assert((regid % 4) == 0); + debug_assert((regid % 4) == 0); - align_sz = align(sizedwords, 4); + align_sz = align(sizedwords, 4); - OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3 + align_sz); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3 + align_sz); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | + CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); + OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - for (uint32_t i = 0; i < sizedwords; i++) { - OUT_RING(ring, dwords[i]); - } + for (uint32_t i = 0; i < sizedwords; i++) { + OUT_RING(ring, dwords[i]); + } - /* Zero-pad to multiple of 4 dwords */ - for (uint32_t i = sizedwords; i < align_sz; i++) { - OUT_RING(ring, 0); - } + /* Zero-pad to multiple of 4 dwords */ + for (uint32_t i = sizedwords; i < align_sz; i++) { + OUT_RING(ring, 0); + } } - static void -cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3]) +cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, + uint32_t grid[3]) { - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct ir3_shader_variant *v = ir3_kernel->v; - - const struct ir3_const_state *const_state = ir3_const_state(v); - uint32_t base = const_state->offsets.immediate; - int size = DIV_ROUND_UP(const_state->immediates_count, 4); - - if (ir3_kernel->info.numwg != INVALID_REG) { - assert((ir3_kernel->info.numwg & 0x3) == 0); - int idx = ir3_kernel->info.numwg >> 2; - const_state->immediates[idx * 4 + 0] = grid[0]; - const_state->immediates[idx * 4 + 1] = grid[1]; - const_state->immediates[idx * 4 + 2] = grid[2]; - } - - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, v->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - emit_const(ring, base, size, const_state->immediates); - } + struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); + struct ir3_shader_variant *v = ir3_kernel->v; + + const struct ir3_const_state *const_state = ir3_const_state(v); + uint32_t base = const_state->offsets.immediate; + int size = DIV_ROUND_UP(const_state->immediates_count, 4); + + if (ir3_kernel->info.numwg != INVALID_REG) { + assert((ir3_kernel->info.numwg & 0x3) == 0); + int idx = ir3_kernel->info.numwg >> 2; + const_state->immediates[idx * 4 + 0] = grid[0]; + const_state->immediates[idx * 4 + 1] = grid[1]; + const_state->immediates[idx * 4 + 2] = grid[2]; + } + + /* truncate size to avoid writing constants that shader + * does not use: + */ + size = MIN2(size + base, v->constlen) - base; + + /* convert out of vec4: */ + base *= 4; + size *= 4; + + if (size > 0) { + emit_const(ring, base, size, const_state->immediates); + } } static void cs_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit, - struct kernel *kernel) + struct kernel *kernel) { - struct fd_ringbuffer *state = - fd_submit_new_ringbuffer(submit, - kernel->num_bufs * 16 * 4, - FD_RINGBUFFER_STREAMING); - - for (unsigned i = 0; i < kernel->num_bufs; i++) { - /* size is encoded with low 15b in WIDTH and high bits in HEIGHT, - * in units of elements: - */ - unsigned sz = kernel->buf_sizes[i]; - unsigned width = sz & MASK(15); - unsigned height = sz >> 15; - - OUT_RING(state, A6XX_IBO_0_FMT(FMT6_32_UINT) | - A6XX_IBO_0_TILE_MODE(0)); - OUT_RING(state, A6XX_IBO_1_WIDTH(width) | - A6XX_IBO_1_HEIGHT(height)); - OUT_RING(state, A6XX_IBO_2_PITCH(0) | - A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31 | - A6XX_IBO_2_TYPE(A6XX_TEX_1D)); - OUT_RING(state, A6XX_IBO_3_ARRAY_PITCH(0)); - OUT_RELOC(state, kernel->bufs[i], 0, 0, 0); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - } - - OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(kernel->num_bufs)); - OUT_RB(ring, state); - - OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2); - OUT_RB(ring, state); - - OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); - OUT_RING(ring, kernel->num_bufs); - - fd_ringbuffer_del(state); + struct fd_ringbuffer *state = fd_submit_new_ringbuffer( + submit, kernel->num_bufs * 16 * 4, FD_RINGBUFFER_STREAMING); + + for (unsigned i = 0; i < kernel->num_bufs; i++) { + /* size is encoded with low 15b in WIDTH and high bits in HEIGHT, + * in units of elements: + */ + unsigned sz = kernel->buf_sizes[i]; + unsigned width = sz & MASK(15); + unsigned height = sz >> 15; + + OUT_RING(state, A6XX_IBO_0_FMT(FMT6_32_UINT) | A6XX_IBO_0_TILE_MODE(0)); + OUT_RING(state, A6XX_IBO_1_WIDTH(width) | A6XX_IBO_1_HEIGHT(height)); + OUT_RING(state, A6XX_IBO_2_PITCH(0) | A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31 | + A6XX_IBO_2_TYPE(A6XX_TEX_1D)); + OUT_RING(state, A6XX_IBO_3_ARRAY_PITCH(0)); + OUT_RELOC(state, kernel->bufs[i], 0, 0, 0); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + OUT_RING(state, 0x00000000); + } + + OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); + OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | + CP_LOAD_STATE6_0_NUM_UNIT(kernel->num_bufs)); + OUT_RB(ring, state); + + OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2); + OUT_RB(ring, state); + + OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); + OUT_RING(ring, kernel->num_bufs); + + fd_ringbuffer_del(state); } static inline unsigned event_write(struct fd_ringbuffer *ring, struct kernel *kernel, - enum vgt_event_type evt, bool timestamp) + enum vgt_event_type evt, bool timestamp) { - unsigned seqno = 0; - - OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1); - OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt)); - if (timestamp) { - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - seqno = ++a6xx_backend->seqno; - OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); /* ADDR_LO/HI */ - OUT_RING(ring, seqno); - } - - return seqno; + unsigned seqno = 0; + + OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt)); + if (timestamp) { + struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); + struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); + seqno = ++a6xx_backend->seqno; + OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); /* ADDR_LO/HI */ + OUT_RING(ring, seqno); + } + + return seqno; } static inline void cache_flush(struct fd_ringbuffer *ring, struct kernel *kernel) { - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - unsigned seqno; - - seqno = event_write(ring, kernel, RB_DONE_TS, true); - - OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); - OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | - CP_WAIT_REG_MEM_0_POLL_MEMORY); - OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); - OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno)); - OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0)); - OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); - - seqno = event_write(ring, kernel, CACHE_FLUSH_TS, true); - - OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4); - OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0)); - OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); - OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno)); + struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); + struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); + unsigned seqno; + + seqno = event_write(ring, kernel, RB_DONE_TS, true); + + OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); + OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | + CP_WAIT_REG_MEM_0_POLL_MEMORY); + OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); + OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno)); + OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0)); + OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); + + seqno = event_write(ring, kernel, CACHE_FLUSH_TS, true); + + OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4); + OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0)); + OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); + OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno)); } static void -a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], struct fd_submit *submit) +a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], + struct fd_submit *submit) { - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0, - FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); - - cs_program_emit(ring, kernel); - cs_const_emit(ring, kernel, grid); - cs_ibo_emit(ring, submit, kernel); - - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE)); - - const unsigned *local_size = kernel->local_size; - const unsigned *num_groups = grid; - - unsigned work_dim = 0; - for (int i = 0; i < 3; i++) { - if (!grid[i]) - break; - work_dim++; - } - - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7); - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */ - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */ - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */ - - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3); - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ - - if (a6xx_backend->num_perfcntrs > 0) { - a6xx_backend->query_mem = fd_bo_new(a6xx_backend->dev, - a6xx_backend->num_perfcntrs * sizeof(struct fd6_query_sample), - DRM_FREEDRENO_GEM_TYPE_KMEM, "query"); - - /* configure the performance counters to count the requested - * countables: - */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; - - OUT_PKT4(ring, counter->select_reg, 1); - OUT_RING(ring, counter->selector); - } - - OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); - - /* and snapshot the start values: */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; - - OUT_PKT7(ring, CP_REG_TO_MEM, 3); - OUT_RING(ring, CP_REG_TO_MEM_0_64B | - CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); - } - } - - OUT_PKT7(ring, CP_EXEC_CS, 4); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0])); - OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(grid[1])); - OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(grid[2])); - - OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); - - if (a6xx_backend->num_perfcntrs > 0) { - /* snapshot the end values: */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; - - OUT_PKT7(ring, CP_REG_TO_MEM, 3); - OUT_RING(ring, CP_REG_TO_MEM_0_64B | - CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); - } - - /* and compute the result: */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - /* result += stop - start: */ - OUT_PKT7(ring, CP_MEM_TO_MEM, 9); - OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | - CP_MEM_TO_MEM_0_NEG_C); - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* dst */ - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* srcA */ - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); /* srcB */ - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); /* srcC */ - } - } - - cache_flush(ring, kernel); + struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); + struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); + struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( + submit, 0, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); + + cs_program_emit(ring, kernel); + cs_const_emit(ring, kernel, grid); + cs_ibo_emit(ring, submit, kernel); + + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE)); + + const unsigned *local_size = kernel->local_size; + const unsigned *num_groups = grid; + + unsigned work_dim = 0; + for (int i = 0; i < 3; i++) { + if (!grid[i]) + break; + work_dim++; + } + + OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7); + OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) | + A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | + A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | + A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); + OUT_RING(ring, + A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0])); + OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */ + OUT_RING(ring, + A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1])); + OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */ + OUT_RING(ring, + A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2])); + OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */ + + OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3); + OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */ + OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ + OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ + + if (a6xx_backend->num_perfcntrs > 0) { + a6xx_backend->query_mem = fd_bo_new( + a6xx_backend->dev, + a6xx_backend->num_perfcntrs * sizeof(struct fd6_query_sample), + DRM_FREEDRENO_GEM_TYPE_KMEM, "query"); + + /* configure the performance counters to count the requested + * countables: + */ + for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { + const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; + + OUT_PKT4(ring, counter->select_reg, 1); + OUT_RING(ring, counter->selector); + } + + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); + + /* and snapshot the start values: */ + for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { + const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; + + OUT_PKT7(ring, CP_REG_TO_MEM, 3); + OUT_RING(ring, CP_REG_TO_MEM_0_64B | + CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); + OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); + } + } + + OUT_PKT7(ring, CP_EXEC_CS, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0])); + OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(grid[1])); + OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(grid[2])); + + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); + + if (a6xx_backend->num_perfcntrs > 0) { + /* snapshot the end values: */ + for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { + const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; + + OUT_PKT7(ring, CP_REG_TO_MEM, 3); + OUT_RING(ring, CP_REG_TO_MEM_0_64B | + CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); + OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); + } + + /* and compute the result: */ + for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); + OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* dst */ + OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* srcA */ + OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); /* srcB */ + OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); /* srcC */ + } + } + + cache_flush(ring, kernel); } static void a6xx_set_perfcntrs(struct backend *b, const struct perfcntr *perfcntrs, - unsigned num_perfcntrs) + unsigned num_perfcntrs) { - struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); + struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); - a6xx_backend->perfcntrs = perfcntrs; - a6xx_backend->num_perfcntrs = num_perfcntrs; + a6xx_backend->perfcntrs = perfcntrs; + a6xx_backend->num_perfcntrs = num_perfcntrs; } static void a6xx_read_perfcntrs(struct backend *b, uint64_t *results) { - struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); + struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); - fd_bo_cpu_prep(a6xx_backend->query_mem, NULL, DRM_FREEDRENO_PREP_READ); - struct fd6_query_sample *samples = fd_bo_map(a6xx_backend->query_mem); + fd_bo_cpu_prep(a6xx_backend->query_mem, NULL, DRM_FREEDRENO_PREP_READ); + struct fd6_query_sample *samples = fd_bo_map(a6xx_backend->query_mem); - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - results[i] = samples[i].result; - } + for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { + results[i] = samples[i].result; + } } struct backend * a6xx_init(struct fd_device *dev, uint32_t gpu_id) { - struct a6xx_backend *a6xx_backend = calloc(1, sizeof(*a6xx_backend)); + struct a6xx_backend *a6xx_backend = calloc(1, sizeof(*a6xx_backend)); - a6xx_backend->base = (struct backend) { - .assemble = a6xx_assemble, - .disassemble = a6xx_disassemble, - .emit_grid = a6xx_emit_grid, - .set_perfcntrs = a6xx_set_perfcntrs, - .read_perfcntrs = a6xx_read_perfcntrs, - }; + a6xx_backend->base = (struct backend){ + .assemble = a6xx_assemble, + .disassemble = a6xx_disassemble, + .emit_grid = a6xx_emit_grid, + .set_perfcntrs = a6xx_set_perfcntrs, + .read_perfcntrs = a6xx_read_perfcntrs, + }; - a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id, false); - a6xx_backend->dev = dev; + a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id, false); + a6xx_backend->dev = dev; - a6xx_backend->control_mem = fd_bo_new(dev, 0x1000, - DRM_FREEDRENO_GEM_TYPE_KMEM, "control"); + a6xx_backend->control_mem = + fd_bo_new(dev, 0x1000, DRM_FREEDRENO_GEM_TYPE_KMEM, "control"); - return &a6xx_backend->base; + return &a6xx_backend->base; } diff --git a/src/freedreno/computerator/ir3_asm.c b/src/freedreno/computerator/ir3_asm.c index 9c5e8d82a46..0a57c37ca05 100644 --- a/src/freedreno/computerator/ir3_asm.c +++ b/src/freedreno/computerator/ir3_asm.c @@ -29,37 +29,37 @@ struct ir3_kernel * ir3_asm_assemble(struct ir3_compiler *c, FILE *in) { - struct ir3_kernel *kernel = calloc(1, sizeof(*kernel)); - struct ir3_shader *shader = ir3_parse_asm(c, &kernel->info, in); - if (!shader) - errx(-1, "assembler failed"); - struct ir3_shader_variant *v = shader->variants; + struct ir3_kernel *kernel = calloc(1, sizeof(*kernel)); + struct ir3_shader *shader = ir3_parse_asm(c, &kernel->info, in); + if (!shader) + errx(-1, "assembler failed"); + struct ir3_shader_variant *v = shader->variants; - v->mergedregs = true; + v->mergedregs = true; - kernel->v = v; - kernel->bin = v->bin; + kernel->v = v; + kernel->bin = v->bin; - kernel->base.local_size[0] = v->local_size[0]; - kernel->base.local_size[1] = v->local_size[0]; - kernel->base.local_size[2] = v->local_size[0]; - kernel->base.num_bufs = kernel->info.num_bufs; - memcpy(kernel->base.buf_sizes, kernel->info.buf_sizes, sizeof(kernel->base.buf_sizes)); + kernel->base.local_size[0] = v->local_size[0]; + kernel->base.local_size[1] = v->local_size[0]; + kernel->base.local_size[2] = v->local_size[0]; + kernel->base.num_bufs = kernel->info.num_bufs; + memcpy(kernel->base.buf_sizes, kernel->info.buf_sizes, + sizeof(kernel->base.buf_sizes)); - unsigned sz = v->info.size; + unsigned sz = v->info.size; - v->bo = fd_bo_new(c->dev, sz, - DRM_FREEDRENO_GEM_CACHE_WCOMBINE | - DRM_FREEDRENO_GEM_TYPE_KMEM, - "%s", ir3_shader_stage(v)); + v->bo = fd_bo_new(c->dev, sz, + DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM, + "%s", ir3_shader_stage(v)); - memcpy(fd_bo_map(v->bo), kernel->bin, sz); + memcpy(fd_bo_map(v->bo), kernel->bin, sz); - return kernel; + return kernel; } void ir3_asm_disassemble(struct ir3_kernel *k, FILE *out) { - ir3_shader_disasm(k->v, k->bin, out); + ir3_shader_disasm(k->v, k->bin, out); } diff --git a/src/freedreno/computerator/ir3_asm.h b/src/freedreno/computerator/ir3_asm.h index 1a03eb2548d..50bbae775a4 100644 --- a/src/freedreno/computerator/ir3_asm.h +++ b/src/freedreno/computerator/ir3_asm.h @@ -26,15 +26,15 @@ #include "main.h" -#include "ir3/ir3_shader.h" #include "ir3/ir3_parser.h" +#include "ir3/ir3_shader.h" struct ir3_kernel { - struct kernel base; - struct ir3_kernel_info info; - struct backend *backend; - struct ir3_shader_variant *v; - void *bin; + struct kernel base; + struct ir3_kernel_info info; + struct backend *backend; + struct ir3_shader_variant *v; + void *bin; }; define_cast(kernel, ir3_kernel); diff --git a/src/freedreno/computerator/main.c b/src/freedreno/computerator/main.c index 8e60c17279d..626139a751e 100644 --- a/src/freedreno/computerator/main.c +++ b/src/freedreno/computerator/main.c @@ -24,8 +24,8 @@ #include <getopt.h> #include <inttypes.h> #include <locale.h> -#include <xf86drm.h> #include <stdlib.h> +#include <xf86drm.h> #include "util/u_math.h" @@ -33,94 +33,90 @@ #include "main.h" - static void dump_float(void *buf, int sz) { - uint8_t *ptr = (uint8_t *)buf; - uint8_t *end = ptr + sz - 3; - int i = 0; + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz - 3; + int i = 0; - while (ptr < end) { - uint32_t d = 0; + while (ptr < end) { + uint32_t d = 0; - printf((i % 8) ? " " : "\t"); + printf((i % 8) ? " " : "\t"); - d |= *(ptr++) << 0; - d |= *(ptr++) << 8; - d |= *(ptr++) << 16; - d |= *(ptr++) << 24; + d |= *(ptr++) << 0; + d |= *(ptr++) << 8; + d |= *(ptr++) << 16; + d |= *(ptr++) << 24; - printf("%8f", uif(d)); + printf("%8f", uif(d)); - if ((i % 8) == 7) { - printf("\n"); - } + if ((i % 8) == 7) { + printf("\n"); + } - i++; - } + i++; + } - if (i % 8) { - printf("\n"); - } + if (i % 8) { + printf("\n"); + } } static void dump_hex(void *buf, int sz) { - uint8_t *ptr = (uint8_t *)buf; - uint8_t *end = ptr + sz; - int i = 0; + uint8_t *ptr = (uint8_t *)buf; + uint8_t *end = ptr + sz; + int i = 0; - while (ptr < end) { - uint32_t d = 0; + while (ptr < end) { + uint32_t d = 0; - printf((i % 8) ? " " : "\t"); + printf((i % 8) ? " " : "\t"); - d |= *(ptr++) << 0; - d |= *(ptr++) << 8; - d |= *(ptr++) << 16; - d |= *(ptr++) << 24; + d |= *(ptr++) << 0; + d |= *(ptr++) << 8; + d |= *(ptr++) << 16; + d |= *(ptr++) << 24; - printf("%08x", d); + printf("%08x", d); - if ((i % 8) == 7) { - printf("\n"); - } + if ((i % 8) == 7) { + printf("\n"); + } - i++; - } + i++; + } - if (i % 8) { - printf("\n"); - } + if (i % 8) { + printf("\n"); + } } static const char *shortopts = "df:g:hp:"; static const struct option longopts[] = { - {"disasm", no_argument, 0, 'd'}, - {"file", required_argument, 0, 'f'}, - {"groups", required_argument, 0, 'g'}, - {"help", no_argument, 0, 'h'}, - {"perfcntr", required_argument, 0, 'p'}, - {0, 0, 0, 0} -}; + {"disasm", no_argument, 0, 'd'}, {"file", required_argument, 0, 'f'}, + {"groups", required_argument, 0, 'g'}, {"help", no_argument, 0, 'h'}, + {"perfcntr", required_argument, 0, 'p'}, {0, 0, 0, 0}}; static void usage(const char *name) { - printf("Usage: %s [-dfgh]\n" - "\n" - "options:\n" - " -d, --disasm print disassembled shader\n" - " -f, --file=FILE read shader from file (instead of stdin)\n" - " -g, --groups=X,Y,Z use specified group size\n" - " -h, --help show this message\n" - " -p, --perfcntr=LIST sample specified performance counters (comma\n" - " separated list)\n" - , - name); + printf( + "Usage: %s [-dfgh]\n" + "\n" + "options:\n" + " -d, --disasm print disassembled shader\n" + " -f, --file=FILE read shader from file (instead of stdin)\n" + " -g, --groups=X,Y,Z use specified group size\n" + " -h, --help show this message\n" + " -p, --perfcntr=LIST sample specified performance counters " + "(comma\n" + " separated list)\n", + name); } /* performance counter description: */ @@ -133,183 +129,185 @@ static unsigned *enabled_counters; static void setup_counter(const char *name, struct perfcntr *c) { - for (int i = 0; i < num_groups; i++) { - const struct fd_perfcntr_group *group = &groups[i]; - - for (int j = 0; j < group->num_countables; j++) { - const struct fd_perfcntr_countable *countable = &group->countables[j]; - - if (strcmp(name, countable->name) != 0) - continue; - - /* - * Allocate a counter to use to monitor the requested countable: - */ - if (enabled_counters[i] >= group->num_counters) { - errx(-1, "Too many counters selected in group: %s", group->name); - } - - unsigned idx = enabled_counters[i]++; - const struct fd_perfcntr_counter *counter = &group->counters[idx]; - - /* - * And initialize the perfcntr struct, pulling together the info - * about selected counter and countable, to simplify life for the - * backend: - */ - c->name = name; - c->select_reg = counter->select_reg; - c->counter_reg_lo = counter->counter_reg_lo; - c->counter_reg_hi = counter->counter_reg_hi; - c->selector = countable->selector; - - return; - } - } - - errx(-1, "could not find countable: %s", name); + for (int i = 0; i < num_groups; i++) { + const struct fd_perfcntr_group *group = &groups[i]; + + for (int j = 0; j < group->num_countables; j++) { + const struct fd_perfcntr_countable *countable = &group->countables[j]; + + if (strcmp(name, countable->name) != 0) + continue; + + /* + * Allocate a counter to use to monitor the requested countable: + */ + if (enabled_counters[i] >= group->num_counters) { + errx(-1, "Too many counters selected in group: %s", group->name); + } + + unsigned idx = enabled_counters[i]++; + const struct fd_perfcntr_counter *counter = &group->counters[idx]; + + /* + * And initialize the perfcntr struct, pulling together the info + * about selected counter and countable, to simplify life for the + * backend: + */ + c->name = name; + c->select_reg = counter->select_reg; + c->counter_reg_lo = counter->counter_reg_lo; + c->counter_reg_hi = counter->counter_reg_hi; + c->selector = countable->selector; + + return; + } + } + + errx(-1, "could not find countable: %s", name); } static struct perfcntr * -parse_perfcntrs(uint32_t gpu_id, const char *perfcntrstr, unsigned *num_perfcntrs) +parse_perfcntrs(uint32_t gpu_id, const char *perfcntrstr, + unsigned *num_perfcntrs) { - struct perfcntr *counters = NULL; - char *cnames, *s; - unsigned cnt = 0; + struct perfcntr *counters = NULL; + char *cnames, *s; + unsigned cnt = 0; - groups = fd_perfcntrs(gpu_id, &num_groups); - enabled_counters = calloc(num_groups, sizeof(enabled_counters[0])); + groups = fd_perfcntrs(gpu_id, &num_groups); + enabled_counters = calloc(num_groups, sizeof(enabled_counters[0])); - cnames = strdup(perfcntrstr); - while ((s = strstr(cnames, ","))) { - char *name = cnames; - s[0] = '\0'; - cnames = &s[1]; + cnames = strdup(perfcntrstr); + while ((s = strstr(cnames, ","))) { + char *name = cnames; + s[0] = '\0'; + cnames = &s[1]; - counters = realloc(counters, ++cnt * sizeof(counters[0])); - setup_counter(name, &counters[cnt-1]); - } + counters = realloc(counters, ++cnt * sizeof(counters[0])); + setup_counter(name, &counters[cnt - 1]); + } - char * name = cnames; - counters = realloc(counters, ++cnt * sizeof(counters[0])); - setup_counter(name, &counters[cnt-1]); + char *name = cnames; + counters = realloc(counters, ++cnt * sizeof(counters[0])); + setup_counter(name, &counters[cnt - 1]); - *num_perfcntrs = cnt; + *num_perfcntrs = cnt; - return counters; + return counters; } int main(int argc, char **argv) { - FILE *in = stdin; - const char *perfcntrstr = NULL; - struct perfcntr *perfcntrs = NULL; - unsigned num_perfcntrs = 0; - bool disasm = false; - uint32_t grid[3] = {0}; - int opt, ret; - - setlocale(LC_NUMERIC, "en_US.UTF-8"); - - while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) { - switch (opt) { - case 'd': - disasm = true; - break; - case 'f': - in = fopen(optarg, "r"); - if (!in) - err(1, "could not open '%s'", optarg); - break; - case 'g': - ret = sscanf(optarg, "%u,%u,%u", &grid[0], &grid[1], &grid[2]); - if (ret != 3) - goto usage; - break; - case 'h': - goto usage; - case 'p': - perfcntrstr = optarg; - break; - default: - printf("unrecognized arg: %c\n", opt); - goto usage; - } - } - - int fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER); - if (fd < 0) - err(1, "could not open drm device"); - - struct fd_device *dev = fd_device_new(fd); - struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D); - - uint64_t val; - fd_pipe_get_param(pipe, FD_GPU_ID, &val); - uint32_t gpu_id = val; - - printf("got gpu_id: %u\n", gpu_id); - - struct backend *backend; - switch (gpu_id) { - case 600 ... 699: - backend = a6xx_init(dev, gpu_id); - break; - default: - err(1, "unsupported gpu: a%u", gpu_id); - } - - struct kernel *kernel = backend->assemble(backend, in); - printf("localsize: %dx%dx%d\n", kernel->local_size[0], - kernel->local_size[1], kernel->local_size[2]); - for (int i = 0; i < kernel->num_bufs; i++) { - printf("buf[%d]: size=%u\n", i, kernel->buf_sizes[i]); - kernel->bufs[i] = fd_bo_new(dev, kernel->buf_sizes[i] * 4, - DRM_FREEDRENO_GEM_TYPE_KMEM, "buf[%d]", i); - } - - if (disasm) - backend->disassemble(kernel, stdout); - - if (grid[0] == 0) - return 0; - - struct fd_submit *submit = fd_submit_new(pipe); - - if (perfcntrstr) { - if (!backend->set_perfcntrs) { - err(1, "performance counters not supported"); - } - perfcntrs = parse_perfcntrs(gpu_id, perfcntrstr, &num_perfcntrs); - backend->set_perfcntrs(backend, perfcntrs, num_perfcntrs); - } - - backend->emit_grid(kernel, grid, submit); - - fd_submit_flush(submit, -1, NULL, NULL); - - for (int i = 0; i < kernel->num_bufs; i++) { - fd_bo_cpu_prep(kernel->bufs[i], pipe, DRM_FREEDRENO_PREP_READ); - void *map = fd_bo_map(kernel->bufs[i]); - - printf("buf[%d]:\n", i); - dump_hex(map, kernel->buf_sizes[i] * 4); - dump_float(map, kernel->buf_sizes[i] * 4); - } - - if (perfcntrstr) { - uint64_t results[num_perfcntrs]; - backend->read_perfcntrs(backend, results); - - for (unsigned i = 0; i < num_perfcntrs; i++) { - printf("%s:\t%'"PRIu64"\n", perfcntrs[i].name, results[i]); - } - } - - return 0; + FILE *in = stdin; + const char *perfcntrstr = NULL; + struct perfcntr *perfcntrs = NULL; + unsigned num_perfcntrs = 0; + bool disasm = false; + uint32_t grid[3] = {0}; + int opt, ret; + + setlocale(LC_NUMERIC, "en_US.UTF-8"); + + while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != + -1) { + switch (opt) { + case 'd': + disasm = true; + break; + case 'f': + in = fopen(optarg, "r"); + if (!in) + err(1, "could not open '%s'", optarg); + break; + case 'g': + ret = sscanf(optarg, "%u,%u,%u", &grid[0], &grid[1], &grid[2]); + if (ret != 3) + goto usage; + break; + case 'h': + goto usage; + case 'p': + perfcntrstr = optarg; + break; + default: + printf("unrecognized arg: %c\n", opt); + goto usage; + } + } + + int fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER); + if (fd < 0) + err(1, "could not open drm device"); + + struct fd_device *dev = fd_device_new(fd); + struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D); + + uint64_t val; + fd_pipe_get_param(pipe, FD_GPU_ID, &val); + uint32_t gpu_id = val; + + printf("got gpu_id: %u\n", gpu_id); + + struct backend *backend; + switch (gpu_id) { + case 600 ... 699: + backend = a6xx_init(dev, gpu_id); + break; + default: + err(1, "unsupported gpu: a%u", gpu_id); + } + + struct kernel *kernel = backend->assemble(backend, in); + printf("localsize: %dx%dx%d\n", kernel->local_size[0], kernel->local_size[1], + kernel->local_size[2]); + for (int i = 0; i < kernel->num_bufs; i++) { + printf("buf[%d]: size=%u\n", i, kernel->buf_sizes[i]); + kernel->bufs[i] = fd_bo_new(dev, kernel->buf_sizes[i] * 4, + DRM_FREEDRENO_GEM_TYPE_KMEM, "buf[%d]", i); + } + + if (disasm) + backend->disassemble(kernel, stdout); + + if (grid[0] == 0) + return 0; + + struct fd_submit *submit = fd_submit_new(pipe); + + if (perfcntrstr) { + if (!backend->set_perfcntrs) { + err(1, "performance counters not supported"); + } + perfcntrs = parse_perfcntrs(gpu_id, perfcntrstr, &num_perfcntrs); + backend->set_perfcntrs(backend, perfcntrs, num_perfcntrs); + } + + backend->emit_grid(kernel, grid, submit); + + fd_submit_flush(submit, -1, NULL, NULL); + + for (int i = 0; i < kernel->num_bufs; i++) { + fd_bo_cpu_prep(kernel->bufs[i], pipe, DRM_FREEDRENO_PREP_READ); + void *map = fd_bo_map(kernel->bufs[i]); + + printf("buf[%d]:\n", i); + dump_hex(map, kernel->buf_sizes[i] * 4); + dump_float(map, kernel->buf_sizes[i] * 4); + } + + if (perfcntrstr) { + uint64_t results[num_perfcntrs]; + backend->read_perfcntrs(backend, results); + + for (unsigned i = 0; i < num_perfcntrs; i++) { + printf("%s:\t%'" PRIu64 "\n", perfcntrs[i].name, results[i]); + } + } + + return 0; usage: - usage(argv[0]); - return -1; + usage(argv[0]); + return -1; } diff --git a/src/freedreno/computerator/main.h b/src/freedreno/computerator/main.h index 8d310f76969..b8984a5d414 100644 --- a/src/freedreno/computerator/main.h +++ b/src/freedreno/computerator/main.h @@ -31,52 +31,53 @@ #include "drm/freedreno_drmif.h" #include "drm/freedreno_ringbuffer.h" -#include "adreno_pm4.xml.h" #include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" #define MAX_BUFS 4 struct kernel { - /* filled in by backend when shader is assembled: */ - uint32_t local_size[3]; - uint32_t num_bufs; - uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */ + /* filled in by backend when shader is assembled: */ + uint32_t local_size[3]; + uint32_t num_bufs; + uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */ - /* filled in by frontend before launching grid: */ - struct fd_bo *bufs[MAX_BUFS]; + /* filled in by frontend before launching grid: */ + struct fd_bo *bufs[MAX_BUFS]; }; struct perfcntr { - const char *name; + const char *name; - /* for backend to configure/read the counter, describes - * the selected counter: - */ - unsigned select_reg; - unsigned counter_reg_lo; - unsigned counter_reg_hi; - /* and selected countable: - */ - unsigned selector; + /* for backend to configure/read the counter, describes + * the selected counter: + */ + unsigned select_reg; + unsigned counter_reg_lo; + unsigned counter_reg_hi; + /* and selected countable: + */ + unsigned selector; }; /* per-generation entry-points: */ struct backend { - struct kernel *(*assemble)(struct backend *b, FILE *in); - void (*disassemble)(struct kernel *kernel, FILE *out); - void (*emit_grid)(struct kernel *kernel, uint32_t grid[3], - struct fd_submit *submit); + struct kernel *(*assemble)(struct backend *b, FILE *in); + void (*disassemble)(struct kernel *kernel, FILE *out); + void (*emit_grid)(struct kernel *kernel, uint32_t grid[3], + struct fd_submit *submit); - /* performance-counter API: */ - void (*set_perfcntrs)(struct backend *b, const struct perfcntr *perfcntrs, - unsigned num_perfcntrs); - void (*read_perfcntrs)(struct backend *b, uint64_t *results); + /* performance-counter API: */ + void (*set_perfcntrs)(struct backend *b, const struct perfcntr *perfcntrs, + unsigned num_perfcntrs); + void (*read_perfcntrs)(struct backend *b, uint64_t *results); }; -#define define_cast(_from, _to) \ -static inline struct _to * \ -to_ ## _to(struct _from *f) \ -{ return (struct _to *)f; } +#define define_cast(_from, _to) \ + static inline struct _to *to_##_to(struct _from *f) \ + { \ + return (struct _to *)f; \ + } struct backend *a6xx_init(struct fd_device *dev, uint32_t gpu_id); |