summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVadim Girlin <vadimgirlin@gmail.com>2011-11-13 22:08:33 +0400
committerAlex Deucher <alexander.deucher@amd.com>2011-11-13 17:10:02 -0500
commit8e366dc365d01213b71b87ace47d30938db74845 (patch)
tree14ceb822a3738e91691b2f8f7e87cef0fd79dd05 /src
parent88a140cd199d585d15ce9b0b3ca27815bedf0d63 (diff)
r600g: lazy load for AR register
Emit MOVA* instruction only when AR is used. Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/r600/r600_asm.c35
-rw-r--r--src/gallium/drivers/r600/r600_asm.h2
-rw-r--r--src/gallium/drivers/r600/r600_shader.c70
3 files changed, 61 insertions, 46 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 75dfdb45f20..0a1717c6168 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -255,6 +255,7 @@ static int r600_bytecode_add_cf(struct r600_bytecode *bc)
bc->ncf++;
bc->ndw += 2;
bc->force_add_cf = 0;
+ bc->ar_loaded = 0;
return 0;
}
@@ -1203,6 +1204,32 @@ static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r60
return 0;
}
+/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
+static int load_ar(struct r600_bytecode *bc)
+{
+ struct r600_bytecode_alu alu;
+ int r;
+
+ if (bc->ar_loaded)
+ return 0;
+
+ /* hack to avoid making MOVA the last instruction in the clause */
+ if ((bc->cf_last->ndw>>1) >= 110)
+ bc->force_add_cf = 1;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ alu.src[0].sel = bc->ar_reg;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+
+ bc->cf_last->r6xx_uses_waterfall = 1;
+ bc->ar_loaded = 1;
+ return 0;
+}
+
int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type)
{
struct r600_bytecode_alu *nalu = r600_bytecode_alu();
@@ -1237,6 +1264,14 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytec
}
bc->cf_last->inst = (type << 3);
+ /* Check AR usage and load it if required */
+ for (i = 0; i < 3; i++)
+ if (nalu->src[i].rel && !bc->ar_loaded)
+ load_ar(bc);
+
+ if (nalu->dst.rel && !bc->ar_loaded)
+ load_ar(bc);
+
/* Setup the kcache for this ALU instruction. This will start a new
* ALU clause if needed. */
if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 61caa4b915e..f4a6cfd33d6 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -184,6 +184,8 @@ struct r600_bytecode {
struct r600_cf_stack_entry fc_stack[32];
unsigned call_sp;
struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
+ unsigned ar_loaded;
+ unsigned ar_reg;
};
/* eg_asm.c */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5ce464d1c54..48a2f7b7329 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -166,7 +166,6 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
- unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bytecode *bc;
struct r600_shader *shader;
@@ -553,7 +552,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
memset(&alu, 0, sizeof(alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
- alu.src[0].sel = ctx->ar_reg;
+ alu.src[0].sel = ctx->bc->ar_reg;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = offset;
@@ -567,7 +566,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
ar_reg = dst_reg;
} else {
- ar_reg = ctx->ar_reg;
+ ar_reg = ctx->bc->ar_reg;
}
memset(&vtx, 0, sizeof(vtx));
@@ -750,9 +749,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
- ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
- ctx.temp_reg = ctx.ar_reg + 1;
+ ctx.temp_reg = ctx.bc->ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
@@ -2942,45 +2941,26 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
break;
case TGSI_OPCODE_UARL:
+ alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
break;
default:
assert(0);
return -1;
}
- if (alu.inst) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.last = 1;
- alu.dst.sel = ctx->ar_reg;
- alu.dst.write = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- /* TODO: Note that the MOVA can be avoided if we never use AR for
- * indexing non-CB registers in the current ALU clause. Similarly, we
- * need to load AR from ar_reg again if we started a new clause
- * between ARL and AR usage. The easy way to do that is to remove
- * the MOVA here, and load it for the first AR access after ar_reg
- * has been modified in each clause. */
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- else {
- alu.src[0].sel = ctx->ar_reg;
- alu.src[0].chan = 0;
- }
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ ctx->bc->ar_loaded = 0;
return 0;
}
static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
{
- /* TODO from r600c, ar values don't persist between clauses */
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
@@ -2990,7 +2970,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.dst.sel = ctx->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
@@ -2999,8 +2979,8 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
- alu.src[0].sel = ctx->ar_reg;
- alu.dst.sel = ctx->ar_reg;
+ alu.src[0].sel = ctx->bc->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
@@ -3011,7 +2991,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.dst.sel = ctx->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
@@ -3019,24 +2999,22 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
return r;
break;
case TGSI_OPCODE_UARL:
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
- memset(&alu, 0, sizeof(alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- else
- alu.src[0].sel = ctx->ar_reg;
- alu.last = 1;
-
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- ctx->bc->cf_last->r6xx_uses_waterfall = 1;
+ ctx->bc->ar_loaded = 0;
return 0;
}