summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2012-09-17 14:33:56 +0000
committerAndreas Boll <andreas.boll.dev@gmail.com>2013-01-20 15:08:29 +0100
commit4cf2be408eeabd7ae8b0d3c3c81a6dc17e98e323 (patch)
tree83e2b02c12e4842dee4fc2aa9d13f4de279ba475
parent7045f222cdd3a50f03e66ae2c184af17d9c32364 (diff)
r600g: Use LOOP_START_DX10 for loops
LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not limited to 4096 iterations like the other LOOP_* instructions. Compute shaders need to use this instruction, and since we aren't optimizing loops with the LOOP_CONFIG* registers for pixel and vertex shaders, it seems like we should just use it for everything. Reviewed-by: Marek Olšák <maraeo@gmail.com> (cherry picked from commit 810345492eca34c2ad12728b5491a4691cc62ec2) Fixes a hang on the following piglit test on my rv770 ./bin/ext_timer_query-time-elapsed -auto -fbo
-rw-r--r--src/gallium/drivers/r600/eg_asm.c1
-rw-r--r--src/gallium/drivers/r600/r600_asm.c8
-rw-r--r--src/gallium/drivers/r600/r600_shader.c4
3 files changed, 11 insertions, 2 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index d2c1679796a..00ac4a8c25b 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -121,6 +121,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 48a2a8ac3a7..83529d8d596 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1878,6 +1878,7 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -1952,6 +1953,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -1986,7 +1988,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
- case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -2089,6 +2091,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3:
+ case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
@@ -2172,6 +2175,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -2360,6 +2364,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -2454,6 +2459,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
case V_SQ_CF_WORD1_SQ_CF_INST_POP:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 76e26e8464c..75144a6725c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5114,7 +5114,9 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
{
- r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
+ /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
+ * limited to 4096 iterations, like the other LOOP_* instructions. */
+ r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
fc_pushlevel(ctx, FC_LOOP);