summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2009-11-18 18:56:21 +0100
committerDavid Schleef <ds@schleef.org>2009-11-18 18:56:21 +0100
commit318ec3e449322956d34c72a3f4b4d89d5ba1f36a (patch)
tree90dbbb74f6d2b8c03a3785e43f28784c982ed02b
parent1be462eb75d7b4307a153610e94125bdc2a19188 (diff)
sse: load/store at an offset for constant width
-rw-r--r--orc/orcprogram-c.c41
-rw-r--r--orc/orcprogram-sse.c47
-rw-r--r--orc/orcprogram.c3
3 files changed, 47 insertions, 44 deletions
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c
index 04ab39d..68d88fc 100644
--- a/orc/orcprogram-c.c
+++ b/orc/orcprogram-c.c
@@ -120,6 +120,22 @@ orc_compiler_c_assemble (OrcCompiler *compiler)
if (compiler->program->is_2d) {
ORC_ASM_CODE(compiler," int j;\n");
}
+ if (compiler->program->constant_n == 0) {
+ if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) {
+ ORC_ASM_CODE(compiler," int n = ex->n;\n");
+ }
+ } else {
+ ORC_ASM_CODE(compiler," int n = %d;\n", compiler->program->constant_n);
+ }
+ if (compiler->program->is_2d) {
+ if (compiler->program->constant_m == 0) {
+ if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) {
+ ORC_ASM_CODE(compiler," int m = ex->params[ORC_VAR_A1];\n");
+ }
+ } else {
+ ORC_ASM_CODE(compiler," int m = %d;\n", compiler->program->constant_m);
+ }
+ }
for(i=0;i<ORC_N_VARIABLES;i++){
OrcVariable *var = compiler->vars + i;
@@ -173,16 +189,7 @@ orc_compiler_c_assemble (OrcCompiler *compiler)
ORC_ASM_CODE(compiler,"\n");
if (compiler->program->is_2d) {
- if (compiler->program->constant_m == 0) {
- if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) {
- ORC_ASM_CODE(compiler," for (j = 0; j < ex->params[ORC_VAR_A1]; j++) {\n");
- } else {
- ORC_ASM_CODE(compiler," for (j = 0; j < m; j++) {\n");
- }
- } else {
- ORC_ASM_CODE(compiler," for (j = 0; j < %d; j++) {\n",
- compiler->program->constant_m);
- }
+ ORC_ASM_CODE(compiler," for (j = 0; j < m; j++) {\n");
prefix = 2;
for(i=0;i<ORC_N_VARIABLES;i++){
@@ -251,17 +258,7 @@ orc_compiler_c_assemble (OrcCompiler *compiler)
}
ORC_ASM_CODE(compiler,"\n");
- if (compiler->program->constant_n == 0) {
- if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) {
- ORC_ASM_CODE(compiler,"%*s for (i = 0; i < ex->n; i++) {\n", prefix, "");
- } else {
- ORC_ASM_CODE(compiler,"%*s for (i = 0; i < n; i++) {\n", prefix, "");
- }
- } else {
- ORC_ASM_CODE(compiler,"%*s for (i = 0; i < %d; i++) {\n",
- prefix, "",
- compiler->program->constant_n);
- }
+ ORC_ASM_CODE(compiler,"%*s for (i = 0; i < n; i++) {\n", prefix, "");
/* Load from source (and maybe destination) arrays */
for(i=0;i<ORC_N_VARIABLES;i++){
@@ -273,7 +270,7 @@ orc_compiler_c_assemble (OrcCompiler *compiler)
ORC_ASM_CODE (compiler, "%*s %s = *ptr%d;\n", prefix, "", s, i);
ORC_ASM_CODE (compiler, "%*s ptr%d++;\n", prefix, "", i);
}
- if (var->vartype == ORC_VAR_TYPE_SRC && var->load_dest) {
+ if (var->vartype == ORC_VAR_TYPE_DEST && var->load_dest) {
ORC_ASM_CODE (compiler, "%*s %s = *ptr%d;\n", prefix, "", s, i);
}
}
diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c
index 306aa29..c6c0dc9 100644
--- a/orc/orcprogram-sse.c
+++ b/orc/orcprogram-sse.c
@@ -18,7 +18,7 @@
#define ORC_SSE_ALIGNED_DEST_CUTOFF 64
-void orc_sse_emit_loop (OrcCompiler *compiler, int update);
+void orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update);
void orc_compiler_sse_init (OrcCompiler *compiler);
unsigned int orc_compiler_sse_get_default_flags (void);
@@ -332,7 +332,7 @@ sse_add_strides (OrcCompiler *compiler)
}
void
-orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var)
+orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var, int offset)
{
int ptr_reg;
if (var->ptr_register == 0) {
@@ -347,23 +347,23 @@ orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var)
}
switch (var->size << compiler->loop_shift) {
case 1:
- orc_x86_emit_mov_memoffset_reg (compiler, 1, 0, ptr_reg, compiler->gp_tmpreg);
+ orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, var->alloc);
break;
case 2:
- orc_x86_emit_mov_memoffset_reg (compiler, 2, 0, ptr_reg, compiler->gp_tmpreg);
+ orc_x86_emit_mov_memoffset_reg (compiler, 2, offset, ptr_reg, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, var->alloc);
break;
case 4:
- orc_x86_emit_mov_memoffset_sse (compiler, 4, 0, ptr_reg, var->alloc,
+ orc_x86_emit_mov_memoffset_sse (compiler, 4, offset, ptr_reg, var->alloc,
var->is_aligned);
break;
case 8:
- orc_x86_emit_mov_memoffset_sse (compiler, 8, 0, ptr_reg, var->alloc,
+ orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, ptr_reg, var->alloc,
var->is_aligned);
break;
case 16:
- orc_x86_emit_mov_memoffset_sse (compiler, 16, 0, ptr_reg, var->alloc,
+ orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, ptr_reg, var->alloc,
var->is_aligned);
break;
default:
@@ -374,7 +374,7 @@ orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var)
}
void
-orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var)
+orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var, int offset)
{
int ptr_reg;
if (var->ptr_register == 0) {
@@ -391,7 +391,7 @@ orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var)
ORC_COMPILER_ERROR(compiler,"unimplemented");
}
orc_x86_emit_mov_sse_reg (compiler, var->alloc, compiler->gp_tmpreg);
- orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, 0, ptr_reg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, offset, ptr_reg);
break;
case 2:
/* FIXME we might be using ecx twice here */
@@ -399,18 +399,18 @@ orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var)
ORC_COMPILER_ERROR(compiler,"unimplemented");
}
orc_x86_emit_mov_sse_reg (compiler, var->alloc, compiler->gp_tmpreg);
- orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, 0, ptr_reg);
+ orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, offset, ptr_reg);
break;
case 4:
- orc_x86_emit_mov_sse_memoffset (compiler, 4, var->alloc, 0, ptr_reg,
+ orc_x86_emit_mov_sse_memoffset (compiler, 4, var->alloc, offset, ptr_reg,
var->is_aligned, var->is_uncached);
break;
case 8:
- orc_x86_emit_mov_sse_memoffset (compiler, 8, var->alloc, 0, ptr_reg,
+ orc_x86_emit_mov_sse_memoffset (compiler, 8, var->alloc, offset, ptr_reg,
var->is_aligned, var->is_uncached);
break;
case 16:
- orc_x86_emit_mov_sse_memoffset (compiler, 16, var->alloc, 0, ptr_reg,
+ orc_x86_emit_mov_sse_memoffset (compiler, 16, var->alloc, offset, ptr_reg,
var->is_aligned, var->is_uncached);
break;
default:
@@ -569,18 +569,23 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
int n_left = compiler->program->constant_n;
int save_loop_shift;
int loop_shift;
+ int offset = 0;
save_loop_shift = compiler->loop_shift;
while (n_left >= (1<<compiler->loop_shift)) {
- orc_sse_emit_loop (compiler, (n_left != (1<<compiler->loop_shift)));
+ ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift);
+ orc_sse_emit_loop (compiler, offset, FALSE);
n_left -= 1<<compiler->loop_shift;
+ offset += 1<<compiler->loop_shift;
}
for(loop_shift = compiler->loop_shift-1; loop_shift>=0; loop_shift--) {
if (n_left >= (1<<loop_shift)) {
compiler->loop_shift = loop_shift;
- orc_sse_emit_loop (compiler, (n_left >= (1<<loop_shift)));
+ ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", loop_shift);
+ orc_sse_emit_loop (compiler, offset, FALSE);
n_left -= 1<<loop_shift;
+ offset += 1<<loop_shift;
}
}
compiler->loop_shift = save_loop_shift;
@@ -599,7 +604,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
orc_x86_emit_test_imm_memoffset (compiler, 4, 1<<compiler->loop_shift,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg);
orc_x86_emit_je (compiler, 12 + compiler->loop_shift);
- orc_sse_emit_loop (compiler, TRUE);
+ orc_sse_emit_loop (compiler, 0, TRUE);
orc_x86_emit_label (compiler, 12 + compiler->loop_shift);
}
@@ -616,7 +621,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift);
orc_x86_emit_align (compiler);
orc_x86_emit_label (compiler, 2);
- orc_sse_emit_loop (compiler, TRUE);
+ orc_sse_emit_loop (compiler, 0, TRUE);
orc_x86_emit_dec_memoffset (compiler, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter2),
compiler->exec_reg);
@@ -637,7 +642,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
orc_x86_emit_test_imm_memoffset (compiler, 4, 1<<compiler->loop_shift,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg);
orc_x86_emit_je (compiler, 8 + compiler->loop_shift);
- orc_sse_emit_loop (compiler, TRUE);
+ orc_sse_emit_loop (compiler, 0, TRUE);
orc_x86_emit_label (compiler, 8 + compiler->loop_shift);
}
@@ -663,7 +668,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
}
void
-orc_sse_emit_loop (OrcCompiler *compiler, int update)
+orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update)
{
int j;
int k;
@@ -697,7 +702,7 @@ orc_sse_emit_loop (OrcCompiler *compiler, int update)
switch (var->vartype) {
case ORC_VAR_TYPE_SRC:
case ORC_VAR_TYPE_DEST:
- orc_sse_emit_load_src (compiler, var);
+ orc_sse_emit_load_src (compiler, var, offset);
break;
case ORC_VAR_TYPE_CONST:
break;
@@ -731,7 +736,7 @@ orc_sse_emit_loop (OrcCompiler *compiler, int update)
switch (var->vartype) {
case ORC_VAR_TYPE_DEST:
- orc_sse_emit_store_dest (compiler, var);
+ orc_sse_emit_store_dest (compiler, var, offset);
break;
case ORC_VAR_TYPE_TEMP:
break;
diff --git a/orc/orcprogram.c b/orc/orcprogram.c
index 6feba5b..12ccdba 100644
--- a/orc/orcprogram.c
+++ b/orc/orcprogram.c
@@ -579,7 +579,8 @@ orc_program_get_max_var_size (OrcProgram *program)
max = 0;
for(i=0;i<ORC_N_VARIABLES;i++){
- if (program->vars[i].size) {
+ if (program->vars[i].size &&
+ program->vars[i].vartype != ORC_VAR_TYPE_ACCUMULATOR) {
max = MAX(max, program->vars[i].size);
}
}