diff options
author | David Schleef <ds@schleef.org> | 2009-11-18 18:56:21 +0100 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2009-11-18 18:56:21 +0100 |
commit | 318ec3e449322956d34c72a3f4b4d89d5ba1f36a (patch) | |
tree | 90dbbb74f6d2b8c03a3785e43f28784c982ed02b | |
parent | 1be462eb75d7b4307a153610e94125bdc2a19188 (diff) |
sse: load/store at an offset for constant width
-rw-r--r-- | orc/orcprogram-c.c | 41 | ||||
-rw-r--r-- | orc/orcprogram-sse.c | 47 | ||||
-rw-r--r-- | orc/orcprogram.c | 3 |
3 files changed, 47 insertions, 44 deletions
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index 04ab39d..68d88fc 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -120,6 +120,22 @@ orc_compiler_c_assemble (OrcCompiler *compiler) if (compiler->program->is_2d) { ORC_ASM_CODE(compiler," int j;\n"); } + if (compiler->program->constant_n == 0) { + if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) { + ORC_ASM_CODE(compiler," int n = ex->n;\n"); + } + } else { + ORC_ASM_CODE(compiler," int n = %d;\n", compiler->program->constant_n); + } + if (compiler->program->is_2d) { + if (compiler->program->constant_m == 0) { + if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) { + ORC_ASM_CODE(compiler," int m = ex->params[ORC_VAR_A1];\n"); + } + } else { + ORC_ASM_CODE(compiler," int m = %d;\n", compiler->program->constant_m); + } + } for(i=0;i<ORC_N_VARIABLES;i++){ OrcVariable *var = compiler->vars + i; @@ -173,16 +189,7 @@ orc_compiler_c_assemble (OrcCompiler *compiler) ORC_ASM_CODE(compiler,"\n"); if (compiler->program->is_2d) { - if (compiler->program->constant_m == 0) { - if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) { - ORC_ASM_CODE(compiler," for (j = 0; j < ex->params[ORC_VAR_A1]; j++) {\n"); - } else { - ORC_ASM_CODE(compiler," for (j = 0; j < m; j++) {\n"); - } - } else { - ORC_ASM_CODE(compiler," for (j = 0; j < %d; j++) {\n", - compiler->program->constant_m); - } + ORC_ASM_CODE(compiler," for (j = 0; j < m; j++) {\n"); prefix = 2; for(i=0;i<ORC_N_VARIABLES;i++){ @@ -251,17 +258,7 @@ orc_compiler_c_assemble (OrcCompiler *compiler) } ORC_ASM_CODE(compiler,"\n"); - if (compiler->program->constant_n == 0) { - if (!(compiler->target_flags & ORC_TARGET_C_NOEXEC)) { - ORC_ASM_CODE(compiler,"%*s for (i = 0; i < ex->n; i++) {\n", prefix, ""); - } else { - ORC_ASM_CODE(compiler,"%*s for (i = 0; i < n; i++) {\n", prefix, ""); - } - } else { - ORC_ASM_CODE(compiler,"%*s for (i = 0; i < %d; i++) {\n", - prefix, "", - compiler->program->constant_n); - } + ORC_ASM_CODE(compiler,"%*s for (i = 0; i < n; i++) {\n", prefix, ""); /* Load from source (and maybe destination) arrays */ for(i=0;i<ORC_N_VARIABLES;i++){ @@ -273,7 +270,7 @@ orc_compiler_c_assemble (OrcCompiler *compiler) ORC_ASM_CODE (compiler, "%*s %s = *ptr%d;\n", prefix, "", s, i); ORC_ASM_CODE (compiler, "%*s ptr%d++;\n", prefix, "", i); } - if (var->vartype == ORC_VAR_TYPE_SRC && var->load_dest) { + if (var->vartype == ORC_VAR_TYPE_DEST && var->load_dest) { ORC_ASM_CODE (compiler, "%*s %s = *ptr%d;\n", prefix, "", s, i); } } diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 306aa29..c6c0dc9 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -18,7 +18,7 @@ #define ORC_SSE_ALIGNED_DEST_CUTOFF 64 -void orc_sse_emit_loop (OrcCompiler *compiler, int update); +void orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update); void orc_compiler_sse_init (OrcCompiler *compiler); unsigned int orc_compiler_sse_get_default_flags (void); @@ -332,7 +332,7 @@ sse_add_strides (OrcCompiler *compiler) } void -orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var) +orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var, int offset) { int ptr_reg; if (var->ptr_register == 0) { @@ -347,23 +347,23 @@ orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var) } switch (var->size << compiler->loop_shift) { case 1: - orc_x86_emit_mov_memoffset_reg (compiler, 1, 0, ptr_reg, compiler->gp_tmpreg); + orc_x86_emit_mov_memoffset_reg (compiler, 1, offset, ptr_reg, compiler->gp_tmpreg); orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, var->alloc); break; case 2: - orc_x86_emit_mov_memoffset_reg (compiler, 2, 0, ptr_reg, compiler->gp_tmpreg); + orc_x86_emit_mov_memoffset_reg (compiler, 2, offset, ptr_reg, compiler->gp_tmpreg); orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, var->alloc); break; case 4: - orc_x86_emit_mov_memoffset_sse (compiler, 4, 0, ptr_reg, var->alloc, + orc_x86_emit_mov_memoffset_sse (compiler, 4, offset, ptr_reg, var->alloc, var->is_aligned); break; case 8: - orc_x86_emit_mov_memoffset_sse (compiler, 8, 0, ptr_reg, var->alloc, + orc_x86_emit_mov_memoffset_sse (compiler, 8, offset, ptr_reg, var->alloc, var->is_aligned); break; case 16: - orc_x86_emit_mov_memoffset_sse (compiler, 16, 0, ptr_reg, var->alloc, + orc_x86_emit_mov_memoffset_sse (compiler, 16, offset, ptr_reg, var->alloc, var->is_aligned); break; default: @@ -374,7 +374,7 @@ orc_sse_emit_load_src (OrcCompiler *compiler, OrcVariable *var) } void -orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var) +orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var, int offset) { int ptr_reg; if (var->ptr_register == 0) { @@ -391,7 +391,7 @@ orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var) ORC_COMPILER_ERROR(compiler,"unimplemented"); } orc_x86_emit_mov_sse_reg (compiler, var->alloc, compiler->gp_tmpreg); - orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, 0, ptr_reg); + orc_x86_emit_mov_reg_memoffset (compiler, 1, compiler->gp_tmpreg, offset, ptr_reg); break; case 2: /* FIXME we might be using ecx twice here */ @@ -399,18 +399,18 @@ orc_sse_emit_store_dest (OrcCompiler *compiler, OrcVariable *var) ORC_COMPILER_ERROR(compiler,"unimplemented"); } orc_x86_emit_mov_sse_reg (compiler, var->alloc, compiler->gp_tmpreg); - orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, 0, ptr_reg); + orc_x86_emit_mov_reg_memoffset (compiler, 2, compiler->gp_tmpreg, offset, ptr_reg); break; case 4: - orc_x86_emit_mov_sse_memoffset (compiler, 4, var->alloc, 0, ptr_reg, + orc_x86_emit_mov_sse_memoffset (compiler, 4, var->alloc, offset, ptr_reg, var->is_aligned, var->is_uncached); break; case 8: - orc_x86_emit_mov_sse_memoffset (compiler, 8, var->alloc, 0, ptr_reg, + orc_x86_emit_mov_sse_memoffset (compiler, 8, var->alloc, offset, ptr_reg, var->is_aligned, var->is_uncached); break; case 16: - orc_x86_emit_mov_sse_memoffset (compiler, 16, var->alloc, 0, ptr_reg, + orc_x86_emit_mov_sse_memoffset (compiler, 16, var->alloc, offset, ptr_reg, var->is_aligned, var->is_uncached); break; default: @@ -569,18 +569,23 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) int n_left = compiler->program->constant_n; int save_loop_shift; int loop_shift; + int offset = 0; save_loop_shift = compiler->loop_shift; while (n_left >= (1<<compiler->loop_shift)) { - orc_sse_emit_loop (compiler, (n_left != (1<<compiler->loop_shift))); + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); + orc_sse_emit_loop (compiler, offset, FALSE); n_left -= 1<<compiler->loop_shift; + offset += 1<<compiler->loop_shift; } for(loop_shift = compiler->loop_shift-1; loop_shift>=0; loop_shift--) { if (n_left >= (1<<loop_shift)) { compiler->loop_shift = loop_shift; - orc_sse_emit_loop (compiler, (n_left >= (1<<loop_shift))); + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", loop_shift); + orc_sse_emit_loop (compiler, offset, FALSE); n_left -= 1<<loop_shift; + offset += 1<<loop_shift; } } compiler->loop_shift = save_loop_shift; @@ -599,7 +604,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) orc_x86_emit_test_imm_memoffset (compiler, 4, 1<<compiler->loop_shift, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); orc_x86_emit_je (compiler, 12 + compiler->loop_shift); - orc_sse_emit_loop (compiler, TRUE); + orc_sse_emit_loop (compiler, 0, TRUE); orc_x86_emit_label (compiler, 12 + compiler->loop_shift); } @@ -616,7 +621,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); orc_x86_emit_align (compiler); orc_x86_emit_label (compiler, 2); - orc_sse_emit_loop (compiler, TRUE); + orc_sse_emit_loop (compiler, 0, TRUE); orc_x86_emit_dec_memoffset (compiler, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); @@ -637,7 +642,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) orc_x86_emit_test_imm_memoffset (compiler, 4, 1<<compiler->loop_shift, (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); orc_x86_emit_je (compiler, 8 + compiler->loop_shift); - orc_sse_emit_loop (compiler, TRUE); + orc_sse_emit_loop (compiler, 0, TRUE); orc_x86_emit_label (compiler, 8 + compiler->loop_shift); } @@ -663,7 +668,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) } void -orc_sse_emit_loop (OrcCompiler *compiler, int update) +orc_sse_emit_loop (OrcCompiler *compiler, int offset, int update) { int j; int k; @@ -697,7 +702,7 @@ orc_sse_emit_loop (OrcCompiler *compiler, int update) switch (var->vartype) { case ORC_VAR_TYPE_SRC: case ORC_VAR_TYPE_DEST: - orc_sse_emit_load_src (compiler, var); + orc_sse_emit_load_src (compiler, var, offset); break; case ORC_VAR_TYPE_CONST: break; @@ -731,7 +736,7 @@ orc_sse_emit_loop (OrcCompiler *compiler, int update) switch (var->vartype) { case ORC_VAR_TYPE_DEST: - orc_sse_emit_store_dest (compiler, var); + orc_sse_emit_store_dest (compiler, var, offset); break; case ORC_VAR_TYPE_TEMP: break; diff --git a/orc/orcprogram.c b/orc/orcprogram.c index 6feba5b..12ccdba 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -579,7 +579,8 @@ orc_program_get_max_var_size (OrcProgram *program) max = 0; for(i=0;i<ORC_N_VARIABLES;i++){ - if (program->vars[i].size) { + if (program->vars[i].size && + program->vars[i].vartype != ORC_VAR_TYPE_ACCUMULATOR) { max = MAX(max, program->vars[i].size); } } |