summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Vasut <marex@denx.de>2020-06-26 16:31:02 +0200
committerMarek Vasut <marex@denx.de>2020-10-25 20:44:06 +0100
commit5cf9420f3712940a61d816f19271f1aeb288a588 (patch)
tree014d11eb5e557cbabd83cf14887f0e298d545a99
parentbf33f04178ae9001f179a20acee3881b63950e74 (diff)
aarch64: Implement loadupdb instruction
Fill in aarch64 opcodes for loadupdb instruction, which is used by various color space conversion programs. This is thus far only available on aarch64, but arm32 port should be easy.
-rw-r--r--orc/orcarm.h2
-rw-r--r--orc/orcprogram-neon.c61
-rw-r--r--orc/orcrules-neon.c121
3 files changed, 176 insertions, 8 deletions
diff --git a/orc/orcarm.h b/orc/orcarm.h
index a04bcd8..84b18ca 100644
--- a/orc/orcarm.h
+++ b/orc/orcarm.h
@@ -440,6 +440,8 @@ ORC_API void orc_arm64_emit_ret (OrcCompiler *p, int Rn);
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,0,Rd,Rn,Rm,0)
#define orc_arm64_emit_add_lsl(p,bits,Rd,Rn,Rm,val) \
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,ORC_ARM_LSL,Rd,Rn,Rm,val)
+#define orc_arm64_emit_add_lsr(p,bits,Rd,Rn,Rm,val) \
+ orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,ORC_ARM_LSR,Rd,Rn,Rm,val)
#define orc_arm64_emit_add_asr(p,bits,Rd,Rn,Rm,val) \
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,ORC_ARM_ASR,Rd,Rn,Rm,val)
#define orc_arm64_emit_add_ror(p,bits,Rd,Rn,Rm,val) \
diff --git a/orc/orcprogram-neon.c b/orc/orcprogram-neon.c
index 8d55411..1e0ab2c 100644
--- a/orc/orcprogram-neon.c
+++ b/orc/orcprogram-neon.c
@@ -311,6 +311,17 @@ orc_compiler_neon_init (OrcCompiler *compiler)
compiler->unroll_shift = 0;
}
+ if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */
+ for(i=0;i<compiler->n_insns;i++){
+ OrcInstruction *insn = compiler->insns + i;
+ OrcStaticOpcode *opcode = insn->opcode;
+
+ if (strcmp (opcode->name, "loadupdb") == 0) {
+ compiler->vars[insn->src_args[0]].need_offset_reg = TRUE;
+ }
+ }
+ }
+
if (0) {
compiler->need_mask_regs = TRUE;
}
@@ -343,6 +354,34 @@ orc_neon_load_constants_outer (OrcCompiler *compiler)
}
orc_compiler_emit_invariants (compiler);
+
+ if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */
+ for(i=0;i<compiler->n_insns;i++){
+ OrcInstruction *insn = compiler->insns + i;
+ OrcStaticOpcode *opcode = insn->opcode;
+
+ if (strcmp (opcode->name, "loadupdb") == 0) {
+ if (compiler->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
+ orc_arm64_emit_load_reg (compiler, 64,
+ compiler->vars[insn->src_args[0]].ptr_offset,
+ compiler->exec_reg,
+ ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]));
+ } else {
+ if (!compiler->vars[insn->src_args[0]].ptr_offset)
+ continue;
+ if (!compiler->vars[insn->src_args[1]].value.i)
+ orc_arm64_emit_eor(compiler, 64,
+ compiler->vars[insn->src_args[0]].ptr_offset,
+ compiler->vars[insn->src_args[0]].ptr_offset,
+ compiler->vars[insn->src_args[0]].ptr_offset);
+ else
+ orc_arm64_emit_load_imm(compiler, 64,
+ compiler->vars[insn->src_args[0]].ptr_offset,
+ compiler->vars[insn->src_args[1]].value.i);
+ }
+ }
+ }
+ }
}
static void
@@ -363,6 +402,11 @@ orc_neon_load_constants_inner (OrcCompiler *compiler)
orc_arm64_emit_load_reg (compiler, 64,
compiler->vars[i].ptr_register,
compiler->exec_reg, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
+ if (compiler->vars[i].ptr_offset)
+ orc_arm64_emit_eor(compiler, 64,
+ compiler->vars[i].ptr_offset,
+ compiler->vars[i].ptr_offset,
+ compiler->vars[i].ptr_offset);
} else {
orc_arm_emit_load_reg (compiler,
compiler->vars[i].ptr_register,
@@ -1125,22 +1169,23 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index)
if (compiler->vars[k].name == NULL) continue;
if (compiler->vars[k].vartype == ORC_VAR_TYPE_SRC ||
compiler->vars[k].vartype == ORC_VAR_TYPE_DEST) {
- if (compiler->vars[k].ptr_register) {
- if (compiler->is_64bit)
+ if (compiler->is_64bit) {
+ if (compiler->vars[k].ptr_offset) {
+ orc_arm64_emit_add_imm (compiler, 64,
+ compiler->vars[k].ptr_offset,
+ compiler->vars[k].ptr_offset,
+ compiler->vars[k].size << compiler->loop_shift);
+ } else if (compiler->vars[k].ptr_register) {
orc_arm64_emit_add_imm (compiler, 64,
compiler->vars[k].ptr_register,
compiler->vars[k].ptr_register,
compiler->vars[k].size << compiler->loop_shift);
- else
+ }
+ } else {
orc_arm_emit_add_imm (compiler,
compiler->vars[k].ptr_register,
compiler->vars[k].ptr_register,
compiler->vars[k].size << compiler->loop_shift);
- } else {
- /* arm_emit_add_imm_memoffset (compiler, arm_ptr_size, */
- /* compiler->vars[k].size << compiler->loop_shift, */
- /* (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[k]), */
- /* p->exec_reg); */
}
}
}
diff --git a/orc/orcrules-neon.c b/orc/orcrules-neon.c
index 7e8e3db..2764e9e 100644
--- a/orc/orcrules-neon.c
+++ b/orc/orcrules-neon.c
@@ -1077,6 +1077,126 @@ orc_neon_storeq (OrcCompiler *compiler, int dest, int update, int src1, int is_a
#endif
static void
+neon_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn)
+{
+ OrcVariable *src = compiler->vars + insn->src_args[0];
+ unsigned int code = 0;
+ int size = src->size << compiler->insn_shift;
+ ORC_ASSERT(src->ptr_register); /* can ptr_register be 0 ? */
+ int ptr_reg;
+
+ if (!compiler->is_64bit) {
+ ORC_COMPILER_ERROR(compiler, "loadupdb is implemented only on aarch64");
+ return;
+ }
+
+ /* FIXME this should be fixed at a higher level */
+ if (src->vartype != ORC_VAR_TYPE_SRC && src->vartype != ORC_VAR_TYPE_DEST) {
+ ORC_COMPILER_ERROR(compiler, "loadX used with non src/dest");
+ return;
+ }
+
+ if (src->ptr_offset) {
+ ptr_reg = compiler->gp_tmpreg;
+ orc_arm64_emit_add_lsr(compiler, 64, ptr_reg, src->ptr_register, src->ptr_offset, 1);
+ } else {
+ ptr_reg = src->ptr_register;
+ }
+
+ int opcode, flag;
+
+ if (size > 16) {
+ /** load multiple single-element structures to one, two, three, or four registers */
+ char vt_str[64];
+
+ memset(vt_str, '\x00', 64);
+
+ if (size == 64) {
+ snprintf(vt_str, 64, "%s, %s, %s, %s",
+ orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
+ orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1),
+ orc_neon64_reg_name_vector (compiler->tmpreg + 2, 1, 1),
+ orc_neon64_reg_name_vector (compiler->tmpreg + 3, 1, 1));
+ opcode = 0x2;
+ } else if (size == 32) {
+ snprintf(vt_str, 64, "%s, %s",
+ orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
+ orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1));
+ opcode = 0xa;
+ } else if (size == 16) {
+ snprintf(vt_str, 64, "%s",
+ orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1));
+ opcode = 0x7;
+ } else {
+ ORC_COMPILER_ERROR(compiler,"bad aligned load size %d",
+ src->size << compiler->insn_shift);
+ return;
+ }
+ flag = 0; /* Bytes */
+
+ ORC_ASM_CODE(compiler," ld1 { %s }, [%s]\n",
+ vt_str, orc_arm64_reg_name (ptr_reg, 64));
+ code = 0x0c400000;
+ code |= 0 << 30; /* Q-bit */
+ code |= (flag&0x3) << 10;
+ code |= (opcode&0xf) << 12;
+ } else {
+ /** load one single-element structure to one lane of one register */
+ flag = 0;
+ if (size == 8) {
+ opcode = 4;
+ flag = 1; /* size==01 */
+ } else if (size == 4) {
+ opcode = 4;
+ } else if (size == 2) {
+ opcode = 2;
+ } else if (size == 1) {
+ opcode = 0;
+ } else {
+ ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d",
+ src->size << compiler->insn_shift);
+ return;
+ }
+ ORC_ASM_CODE(compiler," ld1 { %s }[0], [%s]\n",
+ orc_neon64_reg_name_vector_single (compiler->tmpreg, size),
+ orc_arm64_reg_name (ptr_reg, 64));
+ code = 0x0d400000;
+ code |= (opcode&0x7) << 13;
+ code |= (flag&0x3) << 10;
+ }
+
+ code |= (ptr_reg&0x1f) << 5;
+ code |= (compiler->tmpreg&0x1f);
+
+ orc_arm_emit (compiler, code);
+
+ OrcVariable tmpreg = { .alloc = compiler->tmpreg, .size = compiler->vars[insn->src_args[0]].size };
+
+ switch (src->size) {
+ case 1:
+ orc_neon64_emit_binary (compiler, "zip1", 0x0e003800,
+ compiler->vars[insn->dest_args[0]],
+ tmpreg,
+ tmpreg, compiler->insn_shift - 1);
+ break;
+ case 2:
+ orc_neon64_emit_binary (compiler, "zip1", 0x0e403800,
+ compiler->vars[insn->dest_args[0]],
+ tmpreg,
+ tmpreg, compiler->insn_shift - 1);
+ break;
+ case 4:
+ orc_neon64_emit_binary (compiler, "zip1", 0x0e803800,
+ compiler->vars[insn->dest_args[0]],
+ tmpreg,
+ tmpreg, compiler->insn_shift - 1);
+ break;
+ }
+
+ src->update_type = 1;
+}
+
+static void
neon_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{
OrcVariable *src = compiler->vars + insn->src_args[0];
@@ -4388,6 +4508,7 @@ orc_compiler_neon_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "loadpw", neon_rule_loadpX, (void *)2);
orc_rule_register (rule_set, "loadpl", neon_rule_loadpX, (void *)4);
orc_rule_register (rule_set, "loadpq", neon_rule_loadpX, (void *)8);
+ orc_rule_register (rule_set, "loadupdb", neon_rule_loadupdb, (void *)0);
orc_rule_register (rule_set, "loadb", neon_rule_loadX, (void *)0);
orc_rule_register (rule_set, "loadw", neon_rule_loadX, (void *)0);
orc_rule_register (rule_set, "loadl", neon_rule_loadX, (void *)0);