summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Xionghu <xionghu.luo@intel.com>2015-09-22 14:51:09 +0800
committerYang Rong <rong.r.yang@intel.com>2015-09-22 16:15:28 +0800
commit18a52ffc966027a3004b85c7c03c9416e1a84c3a (patch)
treed69872e876e76af5d8629809acc31a79426dea46
parentf5c7e2e45eb20773d1e631dcd1ba142d28a7f208 (diff)
add bswap64 for gen7/gen75 and gen8 seperately.
as the long type data layout is not continous on platform gen7/gen75, the indirect address access pattern is a bit different than gen8. Signed-off-by: Luo Xionghu <xionghu.luo@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/gen8_context.cpp64
-rw-r--r--backend/src/backend/gen_context.cpp110
2 files changed, 174 insertions, 0 deletions
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index dd5b4ca0..5eb78665 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -246,6 +246,70 @@ namespace gbe
p->pop();
p->MOV(dst, tmp);
+ }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) {
+ bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0);
+ GBE_ASSERT(uniform_src || src.subnr == 0);
+ GBE_ASSERT(dst.subnr == 0);
+ GBE_ASSERT(tmp.subnr == 0);
+ GBE_ASSERT(start_addr >= 0);
+ new_a0[0] = start_addr + 7;
+ new_a0[1] = start_addr + 6;
+ new_a0[2] = start_addr + 5;
+ new_a0[3] = start_addr + 4;
+ new_a0[4] = start_addr + 3;
+ new_a0[5] = start_addr + 2;
+ new_a0[6] = start_addr + 1;
+ new_a0[7] = start_addr;
+ if(!uniform_src) {
+ new_a0[8] = start_addr + 15;
+ new_a0[9] = start_addr + 14;
+ new_a0[10] = start_addr + 13;
+ new_a0[11] = start_addr + 12;
+ new_a0[12] = start_addr + 11;
+ new_a0[13] = start_addr + 10;
+ new_a0[14] = start_addr + 9;
+ new_a0[15] = start_addr + 8;
+ } else {
+ new_a0[8] = start_addr + 7;
+ new_a0[9] = start_addr + 6;
+ new_a0[10] = start_addr + 5;
+ new_a0[11] = start_addr + 4;
+ new_a0[12] = start_addr + 3;
+ new_a0[13] = start_addr + 2;
+ new_a0[14] = start_addr + 1;
+ new_a0[15] = start_addr;
+ }
+ this->setA0Content(new_a0, 56);
+
+ p->push();
+ p->curr.execWidth = 16;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
+ p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
+ if(!uniform_src)
+ ind_src.addr_imm += 16;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 16), ind_src);
+ for (int i = 0; i < 2; i++) {
+ if(!uniform_src)
+ ind_src.addr_imm += 16;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 1, 16*i), ind_src);
+ }
+ if (simd == 16) {
+ for (int i = 0; i < 2; i++) {
+ if(!uniform_src)
+ ind_src.addr_imm += 16;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 2, 16*i), ind_src);
+ }
+ for (int i = 0; i < 2; i++) {
+ if(!uniform_src)
+ ind_src.addr_imm += 16;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 3, 16*i), ind_src);
+ }
+ }
+ p->pop();
+
+ p->MOV(dst, tmp);
} else {
GBE_ASSERT(0);
}
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index a5dc0894..32d00e23 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -472,6 +472,116 @@ namespace gbe
p->pop();
p->MOV(dst, tmp);
+ }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) {
+ bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0);
+ GBE_ASSERT(uniform_src || src.subnr == 0);
+ GBE_ASSERT(dst.subnr == 0);
+ GBE_ASSERT(tmp.subnr == 0);
+ GBE_ASSERT(start_addr >= 0);
+ if (!uniform_src) {
+ new_a0[0] = start_addr + 3;
+ new_a0[1] = start_addr + 2;
+ new_a0[2] = start_addr + 1;
+ new_a0[3] = start_addr;
+ new_a0[4] = start_addr + 7;
+ new_a0[5] = start_addr + 6;
+ new_a0[6] = start_addr + 5;
+ new_a0[7] = start_addr + 4;
+ } else {
+ new_a0[0] = start_addr + 7;
+ new_a0[1] = start_addr + 6;
+ new_a0[2] = start_addr + 5;
+ new_a0[3] = start_addr + 4;
+ new_a0[4] = start_addr + 3;
+ new_a0[5] = start_addr + 2;
+ new_a0[6] = start_addr + 1;
+ new_a0[7] = start_addr;
+ }
+ this->setA0Content(new_a0, 56);
+
+ if (!uniform_src) {
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
+ p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
+ for (int i = 1; i < 4; i++) {
+ if (!uniform_src)
+ ind_src.addr_imm += 8;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 8*i), ind_src);
+ }
+ for (int i = 0; i < 4; i++) {
+ if (!uniform_src)
+ ind_src.addr_imm += 8;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 1, 8*i), ind_src);
+ }
+ if (simd == 16) {
+ for (int i = 0; i < 4; i++) {
+ if (!uniform_src)
+ ind_src.addr_imm += 8;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 2, 8*i), ind_src);
+ }
+ for (int i = 0; i < 4; i++) {
+ if (!uniform_src)
+ ind_src.addr_imm += 8;
+ p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 3, 8*i), ind_src);
+ }
+ }
+ p->pop();
+
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ if (simd == 8) {
+ p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 1, 0),
+ GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 0, 0));
+ p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 0, 0),
+ GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 1, 0));
+ }else if(simd == 16) {
+ p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 2, 0),
+ GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 0, 0));
+ p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 3, 0),
+ GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 1, 0));
+ p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 0, 0),
+ GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 2, 0));
+ p->MOV(GenRegister::offset(GenRegister::retype(dst, GEN_TYPE_D), 1, 0),
+ GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_D), 3, 0));
+ }
+ p->pop();
+ } else {
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 0);
+ p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
+ p->pop();
+
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ GenRegister x = GenRegister::ud1grf(tmp.nr, 0);
+ GenRegister y = GenRegister::ud1grf(tmp.nr, 1);
+ GenRegister dst_ = dst;
+ dst_.type = GEN_TYPE_UD;
+ dst_.hstride = GEN_HORIZONTAL_STRIDE_1;
+ dst_.width = GEN_WIDTH_8;
+ dst_.vstride = GEN_VERTICAL_STRIDE_8;
+
+ if (simd == 8) {
+ p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 0, 0), x);
+ p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 1, 0), y);
+ }else if(simd == 16) {
+ p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 0, 0), x);
+ p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 1, 0), x);
+ p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 2, 0), y);
+ p->MOV(GenRegister::offset(GenRegister::retype(dst_, GEN_TYPE_D), 3, 0), y);
+ }
+ p->pop();
+ }
} else {
GBE_ASSERT(0);
}