summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuo Yejun <yejun.guo@intel.com>2015-12-04 03:22:20 +0800
committerYang Rong <rong.r.yang@intel.com>2015-12-14 12:10:20 +0800
commit1236620cc6b5ad5687132e22ff888070e75332eb (patch)
tree077590709f295c8e1fdd4a04d83a78910b0b485f
parentb51dd736527ee769a9665d83a1b4e7dc6438e67c (diff)
add Broxton support
special versions of linux kernel and libdrm are needed. utest and conformance test PASSED. Signed-off-by: Guo Yejun <yejun.guo@intel.com> Reviewed-by: Junyan He <junyan.he@linux.intel.com>
-rwxr-xr-xGetGenID.sh2
-rw-r--r--backend/src/backend/gen8_context.cpp2
-rw-r--r--backend/src/backend/gen8_context.hpp2
-rw-r--r--backend/src/backend/gen9_context.cpp110
-rw-r--r--backend/src/backend/gen9_context.hpp22
-rw-r--r--backend/src/backend/gen_insn_selection.cpp11
-rw-r--r--backend/src/backend/gen_insn_selection.hpp7
-rw-r--r--backend/src/backend/gen_program.cpp17
-rw-r--r--backend/src/gbe_bin_generater.cpp4
-rw-r--r--src/cl_device_data.h9
-rw-r--r--src/cl_device_id.c34
-rw-r--r--src/intel/intel_gpgpu.c5
12 files changed, 213 insertions, 12 deletions
diff --git a/GetGenID.sh b/GetGenID.sh
index 7acf9bda..30296da7 100755
--- a/GetGenID.sh
+++ b/GetGenID.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-genpciid=(0152 0162 0156 0166 015a 016a 0f31 0402 0412 0422 040a 041a 042a 0406 0416 0426 0c02 0c12 0c22 0c0a 0c1a 0c2a 0c06 0c16 0c26 0a02 0a12 0a22 0a0a 0a1a 0a2a 0a06 0a16 0a26 0d02 0d12 0d22 0d0a 0d1a 0d2a 0d06 0d16 0d26)
+genpciid=(0152 0162 0156 0166 015a 016a 0f31 0402 0412 0422 040a 041a 042a 0406 0416 0426 0c02 0c12 0c22 0c0a 0c1a 0c2a 0c06 0c16 0c26 0a02 0a12 0a22 0a0a 0a1a 0a2a 0a06 0a16 0a26 0d02 0d12 0d22 0d0a 0d1a 0d2a 0d06 0d16 0d26 5a84)
pciid=($(lspci -nn | grep "\[8086:.*\]" -o | awk -F : '{print $2}' | awk -F ] '{print $1}'))
n=${#pciid[*]}
i=0
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 71d900f3..7455bfcf 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -417,7 +417,7 @@ namespace gbe
GBE_ASSERT(0);
}
- static GenRegister unpacked_ud(GenRegister reg, uint32_t offset = 0)
+ GenRegister Gen8Context::unpacked_ud(GenRegister reg, uint32_t offset)
{
if(reg.hstride == GEN_HORIZONTAL_STRIDE_0) {
if(offset == 0)
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 537aef57..cc415c63 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -76,6 +76,8 @@ namespace gbe
virtual void emitF64DIVInstruction(const SelectionInstruction &insn);
+ static GenRegister unpacked_ud(GenRegister reg, uint32_t offset = 0);
+
protected:
virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0);
virtual void subTimestamps(GenRegister& t0, GenRegister& t1, GenRegister& tmp);
diff --git a/backend/src/backend/gen9_context.cpp b/backend/src/backend/gen9_context.cpp
index c35293a9..47b1496a 100644
--- a/backend/src/backend/gen9_context.cpp
+++ b/backend/src/backend/gen9_context.cpp
@@ -55,4 +55,114 @@ namespace gbe
p->WAIT();
p->pop();
}
+
+ void BxtContext::newSelection(void) {
+ this->sel = GBE_NEW(SelectionBxt, *this);
+ }
+
+ void BxtContext::calculateFullU64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
+ GenRegister dst_l, GenRegister s0l_s1h, GenRegister s0h_s1l)
+ {
+ src0.type = src1.type = GEN_TYPE_UD;
+ dst_h.type = dst_l.type = GEN_TYPE_UL;
+ s0l_s1h.type = s0h_s1l.type = GEN_TYPE_UL;
+
+ //GenRegister tmp;
+
+ GenRegister s0l = unpacked_ud(src0);
+ GenRegister s1l = unpacked_ud(src1);
+ GenRegister s0h = unpacked_ud(s0l_s1h); //s0h only used before s0l_s1h, reuse s0l_s1h
+ GenRegister s1h = unpacked_ud(dst_l); //s1h only used before dst_l, reuse dst_l
+
+ p->MOV(s0h, GenRegister::offset(s0l, 0, 4));
+ p->MOV(s1h, GenRegister::offset(s1l, 0, 4));
+
+ /* High 32 bits X High 32 bits. */
+ p->MUL(dst_h, s0h, s1h);
+ /* High 32 bits X low 32 bits. */
+ p->MUL(s0h_s1l, s0h, s1l);
+ /* Low 32 bits X high 32 bits. */
+ p->MUL(s0l_s1h, s0l, s1h);
+ /* Low 32 bits X low 32 bits. */
+ p->MUL(dst_l, s0l, s1l);
+
+ /* Because the max product of s0l*s1h is (2^N - 1) * (2^N - 1) = 2^2N + 1 - 2^(N+1), here N = 32
+ The max of addding 2 32bits integer to it is
+ 2^2N + 1 - 2^(N+1) + 2*(2^N - 1) = 2^2N - 1
+ which means the product s0h_s1l adds dst_l's high 32 bits and then adds s0l_s1h's low 32 bits will not
+ overflow and have no carry.
+ By this manner, we can avoid using acc register, which has a lot of restrictions. */
+
+ GenRegister s0l_s1h_l = unpacked_ud(s0l_s1h);
+ p->ADD(s0h_s1l, s0h_s1l, s0l_s1h_l);
+
+ p->SHR(s0l_s1h, s0l_s1h, GenRegister::immud(32));
+ GenRegister s0l_s1h_h = unpacked_ud(s0l_s1h);
+ p->ADD(dst_h, dst_h, s0l_s1h_h);
+
+ GenRegister dst_l_h = unpacked_ud(s0l_s1h);
+ p->MOV(dst_l_h, unpacked_ud(dst_l, 1));
+ p->ADD(s0h_s1l, s0h_s1l, dst_l_h);
+
+ // No longer need s0l_s1h
+ GenRegister tmp = s0l_s1h;
+
+ p->SHL(tmp, s0h_s1l, GenRegister::immud(32));
+ GenRegister tmp_unpacked = unpacked_ud(tmp, 1);
+ p->MOV(unpacked_ud(dst_l, 1), tmp_unpacked);
+
+ p->SHR(tmp, s0h_s1l, GenRegister::immud(32));
+ p->ADD(dst_h, dst_h, tmp);
+ }
+
+ void BxtContext::emitI64MULInstruction(const SelectionInstruction &insn)
+ {
+ GenRegister src0 = ra->genReg(insn.src(0));
+ GenRegister src1 = ra->genReg(insn.src(1));
+ GenRegister dst = ra->genReg(insn.dst(0));
+ GenRegister res = ra->genReg(insn.dst(1));
+
+ src0.type = src1.type = GEN_TYPE_UD;
+ dst.type = GEN_TYPE_UL;
+ res.type = GEN_TYPE_UL;
+
+ /* Low 32 bits X low 32 bits. */
+ GenRegister s0l = unpacked_ud(src0);
+ GenRegister s1l = unpacked_ud(src1);
+ p->MUL(dst, s0l, s1l);
+
+ /* Low 32 bits X high 32 bits. */
+ GenRegister s1h = unpacked_ud(res);
+ p->MOV(s1h, unpacked_ud(src1, 1));
+
+ p->MUL(res, s0l, s1h);
+ p->SHL(res, res, GenRegister::immud(32));
+ p->ADD(dst, dst, res);
+
+ /* High 32 bits X low 32 bits. */
+ GenRegister s0h = unpacked_ud(res);
+ p->MOV(s0h, unpacked_ud(src0, 1));
+
+ p->MUL(res, s0h, s1l);
+ p->SHL(res, res, GenRegister::immud(32));
+ p->ADD(dst, dst, res);
+ }
+
+ void BxtContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int sz) {
+ if (sz == 0)
+ sz = 16;
+ GBE_ASSERT(sz%4 == 0);
+ GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
+
+ p->push();
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ for (int i = 0; i < sz/2; i++) {
+ p->MOV(GenRegister::retype(GenRegister::addr1(i*2), GEN_TYPE_UD),
+ GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2]));
+ }
+ p->pop();
+ }
+
}
diff --git a/backend/src/backend/gen9_context.hpp b/backend/src/backend/gen9_context.hpp
index 8acad8cc..a2931cca 100644
--- a/backend/src/backend/gen9_context.hpp
+++ b/backend/src/backend/gen9_context.hpp
@@ -46,5 +46,27 @@ namespace gbe
private:
virtual void newSelection(void);
};
+
+ //most code of BxtContext are copied from ChvContext, it results in two physical copy of the same code.
+ //there are two possible ways to resolve it: 1) virtual inheritance 2) class template
+ //but either way makes BxtContext and ChvContext tied closely, it might impact the flexibility of future changes
+ //so, choose the method of two physical copies.
+ class BxtContext : public Gen9Context
+ {
+ public:
+ virtual ~BxtContext(void) { }
+ BxtContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false)
+ : Gen9Context(unit, name, deviceID, relaxMath) {
+ };
+ virtual void emitI64MULInstruction(const SelectionInstruction &insn);
+
+ protected:
+ virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0);
+
+ private:
+ virtual void newSelection(void);
+ virtual void calculateFullU64MUL(GenRegister src0, GenRegister src1, GenRegister dst_h,
+ GenRegister dst_l, GenRegister s0l_s1h, GenRegister s0h_s1l);
+ };
}
#endif /* __GBE_GEN9_CONTEXT_HPP__ */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index edd50456..ce95b1f5 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2268,6 +2268,17 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
opt_features = SIOF_LOGICAL_SRCMOD;
}
+ SelectionBxt::SelectionBxt(GenContext &ctx) : Selection(ctx) {
+ this->opaque->setHas32X32Mul(true);
+ this->opaque->setHasLongType(true);
+ this->opaque->setLongRegRestrict(true);
+ this->opaque->setHasDoubleType(true);
+ this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
+ this->opaque->setSlowByteGather(true);
+ this->opaque->setHasHalfType(true);
+ opt_features = SIOF_LOGICAL_SRCMOD | SIOF_OP_MOV_LONG_REG_RESTRICT;
+ }
+
void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t msgNum,
uint32_t bti, bool is3D) {
uint32_t elemID = 0;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index f292566e..83d64bec 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -333,6 +333,13 @@ namespace gbe
Selection9(GenContext &ctx);
};
+ class SelectionBxt: public Selection
+ {
+ public:
+ /*! Initialize internal structures used for the selection */
+ SelectionBxt(GenContext &ctx);
+ };
+
} /* namespace gbe */
#endif /* __GEN_INSN_SELECTION_HPP__ */
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 5149d498..1427c25a 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -171,6 +171,8 @@ namespace gbe {
ctx = GBE_NEW(ChvContext, unit, name, deviceID, relaxMath);
} else if (IS_SKYLAKE(deviceID)) {
ctx = GBE_NEW(Gen9Context, unit, name, deviceID, relaxMath);
+ } else if (IS_BROXTON(deviceID)) {
+ ctx = GBE_NEW(BxtContext, unit, name, deviceID, relaxMath);
}
GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n");
@@ -219,7 +221,8 @@ namespace gbe {
GBHI_HSW = 2,
GBHI_CHV = 3,
GBHI_BDW = 4,
- GBHI_SKL = 5,//remember update GBHI_MAX if add option.
+ GBHI_SKL = 5,
+ GBHI_BXT = 6,
GBHI_MAX,
};
@@ -229,7 +232,9 @@ namespace gbe {
{0, 'G','E', 'N', 'C', 'H', 'S', 'W'},
{0, 'G','E', 'N', 'C', 'C', 'H', 'V'},
{0, 'G','E', 'N', 'C', 'B', 'D', 'W'},
- {0, 'G','E', 'N', 'C', 'S', 'K', 'L'}};
+ {0, 'G','E', 'N', 'C', 'S', 'K', 'L'},
+ {0, 'G','E', 'N', 'C', 'B', 'X', 'T'}
+ };
#define FILL_GEN_HEADER(binary, index) do {int i = 0; do {*(binary+i) = gen_binary_header[index][i]; i++; }while(i < GEN_BINARY_HEADER_LENGTH);}while(0)
#define FILL_BYT_HEADER(binary) FILL_GEN_HEADER(binary, GBHI_BYT)
@@ -238,6 +243,7 @@ namespace gbe {
#define FILL_CHV_HEADER(binary) FILL_GEN_HEADER(binary, GBHI_CHV)
#define FILL_BDW_HEADER(binary) FILL_GEN_HEADER(binary, GBHI_BDW)
#define FILL_SKL_HEADER(binary) FILL_GEN_HEADER(binary, GBHI_SKL)
+#define FILL_BXT_HEADER(binary) FILL_GEN_HEADER(binary, GBHI_BXT)
static bool genHeaderCompare(const unsigned char *BufPtr, GEN_BINARY_HEADER_INDEX index)
{
@@ -255,6 +261,7 @@ namespace gbe {
#define MATCH_CHV_HEADER(binary) genHeaderCompare(binary, GBHI_CHV)
#define MATCH_BDW_HEADER(binary) genHeaderCompare(binary, GBHI_BDW)
#define MATCH_SKL_HEADER(binary) genHeaderCompare(binary, GBHI_SKL)
+#define MATCH_BXT_HEADER(binary) genHeaderCompare(binary, GBHI_BXT)
#define MATCH_DEVICE(deviceID, binary) ((IS_IVYBRIDGE(deviceID) && MATCH_IVB_HEADER(binary)) || \
(IS_IVYBRIDGE(deviceID) && MATCH_IVB_HEADER(binary)) || \
@@ -262,7 +269,9 @@ namespace gbe {
(IS_HASWELL(deviceID) && MATCH_HSW_HEADER(binary)) || \
(IS_BROADWELL(deviceID) && MATCH_BDW_HEADER(binary)) || \
(IS_CHERRYVIEW(deviceID) && MATCH_CHV_HEADER(binary)) || \
- (IS_SKYLAKE(deviceID) && MATCH_SKL_HEADER(binary)) )
+ (IS_SKYLAKE(deviceID) && MATCH_SKL_HEADER(binary)) || \
+ (IS_BROXTON(deviceID) && MATCH_BXT_HEADER(binary)) \
+ )
static gbe_program genProgramNewFromBinary(uint32_t deviceID, const char *binary, size_t size) {
using namespace gbe;
@@ -359,6 +368,8 @@ namespace gbe {
FILL_CHV_HEADER(*binary);
}else if(IS_SKYLAKE(prog->deviceID)){
FILL_SKL_HEADER(*binary);
+ }else if(IS_BROXTON(prog->deviceID)){
+ FILL_BXT_HEADER(*binary);
}else {
free(*binary);
*binary = NULL;
diff --git a/backend/src/gbe_bin_generater.cpp b/backend/src/gbe_bin_generater.cpp
index 86197e19..8225d4a3 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -186,6 +186,10 @@ void program_build_instance::serialize_program(void) throw(int)
src_hw_info[0]='S';
src_hw_info[1]='K';
src_hw_info[2]='L';
+ }else if(IS_BROXTON(gen_pci_id)){
+ src_hw_info[0]='B';
+ src_hw_info[1]='X';
+ src_hw_info[2]='T';
}
if (str_fmt_out) {
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index 3552a166..63e078fd 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -287,7 +287,14 @@
devid == PCI_CHIP_SKYLAKE_SRV_GT4)
#define IS_SKYLAKE(devid) (IS_SKL_GT1(devid) || IS_SKL_GT2(devid) || IS_SKL_GT3(devid) || IS_SKL_GT4(devid))
-#define IS_GEN9(devid) IS_SKYLAKE(devid)
+
+/* BXT */
+#define PCI_CHIP_BROXTON_P 0x5A84 /* Intel(R) BXT-P for mobile desktop */
+
+#define IS_BROXTON(devid) \
+ (devid == PCI_CHIP_BROXTON_P)
+
+#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid))
#endif /* __CL_DEVICE_DATA_H__ */
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 5debf06a..a98523f8 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -198,6 +198,17 @@ static struct _cl_device_id intel_skl_gt4_device = {
#include "cl_gen9_device.h"
};
+static struct _cl_device_id intel_bxt_device = {
+ INIT_ICD(dispatch)
+ .max_compute_unit = 18,
+ .max_thread_per_unit = 6,
+ .sub_slice_count = 3,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen9_device.h"
+};
+
LOCAL cl_device_id
cl_get_gt_device(void)
{
@@ -529,6 +540,16 @@ skl_gt4_break:
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
break;
+ case PCI_CHIP_BROXTON_P:
+ DECL_INFO_STRING(bxt_break, intel_bxt_device, name, "Intel(R) HD Graphics Broxton-P");
+bxt_break:
+ intel_bxt_device.device_id = device_id;
+ intel_bxt_device.platform = cl_get_platform_default();
+ ret = &intel_bxt_device;
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
case PCI_CHIP_SANDYBRIDGE_BRIDGE:
case PCI_CHIP_SANDYBRIDGE_GT1:
case PCI_CHIP_SANDYBRIDGE_GT2:
@@ -754,7 +775,8 @@ cl_get_device_info(cl_device_id device,
device != &intel_skl_gt1_device &&
device != &intel_skl_gt2_device &&
device != &intel_skl_gt3_device &&
- device != &intel_skl_gt4_device
+ device != &intel_skl_gt4_device &&
+ device != &intel_bxt_device
))
return CL_INVALID_DEVICE;
@@ -868,7 +890,9 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
device != &intel_skl_gt1_device &&
device != &intel_skl_gt2_device &&
device != &intel_skl_gt3_device &&
- device != &intel_skl_gt4_device))
+ device != &intel_skl_gt4_device &&
+ device != &intel_bxt_device
+ ))
return CL_INVALID_DEVICE;
if (ver == NULL)
return CL_SUCCESS;
@@ -883,7 +907,8 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
|| device == &intel_brw_gt3_device || device == &intel_chv_device) {
*ver = 8;
} else if (device == &intel_skl_gt1_device || device == &intel_skl_gt2_device
- || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device) {
+ || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device
+ || device == &intel_bxt_device) {
*ver = 9;
} else
return CL_INVALID_VALUE;
@@ -971,7 +996,8 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
device != &intel_skl_gt1_device &&
device != &intel_skl_gt2_device &&
device != &intel_skl_gt3_device &&
- device != &intel_skl_gt4_device))
+ device != &intel_skl_gt4_device &&
+ device != &intel_bxt_device))
return CL_INVALID_DEVICE;
CHECK_KERNEL(kernel);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 53a6dfc2..d1038e14 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1208,7 +1208,8 @@ static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_
IS_HASWELL(gpgpu->drv->device_id) ||
IS_BROADWELL(gpgpu->drv->device_id) ||
IS_CHERRYVIEW(gpgpu->drv->device_id) ||
- IS_SKYLAKE(gpgpu->drv->device_id))) &&
+ IS_SKYLAKE(gpgpu->drv->device_id) ||
+ IS_BROXTON(gpgpu->drv->device_id))) &&
index >= BTI_WORKAROUND_IMAGE_OFFSET + BTI_RESERVED_NUM &&
type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
surface_type = I965_SURFACE_2D;
@@ -2488,7 +2489,7 @@ intel_set_gpgpu_callbacks(int device_id)
intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
return;
}
- if (IS_SKYLAKE(device_id)) {
+ if (IS_SKYLAKE(device_id) || IS_BROXTON(device_id)) {
cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;