summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.mk4
-rw-r--r--configs/autoconf.in6
-rw-r--r--configure.ac17
-rw-r--r--include/pci_ids/pci_id_driver_map.h7
-rw-r--r--include/pci_ids/radeonsi_pci_ids.h40
-rw-r--r--src/egl/main/Android.mk7
-rw-r--r--src/gallium/Android.mk7
-rw-r--r--src/gallium/SConscript2
-rw-r--r--src/gallium/drivers/Makefile.am13
-rw-r--r--src/gallium/drivers/radeon/AMDGPU.h47
-rw-r--r--src/gallium/drivers/radeon/AMDGPUConstants.pm44
-rw-r--r--src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp65
-rw-r--r--src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl126
-rw-r--r--src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl30
-rw-r--r--src/gallium/drivers/radeon/AMDGPUISelLowering.cpp31
-rw-r--r--src/gallium/drivers/radeon/AMDGPUISelLowering.h35
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp116
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstrInfo.h59
-rw-r--r--src/gallium/drivers/radeon/AMDGPUInstructions.td90
-rw-r--r--src/gallium/drivers/radeon/AMDGPUIntrinsics.td56
-rw-r--r--src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp38
-rw-r--r--src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h40
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp24
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.h38
-rw-r--r--src/gallium/drivers/radeon/AMDGPURegisterInfo.td22
-rw-r--r--src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp66
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp180
-rw-r--r--src/gallium/drivers/radeon/AMDGPUTargetMachine.h62
-rw-r--r--src/gallium/drivers/radeon/AMDGPUUtil.cpp127
-rw-r--r--src/gallium/drivers/radeon/AMDGPUUtil.h49
-rw-r--r--src/gallium/drivers/radeon/AMDIL.h292
-rw-r--r--src/gallium/drivers/radeon/AMDIL.td19
-rw-r--r--src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp723
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.cpp157
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.h77
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp548
-rw-r--r--src/gallium/drivers/radeon/AMDILAlgorithms.tpp93
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmBackend.cpp82
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmBackend.h49
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmPrinter7XX.cpp149
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmPrinterEG.cpp162
-rw-r--r--src/gallium/drivers/radeon/AMDILBarrierDetect.cpp254
-rw-r--r--src/gallium/drivers/radeon/AMDILBase.td104
-rw-r--r--src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp3257
-rw-r--r--src/gallium/drivers/radeon/AMDILCallingConv.td75
-rw-r--r--src/gallium/drivers/radeon/AMDILCodeEmitter.h46
-rw-r--r--src/gallium/drivers/radeon/AMDILCompilerErrors.h75
-rw-r--r--src/gallium/drivers/radeon/AMDILCompilerWarnings.h31
-rw-r--r--src/gallium/drivers/radeon/AMDILConversions.td1022
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.cpp137
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.h132
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.cpp87
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.h89
-rw-r--r--src/gallium/drivers/radeon/AMDILDevices.h19
-rw-r--r--src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp1093
-rw-r--r--src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp71
-rw-r--r--src/gallium/drivers/radeon/AMDILELFWriterInfo.h54
-rw-r--r--src/gallium/drivers/radeon/AMDILEnumeratedTypes.td522
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp211
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.h93
-rw-r--r--src/gallium/drivers/radeon/AMDILFormats.td450
-rw-r--r--src/gallium/drivers/radeon/AMDILFrameLowering.cpp53
-rw-r--r--src/gallium/drivers/radeon/AMDILFrameLowering.h46
-rw-r--r--src/gallium/drivers/radeon/AMDILGlobalManager.cpp1353
-rw-r--r--src/gallium/drivers/radeon/AMDILGlobalManager.h256
-rw-r--r--src/gallium/drivers/radeon/AMDILIOExpansion.cpp1160
-rw-r--r--src/gallium/drivers/radeon/AMDILIOExpansion.h320
-rw-r--r--src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp457
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp5612
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.h527
-rw-r--r--src/gallium/drivers/radeon/AMDILImageExpansion.cpp171
-rw-r--r--src/gallium/drivers/radeon/AMDILInliner.cpp271
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.cpp709
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.h175
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.td115
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrPatterns.td66
-rw-r--r--src/gallium/drivers/radeon/AMDILInstructions.td2436
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp190
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsicInfo.h49
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsics.td705
-rw-r--r--src/gallium/drivers/radeon/AMDILKernel.h84
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.cpp1356
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.h177
-rw-r--r--src/gallium/drivers/radeon/AMDILLiteralManager.cpp128
-rw-r--r--src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp158
-rw-r--r--src/gallium/drivers/radeon/AMDILMachineFunctionInfo.cpp597
-rw-r--r--src/gallium/drivers/radeon/AMDILMachineFunctionInfo.h422
-rw-r--r--src/gallium/drivers/radeon/AMDILMachinePeephole.cpp173
-rw-r--r--src/gallium/drivers/radeon/AMDILModuleInfo.cpp1266
-rw-r--r--src/gallium/drivers/radeon/AMDILModuleInfo.h159
-rw-r--r--src/gallium/drivers/radeon/AMDILMultiClass.td1440
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.cpp71
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.h59
-rw-r--r--src/gallium/drivers/radeon/AMDILNodes.td325
-rw-r--r--src/gallium/drivers/radeon/AMDILOperands.td37
-rw-r--r--src/gallium/drivers/radeon/AMDILPatterns.td504
-rw-r--r--src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp1211
-rw-r--r--src/gallium/drivers/radeon/AMDILPointerManager.cpp2551
-rw-r--r--src/gallium/drivers/radeon/AMDILPointerManager.h209
-rw-r--r--src/gallium/drivers/radeon/AMDILPrintfConvert.cpp293
-rw-r--r--src/gallium/drivers/radeon/AMDILProfiles.td174
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.cpp200
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.h91
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.td964
-rw-r--r--src/gallium/drivers/radeon/AMDILSIDevice.cpp49
-rw-r--r--src/gallium/drivers/radeon/AMDILSIDevice.h45
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.cpp179
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.h75
-rw-r--r--src/gallium/drivers/radeon/AMDILTargetMachine.cpp195
-rw-r--r--src/gallium/drivers/radeon/AMDILTargetMachine.h75
-rw-r--r--src/gallium/drivers/radeon/AMDILTokenDesc.td120
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp683
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.h362
-rw-r--r--src/gallium/drivers/radeon/AMDILVersion.td75
-rw-r--r--src/gallium/drivers/radeon/LICENSE.TXT43
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDILMCAsmInfo.cpp107
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDILMCAsmInfo.h30
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDILMCTargetDesc.cpp66
-rw-r--r--src/gallium/drivers/radeon/MCTargetDesc/AMDILMCTargetDesc.h36
-rw-r--r--src/gallium/drivers/radeon/Makefile77
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources86
-rw-r--r--src/gallium/drivers/radeon/Processors.td28
-rw-r--r--src/gallium/drivers/radeon/R600CodeEmitter.cpp776
-rw-r--r--src/gallium/drivers/radeon/R600GenRegisterInfo.pl171
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.cpp102
-rw-r--r--src/gallium/drivers/radeon/R600ISelLowering.h40
-rw-r--r--src/gallium/drivers/radeon/R600InstrFormats.td16
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.cpp109
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.h74
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td931
-rw-r--r--src/gallium/drivers/radeon/R600Intrinsics.td40
-rw-r--r--src/gallium/drivers/radeon/R600KernelParameters.cpp503
-rw-r--r--src/gallium/drivers/radeon/R600KernelParameters.h28
-rw-r--r--src/gallium/drivers/radeon/R600LowerInstructions.cpp546
-rw-r--r--src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp143
-rw-r--r--src/gallium/drivers/radeon/R600OpenCLUtils.h49
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.cpp102
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.h44
-rw-r--r--src/gallium/drivers/radeon/R600Schedule.td34
-rw-r--r--src/gallium/drivers/radeon/SIAssignInterpRegs.cpp110
-rw-r--r--src/gallium/drivers/radeon/SICodeEmitter.cpp274
-rw-r--r--src/gallium/drivers/radeon/SIConvertToISA.cpp89
-rw-r--r--src/gallium/drivers/radeon/SIGenRegisterInfo.pl278
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.cpp151
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.h44
-rw-r--r--src/gallium/drivers/radeon/SIInstrFormats.td128
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.cpp173
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.h95
-rw-r--r--src/gallium/drivers/radeon/SIInstrInfo.td472
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td962
-rw-r--r--src/gallium/drivers/radeon/SIIntrinsics.td34
-rw-r--r--src/gallium/drivers/radeon/SILowerShaderInstructions.cpp90
-rw-r--r--src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp62
-rw-r--r--src/gallium/drivers/radeon/SIMachineFunctionInfo.h36
-rw-r--r--src/gallium/drivers/radeon/SIPropagateImmReads.cpp71
-rw-r--r--src/gallium/drivers/radeon/SIRegisterInfo.cpp66
-rw-r--r--src/gallium/drivers/radeon/SIRegisterInfo.h46
-rw-r--r--src/gallium/drivers/radeon/SISchedule.td15
-rw-r--r--src/gallium/drivers/radeon/TargetInfo/AMDILTargetInfo.cpp32
-rw-r--r--src/gallium/drivers/radeon/loader.cpp34
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h136
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.cpp145
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c660
-rw-r--r--src/gallium/drivers/radeonsi/Android.mk38
-rw-r--r--src/gallium/drivers/radeonsi/Makefile24
-rw-r--r--src/gallium/drivers/radeonsi/Makefile.sources13
-rw-r--r--src/gallium/drivers/radeonsi/SConscript17
-rw-r--r--src/gallium/drivers/radeonsi/evergreen_hw_context.c561
-rw-r--r--src/gallium/drivers/radeonsi/evergreen_state.c2169
-rw-r--r--src/gallium/drivers/radeonsi/r600.h245
-rw-r--r--src/gallium/drivers/radeonsi/r600_blit.c379
-rw-r--r--src/gallium/drivers/radeonsi/r600_buffer.c282
-rw-r--r--src/gallium/drivers/radeonsi/r600_hw_context.c1151
-rw-r--r--src/gallium/drivers/radeonsi/r600_hw_context_priv.h76
-rw-r--r--src/gallium/drivers/radeonsi/r600_query.c130
-rw-r--r--src/gallium/drivers/radeonsi/r600_resource.c64
-rw-r--r--src/gallium/drivers/radeonsi/r600_resource.h105
-rw-r--r--src/gallium/drivers/radeonsi/r600_state_common.c899
-rw-r--r--src/gallium/drivers/radeonsi/r600_texture.c825
-rw-r--r--src/gallium/drivers/radeonsi/r600_translate.c54
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_pipe.c731
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_pipe.h490
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_public.h30
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.c565
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.h4
-rw-r--r--src/gallium/drivers/radeonsi/sid.h7668
-rw-r--r--src/gallium/targets/dri-radeonsi/Makefile26
-rw-r--r--src/gallium/targets/dri-radeonsi/SConscript25
-rw-r--r--src/gallium/targets/dri-radeonsi/target.c40
-rw-r--r--src/gallium/targets/egl-static/Android.mk3
-rw-r--r--src/gallium/targets/egl-static/Makefile11
-rw-r--r--src/gallium/targets/egl-static/SConscript3
-rw-r--r--src/gallium/targets/egl-static/egl_pipe.c27
-rw-r--r--src/gallium/targets/gbm/Makefile13
-rw-r--r--src/gallium/targets/gbm/pipe_radeonsi.c26
-rw-r--r--src/gallium/targets/xorg-radeonsi/Makefile24
-rw-r--r--src/gallium/targets/xorg-radeonsi/target.c26
-rw-r--r--src/gallium/targets/xorg-radeonsi/xorg.c148
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c8
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.h3
200 files changed, 66076 insertions, 10 deletions
diff --git a/Android.mk b/Android.mk
index 0d5917ce5f2..6a3014c81bb 100644
--- a/Android.mk
+++ b/Android.mk
@@ -24,7 +24,7 @@
# BOARD_GPU_DRIVERS should be defined. The valid values are
#
# classic drivers: i915 i965
-# gallium drivers: swrast i915g nouveau r300g r600g vmwgfx
+# gallium drivers: swrast i915g nouveau r300g r600g radeonsi vmwgfx
#
# The main target is libGLES_mesa. For each classic driver enabled, a DRI
# module will also be built. DRI modules will be loaded by libGLES_mesa.
@@ -37,7 +37,7 @@ DRM_TOP := external/drm
DRM_GRALLOC_TOP := hardware/drm_gralloc
classic_drivers := i915 i965
-gallium_drivers := swrast i915g nouveau r300g r600g vmwgfx
+gallium_drivers := swrast i915g nouveau r300g r600g radeonsi vmwgfx
MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS))
diff --git a/configs/autoconf.in b/configs/autoconf.in
index 95cca6f239e..ec3f3194e2d 100644
--- a/configs/autoconf.in
+++ b/configs/autoconf.in
@@ -32,9 +32,12 @@ INTEL_LIBS = @INTEL_LIBS@
INTEL_CFLAGS = @INTEL_CFLAGS@
X11_LIBS = @X11_LIBS@
X11_CFLAGS = @X11_CFLAGS@
+LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
LLVM_LDFLAGS = @LLVM_LDFLAGS@
LLVM_LIBS = @LLVM_LIBS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
GLW_CFLAGS = @GLW_CFLAGS@
GLX_TLS = @GLX_TLS@
DRI_CFLAGS = @DRI_CFLAGS@
@@ -58,6 +61,9 @@ AWK = @AWK@
GREP = @GREP@
NM = @NM@
+# Perl
+PERL = @PERL@
+
# Python and flags (generally only needed by the developers)
PYTHON2 = @PYTHON2@
PYTHON_FLAGS = -t -O -O
diff --git a/configure.ac b/configure.ac
index 65d358e0a8d..17564f12885 100644
--- a/configure.ac
+++ b/configure.ac
@@ -67,6 +67,8 @@ if test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.y"; then
fi
AC_PROG_LEX
+AC_PATH_PROG([PERL], [perl])
+
dnl Our fallback install-sh is a symlink to minstall. Use the existing
dnl configuration in that case.
AC_PROG_INSTALL
@@ -1647,9 +1649,12 @@ if test "x$with_gallium_drivers" != x; then
SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets"
fi
+AC_SUBST([LLVM_BINDIR])
AC_SUBST([LLVM_CFLAGS])
+AC_SUBST([LLVM_CXXFLAGS])
AC_SUBST([LLVM_LIBS])
AC_SUBST([LLVM_LDFLAGS])
+AC_SUBST([LLVM_INCLUDEDIR])
AC_SUBST([LLVM_VERSION])
case "x$enable_opengl$enable_gles1$enable_gles2" in
@@ -1795,6 +1800,9 @@ if test "x$enable_gallium_llvm" = xyes; then
LLVM_LIBS="`$LLVM_CONFIG --libs engine bitwriter`"
fi
LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
+ LLVM_BINDIR=`$LLVM_CONFIG --bindir`
+ LLVM_CXXFLAGS=`$LLVM_CONFIG --cxxflags`
+ LLVM_INCLUDEDIR=`$LLVM_CONFIG --includedir`
DEFINES="$DEFINES -D__STDC_CONSTANT_MACROS"
MESA_LLVM=1
else
@@ -1898,6 +1906,14 @@ if test "x$with_gallium_drivers" != x; then
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
gallium_check_st "radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600" "va-r600"
;;
+ xradeonsi)
+ GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS radeonsi"
+ if test "x$LLVM_VERSION" != "x3.1"; then
+ AC_MSG_ERROR([LLVM 3.1 is required to build the radeonsi driver.])
+ fi
+ NEED_RADEON_GALLIUM=yes;
+ gallium_check_st "radeon/drm" "dri-radeonsi" "xorg-radeonsi"
+ ;;
xnouveau)
PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NOUVEAU_REQUIRED])
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0"
@@ -1957,6 +1973,7 @@ done
AM_CONDITIONAL(HAVE_GALAHAD_GALLIUM, test x$HAVE_GALAHAD_GALLIUM = xyes)
AM_CONDITIONAL(HAVE_IDENTITY_GALLIUM, test x$HAVE_IDENTITY_GALLIUM = xyes)
AM_CONDITIONAL(HAVE_NOOP_GALLIUM, test x$HAVE_NOOP_GALLIUM = xyes)
+AM_CONDITIONAL(NEED_RADEON_GALLIUM, test x$NEED_RADEON_GALLIUM = xyes)
AC_SUBST([GALLIUM_MAKE_DIRS])
dnl prepend CORE_DIRS to SRC_DIRS
diff --git a/include/pci_ids/pci_id_driver_map.h b/include/pci_ids/pci_id_driver_map.h
index 232359f6e0d..fce38af0fe0 100644
--- a/include/pci_ids/pci_id_driver_map.h
+++ b/include/pci_ids/pci_id_driver_map.h
@@ -45,6 +45,12 @@ static const int r600_chip_ids[] = {
#undef CHIPSET
};
+static const int radeonsi_chip_ids[] = {
+#define CHIPSET(chip, name, family) chip,
+#include "pci_ids/radeonsi_pci_ids.h"
+#undef CHIPSET
+};
+
static const int vmwgfx_chip_ids[] = {
#define CHIPSET(chip, name, family) chip,
#include "pci_ids/vmwgfx_pci_ids.h"
@@ -65,6 +71,7 @@ static const struct {
#endif
{ 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },
{ 0x1002, "r600", r600_chip_ids, ARRAY_SIZE(r600_chip_ids) },
+ { 0x1002, "radeonsi", radeonsi_chip_ids, ARRAY_SIZE(radeonsi_chip_ids) },
{ 0x10de, "nouveau", NULL, -1 },
{ 0x15ad, "vmwgfx", vmwgfx_chip_ids, ARRAY_SIZE(vmwgfx_chip_ids) },
{ 0x0000, NULL, NULL, 0 },
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
new file mode 100644
index 00000000000..55ade1247d7
--- /dev/null
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -0,0 +1,40 @@
+CHIPSET(0x6780, TAHITI_6780, TAHITI)
+CHIPSET(0x6784, TAHITI_6784, TAHITI)
+CHIPSET(0x6788, TAHITI_678A, TAHITI)
+CHIPSET(0x678A, TAHITI_678A, TAHITI)
+CHIPSET(0x6790, TAHITI_6790, TAHITI)
+CHIPSET(0x6798, TAHITI_6798, TAHITI)
+CHIPSET(0x6799, TAHITI_6799, TAHITI)
+CHIPSET(0x679A, TAHITI_679E, TAHITI)
+CHIPSET(0x679E, TAHITI_679E, TAHITI)
+CHIPSET(0x679F, TAHITI_679F, TAHITI)
+
+CHIPSET(0x6800, PITCAIRN_6800, PITCAIRN)
+CHIPSET(0x6801, PITCAIRN_6801, PITCAIRN)
+CHIPSET(0x6802, PITCAIRN_6802, PITCAIRN)
+CHIPSET(0x6808, PITCAIRN_6808, PITCAIRN)
+CHIPSET(0x6809, PITCAIRN_6809, PITCAIRN)
+CHIPSET(0x6810, PITCAIRN_6810, PITCAIRN)
+CHIPSET(0x6818, PITCAIRN_6818, PITCAIRN)
+CHIPSET(0x6819, PITCAIRN_6819, PITCAIRN)
+CHIPSET(0x684C, PITCAIRN_684C, PITCAIRN)
+
+CHIPSET(0x6820, VERDE_6820, VERDE)
+CHIPSET(0x6821, VERDE_6821, VERDE)
+CHIPSET(0x6823, VERDE_6824, VERDE)
+CHIPSET(0x6824, VERDE_6824, VERDE)
+CHIPSET(0x6825, VERDE_6825, VERDE)
+CHIPSET(0x6826, VERDE_6825, VERDE)
+CHIPSET(0x6827, VERDE_6827, VERDE)
+CHIPSET(0x6828, VERDE_6828, VERDE)
+CHIPSET(0x6829, VERDE_6829, VERDE)
+CHIPSET(0x682D, VERDE_682D, VERDE)
+CHIPSET(0x682F, VERDE_682F, VERDE)
+CHIPSET(0x6830, VERDE_6830, VERDE)
+CHIPSET(0x6831, VERDE_6831, VERDE)
+CHIPSET(0x6837, VERDE_6831, VERDE)
+CHIPSET(0x6838, VERDE_6838, VERDE)
+CHIPSET(0x6839, VERDE_6839, VERDE)
+CHIPSET(0x683B, VERDE_683B, VERDE)
+CHIPSET(0x683D, VERDE_683D, VERDE)
+CHIPSET(0x683F, VERDE_683F, VERDE)
diff --git a/src/egl/main/Android.mk b/src/egl/main/Android.mk
index d96da228aa7..a4a00f3bb35 100644
--- a/src/egl/main/Android.mk
+++ b/src/egl/main/Android.mk
@@ -107,8 +107,8 @@ gallium_DRIVERS += \
LOCAL_SHARED_LIBRARIES += libdrm_nouveau
endif
-# r300g/r600g
-ifneq ($(filter r300g r600g, $(MESA_GPU_DRIVERS)),)
+# r300g/r600g/radeonsi
+ifneq ($(filter r300g r600g radeonsi, $(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_winsys_radeon
ifneq ($(filter r300g, $(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_pipe_r300
@@ -116,6 +116,9 @@ endif
ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_pipe_r600
endif
+ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_pipe_radeonsi
+endif
endif
# vmwgfx
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 41c59b13c6f..1d002d05374 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -49,8 +49,8 @@ SUBDIRS += \
drivers/nvc0
endif
-# r300g/r600g
-ifneq ($(filter r300g r600g, $(MESA_GPU_DRIVERS)),)
+# r300g/r600g/radeonsi
+ifneq ($(filter r300g r600g radeonsi, $(MESA_GPU_DRIVERS)),)
SUBDIRS += winsys/radeon/drm
ifneq ($(filter r300g, $(MESA_GPU_DRIVERS)),)
SUBDIRS += drivers/r300
@@ -58,6 +58,9 @@ endif
ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
SUBDIRS += drivers/r600
endif
+ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
+SUBDIRS += drivers/radeonsi
+endif
endif
# vmwgfx
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 4413bc8742b..da2e4dd5ded 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -33,6 +33,7 @@ if env['drm']:
SConscript([
'drivers/r300/SConscript',
'drivers/r600/SConscript',
+ 'drivers/radeonsi/SConscript',
])
# XXX: nouveau drivers have a tight dependency on libdrm, so to enable
# we need some version logic before we enable them. Also, ATM there is
@@ -152,6 +153,7 @@ if not env['embedded']:
SConscript([
'targets/dri-r300/SConscript',
'targets/dri-r600/SConscript',
+ 'targets/dri-radeonsi/SConscript',
])
if env['xorg'] and env['drm']:
diff --git a/src/gallium/drivers/Makefile.am b/src/gallium/drivers/Makefile.am
index 0aa2653a0f1..97c5695fa15 100644
--- a/src/gallium/drivers/Makefile.am
+++ b/src/gallium/drivers/Makefile.am
@@ -10,6 +10,8 @@ AM_CPPFLAGS = \
noinst_LIBRARIES =
+SUBDIRS =
+
################################################################################
if HAVE_GALAHAD_GALLIUM
@@ -52,7 +54,16 @@ noop_libnoop_a_SOURCES = \
endif
################################################################################
-SUBDIRS = $(GALLIUM_MAKE_DIRS)
+
+if NEED_RADEON_GALLIUM
+
+SUBDIRS+= radeon
+
+endif
+
+################################################################################
+
+SUBDIRS+= $(GALLIUM_MAKE_DIRS)
# FIXME: Remove when the rest of Gallium is converted to automake.
default: all
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h
new file mode 100644
index 00000000000..5613dab4b35
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPU.h
@@ -0,0 +1,47 @@
+//===-- AMDGPU.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class AMDGPUTargetMachine;
+
+ FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+ FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm);
+ FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+ FunctionPass *createSIConvertToISAPass(TargetMachine &tm);
+ FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm);
+ FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm);
+ FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
+ FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+
+ FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+
+ FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm);
+
+} /* End namespace llvm */
+#endif /* AMDGPU_H */
diff --git a/src/gallium/drivers/radeon/AMDGPUConstants.pm b/src/gallium/drivers/radeon/AMDGPUConstants.pm
new file mode 100644
index 00000000000..b64ff49c187
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUConstants.pm
@@ -0,0 +1,44 @@
+#===-- AMDGPUConstants.pm - TODO: Add brief description -------===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===#
+#
+# TODO: Add full description
+#
+#===----------------------------------------------------------------------===#
+
+package AMDGPUConstants;
+
+use base 'Exporter';
+
+use constant CONST_REG_COUNT => 256;
+use constant TEMP_REG_COUNT => 128;
+
+our @EXPORT = ('TEMP_REG_COUNT', 'CONST_REG_COUNT', 'get_hw_index', 'get_chan_str');
+
+sub get_hw_index {
+ my ($index) = @_;
+ return int($index / 4);
+}
+
+sub get_chan_str {
+ my ($index) = @_;
+ my $chan = $index % 4;
+ if ($chan == 0 ) {
+ return 'X';
+ } elsif ($chan == 1) {
+ return 'Y';
+ } elsif ($chan == 2) {
+ return 'Z';
+ } elsif ($chan == 3) {
+ return 'W';
+ } else {
+ die("Unknown chan value: $chan");
+ }
+}
+
+1;
diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
new file mode 100644
index 00000000000..ce947f8ff78
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
@@ -0,0 +1,65 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers AMDIL machine instructions to the appropriate hardware
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+ class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ void lowerFLT(MachineInstr &MI);
+
+ public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+} /* End anonymous namespace */
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ MachineInstr * newInstr = TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ if (!newInstr) {
+ continue;
+ }
+ MBB.insert(I, newInstr);
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
new file mode 100644
index 00000000000..1fd4fb04b3e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
@@ -0,0 +1,126 @@
+#===-- AMDGPUGenInstrEnums.pl - TODO: Add brief description -------===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===#
+#
+# TODO: Add full description
+#
+#===----------------------------------------------------------------------===#
+
+use warnings;
+use strict;
+
+my @F32_MULTICLASSES = qw {
+ UnaryIntrinsicFloat
+ UnaryIntrinsicFloatScalar
+ BinaryIntrinsicFloat
+ TernaryIntrinsicFloat
+ BinaryOpMCFloat
+};
+
+my @I32_MULTICLASSES = qw {
+ BinaryOpMCInt
+ BinaryOpMCi32
+ BinaryOpMCi32Const
+};
+
+my @GENERATION_ENUM = qw {
+ R600_CAYMAN
+ R600
+ EG
+ EG_CAYMAN
+ CAYMAN
+ SI
+};
+
+my $FILE_TYPE = $ARGV[0];
+
+open AMDIL, '<', 'AMDILInstructions.td';
+
+my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ');
+
+while (<AMDIL>) {
+ if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) {
+ if (grep {$_ eq $2} @F32_MULTICLASSES) {
+ push @INST_ENUMS, "$1\_f32";
+
+ } elsif (grep {$_ eq $2} @I32_MULTICLASSES) {
+ push @INST_ENUMS, "$1\_i32";
+ }
+ } elsif ($_ =~ /def\s+([A-Z_]+)(_[fi]32)/) {
+ push @INST_ENUMS, "$1$2";
+ }
+}
+
+if ($FILE_TYPE eq 'td') {
+
+ print_td_enum('AMDILInst', 'AMDILInstEnums', 'field bits<16>', @INST_ENUMS);
+
+ print_td_enum('AMDGPUGen', 'AMDGPUGenEnums', 'field bits<3>', @GENERATION_ENUM);
+
+ my %constants = (
+ 'PI' => '0x40490fdb',
+ 'TWO_PI' => '0x40c90fdb',
+ 'TWO_PI_INV' => '0x3e22f983'
+ );
+
+ print "class Constants {\n";
+ foreach (keys(%constants)) {
+ print "int $_ = $constants{$_};\n";
+ }
+ print "}\n";
+ print "def CONST : Constants;\n";
+
+} elsif ($FILE_TYPE eq 'h') {
+
+ print "unsigned GetRealAMDILOpcode(unsigned internalOpcode) const;\n";
+
+ print_h_enum('AMDILTblgenOpcode', @INST_ENUMS);
+
+ print_h_enum('AMDGPUGen', @GENERATION_ENUM);
+
+} elsif ($FILE_TYPE eq 'inc') {
+ print "unsigned AMDGPUInstrInfo::GetRealAMDILOpcode(unsigned internalOpcode) const\n{\n";
+ print " switch(internalOpcode) {\n";
+ #Start at 1 so we skip NONE
+ for (my $i = 1; $i < scalar(@INST_ENUMS); $i++) {
+ my $inst = $INST_ENUMS[$i];
+ print " case AMDGPUInstrInfo::$inst: return AMDIL::$inst;\n";
+ }
+ print " default: abort();\n";
+ print " }\n}\n";
+}
+
+
+sub print_td_enum {
+ my ($instance, $class, $field, @values) = @_;
+
+ print "class $class {\n";
+
+ for (my $i = 0; $i < scalar(@values); $i++) {
+ print " $field $values[$i] = $i;\n";
+ }
+ print "}\n";
+
+ print "def $instance : $class;\n";
+}
+
+sub print_h_enum {
+
+ my ($enum, @list) = @_;
+ print "enum $enum {\n";
+
+ for (my $i = 0; $i < scalar(@list); $i++) {
+ print " $list[$i] = $i";
+ if ($i != $#list) {
+ print ',';
+ }
+ print "\n";
+ }
+ print "};\n";
+}
+
diff --git a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl b/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl
new file mode 100644
index 00000000000..60523a7b48f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl
@@ -0,0 +1,30 @@
+#===-- AMDGPUGenShaderPatterns.pl - TODO: Add brief description -------===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===#
+#
+# TODO: Add full description
+#
+#===----------------------------------------------------------------------===#
+
+use strict;
+use warnings;
+
+use AMDGPUConstants;
+
+my $reg_prefix = $ARGV[0];
+
+for (my $i = 0; $i < CONST_REG_COUNT * 4; $i++) {
+ my $index = get_hw_index($i);
+ my $chan = get_chan_str($i);
+print <<STRING;
+def : Pat <
+ (int_AMDGPU_load_const $i),
+ (f32 (MOV (f32 $reg_prefix$index\_$chan)))
+>;
+STRING
+}
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
new file mode 100644
index 00000000000..2c1052fd8ea
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
@@ -0,0 +1,31 @@
+//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPUUtil.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ AMDILTargetLowering(TM)
+{
+}
+
+void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
+ MachineFunction * MF, MachineRegisterInfo & MRI,
+ const struct TargetInstrInfo * TII, unsigned reg) const
+{
+ AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
+}
+
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
new file mode 100644
index 00000000000..3c5beb1cdae
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
@@ -0,0 +1,35 @@
+//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "AMDILISelLowering.h"
+
+namespace llvm {
+
+class AMDGPUTargetLowering : public AMDILTargetLowering
+{
+protected:
+ void addLiveIn(MachineInstr * MI, MachineFunction * MF,
+ MachineRegisterInfo & MRI, const struct TargetInstrInfo * TII,
+ unsigned reg) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+};
+
+} /* End namespace llvm */
+
+#endif /* AMDGPUISELLOWERING_H */
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
new file mode 100644
index 00000000000..4742283f688
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
@@ -0,0 +1,116 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the TargetInstrInfo class that is
+// common to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDILInstrInfo(tm), TM(tm)
+{
+ const AMDILDevice * dev = TM.getSubtarget<AMDILSubtarget>().device();
+ for (unsigned i = 0; i < AMDIL::INSTRUCTION_LIST_END; i++) {
+ const MCInstrDesc & instDesc = get(i);
+ uint32_t instGen = (instDesc.TSFlags >> 40) & 0x7;
+ uint32_t inst = (instDesc.TSFlags >> 48) & 0xffff;
+ if (inst == 0) {
+ continue;
+ }
+ switch (instGen) {
+ case AMDGPUInstrInfo::R600_CAYMAN:
+ if (dev->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::R600:
+ if (dev->getGeneration() != AMDILDeviceInfo::HD4XXX) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::EG_CAYMAN:
+ if (dev->getGeneration() < AMDILDeviceInfo::HD5XXX
+ || dev->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::CAYMAN:
+ if (dev->getDeviceFlag() != OCL_DEVICE_CAYMAN) {
+ continue;
+ }
+ break;
+ case AMDGPUInstrInfo::SI:
+ if (dev->getGeneration() != AMDILDeviceInfo::HD7XXX) {
+ continue;
+ }
+ break;
+ default:
+ abort();
+ break;
+ }
+
+ unsigned amdilOpcode = GetRealAMDILOpcode(inst);
+ amdilToISA[amdilOpcode] = instDesc.Opcode;
+ }
+}
+
+MachineInstr * AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const
+{
+ MachineInstrBuilder newInstr;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+ unsigned ISAOpcode = getISAOpcode(MI.getOpcode());
+
+ /* Create the new instruction */
+ newInstr = BuildMI(MF, DL, TM.getInstrInfo()->get(ISAOpcode));
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ /* Convert dst regclass to one that is supported by the ISA */
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ /* Add the operand to the new instruction */
+ newInstr.addOperand(MO);
+ }
+
+ return newInstr;
+}
+
+unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const
+{
+ if (amdilToISA.count(opcode) == 0) {
+ return opcode;
+ } else {
+ return amdilToISA.find(opcode)->second;
+ }
+}
+
+bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const
+{
+ return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1;
+}
+
+#include "AMDGPUInstrEnums.include"
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
new file mode 100644
index 00000000000..fa009bc6302
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -0,0 +1,59 @@
+//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H_
+#define AMDGPUINSTRUCTIONINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDILInstrInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class AMDGPUInstrInfo : public AMDILInstrInfo {
+ private:
+ AMDGPUTargetMachine & TM;
+ std::map<unsigned, unsigned> amdilToISA;
+
+ public:
+ explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ virtual unsigned getISAOpcode(unsigned AMDILopcode) const;
+
+ virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+ bool isRegPreload(const MachineInstr &MI) const;
+
+ #include "AMDGPUInstrEnums.h.include"
+ };
+
+} // End llvm namespace
+
+/* AMDGPU target flags are stored in bits 32-39 */
+namespace AMDGPU_TFLAG_SHIFTS {
+ enum TFLAGS {
+ PRELOAD_REG = 32
+ };
+}
+
+
+#endif // AMDGPUINSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
new file mode 100644
index 00000000000..10eceb6ce53
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -0,0 +1,90 @@
+//===-- AMDGPUInstructions.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+include "AMDGPUInstrEnums.td"
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bits<16> AMDILOp = 0;
+ field bits<3> Gen = 0;
+ field bit PreloadReg = 0;
+
+ let Namespace = "AMDIL";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let TSFlags{32} = PreloadReg;
+ let TSFlags{42-40} = Gen;
+ let TSFlags{63-48} = AMDILOp;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+let isCodeGenOnly = 1 in {
+
+ def EXPORT_REG : AMDGPUShaderInst <
+ (outs),
+ (ins GPRF32:$src),
+ "EXPORT_REG $src",
+ [(int_AMDGPU_export_reg GPRF32:$src)]
+ >;
+
+ def LOAD_INPUT : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_INPUT $dst, $src",
+ [] >{
+ let PreloadReg = 1;
+ }
+
+ def MASK_WRITE : AMDGPUShaderInst <
+ (outs),
+ (ins GPRF32:$src),
+ "MASK_WRITE $src",
+ []
+ >;
+
+ def RESERVE_REG : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "RESERVE_REG $dst, $src",
+ [(set GPRF32:$dst, (int_AMDGPU_reserve_reg imm:$src))]> {
+ let PreloadReg = 1;
+ }
+
+ def STORE_OUTPUT: AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins GPRF32:$src0, i32imm:$src1),
+ "STORE_OUTPUT $dst, $src0, $src1",
+ [(set GPRF32:$dst, (int_AMDGPU_store_output GPRF32:$src0, imm:$src1))]
+ >;
+}
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+ RegisterClass rc> : Pat <
+ (int_AMDGPU_pow rc:$src0, rc:$src1),
+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
new file mode 100644
index 00000000000..d2cda0db936
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
@@ -0,0 +1,56 @@
+//===-- AMDGPUIntrinsics.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+ def int_AMDGPU_export_reg : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
+ def int_AMDGPU_store_output : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>;
+
+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>;
+ def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_kill : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_sge : BinaryIntFloat;
+ def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp
new file mode 100644
index 00000000000..d33055ccb87
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp
@@ -0,0 +1,38 @@
+//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPULowerShaderInstructions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF,
+ const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const
+{
+ if (!MRI->isLiveIn(physReg)) {
+ MRI->addLiveIn(physReg, virtReg);
+ MachineBasicBlock &EntryMBB = MF->front();
+ BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ virtReg)
+ .addReg(physReg);
+ } else {
+ /* We can't mark the same register as preloaded twice, but we still must
+ * associate virtReg with the correct preloaded register. */
+ unsigned newReg = MRI->getLiveInVirtReg(physReg);
+ MRI->replaceRegWith(virtReg, newReg);
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h
new file mode 100644
index 00000000000..5ee77fafe2b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h
@@ -0,0 +1,40 @@
+//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS
+#define AMDGPU_LOWER_SHADER_INSTRUCTIONS
+
+namespace llvm {
+
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+class AMDGPULowerShaderInstructionsPass {
+
+ protected:
+ MachineRegisterInfo * MRI;
+ /**
+ * @param physReg The physical register that will be preloaded.
+ * @param virtReg The virtual register that currently holds the
+ * preloaded value.
+ */
+ void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII,
+ unsigned physReg, unsigned virtReg) const;
+};
+
+} // end namespace llvm
+
+
+#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
new file mode 100644
index 00000000000..162a49116a0
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDILRegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
new file mode 100644
index 00000000000..f4492e9795d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
@@ -0,0 +1,38 @@
+//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H_
+#define AMDGPUREGISTERINFO_H_
+
+#include "AMDILRegisterInfo.h"
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class TargetInstrInfo;
+
+ struct AMDGPURegisterInfo : public AMDILRegisterInfo
+ {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const = 0;
+ };
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
new file mode 100644
index 00000000000..173d6622569
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
@@ -0,0 +1,22 @@
+//===-- AMDGPURegisterInfo.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDIL" in {
+ def sel_x : SubRegIndex;
+ def sel_y : SubRegIndex;
+ def sel_z : SubRegIndex;
+ def sel_w : SubRegIndex;
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp b/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp
new file mode 100644
index 00000000000..c923f19c39f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp
@@ -0,0 +1,66 @@
+//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Function.h"
+
+using namespace llvm;
+
+namespace {
+ class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; }
+ };
+} /* End anonymous namespace */
+
+char AMDGPUReorderPreloadInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) {
+ return new AMDGPUReorderPreloadInstructionsPass(tm);
+}
+
+/* This pass moves instructions that represent preloaded registers to the
+ * start of the program. */
+bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ if (TII->isRegPreload(MI)) {
+ MF.front().insert(MF.front().begin(), MI.removeFromParent());
+ }
+ }
+ }
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
new file mode 100644
index 00000000000..4d6a1bd7e34
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -0,0 +1,180 @@
+//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILTargetMachine.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600KernelParameters.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ mGM(new AMDILGlobalManager(0 /* Debug mode */)),
+ mKM(new AMDILKernelManager(this, mGM)),
+ mDump(false)
+
+{
+ /* XXX: Add these two initializations to fix a segfault, not sure if this
+ * is correct. These are normally initialized in the AsmPrinter, but AMDGPU
+ * does not use the asm printer */
+ Subtarget.setGlobalManager(mGM);
+ Subtarget.setKernelManager(mKM);
+ /* TLInfo uses InstrInfo so it must be initialized after. */
+ if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine()
+{
+ delete mGM;
+ delete mKM;
+}
+
+bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify) {
+ /* XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
+ * only using it to access addPassesToGenerateCode() */
+ bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
+ DisableVerify);
+ assert(fail);
+
+ const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
+ std::string gpu = STM.getDeviceName();
+ if (gpu == "SI") {
+ PM.add(createSICodeEmitterPass(Out));
+ } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600CodeEmitterPass(Out));
+ } else {
+ abort();
+ return true;
+ }
+ PM.add(createGCInfoDeleter());
+
+ return false;
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel()
+{
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+ if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600KernelParametersPass(
+ getAMDGPUTargetMachine().getTargetData()));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ PM.add(createAMDILBarrierDetect(*TM));
+ PM.add(createAMDILPrintfConvert(*TM));
+ PM.add(createAMDILInlinePass(*TM));
+ PM.add(createAMDILPeepholeOpt(*TM));
+ PM.add(createAMDILISelDag(getAMDGPUTargetMachine()));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+
+ if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) {
+ PM.add(createSIInitMachineFunctionInfoPass(*TM));
+ }
+
+ PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM));
+ if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600LowerShaderInstructionsPass(*TM));
+ PM.add(createR600LowerInstructionsPass(*TM));
+ } else {
+ PM.add(createSILowerShaderInstructionsPass(*TM));
+ PM.add(createSIAssignInterpRegsPass(*TM));
+ PM.add(createSIConvertToISAPass(*TM));
+ }
+ PM.add(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+ PM.add(createAMDILCFGPreparationPass(*TM));
+ PM.add(createAMDILCFGStructurizerPass(*TM));
+ if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) {
+ PM.add(createSIPropagateImmReadsPass(*TM));
+ }
+
+ PM.add(createAMDILIOExpansion(*TM));
+ return false;
+}
+
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
new file mode 100644
index 00000000000..d4165b09e84
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
@@ -0,0 +1,62 @@
+//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDILTargetMachine.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public AMDILTargetMachine {
+ AMDILSubtarget Subtarget;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ AMDILGlobalManager *mGM;
+ AMDILKernelManager *mKM;
+ bool mDump;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDILSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify);
+public:
+ void dumpCode() { mDump = true; }
+ bool shouldDumpCode() const { return mDump; }
+};
+
+} /* End namespace llvm */
+
+#endif /* AMDGPU_TARGET_MACHINE_H */
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
new file mode 100644
index 00000000000..d24b98070de
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
@@ -0,0 +1,127 @@
+//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUUtil.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDIL.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+/* Some instructions act as place holders to emulate operations that the GPU
+ * hardware does automatically. This function can be used to check if
+ * an opcode falls into this category. */
+bool llvm::isPlaceHolderOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ default: return false;
+ case AMDIL::EXPORT_REG:
+ case AMDIL::RETURN:
+ case AMDIL::LOAD_INPUT:
+ case AMDIL::LAST:
+ case AMDIL::RESERVE_REG:
+ return true;
+ }
+}
+
+bool llvm::isTransOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+
+ case AMDIL::COS_f32:
+ case AMDIL::COS_r600:
+ case AMDIL::COS_eg:
+ case AMDIL::RSQ_f32:
+ case AMDIL::FTOI:
+ case AMDIL::ITOF:
+ case AMDIL::MULLIT:
+ case AMDIL::MUL_LIT_r600:
+ case AMDIL::MUL_LIT_eg:
+ case AMDIL::SHR_i32:
+ case AMDIL::SIN_f32:
+ case AMDIL::EXP_f32:
+ case AMDIL::EXP_IEEE_r600:
+ case AMDIL::EXP_IEEE_eg:
+ case AMDIL::LOG_CLAMPED_r600:
+ case AMDIL::LOG_IEEE_r600:
+ case AMDIL::LOG_CLAMPED_eg:
+ case AMDIL::LOG_IEEE_eg:
+ case AMDIL::LOG_f32:
+ return true;
+ }
+}
+
+bool llvm::isTexOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::TEX_SAMPLE:
+ case AMDIL::TEX_SAMPLE_C:
+ case AMDIL::TEX_SAMPLE_L:
+ case AMDIL::TEX_SAMPLE_C_L:
+ case AMDIL::TEX_SAMPLE_LB:
+ case AMDIL::TEX_SAMPLE_C_LB:
+ case AMDIL::TEX_SAMPLE_G:
+ case AMDIL::TEX_SAMPLE_C_G:
+ return true;
+ }
+}
+
+bool llvm::isReductionOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::DOT4_r600:
+ case AMDIL::DOT4_eg:
+ return true;
+ }
+}
+
+bool llvm::isFCOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::BREAK_LOGICALZ_f32:
+ case AMDIL::BREAK_LOGICALNZ_i32:
+ case AMDIL::BREAK_LOGICALZ_i32:
+ case AMDIL::CONTINUE_LOGICALNZ_f32:
+ case AMDIL::IF_LOGICALNZ_i32:
+ case AMDIL::IF_LOGICALZ_f32:
+ case AMDIL::ELSE:
+ case AMDIL::ENDIF:
+ case AMDIL::ENDLOOP:
+ case AMDIL::IF_LOGICALNZ_f32:
+ case AMDIL::WHILELOOP:
+ return true;
+ }
+}
+
+void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
+ const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg)
+{
+ if (!MRI.isLiveIn(physReg)) {
+ MRI.addLiveIn(physReg, virtReg);
+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), virtReg)
+ .addReg(physReg);
+ } else {
+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h
new file mode 100644
index 00000000000..299146e1ba7
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.h
@@ -0,0 +1,49 @@
+//===-- AMDGPUUtil.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_UTIL_H
+#define AMDGPU_UTIL_H
+
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AMDILMachineFunctionInfo;
+
+class TargetMachine;
+class TargetRegisterInfo;
+
+bool isPlaceHolderOpcode(unsigned opcode);
+
+bool isTransOp(unsigned opcode);
+bool isTexOp(unsigned opcode);
+bool isReductionOp(unsigned opcode);
+bool isFCOp(unsigned opcode);
+
+/* XXX: Move these to AMDGPUInstrInfo.h */
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+
+} /* End namespace llvm */
+
+namespace AMDGPU {
+
+void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI,
+ const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
+
+} // End namespace AMDGPU
+
+#endif /* AMDGPU_UTIL_H */
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h
new file mode 100644
index 00000000000..cc6590c82a9
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL.h
@@ -0,0 +1,292 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AMDIL back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define AMDIL_MAJOR_VERSION 2
+#define AMDIL_MINOR_VERSION 0
+#define AMDIL_REVISION_NUMBER 74
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+// SC->CAL version matchings.
+#define CAL_VERSION_SC_150 1700
+#define CAL_VERSION_SC_149 1700
+#define CAL_VERSION_SC_148 1525
+#define CAL_VERSION_SC_147 1525
+#define CAL_VERSION_SC_146 1525
+#define CAL_VERSION_SC_145 1451
+#define CAL_VERSION_SC_144 1451
+#define CAL_VERSION_SC_143 1441
+#define CAL_VERSION_SC_142 1441
+#define CAL_VERSION_SC_141 1420
+#define CAL_VERSION_SC_140 1400
+#define CAL_VERSION_SC_139 1387
+#define CAL_VERSION_SC_138 1387
+#define CAL_APPEND_BUFFER_SUPPORT 1340
+#define CAL_VERSION_SC_137 1331
+#define CAL_VERSION_SC_136 982
+#define CAL_VERSION_SC_135 950
+#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+#define AMDIL_OPT_LEVEL_DECL
+#define AMDIL_OPT_LEVEL_VAR
+#define AMDIL_OPT_LEVEL_VAR_NO_COMMA
+
+namespace llvm {
+class AMDILInstrPrinter;
+class AMDILTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+ createAMDILISelDag(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILBarrierDetect(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILPrintfConvert(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILInlinePass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+/// Pre regalloc passes.
+FunctionPass*
+ createAMDILPointerManager(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILMachinePeephole(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+/// Pre emit passes.
+FunctionPass*
+ createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILLiteralManager(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILIOExpansion(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+extern Target TheAMDILTarget;
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+#define GET_REGINFO_ENUM
+#include "AMDILGenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "AMDILGenInstrInfo.inc"
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDILAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ ADDRESS_NONE = 5, // Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
+ LAST_ADDRESS = 8
+};
+
+// We are piggybacking on the CommentFlag enum in MachineInstr.h to
+// set bits in AsmPrinterFlags of the MachineInstruction. We will
+// start at bit 16 and allocate down while LLVM will start at bit
+// 1 and allocate up.
+
+// This union/struct combination is an easy way to read out the
+// exact bits that are needed.
+typedef union ResourceRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned short isImage : 1; // Reserved for future use/llvm.
+ unsigned short ResourceID : 10; // Flag to specify the resourece ID for
+ // the op.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
+ // conflict.
+ unsigned short ByteStore : 1; // Flag to specify if the op is a byte
+ // store op.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+#else
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short ByteStore : 1; // Flag to specify if the op is byte
+ // store op.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
+ // a conflict.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ResourceID : 10; // Flag to specify the resource ID for
+ // the op.
+ unsigned short isImage : 1; // Reserved for future use.
+#endif
+ } bits;
+ unsigned short u16all;
+} InstrResEnc;
+
+} // namespace AMDILAS
+
+// The OpSwizzle encodes a subset of all possible
+// swizzle combinations into a number of bits using
+// only the combinations utilized by the backend.
+// The lower 128 are for source swizzles and the
+// upper 128 or for destination swizzles.
+// The valid mappings can be found in the
+// getSrcSwizzle and getDstSwizzle functions of
+// AMDILUtilityFunctions.cpp.
+typedef union SwizzleRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned char dst : 1;
+ unsigned char swizzle : 7;
+#else
+ unsigned char swizzle : 7;
+ unsigned char dst : 1;
+#endif
+ } bits;
+ unsigned char u8all;
+} OpSwizzle;
+// Enums corresponding to AMDIL condition codes for IL. These
+// values must be kept in sync with the ones in the .td file.
+namespace AMDILCC {
+enum CondCodes {
+ // AMDIL specific condition codes. These correspond to the IL_CC_*
+ // in AMDILInstrInfo.td and must be kept in the same order.
+ IL_CC_D_EQ = 0, // DEQ instruction.
+ IL_CC_D_GE = 1, // DGE instruction.
+ IL_CC_D_LT = 2, // DLT instruction.
+ IL_CC_D_NE = 3, // DNE instruction.
+ IL_CC_F_EQ = 4, // EQ instruction.
+ IL_CC_F_GE = 5, // GE instruction.
+ IL_CC_F_LT = 6, // LT instruction.
+ IL_CC_F_NE = 7, // NE instruction.
+ IL_CC_I_EQ = 8, // IEQ instruction.
+ IL_CC_I_GE = 9, // IGE instruction.
+ IL_CC_I_LT = 10, // ILT instruction.
+ IL_CC_I_NE = 11, // INE instruction.
+ IL_CC_U_GE = 12, // UGE instruction.
+ IL_CC_U_LT = 13, // ULE instruction.
+ // Pseudo IL Comparison instructions here.
+ IL_CC_F_GT = 14, // GT instruction.
+ IL_CC_U_GT = 15,
+ IL_CC_I_GT = 16,
+ IL_CC_D_GT = 17,
+ IL_CC_F_LE = 18, // LE instruction
+ IL_CC_U_LE = 19,
+ IL_CC_I_LE = 20,
+ IL_CC_D_LE = 21,
+ IL_CC_F_UNE = 22,
+ IL_CC_F_UEQ = 23,
+ IL_CC_F_ULT = 24,
+ IL_CC_F_UGT = 25,
+ IL_CC_F_ULE = 26,
+ IL_CC_F_UGE = 27,
+ IL_CC_F_ONE = 28,
+ IL_CC_F_OEQ = 29,
+ IL_CC_F_OLT = 30,
+ IL_CC_F_OGT = 31,
+ IL_CC_F_OLE = 32,
+ IL_CC_F_OGE = 33,
+ IL_CC_D_UNE = 34,
+ IL_CC_D_UEQ = 35,
+ IL_CC_D_ULT = 36,
+ IL_CC_D_UGT = 37,
+ IL_CC_D_ULE = 38,
+ IL_CC_D_UGE = 39,
+ IL_CC_D_ONE = 40,
+ IL_CC_D_OEQ = 41,
+ IL_CC_D_OLT = 42,
+ IL_CC_D_OGT = 43,
+ IL_CC_D_OLE = 44,
+ IL_CC_D_OGE = 45,
+ IL_CC_U_EQ = 46,
+ IL_CC_U_NE = 47,
+ IL_CC_F_O = 48,
+ IL_CC_D_O = 49,
+ IL_CC_F_UO = 50,
+ IL_CC_D_UO = 51,
+ IL_CC_L_LE = 52,
+ IL_CC_L_GE = 53,
+ IL_CC_L_EQ = 54,
+ IL_CC_L_NE = 55,
+ IL_CC_L_LT = 56,
+ IL_CC_L_GT = 57,
+ IL_CC_UL_LE = 58,
+ IL_CC_UL_GE = 59,
+ IL_CC_UL_EQ = 60,
+ IL_CC_UL_NE = 61,
+ IL_CC_UL_LT = 62,
+ IL_CC_UL_GT = 63,
+ COND_ERROR = 64
+};
+
+} // end namespace AMDILCC
+} // end namespace llvm
+#endif // AMDIL_H_
diff --git a/src/gallium/drivers/radeon/AMDIL.td b/src/gallium/drivers/radeon/AMDIL.td
new file mode 100644
index 00000000000..9bcccac2411
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL.td
@@ -0,0 +1,19 @@
+//===-- AMDIL.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// This file specifies where the base TD file exists
+// and where the version specific TD file exists.
+include "AMDILBase.td"
+include "AMDILVersion.td"
+
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
diff --git a/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp b/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp
new file mode 100644
index 00000000000..cf5afb9d195
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp
@@ -0,0 +1,723 @@
+//===-- AMDIL789IOExpansion.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// @file AMDIL789IOExpansion.cpp
+// @details Implementation of the IO expansion class for 789 devices.
+//
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Value.h"
+
+using namespace llvm;
+AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL)
+: AMDILIOExpansion(tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+
+AMDIL789IOExpansion::~AMDIL789IOExpansion() {
+}
+
+const char *AMDIL789IOExpansion::getPassName() const
+{
+ return "AMDIL 789 IO Expansion Pass";
+}
+// This code produces the following pseudo-IL:
+// mov r1007, $src.y000
+// cmov_logical r1007.x___, $flag.yyyy, r1007.xxxx, $src.xxxx
+// mov r1006, $src.z000
+// cmov_logical r1007.x___, $flag.zzzz, r1006.xxxx, r1007.xxxx
+// mov r1006, $src.w000
+// cmov_logical $dst.x___, $flag.wwww, r1006.xxxx, r1007.xxxx
+void
+AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI,
+ unsigned flag, unsigned src, unsigned dst, bool before)
+{
+ MachineBasicBlock::iterator I = *MI;
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(src)
+ .addImm(2);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(flag)
+ .addReg(AMDIL::R1007)
+ .addReg(src);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006)
+ .addReg(src)
+ .addImm(3);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1007)
+ .addReg(flag)
+ .addReg(AMDIL::R1006)
+ .addReg(AMDIL::R1007);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006)
+ .addReg(src)
+ .addImm(4);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_W_i32), dst)
+ .addReg(flag)
+ .addReg(AMDIL::R1006)
+ .addReg(AMDIL::R1007);
+
+}
+// We have a 128 bit load but a 8/16/32bit value, so we need to
+// select the correct component and make sure that the correct
+// bits are selected. For the 8 and 16 bit cases we need to
+// extract from the component the correct bits and for 32 bits
+// we just need to select the correct component.
+ void
+AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ DebugLoc DL = MI->getDebugLoc();
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false);
+ if (getMemorySize(MI) == 1) {
+ // This produces the following pseudo-IL:
+ // iand r1006.x___, r1010.xxxx, l14.xxxx
+ // mov r1006, r1006.xxxx
+ // iadd r1006, r1006, {0, -1, 2, 3}
+ // ieq r1008, r1006, 0
+ // mov r1011, r1011.xxxx
+ // ishr r1011, r1011, {0, 8, 16, 24}
+ // mov r1007, r1011.y000
+ // cmov_logical r1007.x___, r1008.yyyy, r1007.xxxx, r1011.xxxx
+ // mov r1006, r1011.z000
+ // cmov_logical r1007.x___, r1008.zzzz, r1006.xxxx, r1007.xxxx
+ // mov r1006, r1011.w000
+ // cmov_logical r1011.x___, r1008.wwww, r1006.xxxx, r1007.xxxx
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1006)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1006)
+ .addReg(AMDIL::R1006);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006)
+ .addReg(AMDIL::R1006)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1006)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false);
+ } else if (getMemorySize(MI) == 2) {
+ // This produces the following pseudo-IL:
+ // ishr r1007.x___, r1010.xxxx, 1
+ // iand r1008.x___, r1007.xxxx, 1
+ // ishr r1007.x___, r1011.xxxx, 16
+ // cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addReg(AMDIL::R1011);
+ }
+}
+// This function does address calculations modifications to load from a vector
+// register type instead of a dword addressed load.
+ void
+AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect)
+{
+ MachineBasicBlock::iterator I = *MI;
+ DebugLoc DL = MI->getDebugLoc();
+ // This produces the following pseudo-IL:
+ // ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3
+ // iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1
+ // ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal((is32bit) ? 3 : 1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal((is32bit) ? 2 : 1));
+ if (needsSelect) {
+ // If the component selection is required, the following
+ // pseudo-IL is produced.
+ // mov r1008, r1008.xxxx
+ // iadd r1008, r1008, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1}
+ // ieq r1008, r1008, 0
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL,
+ (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) :
+ -1ULL));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ }
+}
+// This function emits a switch statement and writes 32bit/64bit
+// value to a 128bit vector register type.
+ void
+AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit)
+{
+ MachineBasicBlock::iterator I = *MI;
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ // This section generates the following pseudo-IL:
+ // switch r1008.x
+ // default
+ // mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y}
+ // break
+ // case 1
+ // mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
+ // break
+ // if is32bit is true, case 2 and 3 are emitted.
+ // case 2
+ // mov x1[r1007.x].__z_, r1011.x
+ // break
+ // case 3
+ // mov x1[r1007.x].___w, r1011.x
+ // break
+ // endswitch
+ DebugLoc DL;
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::SWITCH))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::DEFAULT));
+ BuildMI(*mBB, I, DL,
+ mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_X : AMDIL::SCRATCHSTORE_XY)
+ , AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(1);
+ BuildMI(*mBB, I, DL,
+ mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_Y : AMDIL::SCRATCHSTORE_ZW), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
+ if (is32bit) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(2);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHSTORE_Z), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CASE)).addImm(3);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHSTORE_W), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BREAK));
+ }
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ENDSWITCH));
+
+}
+ void
+AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
+ if (!xID) {
+ xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ // Since the private register is a 128 bit aligned, we have to align the address
+ // first, since our source address is 32bit aligned and then load the data.
+ // This produces the following pseudo-IL:
+ // ishr r1010.x___, r1010.xxxx, 4
+ // mov r1011, x1[r1010.x]
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(xID);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ emitVectorAddressCalc(MI, true, true);
+ // This produces the following pseudo-IL:
+ // mov r1011, x1[r1007.x]
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ // These instructions go after the current MI.
+ emitDataLoadSelect(MI);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, true);
+ // This produces the following pseudo-IL:
+ // mov r1011, x1[r1007.x]
+ // mov r1007, r1011.zw00
+ // cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1007.zw
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ // These instructions go after the current MI.
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ break;
+ }
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, I, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)),
+ MI->getOperand(0).getReg())
+ .addReg(AMDIL::R1011);
+}
+
+
+ void
+AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ if (!isHardwareInst(MI) || MI->memoperands_empty()) {
+ return expandGlobalLoad(MI);
+ }
+ uint32_t cID = getPointerID(MI);
+ if (cID < 2) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::CONSTANT_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(cID);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ emitVectorAddressCalc(MI, true, true);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(cID);
+ // These instructions go after the current MI.
+ emitDataLoadSelect(MI);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, true);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(cID);
+ // These instructions go after the current MI.
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ break;
+ }
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, I, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)),
+ MI->getOperand(0).getReg())
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI)
+{
+ if (!isStaticCPLoad(MI)) {
+ return expandConstantLoad(MI);
+ } else {
+ uint32_t idx = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MI->getParent()->getParent()
+ ->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &consts
+ = MCP->getConstants();
+ const Constant *C = consts[idx].Val.ConstVal;
+ emitCPInst(MI, C, mKM, 0, isExtendLoad(MI));
+ }
+}
+
+ void
+AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+ return expandGlobalStore(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ if (!xID) {
+ xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ // This section generates the following pseudo-IL:
+ // ishr r1010.x___, r1010.xxxx, 4
+ // mov x1[r1010.x], r1011
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, I, MI->getDebugLoc(),
+ mTII->get(AMDIL::SCRATCHSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ break;
+ case 1:
+ emitVectorAddressCalc(MI, true, true);
+ // This section generates the following pseudo-IL:
+ // mov r1002, x1[r1007.x]
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true);
+ // This section generates the following pseudo-IL:
+ // iand r1003.x, r1010.x, 3
+ // mov r1003, r1003.xxxx
+ // iadd r1000, r1003, {0, -1, -2, -3}
+ // ieq r1000, r1000, 0
+ // mov r1002, r1002.xxxx
+ // ishr r1002, r1002, {0, 8, 16, 24}
+ // mov r1011, r1011.xxxx
+ // cmov_logical r1002, r1000, r1011, r1002
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1003)
+ .addReg(AMDIL::R1003);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001)
+ .addReg(AMDIL::R1003)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1002);
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This section generates the following pseudo-IL:
+ // iand r1002, r1002, 0xFF
+ // ishl r1002, r1002, {0, 8, 16, 24}
+ // ior r1002.xy, r1002.xy, r1002.zw
+ // ior r1011.x, r1002.x, r1002.y
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1002)
+ .addReg(AMDIL::R1002).addReg(AMDIL::R1002);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1002).addReg(AMDIL::R1002);
+ } else {
+ // This section generates the following pseudo-IL:
+ // mov r1001.xy, r1002.yw
+ // mov r1002.xy, r1002.xz
+ // ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy
+ // mov r1001.x, r1002.y
+ // ubit_insert r1011.x, 16, 16, r1002.y, r1002.x
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1002)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(8))
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ }
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 2:
+ emitVectorAddressCalc(MI, true, true);
+ // This section generates the following pseudo-IL:
+ // mov r1002, x1[r1007.x]
+ BuildMI(*mBB, I, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true);
+ // This section generates the following pseudo-IL:
+ // ishr r1003.x, r1010.x, 1
+ // iand r1003.x, r1003.x, 1
+ // ishr r1001.x, r1002.x, 16
+ // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x
+ // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1003)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003)
+ .addReg(AMDIL::R1003)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1002)
+ .addReg(AMDIL::R1003)
+ .addReg(AMDIL::R1002)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1003)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1001);
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This section generates the following pseudo-IL:
+ // iand r1002.x, r1002.x, 0xFFFF
+ // iand r1001.x, r1001.x, 0xFFFF
+ // ishl r1001.x, r1002.x, 16
+ // ior r1011.x, r1002.x, r1001.x
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1002).addReg(AMDIL::R1001);
+ } else {
+ // This section generates the following pseudo-IL:
+ // ubit_insert r1011.x, 16, 16, r1001.y, r1002.x
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ }
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 4:
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, false);
+ emitVectorSwitchWrite(MI, false);
+ break;
+ };
+}
+ void
+AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ DebugLoc DL;
+ if (MI->getOperand(0).isUndef()) {
+ BuildMI(*mBB, I, DL, mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ .addReg(MI->getOperand(0).getReg());
+ }
+ expandTruncData(MI);
+ if (MI->getOperand(2).isReg()) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_i32), AMDIL::R1010)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg());
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::MOVE_i32), AMDIL::R1010)
+ .addReg(MI->getOperand(1).getReg());
+ }
+ expandAddressCalc(MI);
+ expandPackedData(MI);
+}
+
+
+void
+AMDIL789IOExpansion::expandPackedData(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ if (!isPackedData(MI)) {
+ return;
+ }
+ DebugLoc DL;
+ // If we have packed data, then the shift size is no longer
+ // the same as the load size and we need to adjust accordingly
+ switch(getPackedID(MI)) {
+ default:
+ break;
+ case PACK_V2I8:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32)));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addImm(mMFI->addi64Literal(8ULL << 32));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I8:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V2I16:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL << 32));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I16:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL << 32));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case UNPACK_V2I8:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ break;
+ case UNPACK_V4I8:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+ }
+ break;
+ case UNPACK_V2I16:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ }
+ break;
+ case UNPACK_V4I16:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ }
+ break;
+ };
+}
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
new file mode 100644
index 00000000000..df81c44f288
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
@@ -0,0 +1,157 @@
+//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#ifdef UPSTREAM_LLVM
+#include "AMDIL7XXAsmPrinter.h"
+#endif
+#include "AMDILDevice.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+
+using namespace llvm;
+
+AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
+{
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ mDeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ mDeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ mDeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDIL7XXDevice::~AMDIL7XXDevice()
+{
+}
+
+void AMDIL7XXDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL7XXDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDIL7XXDevice::getWavefrontSize() const
+{
+ return AMDILDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDIL7XXDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
+{
+ return 1;
+}
+
+FunctionPass*
+AMDIL7XXDevice::getIOExpansion(
+ TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const
+{
+ return new AMDIL7XXIOExpansion(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AsmPrinter*
+AMDIL7XXDevice::getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const
+{
+#ifdef UPSTREAM_LLVM
+ return new AMDIL7XXAsmPrinter(TM, Streamer);
+#else
+ return NULL;
+#endif
+}
+
+FunctionPass*
+AMDIL7XXDevice::getPointerManager(
+ TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const
+{
+ return new AMDILPointerManager(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+{
+ setCaps();
+}
+
+AMDIL770Device::~AMDIL770Device()
+{
+}
+
+void AMDIL770Device::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL770Device::getWavefrontSize() const
+{
+ return AMDILDevice::WavefrontSize;
+}
+
+AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
+{
+}
+
+AMDIL710Device::~AMDIL710Device()
+{
+}
+
+size_t AMDIL710Device::getWavefrontSize() const
+{
+ return AMDILDevice::QuarterWavefrontSize;
+}
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h
new file mode 100644
index 00000000000..87238e96006
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.h
@@ -0,0 +1,77 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+class AMDILSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDIL7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDIL7XXDevice : public AMDILDevice {
+public:
+ AMDIL7XXDevice(AMDILSubtarget *ST);
+ virtual ~AMDIL7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+ FunctionPass*
+ getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const;
+ AsmPrinter*
+ getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const;
+ FunctionPass*
+ getPointerManager(TargetMachine& AMDIL_OPT_LEVEL_DECL) const;
+
+protected:
+ virtual void setCaps();
+}; // AMDIL7XXDevice
+
+// The AMDIL770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDIL770Device : public AMDIL7XXDevice {
+public:
+ AMDIL770Device(AMDILSubtarget *ST);
+ virtual ~AMDIL770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDIL770Device
+
+// The AMDIL710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDIL710Device : public AMDIL7XXDevice {
+public:
+ AMDIL710Device(AMDILSubtarget *ST);
+ virtual ~AMDIL710Device();
+ virtual size_t getWavefrontSize() const;
+}; // AMDIL710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp b/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp
new file mode 100644
index 00000000000..cddde313e2b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp
@@ -0,0 +1,548 @@
+//===-- AMDIL7XXIOExpansion.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// @file AMDIL7XXIOExpansion.cpp
+// @details Implementation of the IO Printing class for 7XX devices
+//
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Value.h"
+
+using namespace llvm;
+AMDIL7XXIOExpansion::AMDIL7XXIOExpansion(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) : AMDIL789IOExpansion(tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+
+AMDIL7XXIOExpansion::~AMDIL7XXIOExpansion() {
+}
+const char *AMDIL7XXIOExpansion::getPassName() const
+{
+ return "AMDIL 7XX IO Expansion Pass";
+}
+
+ void
+AMDIL7XXIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ switch(getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(0))
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ break;
+ }
+ // These instructions go after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDIL7XXIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Z), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_W), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ }
+
+ // These instructions go after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+ void
+AMDIL7XXIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::LDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ break;
+ }
+
+ // These instructions go after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDIL7XXIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 1:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 2:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ };
+}
+
+ void
+AMDIL7XXIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalStore(MI);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Z), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_W), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 1:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1006)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 2:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ };
+}
+
+ void
+AMDIL7XXIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalStore(MI);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+}
diff --git a/src/gallium/drivers/radeon/AMDILAlgorithms.tpp b/src/gallium/drivers/radeon/AMDILAlgorithms.tpp
new file mode 100644
index 00000000000..058475f0f98
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAlgorithms.tpp
@@ -0,0 +1,93 @@
+//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides templates algorithms that extend the STL algorithms, but
+// are useful for the AMDIL backend
+//
+//===----------------------------------------------------------------------===//
+
+// A template function that loops through the iterators and passes the second
+// argument along with each iterator to the function. If the function returns
+// true, then the current iterator is invalidated and it moves back, before
+// moving forward to the next iterator, otherwise it moves forward without
+// issue. This is based on the for_each STL function, but allows a reference to
+// the second argument
+template<class InputIterator, class Function, typename Arg>
+Function binaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ F(*First, Second);
+ }
+ return F;
+}
+
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ if (F(*First, Second)) {
+ --First;
+ }
+ }
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with the passed in argument. See binaryForEach for further
+// explanation
+template<class InputIterator, class Function, typename Arg>
+Function binaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ binaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ safeBinaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+
+// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
+// versions of these functions This allows the function to handle situations
+// such as invalidated iterators
+template<class InputIterator, class Function>
+Function safeForEach(InputIterator First, InputIterator Last, Function F)
+{
+ for ( ; First!=Last; ++First ) F(&First)
+ ; // Do nothing.
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator. See binaryForEach for
+// further explanation
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
diff --git a/src/gallium/drivers/radeon/AMDILAsmBackend.cpp b/src/gallium/drivers/radeon/AMDILAsmBackend.cpp
new file mode 100644
index 00000000000..63b688d20fd
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmBackend.cpp
@@ -0,0 +1,82 @@
+//===------ AMDILAsmBackend.cpp - AMDIL Assembly Backend ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+//
+#include "AMDILAsmBackend.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+namespace llvm {
+ ASM_BACKEND_CLASS* createAMDILAsmBackend(const ASM_BACKEND_CLASS &T,
+ const std::string &TT)
+ {
+ return new AMDILAsmBackend(T);
+ }
+} // namespace llvm
+
+//===--------------------- Default AMDIL Asm Backend ---------------------===//
+AMDILAsmBackend::AMDILAsmBackend(const ASM_BACKEND_CLASS &T)
+ : ASM_BACKEND_CLASS()
+{
+}
+
+MCObjectWriter *
+AMDILAsmBackend::createObjectWriter(raw_ostream &OS) const
+{
+ return 0;
+}
+
+bool
+AMDILAsmBackend::doesSectionRequireSymbols(const MCSection &Section) const
+{
+ return false;
+}
+
+bool
+AMDILAsmBackend::isSectionAtomizable(const MCSection &Section) const
+{
+ return true;
+}
+
+bool
+AMDILAsmBackend::isVirtualSection(const MCSection &Section) const
+{
+ return false;
+ //const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
+ //return SE.getType() == MCSectionELF::SHT_NOBITS;
+}
+void
+AMDILAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const
+{
+}
+
+bool
+AMDILAsmBackend::MayNeedRelaxation(const MCInst &Inst) const
+{
+ return false;
+}
+
+void
+AMDILAsmBackend::RelaxInstruction(const MCInst &Inst,
+ MCInst &Res) const
+{
+}
+
+bool
+AMDILAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const
+{
+ return false;
+}
+
+unsigned
+AMDILAsmBackend::getNumFixupKinds() const
+{
+ return 0;
+}
diff --git a/src/gallium/drivers/radeon/AMDILAsmBackend.h b/src/gallium/drivers/radeon/AMDILAsmBackend.h
new file mode 100644
index 00000000000..ae027681b6f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmBackend.h
@@ -0,0 +1,49 @@
+//===-- AMDILAsmBackend.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_ASM_BACKEND_H_
+#define _AMDIL_ASM_BACKEND_H_
+#include "AMDIL.h"
+#include "llvm/MC/MCAsmBackend.h"
+
+#define ASM_BACKEND_CLASS MCAsmBackend
+
+using namespace llvm;
+namespace llvm {
+ class AMDILAsmBackend : public ASM_BACKEND_CLASS {
+ public:
+ AMDILAsmBackend(const ASM_BACKEND_CLASS &T);
+ virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const;
+ virtual bool isSectionAtomizable(const MCSection &Section) const;
+ virtual bool isVirtualSection(const MCSection &Section) const;
+ virtual void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+ virtual bool
+ MayNeedRelaxation(const MCInst &Inst
+ ) const;
+ virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+ virtual bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+ unsigned getNumFixupKinds() const;
+
+ virtual void applyFixup(const MCFixup &Fixup, char * Data, unsigned DataSize,
+ uint64_t value) const { }
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+ virtual bool fixupNeedsRelaxation(const MCFixup &fixup, uint64_t value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const
+ { return false; }
+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const
+ {}
+ virtual bool writeNopData(uint64_t data, llvm::MCObjectWriter * writer) const
+ { return false; }
+
+ }; // class AMDILAsmBackend;
+} // llvm namespace
+
+#endif // _AMDIL_ASM_BACKEND_H_
diff --git a/src/gallium/drivers/radeon/AMDILAsmPrinter7XX.cpp b/src/gallium/drivers/radeon/AMDILAsmPrinter7XX.cpp
new file mode 100644
index 00000000000..1a739294cc1
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmPrinter7XX.cpp
@@ -0,0 +1,149 @@
+//===-- AMDILAsmPrinter7XX.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXAsmPrinter.h"
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+// TODO: Add support for verbose.
+ AMDIL7XXAsmPrinter::AMDIL7XXAsmPrinter(TargetMachine& TM, MCStreamer &Streamer)
+: AMDILAsmPrinter(TM, Streamer)
+{
+}
+
+AMDIL7XXAsmPrinter::~AMDIL7XXAsmPrinter()
+{
+}
+///
+/// @param name
+/// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+/// and returns that name if both of the tokens are present.
+///
+ static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+ void
+AMDIL7XXAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ llvm::raw_ostream &O)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ if (curTarget->device()->usesHardware(
+ AMDILDeviceInfo::DoubleOps)
+ && !::strncmp(name, "__sqrt_f64", 10) ) {
+ name = "__sqrt_f64_7xx";
+ }
+ }
+ emitMCallInst(MI, O, name);
+}
+
+ bool
+AMDIL7XXAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+
+ void
+AMDIL7XXAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ const char *name;
+ name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<")";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ }
+
+ // Print the assembly for the instruction.
+ // We want to make sure that we do HW constants
+ // before we do arena segment
+ if (mMeta->useCompilerWrite(II)) {
+ // TODO: This is a hack to get around some
+ // conformance failures.
+ O << "\tif_logicalz cb0[0].x\n";
+ O << "\tuav_raw_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ") ";
+ O << "mem0.x___, cb0[3].x, r0.0\n";
+ O << "\tendif\n";
+ mMFI->addMetadata(";memory:compilerwrite");
+ } else {
+ printInstruction(II, O);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
diff --git a/src/gallium/drivers/radeon/AMDILAsmPrinterEG.cpp b/src/gallium/drivers/radeon/AMDILAsmPrinterEG.cpp
new file mode 100644
index 00000000000..4a9732a2b68
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmPrinterEG.cpp
@@ -0,0 +1,162 @@
+//===-- AMDILAsmPrinterEG.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEGAsmPrinter.h"
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+
+// TODO: Add support for verbose.
+AMDILEGAsmPrinter::AMDILEGAsmPrinter(TargetMachine& TM, MCStreamer &Streamer)
+: AMDILAsmPrinter(TM, Streamer)
+{
+}
+
+AMDILEGAsmPrinter::~AMDILEGAsmPrinter()
+{
+}
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+ static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+void
+AMDILEGAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ llvm::raw_ostream &O)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ }
+ if (!::strncmp(name, "__fma_f32", 9) && curTarget->device()->usesHardware(
+ AMDILDeviceInfo::FMA)) {
+ name = "__hwfma_f32";
+ }
+ emitMCallInst(MI, O, name);
+}
+
+ bool
+AMDILEGAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+ void
+AMDILEGAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ const char *name;
+ name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+ if (!::strncmp(name, "__fma_f32", 9)
+ && curTarget->device()->usesHardware(
+ AMDILDeviceInfo::FMA)) {
+ name = "__hwfma_f32";
+ }
+ //assert(0 &&
+ //"Found a macro that is still in use!");
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<")";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ }
+
+ // Print the assembly for the instruction.
+ // We want to make sure that we do HW constants
+ // before we do arena segment
+ // TODO: This is a hack to get around some
+ // conformance failures.
+ if (mMeta->useCompilerWrite(II)) {
+ O << "\tif_logicalz cb0[0].x\n";
+ if (mMFI->usesMem(AMDILDevice::RAW_UAV_ID)) {
+ O << "\tuav_raw_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ") ";
+ O << "mem0.x___, cb0[3].x, r0.0\n";
+ } else {
+ O << "\tuav_arena_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+ << ")_size(dword) ";
+ O << "cb0[3].x, r0.0\n";
+ }
+ O << "\tendif\n";
+ mMFI->addMetadata(";memory:compilerwrite");
+ } else {
+ printInstruction(II, O);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
diff --git a/src/gallium/drivers/radeon/AMDILBarrierDetect.cpp b/src/gallium/drivers/radeon/AMDILBarrierDetect.cpp
new file mode 100644
index 00000000000..1bc9651e7a4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILBarrierDetect.cpp
@@ -0,0 +1,254 @@
+//===----- AMDILBarrierDetect.cpp - Barrier Detect pass -*- C++ -*- ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "BarrierDetect"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+#include "AMDILAlgorithms.tpp"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+// The barrier detect pass determines if a barrier has been duplicated in the
+// source program which can cause undefined behaviour if more than a single
+// wavefront is executed in a group. This is because LLVM does not have an
+// execution barrier and if this barrier function gets duplicated, undefined
+// behaviour can occur. In order to work around this, we detect the duplicated
+// barrier and then make the work-group execute in a single wavefront mode,
+// essentially making the barrier a no-op.
+
+namespace
+{
+ class LLVM_LIBRARY_VISIBILITY AMDILBarrierDetect : public FunctionPass
+ {
+ TargetMachine &TM;
+ static char ID;
+ public:
+ AMDILBarrierDetect(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+ ~AMDILBarrierDetect();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ private:
+ bool detectBarrier(BasicBlock::iterator *BBI);
+ bool detectMemFence(BasicBlock::iterator *BBI);
+ bool mChanged;
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
+ const AMDILSubtarget *mStm;
+
+ // Constants used to define memory type.
+ static const unsigned int LOCAL_MEM_FENCE = 1<<0;
+ static const unsigned int GLOBAL_MEM_FENCE = 1<<1;
+ static const unsigned int REGION_MEM_FENCE = 1<<2;
+ };
+ char AMDILBarrierDetect::ID = 0;
+} // anonymouse namespace
+
+namespace llvm
+{
+ FunctionPass *
+ createAMDILBarrierDetect(TargetMachine &TM AMDIL_OPT_LEVEL_DECL)
+ {
+ return new AMDILBarrierDetect(TM AMDIL_OPT_LEVEL_VAR);
+ }
+} // llvm namespace
+
+AMDILBarrierDetect::AMDILBarrierDetect(TargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL)
+ :
+ FunctionPass(ID),
+ TM(TM)
+{
+}
+
+AMDILBarrierDetect::~AMDILBarrierDetect()
+{
+}
+
+bool AMDILBarrierDetect::detectBarrier(BasicBlock::iterator *BBI)
+{
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS>::iterator bIter;
+ int64_t bID;
+ Instruction *inst = (*BBI);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+
+ if (!CI || !CI->getNumOperands()) {
+ return false;
+ }
+ const Value *funcVal = CI->getOperand(CI->getNumOperands() - 1);
+ if (funcVal && strncmp(funcVal->getName().data(), "__amd_barrier", 13)) {
+ return false;
+ }
+
+ if (inst->getNumOperands() >= 3) {
+ const Value *V = inst->getOperand(0);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ bID = Cint->getSExtValue();
+ bIter = std::find(bVecMap.begin(), bVecMap.end(), bID);
+ if (bIter == bVecMap.end()) {
+ bVecMap.push_back(bID);
+ } else {
+ if (mStm->device()->isSupported(AMDILDeviceInfo::BarrierDetect)) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addMetadata(";limitgroupsize");
+ MFI->addErrorMsg(amd::CompilerWarningMessage[BAD_BARRIER_OPT]);
+ }
+ }
+ }
+ if (mStm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addErrorMsg(amd::CompilerWarningMessage[LIMIT_BARRIER]);
+ MFI->addMetadata(";limitgroupsize");
+ MFI->setUsesLocal();
+ }
+ const Value *V = inst->getOperand(inst->getNumOperands()-2);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+ Module *M = iF->getParent();
+ bID = Cint->getSExtValue();
+ if (bID > 0) {
+ const char *name = "barrier";
+ if (bID == GLOBAL_MEM_FENCE) {
+ name = "barrierGlobal";
+ } else if (bID == LOCAL_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ name = "barrierLocal";
+ } else if (bID == REGION_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ name = "barrierRegion";
+ }
+ Function *nF =
+ dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
+ inst->setOperand(inst->getNumOperands()-1, nF);
+ return false;
+ }
+
+ return false;
+}
+
+bool AMDILBarrierDetect::detectMemFence(BasicBlock::iterator *BBI)
+{
+ int64_t bID;
+ Instruction *inst = (*BBI);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+
+ if (!CI || CI->getNumOperands() != 2) {
+ return false;
+ }
+
+ const Value *V = inst->getOperand(inst->getNumOperands()-2);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+
+ const char *fence_local_name;
+ const char *fence_global_name;
+ const char *fence_region_name;
+ const char* fence_name = "mem_fence";
+ if (!iF) {
+ return false;
+ }
+
+ if (strncmp(iF->getName().data(), "mem_fence", 9) == 0) {
+ fence_local_name = "mem_fence_local";
+ fence_global_name = "mem_fence_global";
+ fence_region_name = "mem_fence_region";
+ } else if (strncmp(iF->getName().data(), "read_mem_fence", 14) == 0) {
+ fence_local_name = "read_mem_fence_local";
+ fence_global_name = "read_mem_fence_global";
+ fence_region_name = "read_mem_fence_region";
+ } else if (strncmp(iF->getName().data(), "write_mem_fence", 15) == 0) {
+ fence_local_name = "write_mem_fence_local";
+ fence_global_name = "write_mem_fence_global";
+ fence_region_name = "write_mem_fence_region";
+ } else {
+ return false;
+ }
+
+ Module *M = iF->getParent();
+ bID = Cint->getSExtValue();
+ if (bID > 0) {
+ const char *name = fence_name;
+ if (bID == GLOBAL_MEM_FENCE) {
+ name = fence_global_name;
+ } else if (bID == LOCAL_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ name = fence_local_name;
+ } else if (bID == REGION_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ name = fence_region_name;
+ }
+ Function *nF =
+ dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
+ inst->setOperand(inst->getNumOperands()-1, nF);
+ return false;
+ }
+
+ return false;
+
+}
+
+bool AMDILBarrierDetect::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ bVecMap.clear();
+ mStm = &TM.getSubtarget<AMDILSubtarget>();
+ Function *F = &MF;
+ safeNestedForEach(F->begin(), F->end(), F->begin()->begin(),
+ std::bind1st(
+ std::mem_fun(
+ &AMDILBarrierDetect::detectBarrier), this));
+ safeNestedForEach(F->begin(), F->end(), F->begin()->begin(),
+ std::bind1st(
+ std::mem_fun(
+ &AMDILBarrierDetect::detectMemFence), this));
+ return mChanged;
+}
+
+const char* AMDILBarrierDetect::getPassName() const
+{
+ return "AMDIL Barrier Detect Pass";
+}
+
+bool AMDILBarrierDetect::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool AMDILBarrierDetect::doFinalization(Module &M)
+{
+ return false;
+}
+
+void AMDILBarrierDetect::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td
new file mode 100644
index 00000000000..2706b211f2d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILBase.td
@@ -0,0 +1,104 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDILDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDILDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDILDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDILDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDILDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDILDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDILDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "mIs64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "mIs32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDILDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILCallingConv.td"
+include "AMDILInstrInfo.td"
+
+def AMDILInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// AMDIL processors supported.
+//===----------------------------------------------------------------------===//
+//include "Processors.td"
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDILAsmWriter : AsmWriter {
+ string AsmWriterClassName = "AsmPrinter";
+ int Variant = 0;
+}
+
+def AMDILAsmParser : AsmParser {
+ string AsmParserClassName = "AsmParser";
+ int Variant = 0;
+
+ string CommentDelimiter = ";";
+
+ string RegisterPrefix = "r";
+
+}
+
+
+def AMDIL : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDILInstrInfo;
+ let AssemblyWriters = [AMDILAsmWriter];
+ let AssemblyParsers = [AMDILAsmParser];
+}
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
new file mode 100644
index 00000000000..a7d39466bdf
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3257 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "structcfg"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// bixia TODO: move this out to analysis lib. Make this work for both target
+// AMDIL and CBackend.
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTreeBase<MachineBasicBlock> *DT;
+ MachinePostDominatorTree() : MachineFunctionPass(ID)
+ {
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+ }
+
+ ~MachinePostDominatorTree();
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+ return DT->getRoots();
+ }
+
+ inline MachineDomTreeNode *getRootNode() const {
+ return DT->getRootNode();
+ }
+
+ inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline MachineBasicBlock *
+ findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+ return DT->findNearestCommonDominator(A, B);
+ }
+
+ virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+ DT->print(OS);
+ }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+static RegisterPass<MachinePostDominatorTree>
+machinePostDominatorTreePass("machinepostdomtree",
+ "MachinePostDominator Tree Construction",
+ true, true);
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ //DEBUG(DT->dump());
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass);
+
+private:
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass) {
+ passRep = &pass;
+ funcRep = &func;
+
+ bool changed = false;
+ //func.RenumberBlocks();
+
+ //to do, if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::prepare\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //domTree = CFGTraits::getDominatorTree(pass);
+ //if (DEBUGME) {
+ // domTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //postDomTree = CFGTraits::getPostDominatorTree(pass);
+ //if (DEBUGME) {
+ // postDomTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ //assert(curBlk->size() > 0);
+ //removeEmptyBlock(curBlk) ??
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass) {
+ passRep = &pass;
+ funcRep = &func;
+
+ //func.RenumberBlocks();
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::run\n";
+ //errs() << func.getFunction()->getNameStr() << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+#if 1
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+#endif
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ //func.dump();
+ }
+
+ if (!finish) {
+ MachineFunction *MF = &func;
+ AMDILMachineFunctionInfo *mMFI =
+ MF->getInfo<AMDILMachineFunctionInfo>();
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[IRREDUCIBLE_CF]);
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ } //end of for
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ //numMatch += switchPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1)
+ {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+#if 1
+
+ // TODO: Simplify, maybe separate function?
+ //funcRep->viewCFG();
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+#else
+ return -1;
+#endif
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (//postDomTree->dominates(downBlk, falseBlk) &&
+ singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+#if 0
+ if (DEBUGME) {
+ errs() << "improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+#endif
+
+ // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+ // May consider the # landBlk->pred_size() as it represents the number of
+ // assignment initReg = .. needed to insert.
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDIL::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDIL::IF_LOGICALZ_i32, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+ //CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+ //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ //removeUnconditionalBranch(dstBlk);
+ dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+
+ //curBlk->remove(branchInstrPos);
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
+ {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
+ : CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ } else {
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::BREAK, passRep);
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ //exitingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false)
+ {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ //contingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+ } else {
+ /* if we've arrived here then we've already erased the branch instruction
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+// BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDIL::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg)
+ .addImm(i - 1); //preVal
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDIL branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+#if 0
+ if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+ if (DEBUGME) {
+ errs() << "Replace empty block BB" << curBlk->getNumber()
+ << " with dummyExitBlock\n";
+ }
+ BlockT *predb = *curBlk->pred_begin();
+ predb->removeSuccessor(curBlk);
+ curBlk = predb;
+ } //handle empty curBlk
+#endif
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ //int i = srcBlk->succ_size();
+ //int j = srcBlk->pred_size();
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME)
+ {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDIL
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGStructurizer : public MachineFunctionPass
+{
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+//private:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+//public:
+// static char ID;
+
+public:
+ AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+ //bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGStructurizer
+
+//char AMDILCFGStructurizer::ID = 0;
+} //end of namespace llvm
+AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()) {
+}
+
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPrepare : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPrepare
+
+char AMDILCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+const char *AMDILCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPerform : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPerform
+
+char AMDILCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+
+const char *AMDILCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDILCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDIL backend
+template<>
+struct CFGStructTraits<AMDILCFGStructurizer>
+{
+ typedef int RegiT;
+
+ static int getBreakNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBreakZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode)
+ {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDIL::BRANCH:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isPhimove(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::MOVE);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!isPhimove(instr)) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+ false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc());
+
+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ //newBlk->setNumber(srcBlk->getNumber());
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
+ // does not clone the AsmPrinterFlags.
+ instr->setAsmPrinterFlag(
+ (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDIL instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getExplicitBranch(branchInstr) == oldBlk) {
+ setExplicitBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDIL::CONTINUE
+ && iter->getOpcode() == AMDIL::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDILCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR);
+}
+
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
+ *this);
+}
+
+// createAMDILCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR);
+}
+
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
+ *this);
+}
+
+//end of file newline goes below
+
diff --git a/src/gallium/drivers/radeon/AMDILCallingConv.td b/src/gallium/drivers/radeon/AMDILCallingConv.td
new file mode 100644
index 00000000000..c37ff0a7e7c
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCallingConv.td
@@ -0,0 +1,75 @@
+//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMDIL architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// AMDIL 32-bit C return-value convention.
+def RetCC_AMDIL32 : CallingConv<[
+ // Since IL has no return values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ CCIfType<[i1, i8, i16, i32, f32, f64, i64], CCAssignToReg<
+ [
+ R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element Short vector types get 16 byte alignment and size of 8 bytes
+ CCIfType<[v2i32, v2f32, v2i8, v4i8, v2i16, v4i16], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 4-element Short vector types get 16 byte alignment and size of 16 bytes
+ CCIfType<[v4i32, v4f32], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element 64-bit vector types get aligned to 16 bytes with a size of 16 bytes
+ CCIfType<[v2f64, v2i64], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >, CCAssignToStack<16, 16>
+]>;
+
+// AMDIL 32-bit C Calling convention.
+def CC_AMDIL32 : CallingConv<[
+ // Since IL has parameter values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ CCIfType<[i1, i8, i16, i32, f32, f64, i64], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element Short vector types get 16 byte alignment and size of 8 bytes
+ CCIfType<[v2i32, v2f32, v2i8, v4i8, v2i16, v4i16], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 4-element Short vector types get 16 byte alignment and size of 16 bytes
+ CCIfType<[v4i32, v4f32], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element 64-bit vector types get aligned to 16 bytes with a size of 16 bytes
+ CCIfType<[v2f64, v2i64], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >, CCAssignToStack<16, 16>
+]>;
+
diff --git a/src/gallium/drivers/radeon/AMDILCodeEmitter.h b/src/gallium/drivers/radeon/AMDILCodeEmitter.h
new file mode 100644
index 00000000000..b0ea1455cf9
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCodeEmitter.h
@@ -0,0 +1,46 @@
+// The LLVM Compiler Infrastructure
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+
+#ifndef AMDILCODEEMITTER_H
+#define AMDILCODEEMITTER_H
+
+namespace llvm {
+
+ /* XXX: Temp HACK to work around tablegen name generation */
+ class AMDILCodeEmitter {
+ public:
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const { return 0; }
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ };
+
+} // End namespace llvm
+
+#endif // AMDILCODEEMITTER_H
diff --git a/src/gallium/drivers/radeon/AMDILCompilerErrors.h b/src/gallium/drivers/radeon/AMDILCompilerErrors.h
new file mode 100644
index 00000000000..7d935f5e782
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCompilerErrors.h
@@ -0,0 +1,75 @@
+//===-- AMDILCompilerErrors.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_COMPILER_ERRORS_H_
+#define _AMDIL_COMPILER_ERRORS_H_
+// Compiler errors generated by the backend that will cause
+// the runtime to abort compilation. These are mainly for
+// device constraint violations or invalid code.
+namespace amd {
+
+#define INVALID_COMPUTE 0
+#define GENERIC_ERROR 1
+#define INTERNAL_ERROR 2
+#define MISSING_FUNCTION_CALL 3
+#define RESERVED_FUNCTION 4
+#define BYTE_STORE_ERROR 5
+#define UNKNOWN_TYPE_NAME 6
+#define NO_IMAGE_SUPPORT 7
+#define NO_ATOMIC_32 8
+#define NO_ATOMIC_64 9
+#define IRREDUCIBLE_CF 10
+#define INSUFFICIENT_RESOURCES 11
+#define INSUFFICIENT_LOCAL_RESOURCES 12
+#define INSUFFICIENT_PRIVATE_RESOURCES 13
+#define INSUFFICIENT_IMAGE_RESOURCES 14
+#define DOUBLE_NOT_SUPPORTED 15
+#define INVALID_CONSTANT_WRITE 16
+#define INSUFFICIENT_CONSTANT_RESOURCES 17
+#define INSUFFICIENT_COUNTER_RESOURCES 18
+#define INSUFFICIENT_REGION_RESOURCES 19
+#define REGION_MEMORY_ERROR 20
+#define MEMOP_NO_ALLOCATION 21
+#define RECURSIVE_FUNCTION 22
+#define INCORRECT_COUNTER_USAGE 23
+#define INVALID_INTRINSIC_USAGE 24
+#define NUM_ERROR_MESSAGES 25
+
+
+ static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] =
+ {
+ "E000:Compute Shader Not Supported! ",
+ "E001:Generic Compiler Error Message! ",
+ "E002:Internal Compiler Error Message!",
+ "E003:Missing Function Call Detected! ",
+ "E004:Reserved Function Call Detected!",
+ "E005:Byte Addressable Stores Invalid!",
+ "E006:Kernel Arg Type Name Is Invalid!",
+ "E007:Image 1.0 Extension Unsupported!",
+ "E008:32bit Atomic Op are Unsupported!",
+ "E009:64bit Atomic Op are Unsupported!",
+ "E010:Irreducible ControlFlow Detected",
+ "E011:Insufficient Resources Detected!",
+ "E012:Insufficient Local Resources! ",
+ "E013:Insufficient Private Resources! ",
+ "E014:Images not currently supported! ",
+ "E015:Double precision not supported! ",
+ "E016:Invalid Constant Memory Write! ",
+ "E017:Max number Constant Ptr reached!",
+ "E018:Max number of Counters reached! ",
+ "E019:Insufficient Region Resources! ",
+ "E020:Region address space invalid! ",
+ "E021:MemOp with no memory allocated! ",
+ "E022:Recursive Function detected! ",
+ "E023:Illegal Inc+Dec to same counter!",
+ "E024:Illegal usage of intrinsic inst!"
+ };
+
+}
+
+#endif // _AMDIL_COMPILER_ERRORS_H_
diff --git a/src/gallium/drivers/radeon/AMDILCompilerWarnings.h b/src/gallium/drivers/radeon/AMDILCompilerWarnings.h
new file mode 100644
index 00000000000..c257980a1e4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCompilerWarnings.h
@@ -0,0 +1,31 @@
+//===-- AMDILCompilerWarnings.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_COMPILER_WARNINGS_H_
+#define _AMDIL_COMPILER_WARNINGS_H_
+/// Compiler backend generated warnings that might cause
+/// issues with compilation. These warnings become errors if
+/// -Werror is specified on the command line.
+namespace amd {
+
+#define LIMIT_BARRIER 0
+#define BAD_BARRIER_OPT 1
+#define RECOVERABLE_ERROR 2
+#define NUM_WARN_MESSAGES 3
+ /// All warnings must be prefixed with the W token or they might be
+ /// treated as errors.
+ static const char *CompilerWarningMessage[NUM_WARN_MESSAGES] =
+ {
+ "W000:Barrier caused limited groupsize",
+ "W001:Dangerous Barrier Opt Detected! ",
+ "W002:Recoverable BE Error Detected! "
+
+ };
+}
+
+#endif // _AMDIL_COMPILER_WARNINGS_H_
diff --git a/src/gallium/drivers/radeon/AMDILConversions.td b/src/gallium/drivers/radeon/AMDILConversions.td
new file mode 100644
index 00000000000..0db66ae8475
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILConversions.td
@@ -0,0 +1,1022 @@
+//===-- AMDILConversions.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+def actos_i16:Pat < (i16 (anyext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def uctos_i16:Pat < (i16 (zext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def sctos_i16:Pat < (i16 (sext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def actoi_i32:Pat < (i32 (anyext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def uctoi_i32:Pat < (i32 (zext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def sctoi_i32:Pat < (i32 (sext GPRI8:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def actol_i64:Pat < (i64 (anyext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def uctol_i64:Pat < (i64 (zext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def sctol_i64:Pat < (i64 (sext GPRI8:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 31))) >;
+
+
+def astoi_i32:Pat < (i32 (anyext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def ustoi_i32:Pat < (i32 (zext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def sstoi_i32:Pat < (i32 (sext GPRI16:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def astol_i64:Pat < (i64 (anyext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def ustol_i64:Pat < (i64 (zext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def sstol_i64:Pat < (i64 (sext GPRI16:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 31))) >;
+
+
+def aitol_i64:Pat < (i64 (anyext GPRI32:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)) >;
+
+
+def uitol_i64:Pat < (i64 (zext GPRI32:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)) >;
+
+
+def sitol_i64:Pat < (i64 (sext GPRI32:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 31))) >;
+
+
+
+def sctof_f32:Pat < (f32 (sint_to_fp GPRI8:$src)),
+(f32
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)))) >;
+
+
+def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
+(f32
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)))) >;
+
+
+def ftosc_i8:Pat < (i8 (fp_to_sint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOI GPRF32:$src),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def ftouc_i8:Pat < (i8 (fp_to_uint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOU GPRF32:$src),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)),
+(f64 (FTOD
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))))) >;
+
+
+def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
+(f64 (FTOD
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))))) >;
+
+
+def dtosc_i8:Pat < (i8 (fp_to_sint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def dtouc_i8:Pat < (i8 (fp_to_uint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)),
+(f32
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)))) >;
+
+
+def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
+(f32
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)))) >;
+
+
+def ftoss_i16:Pat < (i16 (fp_to_sint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOI GPRF32:$src),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def ftous_i16:Pat < (i16 (fp_to_uint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOU GPRF32:$src),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)),
+(f64 (FTOD
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))))) >;
+
+
+def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)),
+(f64 (FTOD
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))))) >;
+
+
+def dtoss_i16:Pat < (i16 (fp_to_sint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def dtous_i16:Pat < (i16 (fp_to_uint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+
+
+
+def stoc_i8:Pat < (i8 (trunc GPRI16:$src)),
+(IL_ASCHAR_i32
+ (IL_ASINT_i16
+(BINARY_AND_i16 GPRI16:$src,
+ (LOADCONST_i16 0x000000FF)))
+ ) >;
+
+
+def itoc_i8:Pat < (i8 (trunc GPRI32:$src)),
+(IL_ASCHAR_i32
+ (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+ (LOADCONST_i32 0x000000FF)))
+ ) >;
+
+
+def itos_i16:Pat < (i16 (trunc GPRI32:$src)),
+(IL_ASSHORT_i32
+ (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+ (LOADCONST_i32 0x0000FFFF)))
+ ) >;
+
+
+def ltoc_i8:Pat < (i8 (trunc GPRI64:$src)),
+(IL_ASCHAR_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0x000000FF))
+ ) >;
+
+
+def ltos_i16:Pat < (i16 (trunc GPRI64:$src)),
+(IL_ASSHORT_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0x0000FFFF))
+ ) >;
+
+
+def ltoi_i32:Pat < (i32 (trunc GPRI64:$src)),
+(IL_ASINT_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0xFFFFFFFF))
+ ) >;
+
+
+def actos_v2i16:Pat < (v2i16 (anyext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v2i16:Pat < (v2i16 (zext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v2i16:Pat < (v2i16 (sext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v2i32:Pat < (v2i32 (anyext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v2i32:Pat < (v2i32 (zext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v2i32:Pat < (v2i32 (sext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actol_v2i64:Pat < (v2i64 (anyext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uctol_v2i64:Pat < (v2i64 (zext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sctol_v2i64:Pat < (v2i64 (sext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def astoi_v2i32:Pat < (v2i32 (anyext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v2i32:Pat < (v2i32 (zext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v2i32:Pat < (v2i32 (sext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def astol_v2i64:Pat < (v2i64 (anyext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def ustol_v2i64:Pat < (v2i64 (zext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sstol_v2i64:Pat < (v2i64 (sext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def aitol_v2i64:Pat < (v2i64 (anyext GPRV2I32:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uitol_v2i64:Pat < (v2i64 (zext GPRV2I32:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+
+def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
+(v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
+(v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+
+def dtosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def dtouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
+(v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
+(v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+
+def dtoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def dtous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+def stoc_v2i8:Pat < (v2i8 (trunc GPRV2I16:$src)),
+(IL_ASV2CHAR_v2i32
+ (IL_ASV2INT_v2i16
+(BINARY_AND_v2i16 GPRV2I16:$src,
+ (VCREATE_v2i16 (LOADCONST_i16 0x000000FF))))
+ ) >;
+
+
+def itoc_v2i8:Pat < (v2i8 (trunc GPRV2I32:$src)),
+(IL_ASV2CHAR_v2i32
+ (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))
+ ) >;
+
+
+def itos_v2i16:Pat < (v2i16 (trunc GPRV2I32:$src)),
+(IL_ASV2SHORT_v2i32
+ (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))
+ ) >;
+
+
+def ltoc_v2i8:Pat < (v2i8 (trunc GPRV2I64:$src)),
+(IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF)))
+ ) >;
+
+
+def ltos_v2i16:Pat < (v2i16 (trunc GPRV2I64:$src)),
+(IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF)))
+ ) >;
+
+
+def ltoi_v2i32:Pat < (v2i32 (trunc GPRV2I64:$src)),
+(IL_ASV2INT_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0xFFFFFFFF)))
+ ) >;
+
+
+
+
+def actos_v4i16:Pat < (v4i16 (anyext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v4i16:Pat < (v4i16 (zext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v4i16:Pat < (v4i16 (sext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v4i32:Pat < (v4i32 (anyext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v4i32:Pat < (v4i32 (zext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v4i32:Pat < (v4i32 (sext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def astoi_v4i32:Pat < (v4i32 (anyext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v4i32:Pat < (v4i32 (zext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+
+def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
+(v4f32
+ (ITOF_v4f32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
+(v4f32
+ (UTOF_v4f32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v4i8:Pat < (v4i8 (fp_to_sint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+ (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v4i8:Pat < (v4i8 (fp_to_uint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+ (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
+(v4f32
+ (ITOF_v4f32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
+(v4f32
+ (UTOF_v4f32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v4i16:Pat < (v4i16 (fp_to_sint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+ (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v4i16:Pat < (v4i16 (fp_to_uint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+ (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+
+
+
+def stoc_v4i8:Pat < (v4i8 (trunc GPRV4I16:$src)),
+(IL_ASV4CHAR_v4i32
+ (IL_ASV4INT_v4i16
+(BINARY_AND_v4i16 GPRV4I16:$src,
+ (VCREATE_v4i16 (LOADCONST_i16 0x000000FF))))
+ ) >;
+
+
+def itoc_v4i8:Pat < (v4i8 (trunc GPRV4I32:$src)),
+(IL_ASV4CHAR_v4i32
+ (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))
+ ) >;
+
+
+def itos_v4i16:Pat < (v4i16 (trunc GPRV4I32:$src)),
+(IL_ASV4SHORT_v4i32
+ (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))
+ ) >;
+
+
diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp
new file mode 100644
index 00000000000..aa6d8af7012
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDevice.cpp
@@ -0,0 +1,137 @@
+//===-- AMDILDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
+{
+ mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDILDevice::~AMDILDevice()
+{
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDILDevice::getMaxGDSSize() const
+{
+ return 0;
+}
+
+uint32_t
+AMDILDevice::getDeviceFlag() const
+{
+ return mDeviceFlag;
+}
+
+size_t AMDILDevice::getMaxNumCBs() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxCBSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxScratchSize() const
+{
+ return 65536;
+}
+
+uint32_t AMDILDevice::getStackAlignment() const
+{
+ return 16;
+}
+
+void AMDILDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::HalfOps);
+ mSWBits.set(AMDILDeviceInfo::ByteOps);
+ mSWBits.set(AMDILDeviceInfo::ShortOps);
+ mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
+ mSWBits.set(AMDILDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDILDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+}
+
+AMDILDeviceInfo::ExecutionMode
+AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
+{
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Software;
+ }
+
+ return AMDILDeviceInfo::Unsupported;
+
+}
+
+bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
+}
+
+bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
+}
+
+bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
+}
+
+std::string
+AMDILDevice::getDataLayout() const
+{
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h
new file mode 100644
index 00000000000..338212101b4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDevice.h
@@ -0,0 +1,132 @@
+//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+ class AMDILAsmPrinter;
+ class AMDILIOExpansion;
+ class AMDILPointerManager;
+ class AsmPrinter;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDILDevice {
+public:
+ AMDILDevice(AMDILSubtarget *ST);
+ virtual ~AMDILDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ // Returns the max LDS size that the hardware supports. Size is in
+ // bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ // Returns the max GDS size that the hardware supports if the GDS is
+ // supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ // Returns the max number of hardware constant address spaces that
+ // are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ // Returns the max number of bytes a single hardware constant buffer
+ // can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ // Returns the max number of bytes allowed by the hardware scratch
+ // buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ // Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ // Returns the number of work-items that exist in a single hardware
+ // wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ // Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ // Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ // Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ // Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+ // Interface to get the IO Expansion pass for each device.
+ virtual FunctionPass*
+ getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const = 0;
+
+ // Interface to get the Asm printer for each device.
+ virtual AsmPrinter*
+ getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const = 0;
+
+ // Interface to get the Pointer manager pass for each device.
+ virtual FunctionPass*
+ getPointerManager(TargetMachine& AMDIL_OPT_LEVEL_DECL) const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+ bool isSupported(AMDILDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ llvm::BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDILSubtarget *mSTM;
+ uint32_t mDeviceFlag;
+private:
+ AMDILDeviceInfo::ExecutionMode
+ getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
+}; // AMDILDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
new file mode 100644
index 00000000000..89b8312c294
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
@@ -0,0 +1,87 @@
+//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+ AMDILDevice*
+getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+{
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDIL710Device(ptr);
+ case '7':
+ return new AMDIL770Device(ptr);
+ default:
+ return new AMDIL7XXDevice(ptr);
+ };
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILRedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCedarDevice(ptr);
+ } else if (deviceName == "barts"
+ || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDILSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDIL7XXDevice(ptr);
+ }
+}
+}
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
new file mode 100644
index 00000000000..c4acf9145ae
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
@@ -0,0 +1,89 @@
+//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+
+
+#include <string>
+
+namespace llvm
+{
+ class AMDILDevice;
+ class AMDILSubtarget;
+ namespace AMDILDeviceInfo
+ {
+ // Each Capabilities can be executed using a hardware instruction,
+ // emulated with a sequence of software instructions, or not
+ // supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
+ // feature is emulated in software
+ Hardware // This execution mode is set if the feature exists
+ // natively in hardware
+ };
+
+ // Any changes to this needs to have a corresponding update to the
+ // twiki page GPUMetadataABI
+ enum Caps {
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, // ReservedFlag
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
+ // Debug mode implies that no hardware features or optimizations
+ // are performned and that all memory access go through a single
+ // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Reserved1 = 0x15, // Reserved flag
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ // If more capabilities are required, then
+ // this number needs to be increased.
+ // All capabilities must come before this
+ // number.
+ MaxNumberCapabilities = 0x20
+ };
+ // These have to be in order with the older generations
+ // having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HD7XXX,
+ HDTEST, // Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ } // namespace AMDILDeviceInfo
+ llvm::AMDILDevice*
+ getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h
new file mode 100644
index 00000000000..3fc5fa05669
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // _AMDIL_DEVICES_H_
diff --git a/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp b/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp
new file mode 100644
index 00000000000..185fc70a00b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp
@@ -0,0 +1,1093 @@
+//===-- AMDILEGIOExpansion.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// @file AMDILEGIOExpansion.cpp
+// @details Implementation of IO expansion class for evergreen and NI devices.
+//
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Value.h"
+
+using namespace llvm;
+AMDILEGIOExpansion::AMDILEGIOExpansion(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) : AMDILImageExpansion(tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+
+AMDILEGIOExpansion::~AMDILEGIOExpansion() {
+}
+const char *AMDILEGIOExpansion::getPassName() const
+{
+ return "AMDIL EG/NI IO Expansion Pass";
+}
+ bool
+AMDILEGIOExpansion::isImageIO(MachineInstr *MI)
+{
+ if (!MI->getOperand(0).isGlobal()) {
+ return false;
+ }
+ const llvm::StringRef& nameRef = MI->getOperand(0).getGlobal()->getName();
+ const char *name = nameRef.data();
+ if (nameRef.size() > 8 && !strncmp(name, "__amdil_", 8)) {
+ name += 8;
+ if (!strncmp(name, "sample_data", 11)
+ || !strncmp(name, "write_image", 11)
+ || !strncmp(name, "get_image2d_params", 18)
+ || !strncmp(name, "get_image3d_params", 18)) {
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDILEGIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+ if (!MI) {
+ return false;
+ }
+ switch (MI->getOpcode()) {
+ default:
+ return AMDILIOExpansion::isIOInstruction(MI);
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE2D_WRITE:
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ case AMDIL::IMAGE3D_WRITE:
+ case AMDIL::IMAGE3D_INFO0:
+ case AMDIL::IMAGE3D_INFO1:
+ return true;
+ };
+ return false;
+}
+void
+AMDILEGIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+ assert(isIOInstruction(MI) && "Must be an IO instruction to "
+ "be passed to this function!");
+ switch (MI->getOpcode()) {
+ default:
+ AMDILIOExpansion::expandIOInstruction(MI);
+ break;
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ expandImageLoad(mBB, MI);
+ break;
+ case AMDIL::IMAGE2D_WRITE:
+ case AMDIL::IMAGE3D_WRITE:
+ expandImageStore(mBB, MI);
+ break;
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE3D_INFO0:
+ case AMDIL::IMAGE3D_INFO1:
+ expandImageParam(mBB, MI);
+ break;
+ };
+}
+ bool
+AMDILEGIOExpansion::isCacheableOp(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ // We only support caching on UAV11 - JeffG
+ if (curRes.bits.ResourceID == 11) {
+ return curRes.bits.CacheableRead;
+ } else {
+ return false;
+ }
+}
+ bool
+AMDILEGIOExpansion::isArenaOp(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ return curRes.bits.ResourceID
+ == mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+ || curRes.bits.ResourceID >= ARENA_SEGMENT_RESERVED_UAVS;
+}
+ void
+AMDILEGIOExpansion::expandPackedData(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ if (!isPackedData(MI)) {
+ return;
+ }
+ // There is a bug in the CAL compiler that incorrectly
+ // errors when the UBIT_INSERT instruction is
+ if (mSTM->calVersion() < CAL_VERSION_SC_137) {
+ AMDIL789IOExpansion::expandPackedData(MI);
+ return;
+ }
+ DebugLoc DL;
+ // If we have packed data, then the shift size is no longer
+ // the same as the load size and we need to adjust accordingly
+ switch(getPackedID(MI)) {
+ default:
+ break;
+ case PACK_V2I8:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8)).addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I8:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32),
+ AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+ .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V2I16:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I16:
+ {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+ .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case UNPACK_V2I8:
+ case UNPACK_V4I8:
+ case UNPACK_V2I16:
+ case UNPACK_V4I16:
+ AMDIL789IOExpansion::expandPackedData(MI);
+ break;
+ };
+}
+
+ void
+AMDILEGIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool usesArena = isArenaOp(MI);
+ bool cacheable = isCacheableOp(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+ && !mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ expandArenaSetup(MI);
+ DebugLoc DL;
+ if (getMemorySize(MI) == 1) {
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i8), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(0))
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008);
+ if (cacheable) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else if (getMemorySize(MI) == 2) {
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ if (cacheable) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else if (getMemorySize(MI) == 4) {
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ }
+ } else if (getMemorySize(MI) == 8) {
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_Y_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v2i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ }
+ } else {
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_Y_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_Z_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_W_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(3);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(4);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1006)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1008)
+ .addReg(AMDIL::R1006)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v4i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ }
+ }
+ // These instructions are generated after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, I, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDILEGIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ DebugLoc DL;
+ unsigned mulOp = 0;
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_Z), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_W), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ break;
+ case 2:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ break;
+ case 4:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ };
+
+ // These instructions are generated after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, I, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+ void
+AMDILEGIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::LDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ unsigned mulOp = 0;
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOADVEC_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOADVEC_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_i8), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_u8), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ }
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::LDSLOAD_u16), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ }
+ }
+ break;
+ }
+
+ // These instructions are generated after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, I, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+ void
+AMDILEGIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool usesArena = isArenaOp(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+ && !mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ DebugLoc DL;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ expandArenaSetup(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_Y_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_Z_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_W_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(3);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(3);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(4);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(4);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ }
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 1:
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i8), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 2:
+ if (usesArena) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i16), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 4:
+ if (usesArena) {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 8:
+ if (usesArena) {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_Y_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ }
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ };
+}
+ void
+AMDILEGIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalStore(MI);
+ }
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ unsigned mulOp = HWRegion ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ expandArenaSetup(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE_Z), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE_W), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1006)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_R_MSKOR), AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 2:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_R_MSKOR), AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 4:
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ };
+
+}
+
+ void
+AMDILEGIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalStore(MI);
+ }
+ DebugLoc DL;
+ if (!mMFI->usesMem(AMDILDevice::LDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ unsigned mulOp = HWLocal ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTOREVEC_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTOREVEC_v2i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, I, DL, mTII->get(mulOp), AMDIL::R1006)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_MSKOR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addImm(lID);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_OR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE_i8), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_MSKOR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addImm(lID);
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_OR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ } else {
+ BuildMI(*mBB, I, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE_i16), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ break;
+ }
+}
+
+
+ void
+AMDILEGIOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+ AMDIL789IOExpansion::expandStoreSetupCode(MI);
+}
+ void
+AMDILEGIOExpansion::expandArenaSetup(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ if (!isArenaOp(MI)) {
+ return;
+ }
+ const MCInstrDesc &TID = (MI->getDesc());
+ const MCOperandInfo &TOI = TID.OpInfo[0];
+ unsigned short RegClass = TOI.RegClass;
+ DebugLoc DL;
+ switch (RegClass) {
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV2F32RegClassID:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(4ULL << 32));
+ break;
+ default:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+ break;
+ case AMDIL::GPRI8RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRI16RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ case AMDIL::GPRI32RegClassID:
+ case AMDIL::GPRF32RegClassID:
+ break;
+ };
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp b/src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp
new file mode 100644
index 00000000000..84ae9a33413
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp
@@ -0,0 +1,71 @@
+//===-- AMDILELFWriterInfo.cpp - Elf Writer Info for AMDIL ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the AMDIL backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILELFWriterInfo.h"
+#include "AMDIL.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the AMDILELFWriterInfo class
+//===----------------------------------------------------------------------===//
+AMDILELFWriterInfo::AMDILELFWriterInfo(bool is64bit, bool endian)
+ : TargetELFWriterInfo(is64bit, endian)
+{
+}
+
+AMDILELFWriterInfo::~AMDILELFWriterInfo() {
+}
+
+unsigned AMDILELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+bool AMDILELFWriterInfo::hasRelocationAddend() const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return false;
+}
+
+long int AMDILELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+unsigned AMDILELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+bool AMDILELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return false;
+}
+
+unsigned AMDILELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+long int AMDILELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
diff --git a/src/gallium/drivers/radeon/AMDILELFWriterInfo.h b/src/gallium/drivers/radeon/AMDILELFWriterInfo.h
new file mode 100644
index 00000000000..0bcffd27f59
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILELFWriterInfo.h
@@ -0,0 +1,54 @@
+//===-- AMDILELFWriterInfo.h - Elf Writer Info for AMDIL ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the AMDIL backend.
+//
+//===---------------------------------------------------------------------===//
+#ifndef _AMDIL_ELF_WRITER_INFO_H_
+#define _AMDIL_ELF_WRITER_INFO_H_
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+ class AMDILELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ AMDILELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+ virtual ~AMDILELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// 'hasRelocationAddend - True if the target uses and addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const;
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatble field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+} // namespace llvm
+#endif // _AMDIL_ELF_WRITER_INFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
new file mode 100644
index 00000000000..445fd608bbb
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
@@ -0,0 +1,522 @@
+//===-- AMDILEnumeratedTypes.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// ILEnumreatedTypes.td - The IL Enumerated Types
+//===--------------------------------------------------------------------===//
+
+// Section 5.1 IL Shader
+class ILShader<bits<8> val> {
+ bits<8> Value = val;
+}
+// Table 5-1
+def IL_SHADER_PIXEL : ILShader<0>;
+def IL_SHADER_COMPUTE : ILShader<1>;
+
+// Section 5.2 IL RegType
+class ILRegType<bits<6> val> {
+ bits<6> Value = val;
+}
+// Table 5-2
+def IL_REGTYPE_TEMP : ILRegType<0>;
+def IL_REGTYPE_WINCOORD : ILRegType<1>;
+def IL_REGTYPE_CONST_BUF : ILRegType<2>;
+def IL_REGTYPE_LITERAL : ILRegType<3>;
+def IL_REGTYPE_ITEMP : ILRegType<4>;
+def IL_REGTYPE_GLOBAL : ILRegType<5>;
+
+// Section 5.3 IL Component Select
+class ILComponentSelect<bits<3> val, string text> {
+ bits<3> Value = val;
+ string Text = text;
+}
+// Table 5-3
+def IL_COMPSEL_X : ILComponentSelect<0, "x">;
+def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
+def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
+def IL_COMPSEL_W : ILComponentSelect<3, "w">;
+def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
+def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
+
+// Section 5.4 IL Mod Dst Comp
+class ILModDstComp<bits<2> val, string text> {
+ bits<2> Value = val;
+ string Text = text;
+}
+// Table 5-4
+def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
+def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
+def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
+def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
+def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
+def IL_MODCOMP_0 : ILModDstComp<2, "0">;
+def IL_MODCOMP_1 : ILModDstComp<3, "1">;
+
+// Section 5.5 IL Import Usage
+class ILImportUsage<bits<1> val, string usage> {
+ bits<1> Value = val;
+ string Text = usage;
+}
+// Table 5-5
+def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
+
+// Section 5.6 Il Shift Scale
+class ILShiftScale<bits<4> val, string scale> {
+ bits<4> Value = val;
+ string Text = scale;
+}
+
+// Table 5-6
+def IL_SHIFT_NONE : ILShiftScale<0, "">;
+def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
+def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
+def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
+def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
+def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
+def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
+
+// Section 5.7 IL Divide Component
+class ILDivComp<bits<3> val, string divcomp> {
+ bits<3> Value = val;
+ string Text = divcomp;
+}
+
+// Table 5-7
+def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
+def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
+def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
+def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
+//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
+
+// Section 5.8 IL Relational Op
+class ILRelOp<bits<3> val, string op> {
+ bits<3> Value = val;
+ string Text = op;
+}
+
+// Table 5-8
+def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
+def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
+def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
+def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
+def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
+def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
+
+// Section 5.9 IL Zero Op
+class ILZeroOp<bits<3> val, string behavior> {
+ bits<3> Value = val;
+ string Text = behavior;
+}
+
+// Table 5-9
+def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
+def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
+def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
+def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
+
+// Section 5.10 IL Cmp Value
+class ILCmpValue<bits<3> val, string num> {
+ bits<3> Value = val;
+ string Text = num;
+}
+
+// Table 5-10
+def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
+def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
+def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
+def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
+def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
+
+// Section 5.11 IL Addressing
+class ILAddressing<bits<3> val> {
+ bits<3> Value = val;
+}
+
+// Table 5-11
+def IL_ADDR_ABSOLUTE : ILAddressing<0>;
+def IL_ADDR_RELATIVE : ILAddressing<1>;
+def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
+
+// Section 5.11 IL Element Format
+class ILElementFormat<bits<5> val> {
+ bits<5> Value = val;
+}
+
+// Table 5-11
+def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
+def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
+def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
+def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
+def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
+def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
+def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
+def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
+def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
+
+// Section 5.12 IL Op Code
+class ILOpCode<bits<16> val = -1, string cmd> {
+ bits<16> Value = val;
+ string Text = cmd;
+}
+
+// Table 5-12
+def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
+def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
+def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
+def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
+def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
+def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
+def IL_OP_ABS : ILOpCode<6, "abs">;
+def IL_OP_ADD : ILOpCode<7, "add">;
+def IL_OP_AND : ILOpCode<8, "iand">;
+def IL_OP_BREAK : ILOpCode<9, "break">;
+def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
+def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
+def IL_OP_BREAKC : ILOpCode<12, "breakc">;
+def IL_OP_CALL : ILOpCode<13, "call">;
+def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
+def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
+def IL_OP_CASE : ILOpCode<16, "case">;
+def IL_OP_CLG : ILOpCode<17, "clg">;
+def IL_OP_CMOV : ILOpCode<18, "cmov">;
+def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
+def IL_OP_CMP : ILOpCode<20, "cmp">;
+def IL_OP_CONTINUE : ILOpCode<21, "continue">;
+def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
+def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
+def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
+def IL_OP_COS : ILOpCode<25, "cos">;
+def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
+def IL_OP_D_2_F : ILOpCode<27, "d2f">;
+def IL_OP_D_ADD : ILOpCode<28, "dadd">;
+def IL_OP_D_EQ : ILOpCode<29, "deq">;
+def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
+def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
+def IL_OP_D_GE : ILOpCode<32, "dge">;
+def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
+def IL_OP_D_LT : ILOpCode<34, "dlt">;
+def IL_OP_D_MAD : ILOpCode<35, "dmad">;
+def IL_OP_D_MUL : ILOpCode<36, "dmul">;
+def IL_OP_D_NE : ILOpCode<37, "dne">;
+def IL_OP_DEFAULT : ILOpCode<38, "default">;
+def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
+def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
+def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
+def IL_OP_DP2 : ILOpCode<42, "dp2">;
+def IL_OP_DP3 : ILOpCode<43, "dp3">;
+def IL_OP_DP4 : ILOpCode<44, "dp4">;
+def IL_OP_ELSE : ILOpCode<45, "else">;
+def IL_OP_END : ILOpCode<46, "end">;
+def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
+def IL_OP_ENDIF : ILOpCode<48, "endif">;
+def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
+def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
+def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
+def IL_OP_EQ : ILOpCode<52, "eq">;
+def IL_OP_EXP : ILOpCode<53, "exp">;
+def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
+def IL_OP_F_2_D : ILOpCode<55, "f2d">;
+def IL_OP_FLR : ILOpCode<56, "flr">;
+def IL_OP_FRC : ILOpCode<57, "frc">;
+def IL_OP_FTOI : ILOpCode<58, "ftoi">;
+def IL_OP_FTOU : ILOpCode<59, "ftou">;
+def IL_OP_FUNC : ILOpCode<60, "func">;
+def IL_OP_GE : ILOpCode<61, "ge">;
+def IL_OP_I_ADD : ILOpCode<62, "iadd">;
+def IL_OP_I_EQ : ILOpCode<63, "ieq">;
+def IL_OP_I_GE : ILOpCode<64, "ige">;
+def IL_OP_I_LT : ILOpCode<65, "ilt">;
+def IL_OP_I_MAD : ILOpCode<66, "imad">;
+def IL_OP_I_MAX : ILOpCode<67, "imax">;
+def IL_OP_I_MIN : ILOpCode<68, "imin">;
+def IL_OP_I_MUL : ILOpCode<69, "imul">;
+def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
+def IL_OP_I_NE : ILOpCode<71, "ine">;
+def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
+def IL_OP_I_NOT : ILOpCode<73, "inot">;
+def IL_OP_I_OR : ILOpCode<74, "ior">;
+def IL_OP_I_SHL : ILOpCode<75, "ishl">;
+def IL_OP_I_SHR : ILOpCode<76, "ishr">;
+def IL_OP_I_XOR : ILOpCode<77, "ixor">;
+def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
+def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
+def IL_OP_IFC : ILOpCode<80, "ifc">;
+def IL_OP_ITOF : ILOpCode<81, "itof">;
+def IL_OP_LN : ILOpCode<82, "ln">;
+def IL_OP_LOG : ILOpCode<83, "log">;
+def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
+def IL_OP_LOOP : ILOpCode<85, "loop">;
+def IL_OP_LT : ILOpCode<86, "lt">;
+def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
+def IL_OP_MAX : ILOpCode<88, "max_ieee">;
+def IL_OP_MIN : ILOpCode<89, "min_ieee">;
+def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
+def IL_OP_MOV : ILOpCode<91, "mov">;
+def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
+def IL_OP_NE : ILOpCode<93, "ne">;
+def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
+def IL_OP_POW : ILOpCode<95, "pow">;
+def IL_OP_RCP : ILOpCode<96, "rcp">;
+def IL_OP_RET : ILOpCode<97, "ret">;
+def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
+def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
+def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
+def IL_OP_RND : ILOpCode<101, "rnd">;
+def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
+def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
+def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
+def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
+def IL_OP_RSQ : ILOpCode<106, "rsq">;
+def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
+def IL_OP_SAMPLE : ILOpCode<108, "sample">;
+def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
+def IL_OP_SET : ILOpCode<110, "set">;
+def IL_OP_SGN : ILOpCode<111, "sgn">;
+def IL_OP_SIN : ILOpCode<112, "sin">;
+def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
+def IL_OP_SUB : ILOpCode<114, "sub">;
+def IL_OP_SWITCH : ILOpCode<115, "switch">;
+def IL_OP_TRC : ILOpCode<116, "trc">;
+def IL_OP_U_DIV : ILOpCode<117, "udiv">;
+def IL_OP_U_GE : ILOpCode<118, "uge">;
+def IL_OP_U_LT : ILOpCode<119, "ult">;
+def IL_OP_U_MAD : ILOpCode<120, "umad">;
+def IL_OP_U_MAX : ILOpCode<121, "umax">;
+def IL_OP_U_MIN : ILOpCode<122, "umin">;
+def IL_OP_U_MOD : ILOpCode<123, "umod">;
+def IL_OP_U_MUL : ILOpCode<124, "umul">;
+def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
+def IL_OP_U_SHR : ILOpCode<126, "ushr">;
+def IL_OP_UTOF : ILOpCode<127, "utof">;
+def IL_OP_WHILE : ILOpCode<128, "whileloop">;
+// SC IL instructions that are not in CAL IL
+def IL_OP_ACOS : ILOpCode<129, "acos">;
+def IL_OP_ASIN : ILOpCode<130, "asin">;
+def IL_OP_EXN : ILOpCode<131, "exn">;
+def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
+def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
+def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
+def IL_OP_SQRT : ILOpCode<135, "sqrt">;
+def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
+def IL_OP_ATAN : ILOpCode<137, "atan">;
+def IL_OP_TAN : ILOpCode<137, "tan">;
+def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
+def IL_OP_F_NEG : ILOpCode<139, "mov">;
+def IL_OP_GT : ILOpCode<140, "gt">;
+def IL_OP_LE : ILOpCode<141, "lt">;
+def IL_OP_DIST : ILOpCode<142, "dist">;
+def IL_OP_LEN : ILOpCode<143, "len">;
+def IL_OP_MACRO : ILOpCode<144, "mcall">;
+def IL_OP_INTR : ILOpCode<145, "call">;
+def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
+def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
+def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
+def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
+def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
+def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
+def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
+def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
+def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
+def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
+def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
+def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
+def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
+def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
+def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
+def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
+def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
+def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
+def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
+def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
+def IL_OP_SAD : ILOpCode<166, "sad">;
+def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
+def IL_OP_SAD4 : ILOpCode<168, "sad4">;
+def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
+def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
+def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
+def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
+def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
+def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
+def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
+def IL_OP_CLAMP : ILOpCode<176, "clamp">;
+def IL_OP_LERP : ILOpCode<177, "lrp">;
+def IL_OP_FMA : ILOpCode<178, "fma">;
+def IL_OP_D_MIN : ILOpCode<179, "dmin">;
+def IL_OP_D_MAX : ILOpCode<180, "dmax">;
+def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
+def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
+def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
+def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
+def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
+def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
+def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
+def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
+def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
+def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
+def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
+def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
+def IL_OP_D_MOV : ILOpCode<193, "dmov">;
+def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
+def IL_OP_NOP : ILOpCode<195, "nop">;
+def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
+def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
+def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
+def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
+def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
+def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
+def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
+def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
+def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
+def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
+def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
+def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
+def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
+def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
+def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
+def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
+def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
+def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
+def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
+def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
+def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
+def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
+def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
+def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
+def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
+def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
+def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
+def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
+def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
+def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
+def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
+def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
+def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
+def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
+def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
+def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
+def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
+def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
+def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
+def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
+def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
+def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
+def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
+def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
+def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
+def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
+def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
+def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
+def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
+def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
+def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
+def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
+def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
+def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
+def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
+def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
+def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
+def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
+def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
+def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
+def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
+def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
+def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
+def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
+def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
+def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
+def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
+def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
+def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
+def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
+def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
+def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
+def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
+def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
+def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
+def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
+def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
+def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
+def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
+def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
+def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
+def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
+def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
+def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
+def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
+def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
+def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
+def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
+def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
+def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
+def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
+def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
+def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
+def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
+def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
+def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
+def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
+def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
+def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
+def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
+def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
+def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
+def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
+def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
+def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
+def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
+def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
+def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
+def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
+def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
+def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
+def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
+def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
+def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
+def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
+def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
+def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
+def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
+def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
+def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
+def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
+def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
+def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
+def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
+def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
+def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
+def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
+def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
+def IL_OP_MUL : ILOpCode<323, "mul">;
+def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
+def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
+def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
+def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
+def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
+def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
+def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
+def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
+def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
+def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
+def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
+def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
+def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
+def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
+def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
+def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
+def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
+def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
+def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
+def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
+def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
+def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
+def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
+def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">;
+def IL_OP_BFI : ILOpCode<394, "bfi">;
+def IL_OP_BFM : ILOpCode<395, "bfm">;
+def IL_DBG_STRING : ILOpCode<396, "dbg_string">;
+def IL_DBG_LINE : ILOpCode<397, "dbg_line">;
+def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">;
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
new file mode 100644
index 00000000000..1af28063da6
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
@@ -0,0 +1,211 @@
+//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+#ifdef UPSTREAM_LLVM
+#include "AMDILEGAsmPrinter.h"
+#endif
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+
+using namespace llvm;
+
+AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
+: AMDILDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ mDeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ mDeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ mDeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ mDeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDILEvergreenDevice::~AMDILEvergreenDevice() {
+}
+
+size_t AMDILEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDILEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ } else {
+ return DEFAULT_RAW_UAV_ID;
+ }
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDILEvergreenDevice::getWavefrontSize() const {
+ return AMDILDevice::WavefrontSize;
+}
+
+uint32_t AMDILEvergreenDevice::getGeneration() const {
+ return AMDILDeviceInfo::HD5XXX;
+}
+
+void AMDILEvergreenDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDILDeviceInfo::ArenaUAV);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+ mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+ }
+ mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDILDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+ mSWBits.set(AMDILDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::LocalMem);
+ mHWBits.set(AMDILDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDILDeviceInfo::Images);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDILDeviceInfo::NoAlias);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ mHWBits.set(AMDILDeviceInfo::CachedMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDILDeviceInfo::MultiUAV);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_136) {
+ mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+ } else {
+ mSWBits.set(AMDILDeviceInfo::ArenaVectors);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_137) {
+ mHWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.reset(AMDILDeviceInfo::LongOps);
+ }
+ mHWBits.set(AMDILDeviceInfo::TmrReg);
+}
+FunctionPass*
+AMDILEvergreenDevice::getIOExpansion(
+ TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const
+{
+ return new AMDILEGIOExpansion(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AsmPrinter*
+AMDILEvergreenDevice::getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const
+{
+#ifdef UPSTREAM_LLVM
+ return new AMDILEGAsmPrinter(TM, Streamer);
+#else
+ return NULL;
+#endif
+}
+
+FunctionPass*
+AMDILEvergreenDevice::getPointerManager(
+ TargetMachine& TM AMDIL_OPT_LEVEL_DECL) const
+{
+ return new AMDILEGPointerManager(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCypressDevice::~AMDILCypressDevice() {
+}
+
+void AMDILCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+}
+
+
+AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCedarDevice::~AMDILCedarDevice() {
+}
+
+void AMDILCedarDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILCedarDevice::getWavefrontSize() const {
+ return AMDILDevice::QuarterWavefrontSize;
+}
+
+AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILRedwoodDevice::~AMDILRedwoodDevice()
+{
+}
+
+void AMDILRedwoodDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILRedwoodDevice::getWavefrontSize() const {
+ return AMDILDevice::HalfWavefrontSize;
+}
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h
new file mode 100644
index 00000000000..726b479c7ea
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h
@@ -0,0 +1,93 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDILEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDILDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDILEvergreenDevice : public AMDILDevice {
+public:
+ AMDILEvergreenDevice(AMDILSubtarget *ST);
+ virtual ~AMDILEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+ virtual FunctionPass*
+ getIOExpansion(TargetMachine& AMDIL_OPT_LEVEL_DECL) const;
+ virtual AsmPrinter*
+ getAsmPrinter(TargetMachine& TM, MCStreamer &Streamer) const;
+ virtual FunctionPass*
+ getPointerManager(TargetMachine& AMDIL_OPT_LEVEL_DECL) const;
+protected:
+ virtual void setCaps();
+}; // AMDILEvergreenDevice
+
+// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDILCypressDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCypressDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCypressDevice();
+private:
+ virtual void setCaps();
+}; // AMDILCypressDevice
+
+
+// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDILEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDILCedarDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCedarDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILCedarDevice
+
+// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDILRedwoodDevice : public AMDILEvergreenDevice {
+public:
+ AMDILRedwoodDevice(AMDILSubtarget *ST);
+ virtual ~AMDILRedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILRedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILFormats.td b/src/gallium/drivers/radeon/AMDILFormats.td
new file mode 100644
index 00000000000..99489e7e92c
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFormats.td
@@ -0,0 +1,450 @@
+//==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===--------------------------------------------------------------------===//
+include "AMDILTokenDesc.td"
+
+//===--------------------------------------------------------------------===//
+// The parent IL instruction class that inherits the Instruction class. This
+// class sets the corresponding namespace, the out and input dag lists the
+// pattern to match to and the string to print out for the assembly printer.
+//===--------------------------------------------------------------------===//
+class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDIL";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ ILOpCode operation = op;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// The base class for vector insert instructions. It is a single dest, quad
+// source instruction where the last two source operands must be 32bit
+// immediate values that are encoding the swizzle of the source register
+// The src2 and src3 instructions must also be inversion of each other such
+// that if src2 is 0x1000300(x0z0), src3 must be 0x20004(0y0w). The values
+// are encoded as 32bit integer with each 8 char representing a swizzle value.
+// The encoding is as follows for 32bit register types:
+// 0x00 -> '_'
+// 0x01 -> 'x'
+// 0x02 -> 'y'
+// 0x03 -> 'z'
+// 0x04 -> 'w'
+// 0x05 -> 'x'
+// 0x06 -> 'y'
+// 0x07 -> 'z'
+// 0x08 -> 'w'
+// 0x09 -> '0'
+// The encoding is as follows for 64bit register types:
+// 0x00 -> "__"
+// 0x01 -> "xy"
+// 0x02 -> "zw"
+// 0x03 -> "xy"
+// 0x04 -> "zw"
+// 0x05 -> "00"
+//===--------------------------------------------------------------------===//
+class InsertVectorClass<ILOpCode op, RegisterClass DReg, RegisterClass SReg,
+ SDNode OpNode, string asmstr> :
+ ILFormat<op, (outs DReg:$dst),
+ (ins DReg:$src0, SReg:$src1, i32imm:$src2, i32imm:$src3),
+ !strconcat(asmstr, " $dst, $src0, $src1"),
+ [(set DReg:$dst, (OpNode DReg:$src0, SReg:$src1,
+ timm:$src2, timm:$src3))]>;
+
+//===--------------------------------------------------------------------===//
+// Class that has one input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0" and
+// handles the unary math operators.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input and output register 0.
+//===--------------------------------------------------------------------===//
+class OneInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILDst dst_reg;
+ ILDstMod dst_mod;
+ ILRelAddr dst_rel;
+ ILSrc dst_reg_rel;
+ ILSrcMod dst_reg_rel_mod;
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// A simplified version of OneInOneOut class where the pattern is standard
+// and does not need special cases. This requires that the pattern has
+// a SDNode and takes a source and destination register that is of type
+// RegisterClass. This is the standard unary op class.
+//===--------------------------------------------------------------------===//
+class UnaryOp<ILOpCode op, SDNode OpNode,
+ RegisterClass dRegs, RegisterClass sRegs>
+ : OneInOneOut<op, (outs dRegs:$dst), (ins sRegs:$src),
+ !strconcat(op.Text, " $dst, $src"),
+ [(set dRegs:$dst, (OpNode sRegs:$src))]>;
+
+//===--------------------------------------------------------------------===//
+// This class is similiar to the UnaryOp class, however, there is no
+// result value to assign.
+//===--------------------------------------------------------------------===//
+class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have two input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
+// handles the binary math operators and comparison operations.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input register 1.
+//===--------------------------------------------------------------------===//
+class TwoInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : OneInOneOut<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+//===--------------------------------------------------------------------===//
+// A simplification of the TwoInOneOut pattern for Binary Operations.
+// This class is a helper class that assumes the simple pattern of
+// $dst = op $src0 $src1.
+// Other type of matching patterns need to use the TwoInOneOut class.
+//===--------------------------------------------------------------------===//
+class BinaryOp<ILOpCode op, SDNode OpNode, RegisterClass dReg,
+ RegisterClass sReg0, RegisterClass sReg1>
+ : TwoInOneOut<op, (outs dReg:$dst), (ins sReg0:$src0, sReg1:$src1),
+ !strconcat(op.Text, " $dst, $src0, $src1"),
+ [(set dReg:$dst, (OpNode sReg0:$src0, sReg1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector extract instructions. The vector extract
+// instructions take as an input value a source register and a 32bit integer
+// with the same encoding as specified in InsertVectorClass and produces
+// a result with only the swizzled component in the destination register.
+//===--------------------------------------------------------------------===//
+class ExtractVectorClass<RegisterClass DReg, RegisterClass SReg, SDNode OpNode>
+: TwoInOneOut<IL_OP_MOV, (outs DReg:$dst), (ins SReg:$src0, i32imm:$src1),
+ "mov $dst, $src0",
+ [(set DReg:$dst, (OpNode SReg:$src0, timm:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector concatenation. This class creates either a vec2
+// or a vec4 of 32bit data types or a vec2 of 64bit data types. This is done
+// by swizzling either the 'x' or 'xy' components of the source operands
+// into the destination register.
+//===--------------------------------------------------------------------===//
+class VectorConcatClass<RegisterClass Dst, RegisterClass Src, SDNode OpNode>
+ : TwoInOneOut<IL_OP_I_ADD, (outs Dst:$dst), (ins Src:$src0, Src:$src1),
+ "iadd $dst, $src0, $src1",
+ [(set Dst:$dst, (OpNode Src:$src0, Src:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Similiar to the UnaryOpNoRet class, but takes as arguments two input
+// operands. Used mainly for barrier instructions on PC platform.
+//===--------------------------------------------------------------------===//
+class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : TwoInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src2_reg;
+ ILSrcMod src2_mod;
+ ILRelAddr src2_rel;
+ ILSrc src2_reg_rel;
+ ILSrcMod src2_reg_rel_mod;
+ }
+
+//===--------------------------------------------------------------------===//
+// The g version of the Three Input pattern uses a standard pattern but
+// but allows specification of the register to further generalize the class
+// This class is mainly used in the generic multiclasses in AMDILMultiClass.td
+//===--------------------------------------------------------------------===//
+class TernaryOp<ILOpCode op, SDNode OpNode,
+ RegisterClass dReg,
+ RegisterClass sReg0,
+ RegisterClass sReg1,
+ RegisterClass sReg2>
+ : ThreeInOneOut<op, (outs dReg:$dst),
+ (ins sReg0:$src0, sReg1:$src1, sReg2:$src2),
+ !strconcat(op.Text, " $dst, $src0, $src1, $src2"),
+ [(set dReg:$dst,
+ (OpNode sReg0:$src0, sReg1:$src1, sReg2:$src2))]>;
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class FourInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ThreeInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src3_reg;
+ ILSrcMod src3_mod;
+ ILRelAddr src3_rel;
+ ILSrc src3_reg_rel;
+ ILSrcMod src3_reg_rel_mod;
+ }
+
+
+//===--------------------------------------------------------------------===//
+// The macro class that is an extension of OneInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class UnaryMacro<RegisterClass Dst, RegisterClass Src0, SDNode OpNode>
+: OneInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ (ins Src0:$src0),
+ "($dst),($src0)",
+ [(set Dst:$dst, (OpNode Src0:$src0))]>;
+
+//===--------------------------------------------------------------------===//
+// The macro class is an extension of TwoInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class BinaryMacro<RegisterClass Dst,
+ RegisterClass Src0,
+ RegisterClass Src1,
+ SDNode OpNode>
+ : TwoInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ (ins Src0: $src0, Src1:$src1),
+ "($dst),($src0, $src1)",
+ [(set Dst:$dst, (OpNode Src0:$src0, Src1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 32bit pointers
+//===--------------------------------------------------------------------===//
+class Append<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class AppendNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR:$id))]>;
+
+class UniAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class UniAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI32:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+class BinAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class BinAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI32:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, timm:$id))]>;
+
+class TriAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class TriAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class CmpXChgNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 64bit pointers
+//===--------------------------------------------------------------------===//
+class Append64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR64:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class AppendNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR64:$id))]>;
+
+class UniAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class UniAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI64:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+class BinAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class BinAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI64:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, timm:$id))]>;
+
+class TriAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class TriAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class CmpXChgNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], []>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], []>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp
new file mode 100644
index 00000000000..87eca87e301
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl)
+{
+}
+
+AMDILFrameLowering::~AMDILFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDILFrameLowering::hasFP(const MachineFunction &MF) const
+{
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.h b/src/gallium/drivers/radeon/AMDILFrameLowering.h
new file mode 100644
index 00000000000..b1d919ef524
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFrameLowering.h
@@ -0,0 +1,46 @@
+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+/// Information about the stack frame layout on the AMDIL targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm {
+ class AMDILFrameLowering : public TargetFrameLowering {
+ public:
+ AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+ TransAl = 1);
+ virtual ~AMDILFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const;
+ virtual const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+ }; // class AMDILFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_
diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.cpp b/src/gallium/drivers/radeon/AMDILGlobalManager.cpp
new file mode 100644
index 00000000000..eafd36eaa4e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILGlobalManager.cpp
@@ -0,0 +1,1353 @@
+//===-- AMDILGlobalManager.cpp - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILGlobalManager.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILSubtarget.h"
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILGlobalManager.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/FormattedStream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+AMDILGlobalManager::AMDILGlobalManager(bool debugMode) {
+ mOffset = 0;
+ mReservedBuffs = 0;
+ symTab = NULL;
+ mCurrentCPOffset = 0;
+ mDebugMode = debugMode;
+}
+
+AMDILGlobalManager::~AMDILGlobalManager() {
+}
+
+void AMDILGlobalManager::print(llvm::raw_ostream &O) {
+ if (!mDebugMode) {
+ return;
+ }
+ O << ";AMDIL Global Manager State Dump:\n";
+ O << ";\tSubtarget: " << mSTM << "\tSymbol Table: " << symTab
+ << "\n";
+ O << ";\tConstant Offset: " << mOffset << "\tCP Offset: "
+ << mCurrentCPOffset << "\tReserved Buffers: " << mReservedBuffs
+ << "\n";
+ if (!mImageNameMap.empty()) {
+ llvm::DenseMap<uint32_t, llvm::StringRef>::iterator imb, ime;
+ O << ";\tGlobal Image Mapping: \n";
+ for (imb = mImageNameMap.begin(), ime = mImageNameMap.end(); imb != ime;
+ ++imb) {
+ O << ";\t\tImage ID: " << imb->first << "\tName: "
+ << imb->second << "\n";
+ }
+ }
+ std::set<llvm::StringRef>::iterator sb, se;
+ if (!mByteStore.empty()) {
+ O << ";Byte Store Kernels: \n";
+ for (sb = mByteStore.begin(), se = mByteStore.end(); sb != se; ++sb) {
+ O << ";\t\t" << *sb << "\n";
+ }
+ }
+ if (!mIgnoreStr.empty()) {
+ O << ";\tIgnored Data Strings: \n";
+ for (sb = mIgnoreStr.begin(), se = mIgnoreStr.end(); sb != se; ++sb) {
+ O << ";\t\t" << *sb << "\n";
+ }
+ }
+}
+
+void AMDILGlobalManager::dump() {
+ print(errs());
+}
+
+static const constPtr *getConstPtr(const kernel &krnl, const std::string &arg) {
+ llvm::SmallVector<constPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+ for (begin = krnl.constPtr.begin(), end = krnl.constPtr.end();
+ begin != end; ++begin) {
+ if (!strcmp(begin->name.data(),arg.c_str())) {
+ return &(*begin);
+ }
+ }
+ return NULL;
+}
+#if 0
+static bool structContainsSub32bitType(const StructType *ST) {
+ StructType::element_iterator eib, eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ Type *ptr = *eib;
+ uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr);
+ if (!size) {
+ if (const StructType *ST = dyn_cast<StructType>(ptr)) {
+ if (structContainsSub32bitType(ST)) {
+ return true;
+ }
+ }
+ } else if (size < 32) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+void AMDILGlobalManager::processModule(const Module &M,
+ const AMDILTargetMachine *mTM)
+{
+ Module::const_global_iterator GI;
+ Module::const_global_iterator GE;
+ symTab = "NoSymTab";
+ mSTM = mTM->getSubtargetImpl();
+ for (GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
+ const GlobalValue *GV = GI;
+ if (mDebugMode) {
+ GV->dump();
+ errs() << "\n";
+ }
+ llvm::StringRef GVName = GV->getName();
+ const char *name = GVName.data();
+ if (!strncmp(name, "sgv", 3)) {
+ mKernelArgs[GVName] = parseSGV(GV);
+ } else if (!strncmp(name, "fgv", 3)) {
+ // we can ignore this since we don't care about the filename
+ // string
+ } else if (!strncmp(name, "lvgv", 4)) {
+ mLocalArgs[GVName] = parseLVGV(GV);
+ } else if (!strncmp(name, "llvm.image.annotations", 22)) {
+ if (strstr(name, "__OpenCL")
+ && strstr(name, "_kernel")) {
+ // we only want to parse the image information if the
+ // image is a kernel, we might have to parse out the
+ // information if a function is found that is not
+ // inlined.
+ parseImageAnnotate(GV);
+ }
+ } else if (!strncmp(name, "llvm.global.annotations", 23)) {
+ parseGlobalAnnotate(GV);
+ } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
+ if (strstr(name, "__OpenCL")
+ && strstr(name, "_kernel")) {
+ // we only want to parse constant pointer information
+ // if it is a kernel
+ parseConstantPtrAnnotate(GV);
+ }
+ } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
+ // These are skipped as we handle them later in AMDILPointerManager.cpp
+ } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS #
+ parseAutoArray(GV, false);
+ } else if (strstr(name, "clregion")) {
+ parseAutoArray(GV, true);
+ } else if (!GV->use_empty()
+ && mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
+ parseConstantPtr(GV);
+ }
+ }
+ allocateGlobalCB();
+
+ safeForEach(M.begin(), M.end(),
+ std::bind1st(
+ std::mem_fun(&AMDILGlobalManager::checkConstPtrsUseHW),
+ this));
+}
+
+void AMDILGlobalManager::allocateGlobalCB(void) {
+ uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
+ uint32_t offset = 0;
+ uint32_t curCB = 0;
+ uint32_t swoffset = 0;
+ for (StringMap<constPtr>::iterator cpb = mConstMems.begin(),
+ cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+ bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ cpb->second.usesHardware = false;
+ if (constHW) {
+ // If we have a limit on the max CB Size, then we need to make sure that
+ // the constant sizes fall within the limits.
+ if (cpb->second.size <= maxCBSize) {
+ if (offset + cpb->second.size > maxCBSize) {
+ offset = 0;
+ curCB++;
+ }
+ if (curCB < mSTM->device()->getMaxNumCBs()) {
+ cpb->second.cbNum = curCB + CB_BASE_OFFSET;
+ cpb->second.offset = offset;
+ offset += (cpb->second.size + 15) & (~15);
+ cpb->second.usesHardware = true;
+ continue;
+ }
+ }
+ }
+ cpb->second.cbNum = 0;
+ cpb->second.offset = swoffset;
+ swoffset += (cpb->second.size + 15) & (~15);
+ }
+ if (!mConstMems.empty()) {
+ mReservedBuffs = curCB + 1;
+ }
+}
+
+bool AMDILGlobalManager::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
+{
+ Function::const_arg_iterator AI, AE;
+ const Function *func = *FCI;
+ std::string name = func->getName();
+ if (!strstr(name.c_str(), "__OpenCL")
+ || !strstr(name.c_str(), "_kernel")) {
+ return false;
+ }
+ kernel &krnl = mKernels[name];
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ for (AI = func->arg_begin(), AE = func->arg_end();
+ AI != AE; ++AI) {
+ const Argument *Arg = &(*AI);
+ const PointerType *P = dyn_cast<PointerType>(Arg->getType());
+ if (!P) {
+ continue;
+ }
+ if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
+ continue;
+ }
+ const constPtr *ptr = getConstPtr(krnl, Arg->getName());
+ if (ptr) {
+ continue;
+ }
+ constPtr constAttr;
+ constAttr.name = Arg->getName();
+ constAttr.size = this->mSTM->device()->getMaxCBSize();
+ constAttr.base = Arg;
+ constAttr.isArgument = true;
+ constAttr.isArray = false;
+ constAttr.offset = 0;
+ constAttr.usesHardware =
+ mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ if (constAttr.usesHardware) {
+ constAttr.cbNum = krnl.constPtr.size() + 2;
+ } else {
+ constAttr.cbNum = 0;
+ }
+ krnl.constPtr.push_back(constAttr);
+ }
+ }
+ // Now lets make sure that only the N largest buffers
+ // get allocated in hardware if we have too many buffers
+ uint32_t numPtrs = krnl.constPtr.size();
+ if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
+ // TODO: Change this routine so it sorts
+ // constPtr instead of pulling the sizes out
+ // and then grab the N largest and disable the rest
+ llvm::SmallVector<uint32_t, 16> sizes;
+ for (uint32_t x = 0; x < numPtrs; ++x) {
+ sizes.push_back(krnl.constPtr[x].size);
+ }
+ std::sort(sizes.begin(), sizes.end());
+ uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
+ mReservedBuffs);
+ uint32_t safeSize = sizes[numToDisable-1];
+ for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
+ if (krnl.constPtr[x].size <= safeSize) {
+ krnl.constPtr[x].usesHardware = false;
+ --numToDisable;
+ }
+ }
+ }
+ // Renumber all of the valid CB's so that
+ // they are linear increase
+ uint32_t CBid = 2 + mReservedBuffs;
+ for (uint32_t x = 0; x < numPtrs; ++x) {
+ if (krnl.constPtr[x].usesHardware) {
+ krnl.constPtr[x].cbNum = CBid++;
+ }
+ }
+ for (StringMap<constPtr>::iterator cpb = mConstMems.begin(),
+ cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+ if (cpb->second.usesHardware) {
+ krnl.constPtr.push_back(cpb->second);
+ }
+ }
+ for (uint32_t x = 0; x < krnl.constPtr.size(); ++x) {
+ constPtr &c = krnl.constPtr[x];
+ uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
+ if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
+ if ((c.size + c.offset) > krnl.constSizes[cbNum]) {
+ krnl.constSizes[cbNum] =
+ ((c.size + c.offset) + 15) & ~15;
+ }
+ } else {
+ krnl.constPtr[x].usesHardware = false;
+ }
+ }
+ return false;
+}
+
+int32_t AMDILGlobalManager::getArrayOffset(const llvm::StringRef &a) const {
+ StringMap<arraymem>::const_iterator iter = mArrayMems.find(a);
+ if (iter != mArrayMems.end()) {
+ return iter->second.offset;
+ } else {
+ return -1;
+ }
+}
+
+int32_t AMDILGlobalManager::getConstOffset(const llvm::StringRef &a) const {
+ StringMap<constPtr>::const_iterator iter = mConstMems.find(a);
+ if (iter != mConstMems.end()) {
+ return iter->second.offset;
+ } else {
+ return -1;
+ }
+}
+
+bool AMDILGlobalManager::getConstHWBit(const llvm::StringRef &name) const {
+ StringMap<constPtr>::const_iterator iter = mConstMems.find(name);
+ if (iter != mConstMems.end()) {
+ return iter->second.usesHardware;
+ } else {
+ return false;
+ }
+}
+
+// As of right now we only care about the required group size
+// so we can skip the variable encoding
+kernelArg AMDILGlobalManager::parseSGV(const GlobalValue *G) {
+ kernelArg nArg;
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ memset(&nArg, 0, sizeof(nArg));
+ for (int x = 0; x < 3; ++x) {
+ nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
+ nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
+ }
+ if (!GV || !GV->hasInitializer()) {
+ return nArg;
+ }
+ const Constant *CV = GV->getInitializer();
+ const ConstantDataArray *CA =dyn_cast_or_null<ConstantDataArray>(CV);
+
+ if (!CA || !CA->isString()) {
+ return nArg;
+ }
+ std::string init = CA->getAsString();
+ size_t pos = init.find("RWG");
+ if (pos != llvm::StringRef::npos) {
+ pos += 3;
+ std::string LWS = init.substr(pos, init.length() - pos);
+ const char *lws = LWS.c_str();
+ sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]),
+ &(nArg.reqGroupSize[1]),
+ &(nArg.reqGroupSize[2]));
+ nArg.mHasRWG = true;
+ }
+ pos = init.find("RWR");
+ if (pos != llvm::StringRef::npos) {
+ pos += 3;
+ std::string LWS = init.substr(pos, init.length() - pos);
+ const char *lws = LWS.c_str();
+ sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]),
+ &(nArg.reqRegionSize[1]),
+ &(nArg.reqRegionSize[2]));
+ nArg.mHasRWR = true;
+ }
+ return nArg;
+}
+
+localArg AMDILGlobalManager::parseLVGV(const GlobalValue *G) {
+ localArg nArg;
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ nArg.name = "";
+ if (!GV || !GV->hasInitializer()) {
+ return nArg;
+ }
+ const ConstantArray *CA =
+ dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return nArg;
+ }
+ for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
+ const Value *local = CA->getOperand(x);
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
+ if (!CE || !CE->getNumOperands()) {
+ continue;
+ }
+ nArg.name = (*(CE->op_begin()))->getName();
+ if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
+ nArg.local.push_back(&(mArrayMems[nArg.name]));
+ }
+ }
+ return nArg;
+}
+
+void AMDILGlobalManager::parseConstantPtrAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
+ const ConstantArray *CA =
+ dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return;
+ }
+ uint32_t numOps = CA->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ const Value *V = CA->getOperand(x);
+ const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+ if (!CS) {
+ continue;
+ }
+ assert(CS->getNumOperands() == 2 && "There can only be 2"
+ " fields, a name and size");
+ const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
+ const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
+ assert(nameField && "There must be a constant name field");
+ assert(sizeField && "There must be a constant size field");
+ const GlobalVariable *nameGV =
+ dyn_cast<GlobalVariable>(nameField->getOperand(0));
+ const ConstantDataArray *nameArray =
+ dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+ // Lets add this string to the set of strings we should ignore processing
+ mIgnoreStr.insert(nameGV->getName());
+ if (mConstMems.find(nameGV->getName())
+ != mConstMems.end()) {
+ // If we already processesd this string as a constant, lets remove it from
+ // the list of known constants. This way we don't process unneeded data
+ // and don't generate code/metadata for strings that are never used.
+ mConstMems.erase(mConstMems.find(nameGV->getName()));
+ } else {
+ mIgnoreStr.insert(CS->getOperand(0)->getName());
+ }
+ constPtr constAttr;
+ constAttr.name = nameArray->getAsString();
+ constAttr.size = (sizeField->getZExtValue() + 15) & ~15;
+ constAttr.base = CS;
+ constAttr.isArgument = true;
+ constAttr.isArray = false;
+ constAttr.cbNum = 0;
+ constAttr.offset = 0;
+ constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
+ // Now that we have all our constant information,
+ // lets update the kernel
+ llvm::StringRef kernelName = G->getName().data() + 30;
+ kernel k;
+ if (mKernels.find(kernelName) != mKernels.end()) {
+ k = mKernels[kernelName];
+ } else {
+ k.curSize = 0;
+ k.curRSize = 0;
+ k.curHWSize = 0;
+ k.curHWRSize = 0;
+ k.constSize = 0;
+ k.lvgv = NULL;
+ k.sgv = NULL;
+ memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+ constAttr.cbNum = k.constPtr.size() + 2;
+ k.constPtr.push_back(constAttr);
+ mKernels[kernelName] = k;
+ }
+}
+
+void AMDILGlobalManager::parseImageAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return;
+ }
+ if (isa<GlobalValue>(CA)) {
+ return;
+ }
+ uint32_t e = CA->getNumOperands();
+ if (!e) {
+ return;
+ }
+ kernel k;
+ llvm::StringRef name = G->getName().data() + 23;
+ if (mKernels.find(name) != mKernels.end()) {
+ k = mKernels[name];
+ } else {
+ k.curSize = 0;
+ k.curRSize = 0;
+ k.curHWSize = 0;
+ k.curHWRSize = 0;
+ k.constSize = 0;
+ k.lvgv = NULL;
+ k.sgv = NULL;
+ memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+ for (uint32_t i = 0; i != e; ++i) {
+ const Value *V = CA->getOperand(i);
+ const Constant *C = dyn_cast<Constant>(V);
+ const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+ if (CS && CS->getNumOperands() == 2) {
+ if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
+ mConstMems.end()) {
+ // If we already processesd this string as a constant, lets remove it
+ // from the list of known constants. This way we don't process unneeded
+ // data and don't generate code/metadata for strings that are never
+ // used.
+ mConstMems.erase(
+ mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
+ } else {
+ mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
+ }
+ const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
+ uint32_t val = (uint32_t)CI->getZExtValue();
+ if (val == 1) {
+ k.readOnly.insert(i);
+ } else if (val == 2) {
+ k.writeOnly.insert(i);
+ } else {
+ assert(!"Unknown image type value!");
+ }
+ }
+ }
+ mKernels[name] = k;
+}
+
+void AMDILGlobalManager::parseAutoArray(const GlobalValue *GV, bool isRegion) {
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+ Type *Ty = (G) ? G->getType() : NULL;
+ arraymem tmp;
+ tmp.isHW = true;
+ tmp.offset = 0;
+ tmp.vecSize = getTypeSize(Ty, true);
+ tmp.isRegion = isRegion;
+ mArrayMems[GV->getName()] = tmp;
+}
+
+void AMDILGlobalManager::parseConstantPtr(const GlobalValue *GV) {
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+ Type *Ty = (G) ? G->getType() : NULL;
+ constPtr constAttr;
+ constAttr.name = G->getName();
+ constAttr.size = getTypeSize(Ty, true);
+ constAttr.base = GV;
+ constAttr.isArgument = false;
+ constAttr.isArray = true;
+ constAttr.offset = 0;
+ constAttr.cbNum = 0;
+ constAttr.usesHardware = false;
+ mConstMems[GV->getName()] = constAttr;
+}
+
+void AMDILGlobalManager::parseGlobalAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV->hasInitializer()) {
+ return;
+ }
+ const Constant *CT = GV->getInitializer();
+ if (!CT || isa<GlobalValue>(CT)) {
+ return;
+ }
+ const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
+ if (!CA) {
+ return;
+ }
+
+ unsigned int nKernels = CA->getNumOperands();
+ for (unsigned int i = 0, e = nKernels; i != e; ++i) {
+ parseKernelInformation(CA->getOperand(i));
+ }
+}
+
+void AMDILGlobalManager::parseKernelInformation(const Value *V) {
+ if (isa<GlobalValue>(V)) {
+ return;
+ }
+ const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+ if (!CS) {
+ return;
+ }
+ uint32_t N = CS->getNumOperands();
+ if (N != 5) {
+ return;
+ }
+ kernel tmp;
+
+ tmp.curSize = 0;
+ tmp.curRSize = 0;
+ tmp.curHWSize = 0;
+ tmp.curHWRSize = 0;
+ // The first operand is always a pointer to the kernel.
+ const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
+ llvm::StringRef kernelName = "";
+ if (CV->getNumOperands()) {
+ kernelName = (*(CV->op_begin()))->getName();
+ }
+
+ // If we have images, then we have already created the kernel and we just need
+ // to get the kernel information.
+ if (mKernels.find(kernelName) != mKernels.end()) {
+ tmp = mKernels[kernelName];
+ } else {
+ tmp.curSize = 0;
+ tmp.curRSize = 0;
+ tmp.curHWSize = 0;
+ tmp.curHWRSize = 0;
+ tmp.constSize = 0;
+ tmp.lvgv = NULL;
+ tmp.sgv = NULL;
+ memset(tmp.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+
+
+ // The second operand is SGV, there can only be one so we don't need to worry
+ // about parsing out multiple data points.
+ CV = dyn_cast<Constant>(CS->getOperand(1));
+
+ llvm::StringRef sgvName;
+ if (CV->getNumOperands()) {
+ sgvName = (*(CV->op_begin()))->getName();
+ }
+
+ if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
+ tmp.sgv = &mKernelArgs[sgvName];
+ }
+ // The third operand is FGV, which is skipped
+ // The fourth operand is LVGV
+ // There can be multiple local arrays, so we
+ // need to handle each one seperatly
+ CV = dyn_cast<Constant>(CS->getOperand(3));
+ llvm::StringRef lvgvName = "";
+ if (CV->getNumOperands()) {
+ lvgvName = (*(CV->op_begin()))->getName();
+ }
+ if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) {
+ localArg *ptr = &mLocalArgs[lvgvName];
+ tmp.lvgv = ptr;
+ llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
+ for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
+ if ((*ib)->isRegion) {
+ if ((*ib)->isHW) {
+ (*ib)->offset = tmp.curHWRSize;
+ tmp.curHWRSize += ((*ib)->vecSize + 15) & ~15;
+ } else {
+ (*ib)->offset = tmp.curRSize;
+ tmp.curRSize += ((*ib)->vecSize + 15) & ~15;
+ }
+ } else {
+ if ((*ib)->isHW) {
+ (*ib)->offset = tmp.curHWSize;
+ tmp.curHWSize += ((*ib)->vecSize + 15) & ~15;
+ } else {
+ (*ib)->offset = tmp.curSize;
+ tmp.curSize += ((*ib)->vecSize + 15) & ~15;
+ }
+ }
+ }
+ }
+
+ // The fifth operand is NULL
+ mKernels[kernelName] = tmp;
+}
+
+const kernel &AMDILGlobalManager::getKernel(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ assert(isKernel(name) && "Must be a kernel to call getKernel");
+ return iter->second;
+}
+
+bool AMDILGlobalManager::isKernel(const llvm::StringRef &name) const {
+ return (mKernels.find(name) != mKernels.end());
+}
+
+bool AMDILGlobalManager::isWriteOnlyImage(const llvm::StringRef &name,
+ uint32_t iID) const {
+ const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return false;
+ }
+ return kiter->second.writeOnly.count(iID);
+}
+
+uint32_t
+AMDILGlobalManager::getNumWriteImages(const llvm::StringRef &name) const {
+ char *env = NULL;
+ env = getenv("GPU_DISABLE_RAW_UAV");
+ if (env && env[0] == '1') {
+ return 8;
+ }
+ const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return 0;
+ } else {
+ return kiter->second.writeOnly.size();
+ }
+}
+
+bool AMDILGlobalManager::isReadOnlyImage(const llvm::StringRef &name,
+ uint32_t iID) const {
+ const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return false;
+ }
+ return kiter->second.readOnly.count(iID);
+}
+
+bool AMDILGlobalManager::hasRWG(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *ptr = iter->second.sgv;
+ if (ptr) {
+ return ptr->mHasRWG;
+ }
+ }
+ return false;
+}
+
+bool AMDILGlobalManager::hasRWR(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *ptr = iter->second.sgv;
+ if (ptr) {
+ return ptr->mHasRWR;
+ }
+ }
+ return false;
+}
+
+uint32_t
+AMDILGlobalManager::getMaxGroupSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *sgv = iter->second.sgv;
+ if (sgv) {
+ return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+ }
+ }
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+}
+
+uint32_t
+AMDILGlobalManager::getMaxRegionSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *sgv = iter->second.sgv;
+ if (sgv) {
+ return sgv->reqRegionSize[0] *
+ sgv->reqRegionSize[1] *
+ sgv->reqRegionSize[2];
+ }
+ }
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+}
+
+uint32_t AMDILGlobalManager::getRegionSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curRSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getLocalSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.constSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILGlobalManager::getHWRegionSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curHWRSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getHWLocalSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curHWSize;
+ } else {
+ return 0;
+ }
+}
+
+int32_t AMDILGlobalManager::getArgID(const Argument *arg) {
+ DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
+ if (argiter != mArgIDMap.end()) {
+ return argiter->second;
+ } else {
+ return -1;
+ }
+}
+
+
+uint32_t
+AMDILGlobalManager::getLocal(const llvm::StringRef &name, uint32_t dim) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end() && iter->second.sgv) {
+ kernelArg *sgv = iter->second.sgv;
+ switch (dim) {
+ default: break;
+ case 0:
+ case 1:
+ case 2:
+ return sgv->reqGroupSize[dim];
+ break;
+ case 3:
+ return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+ };
+ }
+ switch (dim) {
+ default:
+ return 1;
+ case 3:
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+ case 2:
+ case 1:
+ case 0:
+ return mSTM->getDefaultSize(dim);
+ break;
+ };
+ return 1;
+}
+
+uint32_t
+AMDILGlobalManager::getRegion(const llvm::StringRef &name, uint32_t dim) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end() && iter->second.sgv) {
+ kernelArg *sgv = iter->second.sgv;
+ switch (dim) {
+ default: break;
+ case 0:
+ case 1:
+ case 2:
+ return sgv->reqRegionSize[dim];
+ break;
+ case 3:
+ return sgv->reqRegionSize[0] *
+ sgv->reqRegionSize[1] *
+ sgv->reqRegionSize[2];
+ };
+ }
+ switch (dim) {
+ default:
+ return 1;
+ case 3:
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+ case 2:
+ case 1:
+ case 0:
+ return mSTM->getDefaultSize(dim);
+ break;
+ };
+ return 1;
+}
+
+StringMap<constPtr>::iterator AMDILGlobalManager::consts_begin() {
+ return mConstMems.begin();
+}
+
+
+StringMap<constPtr>::iterator AMDILGlobalManager::consts_end() {
+ return mConstMems.end();
+}
+
+bool AMDILGlobalManager::byteStoreExists(StringRef S) const {
+ return mByteStore.find(S) != mByteStore.end();
+}
+
+bool AMDILGlobalManager::usesHWConstant(const kernel &krnl,
+ const llvm::StringRef &arg) {
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->usesHardware;
+ } else {
+ return false;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstPtrSize(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->size;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstPtrOff(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->offset;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstPtrCB(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->cbNum;
+ } else {
+ return 0;
+ }
+}
+
+void AMDILGlobalManager::calculateCPOffsets(const MachineFunction *MF,
+ kernel &krnl)
+{
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ if (!MCP) {
+ return;
+ }
+ const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
+ size_t numConsts = consts.size();
+ for (size_t x = 0; x < numConsts; ++x) {
+ krnl.CPOffsets.push_back(
+ std::make_pair<uint32_t, const Constant*>(
+ mCurrentCPOffset, consts[x].Val.ConstVal));
+ size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true);
+ // Align the size to the vector boundary
+ curSize = (curSize + 15) & (~15);
+ mCurrentCPOffset += curSize;
+ }
+}
+
+bool AMDILGlobalManager::isConstPtrArray(const kernel &krnl,
+ const llvm::StringRef &arg) {
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->isArray;
+ } else {
+ return false;
+ }
+}
+
+bool AMDILGlobalManager::isConstPtrArgument(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->isArgument;
+ } else {
+ return false;
+ }
+}
+
+const Value *AMDILGlobalManager::getConstPtrValue(const kernel &krnl,
+ const llvm::StringRef &arg) {
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->base;
+ } else {
+ return NULL;
+ }
+}
+
+static void
+dumpZeroElements(const StructType * const T, llvm::raw_ostream &O, bool asBytes);
+static void
+dumpZeroElements(const IntegerType * const T, llvm::raw_ostream &O, bool asBytes);
+static void
+dumpZeroElements(const ArrayType * const T, llvm::raw_ostream &O, bool asBytes);
+static void
+dumpZeroElements(const VectorType * const T, llvm::raw_ostream &O, bool asBytes);
+static void
+dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes);
+
+void dumpZeroElements(const Type * const T, llvm::raw_ostream &O, bool asBytes) {
+ if (!T) {
+ return;
+ }
+ switch(T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::DoubleTyID:
+ if (asBytes) {
+ O << ":0:0:0:0:0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ break;
+ case Type::FloatTyID:
+ case Type::PointerTyID:
+ case Type::FunctionTyID:
+ if (asBytes) {
+ O << ":0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ break;
+ case Type::IntegerTyID:
+ dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
+ break;
+ case Type::StructTyID:
+ {
+ const StructType *ST = cast<StructType>(T);
+ if (!ST->isOpaque()) {
+ dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
+ } else { // A pre-LLVM 3.0 opaque type
+ if (asBytes) {
+ O << ":0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ }
+ }
+ break;
+ case Type::ArrayTyID:
+ dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
+ break;
+ case Type::VectorTyID:
+ dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
+ break;
+ };
+}
+
+void
+dumpZeroElements(const StructType * const ST, llvm::raw_ostream &O, bool asBytes) {
+ if (!ST) {
+ return;
+ }
+ Type *curType;
+ StructType::element_iterator eib = ST->element_begin();
+ StructType::element_iterator eie = ST->element_end();
+ for (;eib != eie; ++eib) {
+ curType = *eib;
+ dumpZeroElements(curType, O, asBytes);
+ }
+}
+
+void
+dumpZeroElements(const IntegerType * const IT, llvm::raw_ostream &O, bool asBytes) {
+ if (asBytes) {
+ unsigned byteWidth = (IT->getBitWidth() >> 3);
+ for (unsigned x = 0; x < byteWidth; ++x) {
+ O << ":0";
+ }
+ }
+}
+
+void
+dumpZeroElements(const ArrayType * const AT, llvm::raw_ostream &O, bool asBytes) {
+ size_t size = AT->getNumElements();
+ for (size_t x = 0; x < size; ++x) {
+ dumpZeroElements(AT->getElementType(), O, asBytes);
+ }
+}
+
+void
+dumpZeroElements(const VectorType * const VT, llvm::raw_ostream &O, bool asBytes) {
+ size_t size = VT->getNumElements();
+ for (size_t x = 0; x < size; ++x) {
+ dumpZeroElements(VT->getElementType(), O, asBytes);
+ }
+}
+
+void AMDILGlobalManager::printConstantValue(const Constant *CAval,
+ llvm::raw_ostream &O, bool asBytes) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
+ bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+ if (isDouble) {
+ double val = CFP->getValueAPF().convertToDouble();
+ union dtol_union {
+ double d;
+ uint64_t l;
+ char c[8];
+ } conv;
+ conv.d = val;
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(conv.l);
+ } else {
+ for (int i = 0; i < 8; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ } else {
+ float val = CFP->getValueAPF().convertToFloat();
+ union ftoi_union {
+ float f;
+ uint32_t u;
+ char c[4];
+ } conv;
+ conv.f = val;
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(conv.u);
+ } else {
+ for (int i = 0; i < 4; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ }
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
+ uint64_t zVal = CI->getValue().getZExtValue();
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(zVal);
+ } else {
+ switch (CI->getBitWidth()) {
+ default:
+ {
+ union ltob_union {
+ uint64_t l;
+ char c[8];
+ } conv;
+ conv.l = zVal;
+ for (int i = 0; i < 8; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ break;
+ case 8:
+ O << ":";
+ O.write_hex(zVal & 0xFF);
+ break;
+ case 16:
+ {
+ union stob_union {
+ uint16_t s;
+ char c[2];
+ } conv;
+ conv.s = (uint16_t)zVal;
+ O << ":";
+ O.write_hex((unsigned)conv.c[0] & 0xFF);
+ O << ":";
+ O.write_hex((unsigned)conv.c[1] & 0xFF);
+ }
+ break;
+ case 32:
+ {
+ union itob_union {
+ uint32_t i;
+ char c[4];
+ } conv;
+ conv.i = (uint32_t)zVal;
+ for (int i = 0; i < 4; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ break;
+ }
+ }
+ } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
+ int y = CV->getNumOperands()-1;
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CV->getOperand(x), O, asBytes);
+ }
+ printConstantValue(CV->getOperand(x), O, asBytes);
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
+ int y = CS->getNumOperands();
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CS->getOperand(x), O, asBytes);
+ }
+ } else if (const ConstantAggregateZero *CAZ
+ = dyn_cast<ConstantAggregateZero>(CAval)) {
+ int y = CAZ->getNumOperands();
+ if (y > 0) {
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue((llvm::Constant *)CAZ->getOperand(x),
+ O, asBytes);
+ }
+ } else {
+ if (asBytes) {
+ dumpZeroElements(CAval->getType(), O, asBytes);
+ } else {
+ int y = getNumElements(CAval->getType())-1;
+ for (int x = 0; x < y; ++x) {
+ O << ":0";
+ }
+ O << ":0";
+ }
+ }
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
+ int y = CA->getNumOperands();
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CA->getOperand(x), O, asBytes);
+ }
+ } else if (dyn_cast<ConstantPointerNull>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else if (dyn_cast<ConstantExpr>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else if (dyn_cast<UndefValue>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else {
+ assert(0 && "Hit condition which was not expected");
+ }
+}
+
+static bool isStruct(Type * const T)
+{
+ if (!T) {
+ return false;
+ }
+ switch (T->getTypeID()) {
+ default:
+ return false;
+ case Type::PointerTyID:
+ return isStruct(T->getContainedType(0));
+ case Type::StructTyID:
+ return true;
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ return isStruct(dyn_cast<SequentialType>(T)->getElementType());
+ };
+
+}
+
+void AMDILGlobalManager::dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km,
+ uint32_t id) {
+ uint32_t size = 0;
+ for (StringMap<constPtr>::iterator cmb = consts_begin(),
+ cme = consts_end(); cmb != cme; ++cmb) {
+ if (id == cmb->second.cbNum) {
+ size += (cmb->second.size + 15) & (~15);
+ }
+ }
+ if (id == 0) {
+ O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
+ if (mCurrentCPOffset) {
+ for (StringMap<kernel>::iterator kcpb = mKernels.begin(),
+ kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
+ const kernel& k = kcpb->second;
+ size_t numConsts = k.CPOffsets.size();
+ for (size_t x = 0; x < numConsts; ++x) {
+ size_t offset = k.CPOffsets[x].first;
+ const Constant *C = k.CPOffsets[x].second;
+ Type *Ty = C->getType();
+ size_t size = (isStruct(Ty) ? getTypeSize(Ty, true)
+ : getNumElements(Ty));
+ O << ";#" << km->getTypeName(Ty, symTab) << ":";
+ O << offset << ":" << size ;
+ printConstantValue(C, O, isStruct(Ty));
+ O << "\n";
+ }
+ }
+ }
+ } else {
+ O << ";#DATASTART:" << id << ":" << size << "\n";
+ }
+
+ for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end();
+ cmb != cme; ++cmb) {
+ if (cmb->second.cbNum != id) {
+ continue;
+ }
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
+ Type *Ty = (G) ? G->getType() : NULL;
+ size_t offset = cmb->second.offset;
+ const Constant *C = G->getInitializer();
+ size_t size = (isStruct(Ty)
+ ? getTypeSize(Ty, true)
+ : getNumElements(Ty));
+ O << ";#" << km->getTypeName(Ty, symTab) << ":";
+ if (!id) {
+ O << (offset + mCurrentCPOffset) << ":" << size;
+ } else {
+ O << offset << ":" << size;
+ }
+ if (C) {
+ printConstantValue(C, O, isStruct(Ty));
+ } else {
+ assert(0 && "Cannot have a constant pointer"
+ " without an initializer!");
+ }
+ O <<"\n";
+ }
+ if (id == 0) {
+ O << ";#DATAEND\n";
+ } else {
+ O << ";#DATAEND:" << id << "\n";
+ }
+}
+
+void
+AMDILGlobalManager::dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km) {
+ if (mConstMems.empty() && !mCurrentCPOffset) {
+ return;
+ } else {
+ llvm::DenseSet<uint32_t> const_set;
+ for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end();
+ cmb != cme; ++cmb) {
+ const_set.insert(cmb->second.cbNum);
+ }
+ if (mCurrentCPOffset) {
+ const_set.insert(0);
+ }
+ for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
+ sete = const_set.end(); setb != sete; ++setb) {
+ dumpDataToCB(O, km, *setb);
+ }
+ }
+}
+
+/// Create a function ID if it is not known or return the known
+/// function ID.
+uint32_t AMDILGlobalManager::getOrCreateFunctionID(const GlobalValue* func) {
+ if (func->getName().size()) {
+ return getOrCreateFunctionID(func->getName());
+ }
+ uint32_t id;
+ if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
+ id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
+ mFuncPtrNames[func] = id;
+ } else {
+ id = mFuncPtrNames[func];
+ }
+ return id;
+}
+uint32_t AMDILGlobalManager::getOrCreateFunctionID(const std::string &func) {
+ uint32_t id;
+ if (mFuncNames.find(func) == mFuncNames.end()) {
+ id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
+ mFuncNames[func] = id;
+ } else {
+ id = mFuncNames[func];
+ }
+ return id;
+}
diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.h b/src/gallium/drivers/radeon/AMDILGlobalManager.h
new file mode 100644
index 00000000000..1b0361e0174
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILGlobalManager.h
@@ -0,0 +1,256 @@
+//===-- AMDILGlobalManager.h - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// ==-----------------------------------------------------------------------===//
+//
+// Class that handles parsing and storing global variables that are relevant to
+// the compilation of the module.
+//
+// ==-----------------------------------------------------------------------===//
+
+#ifndef _AMDILGLOBALMANAGER_H_
+#define _AMDILGLOBALMANAGER_H_
+
+#include "AMDIL.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Module.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <set>
+#include <string>
+
+#define CB_BASE_OFFSET 2
+
+namespace llvm {
+
+class PointerType;
+class AMDILKernelManager;
+class AMDILSubtarget;
+class TypeSymbolTable;
+class Argument;
+class GlobalValue;
+class MachineFunction;
+
+/// structure that holds information for a single local/region address array
+typedef struct _arrayMemRec {
+ uint32_t vecSize; // size of each vector
+ uint32_t offset; // offset into the memory section
+ bool isHW; // flag to specify if HW is used or SW is used
+ bool isRegion; // flag to specify if GDS is used or not
+} arraymem;
+
+/// Structure that holds information for all local/region address
+/// arrays in the kernel
+typedef struct _localArgRec {
+ llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS> local;
+ std::string name; // Kernel Name
+} localArg;
+
+/// structure that holds information about a constant address
+/// space pointer that is a kernel argument
+typedef struct _constPtrRec {
+ const Value *base;
+ uint32_t size;
+ uint32_t offset;
+ uint32_t cbNum; // value of 0 means that it does not use hw CB
+ bool isArray;
+ bool isArgument;
+ bool usesHardware;
+ std::string name;
+} constPtr;
+
+/// Structure that holds information for each kernel argument
+typedef struct _kernelArgRec {
+ uint32_t reqGroupSize[3];
+ uint32_t reqRegionSize[3];
+ llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo;
+ bool mHasRWG;
+ bool mHasRWR;
+} kernelArg;
+
+/// Structure that holds information for each kernel
+typedef struct _kernelRec {
+ mutable uint32_t curSize;
+ mutable uint32_t curRSize;
+ mutable uint32_t curHWSize;
+ mutable uint32_t curHWRSize;
+ uint32_t constSize;
+ kernelArg *sgv;
+ localArg *lvgv;
+ llvm::SmallVector<struct _constPtrRec, DEFAULT_VEC_SLOTS> constPtr;
+ uint32_t constSizes[HW_MAX_NUM_CB];
+ llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly;
+ llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly;
+ llvm::SmallVector<std::pair<uint32_t, const Constant *>,
+ DEFAULT_VEC_SLOTS> CPOffsets;
+} kernel;
+
+class AMDILGlobalManager {
+public:
+ AMDILGlobalManager(bool debugMode = false);
+ ~AMDILGlobalManager();
+
+ /// Process the given module and parse out the global variable metadata passed
+ /// down from the frontend-compiler
+ void processModule(const Module &MF, const AMDILTargetMachine* mTM);
+
+ /// Returns whether the current name is the name of a kernel function or a
+ /// normal function
+ bool isKernel(const llvm::StringRef &name) const;
+
+ /// Returns true if the image ID corresponds to a read only image.
+ bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+ /// Returns true if the image ID corresponds to a write only image.
+ bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+ /// Returns the number of write only images for the kernel.
+ uint32_t getNumWriteImages(const llvm::StringRef &name) const;
+
+ /// Gets the group size of the kernel for the given dimension.
+ uint32_t getLocal(const llvm::StringRef &name, uint32_t dim) const;
+
+ /// Gets the region size of the kernel for the given dimension.
+ uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const;
+
+ /// Get the Region memory size in 1d for the given function/kernel.
+ uint32_t getRegionSize(const llvm::StringRef &name) const;
+
+ /// Get the region memory size in 1d for the given function/kernel.
+ uint32_t getLocalSize(const llvm::StringRef &name) const;
+
+ // Get the max group size in one 1D for the given function/kernel.
+ uint32_t getMaxGroupSize(const llvm::StringRef &name) const;
+
+ // Get the max region size in one 1D for the given function/kernel.
+ uint32_t getMaxRegionSize(const llvm::StringRef &name) const;
+
+ /// Get the constant memory size in 1d for the given function/kernel.
+ uint32_t getConstSize(const llvm::StringRef &name) const;
+
+ /// Get the HW local size in 1d for the given function/kernel We need to
+ /// seperate SW local and HW local for the case where some local memory is
+ /// emulated in global and some is using the hardware features. The main
+ /// problem is that in OpenCL 1.0/1.1 cl_khr_byte_addressable_store allows
+ /// these actions to happen on all memory spaces, but the hardware can only
+ /// write byte address stores to UAV and LDS, not GDS or Stack.
+ uint32_t getHWLocalSize(const llvm::StringRef &name) const;
+ uint32_t getHWRegionSize(const llvm::StringRef &name) const;
+
+ /// Get the offset of the array for the kernel.
+ int32_t getArrayOffset(const llvm::StringRef &name) const;
+
+ /// Get the offset of the const memory for the kernel.
+ int32_t getConstOffset(const llvm::StringRef &name) const;
+
+ /// Get the boolean value if this particular constant uses HW or not.
+ bool getConstHWBit(const llvm::StringRef &name) const;
+
+ /// Get a reference to the kernel metadata information for the given function
+ /// name.
+ const kernel &getKernel(const llvm::StringRef &name) const;
+
+ /// Returns whether a reqd_workgroup_size attribute has been used or not.
+ bool hasRWG(const llvm::StringRef &name) const;
+
+ /// Returns whether a reqd_workregion_size attribute has been used or not.
+ bool hasRWR(const llvm::StringRef &name) const;
+
+
+ /// Dump the data section to the output stream for the given kernel.
+ void dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km);
+
+ /// Iterate through the constants that are global to the compilation unit.
+ StringMap<constPtr>::iterator consts_begin();
+ StringMap<constPtr>::iterator consts_end();
+
+ /// Query if the kernel has a byte store.
+ bool byteStoreExists(llvm::StringRef S) const;
+
+ /// Query if the kernel and argument uses hardware constant memory.
+ bool usesHWConstant(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query if the constant pointer is an argument.
+ bool isConstPtrArgument(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query if the constant pointer is an array that is globally scoped.
+ bool isConstPtrArray(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the size of the constant pointer.
+ uint32_t getConstPtrSize(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the offset of the constant pointer.
+ uint32_t getConstPtrOff(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the constant buffer number for a constant pointer.
+ uint32_t getConstPtrCB(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the Value* that the constant pointer originates from.
+ const Value *getConstPtrValue(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Get the ID of the argument.
+ int32_t getArgID(const Argument *arg);
+
+ /// Get the unique function ID for the specific function name and create a new
+ /// unique ID if it is not found.
+ uint32_t getOrCreateFunctionID(const GlobalValue* func);
+ uint32_t getOrCreateFunctionID(const std::string& func);
+
+ /// Calculate the offsets of the constant pool for the given kernel and
+ /// machine function.
+ void calculateCPOffsets(const MachineFunction *MF, kernel &krnl);
+
+ /// Print the global manager to the output stream.
+ void print(llvm::raw_ostream& O);
+
+ /// Dump the global manager to the output stream - debug use.
+ void dump();
+
+private:
+ /// Various functions that parse global value information and store them in
+ /// the global manager. This approach is used instead of dynamic parsing as it
+ /// might require more space, but should allow caching of data that gets
+ /// requested multiple times.
+ kernelArg parseSGV(const GlobalValue *GV);
+ localArg parseLVGV(const GlobalValue *GV);
+ void parseGlobalAnnotate(const GlobalValue *G);
+ void parseImageAnnotate(const GlobalValue *G);
+ void parseConstantPtrAnnotate(const GlobalValue *G);
+ void printConstantValue(const Constant *CAval,
+ llvm::raw_ostream& O,
+ bool asByte);
+ void parseKernelInformation(const Value *V);
+ void parseAutoArray(const GlobalValue *G, bool isRegion);
+ void parseConstantPtr(const GlobalValue *G);
+ void allocateGlobalCB();
+ void dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km, uint32_t id);
+ bool checkConstPtrsUseHW(Module::const_iterator *F);
+
+ llvm::StringMap<arraymem> mArrayMems;
+ llvm::StringMap<localArg> mLocalArgs;
+ llvm::StringMap<kernelArg> mKernelArgs;
+ llvm::StringMap<kernel> mKernels;
+ llvm::StringMap<constPtr> mConstMems;
+ llvm::StringMap<uint32_t> mFuncNames;
+ llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames;
+ llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap;
+ std::set<llvm::StringRef> mByteStore;
+ std::set<llvm::StringRef> mIgnoreStr;
+ llvm::DenseMap<const Argument *, int32_t> mArgIDMap;
+ const char *symTab;
+ const AMDILSubtarget *mSTM;
+ size_t mOffset;
+ uint32_t mReservedBuffs;
+ uint32_t mCurrentCPOffset;
+ bool mDebugMode;
+};
+} // namespace llvm
+#endif // __AMDILGLOBALMANAGER_H_
diff --git a/src/gallium/drivers/radeon/AMDILIOExpansion.cpp b/src/gallium/drivers/radeon/AMDILIOExpansion.cpp
new file mode 100644
index 00000000000..68d8eef344d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIOExpansion.cpp
@@ -0,0 +1,1160 @@
+//===----------- AMDILIOExpansion.cpp - IO Expansion Pass -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are
+// not expanded earlier in the pass because any pass before this can assume to
+// be able to generate a load/store instruction. So this pass can only have
+// passes that execute after it if no load/store instructions can be generated.
+//===----------------------------------------------------------------------===//
+#include "AMDILIOExpansion.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Value.h"
+
+using namespace llvm;
+
+char AMDILIOExpansion::ID = 0;
+namespace llvm {
+ FunctionPass*
+ createAMDILIOExpansion(TargetMachine &TM AMDIL_OPT_LEVEL_DECL)
+ {
+ return TM.getSubtarget<AMDILSubtarget>()
+ .device()->getIOExpansion(TM AMDIL_OPT_LEVEL_VAR);
+ }
+}
+
+AMDILIOExpansion::AMDILIOExpansion(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) :
+ MachineFunctionPass(ID), TM(tm)
+{
+ mSTM = &tm.getSubtarget<AMDILSubtarget>();
+ mDebug = DEBUGME;
+ mTII = tm.getInstrInfo();
+ mKM = NULL;
+}
+
+AMDILIOExpansion::~AMDILIOExpansion()
+{
+}
+ bool
+AMDILIOExpansion::runOnMachineFunction(MachineFunction &MF)
+{
+ mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager());
+ mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr *MI = MBI;
+ if (isIOInstruction(MI)) {
+ mBB = MBB;
+ saveInst = false;
+ expandIOInstruction(MI);
+ if (!saveInst) {
+ // erase returns the instruction after
+ // and we want the instruction before
+ MBI = MBB->erase(MI);
+ --MBI;
+ }
+ }
+ }
+ }
+ return false;
+}
+const char *AMDILIOExpansion::getPassName() const
+{
+ return "AMDIL Generic IO Expansion Pass";
+}
+ bool
+AMDILIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+ if (!MI) {
+ return false;
+ }
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ return true;
+ };
+ return false;
+}
+void
+AMDILIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+ assert(isIOInstruction(MI) && "Must be an IO instruction to "
+ "be passed to this function!");
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Not an IO Instruction!");
+ ExpandCaseToAllTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD);
+ expandGlobalLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD);
+ expandRegionLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD);
+ expandLocalLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD);
+ expandConstantLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD);
+ expandPrivateLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ expandConstantPoolLoad(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::GLOBALSTORE);
+ expandGlobalStore(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE);
+ expandPrivateStore(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE);
+ expandRegionStore(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE);
+ expandLocalStore(MI);
+ break;
+ }
+}
+ bool
+AMDILIOExpansion::isAddrCalcInstr(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ {
+ // This section of code is a workaround for the problem of
+ // globally scoped constant address variables. The problems
+ // comes that although they are declared in the constant
+ // address space, all variables must be allocated in the
+ // private address space. So when there is a load from
+ // the global address, it automatically goes into the private
+ // address space. However, the data section is placed in the
+ // constant address space so we need to check to see if our
+ // load base address is a global variable or not. Only if it
+ // is not a global variable can we do the address calculation
+ // into the private memory ring.
+
+ MachineMemOperand& memOp = (**MI->memoperands_begin());
+ const Value *V = memOp.getValue();
+ if (V) {
+ const GlobalValue *GV = dyn_cast<GlobalVariable>(V);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)
+ && !(GV);
+ } else {
+ return false;
+ }
+ }
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ return MI->getOperand(1).isReg();
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE);
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem);
+ };
+ return false;
+
+}
+ bool
+AMDILIOExpansion::isExtendLoad(MachineInstr *MI)
+{
+ return isSExtLoadInst(TM.getInstrInfo(), MI) ||
+ isZExtLoadInst(TM.getInstrInfo(), MI) ||
+ isAExtLoadInst(TM.getInstrInfo(), MI)
+ || isSWSExtLoadInst(MI);
+}
+
+ bool
+AMDILIOExpansion::isHardwareRegion(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+ break;
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE)
+ return mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ };
+ return false;
+}
+ bool
+AMDILIOExpansion::isHardwareLocal(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+ break;
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ return mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ };
+ return false;
+}
+ bool
+AMDILIOExpansion::isPackedData(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ if (isTruncStoreInst(TM.getInstrInfo(), MI)) {
+ switch (MI->getDesc().OpInfo[0].RegClass) {
+ default:
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ switch (getMemorySize(MI)) {
+ case 2:
+ case 4:
+ return true;
+ default:
+ break;
+ }
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ switch (getMemorySize(MI)) {
+ case 4:
+ case 8:
+ return true;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+ break;
+ ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD);
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSTORE);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSTORE);
+ return true;
+ }
+ return false;
+}
+
+ bool
+AMDILIOExpansion::isStaticCPLoad(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ {
+ uint32_t x = 0;
+ uint32_t num = MI->getNumOperands();
+ for (x = 0; x < num; ++x) {
+ if (MI->getOperand(x).isCPI()) {
+ return true;
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+ bool
+AMDILIOExpansion::isNbitType(Type *mType, uint32_t nBits, bool isScalar)
+{
+ if (!mType) {
+ return false;
+ }
+ if (dyn_cast<PointerType>(mType)) {
+ PointerType *PT = dyn_cast<PointerType>(mType);
+ return isNbitType(PT->getElementType(), nBits);
+ } else if (dyn_cast<StructType>(mType)) {
+ return getTypeSize(mType) == nBits;
+ } else if (dyn_cast<VectorType>(mType)) {
+ VectorType *VT = dyn_cast<VectorType>(mType);
+ size_t size = VT->getScalarSizeInBits();
+ return (isScalar ?
+ VT->getNumElements() * size == nBits : size == nBits);
+ } else if (dyn_cast<ArrayType>(mType)) {
+ ArrayType *AT = dyn_cast<ArrayType>(mType);
+ size_t size = AT->getScalarSizeInBits();
+ return (isScalar ?
+ AT->getNumElements() * size == nBits : size == nBits);
+ } else if (mType->isSized()) {
+ return mType->getScalarSizeInBits() == nBits;
+ } else {
+ assert(0 && "Found a type that we don't know how to handle!");
+ return false;
+ }
+}
+
+ bool
+AMDILIOExpansion::isHardwareInst(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ curRes.u16all = MI->getAsmPrinterFlags();
+ return curRes.bits.HardwareInst;
+}
+
+REG_PACKED_TYPE
+AMDILIOExpansion::getPackedID(MachineInstr *MI)
+{
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALSTORE_v2i8:
+ case AMDIL::LOCALSTORE_v2i8:
+ case AMDIL::REGIONSTORE_v2i8:
+ case AMDIL::PRIVATESTORE_v2i8:
+ return PACK_V2I8;
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+ case AMDIL::GLOBALSTORE_v4i8:
+ case AMDIL::LOCALSTORE_v4i8:
+ case AMDIL::REGIONSTORE_v4i8:
+ case AMDIL::PRIVATESTORE_v4i8:
+ return PACK_V4I8;
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+ case AMDIL::GLOBALSTORE_v2i16:
+ case AMDIL::LOCALSTORE_v2i16:
+ case AMDIL::REGIONSTORE_v2i16:
+ case AMDIL::PRIVATESTORE_v2i16:
+ return PACK_V2I16;
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ case AMDIL::GLOBALSTORE_v4i16:
+ case AMDIL::LOCALSTORE_v4i16:
+ case AMDIL::REGIONSTORE_v4i16:
+ case AMDIL::PRIVATESTORE_v4i16:
+ return PACK_V4I16;
+ case AMDIL::GLOBALLOAD_v2i8:
+ case AMDIL::GLOBALSEXTLOAD_v2i8:
+ case AMDIL::GLOBALAEXTLOAD_v2i8:
+ case AMDIL::GLOBALZEXTLOAD_v2i8:
+ case AMDIL::LOCALLOAD_v2i8:
+ case AMDIL::LOCALSEXTLOAD_v2i8:
+ case AMDIL::LOCALAEXTLOAD_v2i8:
+ case AMDIL::LOCALZEXTLOAD_v2i8:
+ case AMDIL::REGIONLOAD_v2i8:
+ case AMDIL::REGIONSEXTLOAD_v2i8:
+ case AMDIL::REGIONAEXTLOAD_v2i8:
+ case AMDIL::REGIONZEXTLOAD_v2i8:
+ case AMDIL::PRIVATELOAD_v2i8:
+ case AMDIL::PRIVATESEXTLOAD_v2i8:
+ case AMDIL::PRIVATEAEXTLOAD_v2i8:
+ case AMDIL::PRIVATEZEXTLOAD_v2i8:
+ case AMDIL::CONSTANTLOAD_v2i8:
+ case AMDIL::CONSTANTSEXTLOAD_v2i8:
+ case AMDIL::CONSTANTAEXTLOAD_v2i8:
+ case AMDIL::CONSTANTZEXTLOAD_v2i8:
+ return UNPACK_V2I8;
+ case AMDIL::GLOBALLOAD_v4i8:
+ case AMDIL::GLOBALSEXTLOAD_v4i8:
+ case AMDIL::GLOBALAEXTLOAD_v4i8:
+ case AMDIL::GLOBALZEXTLOAD_v4i8:
+ case AMDIL::LOCALLOAD_v4i8:
+ case AMDIL::LOCALSEXTLOAD_v4i8:
+ case AMDIL::LOCALAEXTLOAD_v4i8:
+ case AMDIL::LOCALZEXTLOAD_v4i8:
+ case AMDIL::REGIONLOAD_v4i8:
+ case AMDIL::REGIONSEXTLOAD_v4i8:
+ case AMDIL::REGIONAEXTLOAD_v4i8:
+ case AMDIL::REGIONZEXTLOAD_v4i8:
+ case AMDIL::PRIVATELOAD_v4i8:
+ case AMDIL::PRIVATESEXTLOAD_v4i8:
+ case AMDIL::PRIVATEAEXTLOAD_v4i8:
+ case AMDIL::PRIVATEZEXTLOAD_v4i8:
+ case AMDIL::CONSTANTLOAD_v4i8:
+ case AMDIL::CONSTANTSEXTLOAD_v4i8:
+ case AMDIL::CONSTANTAEXTLOAD_v4i8:
+ case AMDIL::CONSTANTZEXTLOAD_v4i8:
+ return UNPACK_V4I8;
+ case AMDIL::GLOBALLOAD_v2i16:
+ case AMDIL::GLOBALSEXTLOAD_v2i16:
+ case AMDIL::GLOBALAEXTLOAD_v2i16:
+ case AMDIL::GLOBALZEXTLOAD_v2i16:
+ case AMDIL::LOCALLOAD_v2i16:
+ case AMDIL::LOCALSEXTLOAD_v2i16:
+ case AMDIL::LOCALAEXTLOAD_v2i16:
+ case AMDIL::LOCALZEXTLOAD_v2i16:
+ case AMDIL::REGIONLOAD_v2i16:
+ case AMDIL::REGIONSEXTLOAD_v2i16:
+ case AMDIL::REGIONAEXTLOAD_v2i16:
+ case AMDIL::REGIONZEXTLOAD_v2i16:
+ case AMDIL::PRIVATELOAD_v2i16:
+ case AMDIL::PRIVATESEXTLOAD_v2i16:
+ case AMDIL::PRIVATEAEXTLOAD_v2i16:
+ case AMDIL::PRIVATEZEXTLOAD_v2i16:
+ case AMDIL::CONSTANTLOAD_v2i16:
+ case AMDIL::CONSTANTSEXTLOAD_v2i16:
+ case AMDIL::CONSTANTAEXTLOAD_v2i16:
+ case AMDIL::CONSTANTZEXTLOAD_v2i16:
+ return UNPACK_V2I16;
+ case AMDIL::GLOBALLOAD_v4i16:
+ case AMDIL::GLOBALSEXTLOAD_v4i16:
+ case AMDIL::GLOBALAEXTLOAD_v4i16:
+ case AMDIL::GLOBALZEXTLOAD_v4i16:
+ case AMDIL::LOCALLOAD_v4i16:
+ case AMDIL::LOCALSEXTLOAD_v4i16:
+ case AMDIL::LOCALAEXTLOAD_v4i16:
+ case AMDIL::LOCALZEXTLOAD_v4i16:
+ case AMDIL::REGIONLOAD_v4i16:
+ case AMDIL::REGIONSEXTLOAD_v4i16:
+ case AMDIL::REGIONAEXTLOAD_v4i16:
+ case AMDIL::REGIONZEXTLOAD_v4i16:
+ case AMDIL::PRIVATELOAD_v4i16:
+ case AMDIL::PRIVATESEXTLOAD_v4i16:
+ case AMDIL::PRIVATEAEXTLOAD_v4i16:
+ case AMDIL::PRIVATEZEXTLOAD_v4i16:
+ case AMDIL::CONSTANTLOAD_v4i16:
+ case AMDIL::CONSTANTSEXTLOAD_v4i16:
+ case AMDIL::CONSTANTAEXTLOAD_v4i16:
+ case AMDIL::CONSTANTZEXTLOAD_v4i16:
+ return UNPACK_V4I16;
+ };
+ return NO_PACKING;
+}
+
+ uint32_t
+AMDILIOExpansion::getPointerID(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curInst;
+ getAsmPrinterFlags(MI, curInst);
+ return curInst.bits.ResourceID;
+}
+
+ uint32_t
+AMDILIOExpansion::getShiftSize(MachineInstr *MI)
+{
+ switch(getPackedID(MI)) {
+ default:
+ return 0;
+ case PACK_V2I8:
+ case PACK_V4I8:
+ case UNPACK_V2I8:
+ case UNPACK_V4I8:
+ return 1;
+ case PACK_V2I16:
+ case PACK_V4I16:
+ case UNPACK_V2I16:
+ case UNPACK_V4I16:
+ return 2;
+ }
+ return 0;
+}
+ uint32_t
+AMDILIOExpansion::getMemorySize(MachineInstr *MI)
+{
+ if (MI->memoperands_empty()) {
+ return 4;
+ }
+ return (uint32_t)((*MI->memoperands_begin())->getSize());
+}
+
+ void
+AMDILIOExpansion::expandLongExtend(MachineInstr *MI,
+ uint32_t numComps, uint32_t size, bool signedShift)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ switch(size) {
+ default:
+ assert(0 && "Found a case we don't handle!");
+ break;
+ case 8:
+ if (numComps == 1) {
+ expandLongExtendSub32(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_v2i32,
+ AMDIL::USHRVEC_i8,
+ 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE, signedShift);
+ } else if (numComps == 2) {
+ expandLongExtendSub32(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v4i32,
+ AMDIL::USHRVEC_v2i8,
+ 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE_v2i64, signedShift);
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ break;
+ case 16:
+ if (numComps == 1) {
+ expandLongExtendSub32(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_v2i32,
+ AMDIL::USHRVEC_i16,
+ 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE, signedShift);
+ } else if (numComps == 2) {
+ expandLongExtendSub32(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v4i32,
+ AMDIL::USHRVEC_v2i16,
+ 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE_v2i64, signedShift);
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ break;
+ case 32:
+ if (numComps == 1) {
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(31));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ } else if (numComps == 2) {
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v2i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(31));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1012);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ };
+}
+ void
+AMDILIOExpansion::expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop, unsigned SHRop, unsigned USHRop,
+ unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+ unsigned LCRop, bool signedShift)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, MI, DL, mTII->get(SHLop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(SHLimm));
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(LCRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(SHRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(SHRimm));
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(USHRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(USHRimm));
+ BuildMI(*mBB, MI, MI->getDebugLoc(), mTII->get(LCRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+}
+
+ void
+AMDILIOExpansion::expandIntegerExtend(MachineInstr *MI, unsigned SHLop,
+ unsigned SHRop, unsigned offset)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ offset = mMFI->addi32Literal(offset);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(SHLop), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addImm(offset);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(SHRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addImm(offset);
+}
+ void
+AMDILIOExpansion::expandExtendLoad(MachineInstr *MI)
+{
+ if (!isExtendLoad(MI)) {
+ return;
+ }
+ Type *mType = NULL;
+ if (!MI->memoperands_empty()) {
+ MachineMemOperand *memOp = (*MI->memoperands_begin());
+ const Value *moVal = (memOp) ? memOp->getValue() : NULL;
+ mType = (moVal) ? moVal->getType() : NULL;
+ }
+ unsigned opcode = 0;
+ DebugLoc DL = MI->getDebugLoc();
+ if (isZExtLoadInst(TM.getInstrInfo(), MI) || isAExtLoadInst(TM.getInstrInfo(), MI) || isSExtLoadInst(TM.getInstrInfo(), MI)) {
+ switch(MI->getDesc().OpInfo[0].RegClass) {
+ default:
+ assert(0 && "Found an extending load that we don't handle!");
+ break;
+ case AMDIL::GPRI16RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_i16 : AMDIL::USHRVEC_i16;
+ expandIntegerExtend(MI, AMDIL::SHL_i16, opcode, 24);
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v2i16 : AMDIL::USHRVEC_v2i16;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i16, opcode, 24);
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i8 : AMDIL::USHRVEC_v4i8;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i8, opcode, 24);
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i16 : AMDIL::USHRVEC_v4i16;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i16, opcode, 24);
+ break;
+ case AMDIL::GPRI32RegClassID:
+ // We can be a i8 or i16 bit sign extended value
+ if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+ expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 24);
+ } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+ expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 16);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ // We can be a v2i8 or v2i16 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 24);
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 16);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ // We can be a v4i8 or v4i16 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 4) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 24);
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 8) {
+ opcode = isSExtLoadInst(TM.getInstrInfo(), MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 16);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRI64RegClassID:
+ // We can be a i8, i16 or i32 bit sign extended value
+ if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+ expandLongExtend(MI, 1, 8, isSExtLoadInst(TM.getInstrInfo(), MI));
+ } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+ expandLongExtend(MI, 1, 16, isSExtLoadInst(TM.getInstrInfo(), MI));
+ } else if (isNbitType(mType, 32) || getMemorySize(MI) == 4) {
+ expandLongExtend(MI, 1, 32, isSExtLoadInst(TM.getInstrInfo(), MI));
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ // We can be a v2i8, v2i16 or v2i32 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+ expandLongExtend(MI, 2, 8, isSExtLoadInst(TM.getInstrInfo(), MI));
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+ expandLongExtend(MI, 2, 16, isSExtLoadInst(TM.getInstrInfo(), MI));
+ } else if (isNbitType(mType, 32, false) || getMemorySize(MI) == 8) {
+ expandLongExtend(MI, 2, 32, isSExtLoadInst(TM.getInstrInfo(), MI));
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRF32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_v2f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_v4f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRF64RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v2f32),
+ AMDIL::R1012).addReg(AMDIL::R1011).addImm(2);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::R1012)
+ .addReg(AMDIL::R1012);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::VINSERT_v2f64), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012)
+ .addImm(1 << 8).addImm(1 << 8);
+ break;
+ };
+ } else if (isSWSExtLoadInst(MI)) {
+ switch(MI->getDesc().OpInfo[0].RegClass) {
+ case AMDIL::GPRI8RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ expandIntegerExtend(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_i8, 24);
+ }
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v2i8, 24);
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v4i8, AMDIL::SHRVEC_v4i8, 24);
+ break;
+ case AMDIL::GPRI16RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ expandIntegerExtend(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_i16, 16);
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v2i16, 16);
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v4i16, AMDIL::SHRVEC_v4i16, 16);
+ break;
+
+ };
+ }
+}
+
+ void
+AMDILIOExpansion::expandTruncData(MachineInstr *MI)
+{
+ MachineBasicBlock::iterator I = *MI;
+ if (!isTruncStoreInst(TM.getInstrInfo(), MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default:
+ MI->dump();
+ assert(!"Found a trunc store instructions we don't handle!");
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE_i64i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE_i64i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ case AMDIL::GLOBALTRUNCSTORE_i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE_i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE_i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE_i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE_i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE_i64i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE_i64i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ case AMDIL::GLOBALTRUNCSTORE_i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE_i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE_i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_i64i32:
+ case AMDIL::LOCALTRUNCSTORE_i64i32:
+ case AMDIL::REGIONTRUNCSTORE_i64i32:
+ case AMDIL::PRIVATETRUNCSTORE_i64i32:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i32:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i32:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i32:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i32:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_f64f32:
+ case AMDIL::LOCALTRUNCSTORE_f64f32:
+ case AMDIL::REGIONTRUNCSTORE_f64f32:
+ case AMDIL::PRIVATETRUNCSTORE_f64f32:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::R1011).addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_v2f64f32:
+ case AMDIL::LOCALTRUNCSTORE_v2f64f32:
+ case AMDIL::REGIONTRUNCSTORE_v2f64f32:
+ case AMDIL::PRIVATETRUNCSTORE_v2f64f32:
+ BuildMI(*mBB, I, DL, mTII->get(AMDIL::VEXTRACT_v2f64),
+ AMDIL::R1012).addReg(AMDIL::R1011).addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::R1011).addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::R1012).addReg(AMDIL::R1012);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v2f32),
+ AMDIL::R1011).addReg(AMDIL::R1011).addReg(AMDIL::R1012)
+ .addImm(1 << 8).addImm(1 << 8);
+ break;
+ }
+}
+ void
+AMDILIOExpansion::expandAddressCalc(MachineInstr *MI)
+{
+ if (!isAddrCalcInstr(MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::T1);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::T2);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::SDP);
+ break;
+ default:
+ return;
+ }
+}
+ void
+AMDILIOExpansion::expandLoadStartCode(MachineInstr *MI)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ if (MI->getOperand(2).isReg()) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg());
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::MOVE_i32),
+ AMDIL::R1010).addReg(MI->getOperand(1).getReg());
+ }
+ MI->getOperand(1).setReg(AMDIL::R1010);
+ expandAddressCalc(MI);
+}
+ void
+AMDILIOExpansion::emitStaticCPLoad(MachineInstr* MI, int swizzle,
+ int id, bool ExtFPLoad)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ switch(swizzle) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+ ? AMDIL::DTOF : AMDIL::MOVE_i32),
+ MI->getOperand(0).getReg())
+ .addImm(id);
+ break;
+ case 1:
+ case 2:
+ case 3:
+ BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+ ? AMDIL::DTOF : AMDIL::MOVE_i32), AMDIL::R1001)
+ .addImm(id);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v4i32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(AMDIL::R1001)
+ .addImm(swizzle + 1);
+ break;
+ };
+}
+ void
+AMDILIOExpansion::emitCPInst(MachineInstr* MI,
+ const Constant* C, AMDILKernelManager* KM, int swizzle, bool ExtFPLoad)
+{
+ if (const ConstantFP* CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ uint32_t val = (uint32_t)(CFP->getValueAPF().bitcastToAPInt()
+ .getZExtValue());
+ uint32_t id = mMFI->addi32Literal(val);
+ if (!id) {
+ const APFloat &APF = CFP->getValueAPF();
+ union dtol_union {
+ double d;
+ uint64_t ul;
+ } conv;
+ if (&APF.getSemantics()
+ == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ conv.d = (double)fval;
+ } else {
+ conv.d = APF.convertToDouble();
+ }
+ id = mMFI->addi64Literal(conv.ul);
+ }
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ } else {
+ const APFloat &APF = CFP->getValueAPF();
+ union ftol_union {
+ double d;
+ uint64_t ul;
+ } conv;
+ if (&APF.getSemantics()
+ == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ conv.d = (double)fval;
+ } else {
+ conv.d = APF.convertToDouble();
+ }
+ uint32_t id = mMFI->getLongLits(conv.ul);
+ if (!id) {
+ id = mMFI->getIntLits((uint32_t)conv.ul);
+ }
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ }
+ } else if (const ConstantInt* CI = dyn_cast<ConstantInt>(C)) {
+ int64_t val = 0;
+ if (CI) {
+ val = CI->getSExtValue();
+ }
+ if (CI->getBitWidth() == 64) {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(val), ExtFPLoad);
+ } else {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(val), ExtFPLoad);
+ }
+ } else if (const ConstantArray* CA = dyn_cast<ConstantArray>(C)) {
+ uint32_t size = CA->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else if (const ConstantAggregateZero* CAZ
+ = dyn_cast<ConstantAggregateZero>(C)) {
+ if (CAZ->isNullValue()) {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad);
+ }
+ } else if (const ConstantStruct* CS = dyn_cast<ConstantStruct>(C)) {
+ uint32_t size = CS->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else if (const ConstantVector* CV = dyn_cast<ConstantVector>(C)) {
+ // TODO: Make this handle vectors natively up to the correct
+ // size
+ uint32_t size = CV->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else {
+ // TODO: Do we really need to handle ConstantPointerNull?
+ // What about BlockAddress, ConstantExpr and Undef?
+ // How would these even be generated by a valid CL program?
+ assert(0 && "Found a constant type that I don't know how to handle");
+ }
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILIOExpansion.h b/src/gallium/drivers/radeon/AMDILIOExpansion.h
new file mode 100644
index 00000000000..af4709a892c
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIOExpansion.h
@@ -0,0 +1,320 @@
+//===----------- AMDILIOExpansion.h - IO Expansion Pass -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are
+// not expanded earlier in the backend because any pass before this can assume to
+// be able to generate a load/store instruction. So this pass can only have
+// passes that execute after it if no load/store instructions can be generated
+// in those passes.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILIOEXPANSION_H_
+#define _AMDILIOEXPANSION_H_
+#undef DEBUG_TYPE
+#undef DEBUGME
+#define DEBUG_TYPE "IOExpansion"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MachineFunction;
+ class AMDILKernelManager;
+ class AMDILMachineFunctionInfo;
+ class AMDILSubtarget;
+ class MachineInstr;
+ class Constant;
+ class TargetInstrInfo;
+ class Type;
+ typedef enum {
+ NO_PACKING = 0,
+ PACK_V2I8,
+ PACK_V4I8,
+ PACK_V2I16,
+ PACK_V4I16,
+ UNPACK_V2I8,
+ UNPACK_V4I8,
+ UNPACK_V2I16,
+ UNPACK_V4I16,
+ UNPACK_LAST
+ } REG_PACKED_TYPE;
+ class AMDILIOExpansion : public MachineFunctionPass
+ {
+ public:
+ virtual ~AMDILIOExpansion();
+ virtual const char* getPassName() const;
+ bool runOnMachineFunction(MachineFunction &MF);
+ static char ID;
+ protected:
+ AMDILIOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ TargetMachine &TM;
+ //
+ // @param MI Machine instruction to check.
+ // @brief checks to see if the machine instruction
+ // is an I/O instruction or not.
+ //
+ // @return true if I/O, false otherwise.
+ //
+ virtual bool
+ isIOInstruction(MachineInstr *MI);
+ // Wrapper function that calls the appropriate I/O
+ // expansion function based on the instruction type.
+ virtual void
+ expandIOInstruction(MachineInstr *MI);
+ virtual void
+ expandGlobalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionStore(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateStore(MachineInstr *MI) = 0;
+ virtual void
+ expandGlobalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandConstantLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandConstantPoolLoad(MachineInstr *MI) = 0;
+ bool
+ isAddrCalcInstr(MachineInstr *MI);
+ bool
+ isExtendLoad(MachineInstr *MI);
+ bool
+ isHardwareRegion(MachineInstr *MI);
+ bool
+ isHardwareLocal(MachineInstr *MI);
+ bool
+ isPackedData(MachineInstr *MI);
+ bool
+ isStaticCPLoad(MachineInstr *MI);
+ bool
+ isNbitType(Type *MI, uint32_t nBits, bool isScalar = true);
+ bool
+ isHardwareInst(MachineInstr *MI);
+ uint32_t
+ getMemorySize(MachineInstr *MI);
+ REG_PACKED_TYPE
+ getPackedID(MachineInstr *MI);
+ uint32_t
+ getShiftSize(MachineInstr *MI);
+ uint32_t
+ getPointerID(MachineInstr *MI);
+ void
+ expandTruncData(MachineInstr *MI);
+ void
+ expandLoadStartCode(MachineInstr *MI);
+ virtual void
+ expandStoreSetupCode(MachineInstr *MI) = 0;
+ void
+ expandAddressCalc(MachineInstr *MI);
+ void
+ expandLongExtend(MachineInstr *MI,
+ uint32_t numComponents, uint32_t size, bool signedShift);
+ void
+ expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop, unsigned SHRop, unsigned USHRop,
+ unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+ unsigned LCRop, bool signedShift);
+ void
+ expandIntegerExtend(MachineInstr *MI, unsigned, unsigned, unsigned);
+ void
+ expandExtendLoad(MachineInstr *MI);
+ virtual void
+ expandPackedData(MachineInstr *MI) = 0;
+ void
+ emitCPInst(MachineInstr* MI, const Constant* C,
+ AMDILKernelManager* KM, int swizzle, bool ExtFPLoad);
+
+ bool mDebug;
+ const AMDILSubtarget *mSTM;
+ AMDILKernelManager *mKM;
+ MachineBasicBlock *mBB;
+ AMDILMachineFunctionInfo *mMFI;
+ const TargetInstrInfo *mTII;
+ bool saveInst;
+ private:
+ void
+ emitStaticCPLoad(MachineInstr* MI, int swizzle, int id,
+ bool ExtFPLoad);
+ }; // class AMDILIOExpansion
+
+ // Intermediate class that holds I/O code expansion that is common to the
+ // 7XX, Evergreen and Northern Island family of chips.
+ class AMDIL789IOExpansion : public AMDILIOExpansion {
+ public:
+ virtual ~AMDIL789IOExpansion();
+ virtual const char* getPassName() const;
+ protected:
+ AMDIL789IOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ virtual void
+ expandGlobalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionStore(MachineInstr *MI) = 0;
+ virtual void
+ expandGlobalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateStore(MachineInstr *MI);
+ virtual void
+ expandConstantLoad(MachineInstr *MI);
+ virtual void
+ expandPrivateLoad(MachineInstr *MI) ;
+ virtual void
+ expandConstantPoolLoad(MachineInstr *MI);
+ void
+ expandStoreSetupCode(MachineInstr *MI);
+ virtual void
+ expandPackedData(MachineInstr *MI);
+ private:
+ void emitVectorAddressCalc(MachineInstr *MI, bool is32bit,
+ bool needsSelect);
+ void emitVectorSwitchWrite(MachineInstr *MI, bool is32bit);
+ void emitComponentExtract(MachineInstr *MI, unsigned flag, unsigned src,
+ unsigned dst, bool beforeInst);
+ void emitDataLoadSelect(MachineInstr *MI);
+ }; // class AMDIL789IOExpansion
+ // Class that handles I/O emission for the 7XX family of devices.
+ class AMDIL7XXIOExpansion : public AMDIL789IOExpansion {
+ public:
+ AMDIL7XXIOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+
+ ~AMDIL7XXIOExpansion();
+ const char* getPassName() const;
+ protected:
+ void
+ expandGlobalStore(MachineInstr *MI);
+ void
+ expandLocalStore(MachineInstr *MI);
+ void
+ expandRegionStore(MachineInstr *MI);
+ void
+ expandGlobalLoad(MachineInstr *MI);
+ void
+ expandRegionLoad(MachineInstr *MI);
+ void
+ expandLocalLoad(MachineInstr *MI);
+ }; // class AMDIL7XXIOExpansion
+
+ // Class that handles image functions to expand them into the
+ // correct set of I/O instructions.
+ class AMDILImageExpansion : public AMDIL789IOExpansion {
+ public:
+ AMDILImageExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+
+ virtual ~AMDILImageExpansion();
+ protected:
+ //
+ // @param MI Instruction iterator that has the sample instruction
+ // that needs to be taken care of.
+ // @brief transforms the __amdil_sample_data function call into a
+ // sample instruction in IL.
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+ //
+ // @param MI Instruction iterator that has the write instruction that
+ // needs to be taken care of.
+ // @brief transforms the __amdil_write_data function call into a
+ // simple UAV write instruction in IL.
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageStore(MachineBasicBlock *BB, MachineInstr *MI);
+ //
+ // @param MI Instruction interator that has the image parameter
+ // instruction
+ // @brief transforms the __amdil_get_image_params function call into
+ // a copy of data from a specific constant buffer to the register
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageParam(MachineBasicBlock *BB, MachineInstr *MI);
+
+ //
+ // @param MI Insturction that points to the image
+ // @brief transforms __amdil_sample_data into a sequence of
+ // if/else that selects the correct sample instruction.
+ //
+ // @warning This function is inefficient and works with no
+ // inlining.
+ //
+ virtual void
+ expandInefficientImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+ private:
+ AMDILImageExpansion(); // Do not implement.
+
+ }; // class AMDILImageExpansion
+
+ // Class that expands IO instructions for Evergreen and Northern
+ // Island family of devices.
+ class AMDILEGIOExpansion : public AMDILImageExpansion {
+ public:
+ AMDILEGIOExpansion(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+
+ virtual ~AMDILEGIOExpansion();
+ const char* getPassName() const;
+ protected:
+ virtual bool
+ isIOInstruction(MachineInstr *MI);
+ virtual void
+ expandIOInstruction(MachineInstr *MI);
+ bool
+ isImageIO(MachineInstr *MI);
+ virtual void
+ expandGlobalStore(MachineInstr *MI);
+ void
+ expandLocalStore(MachineInstr *MI);
+ void
+ expandRegionStore(MachineInstr *MI);
+ virtual void
+ expandGlobalLoad(MachineInstr *MI);
+ void
+ expandRegionLoad(MachineInstr *MI);
+ void
+ expandLocalLoad(MachineInstr *MI);
+ virtual bool
+ isCacheableOp(MachineInstr *MI);
+ void
+ expandStoreSetupCode(MachineInstr *MI);
+ void
+ expandPackedData(MachineInstr *MI);
+ private:
+ bool
+ isArenaOp(MachineInstr *MI);
+ void
+ expandArenaSetup(MachineInstr *MI);
+ }; // class AMDILEGIOExpansion
+} // namespace llvm
+#endif // _AMDILIOEXPANSION_H_
diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
new file mode 100644
index 00000000000..ff04d9d55bf
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,457 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
+// //for SelectionDAG operations.
+//
+namespace {
+class AMDILDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDILSubtarget &Subtarget;
+public:
+ AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+ virtual ~AMDILDAGToDAGISel();
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ SDNode *Select(SDNode *N);
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ virtual const char *getPassName() const;
+private:
+ SDNode *xformAtomicInst(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDILGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AMDILDAGToDAGISel::AMDILDAGToDAGISel(AMDILTargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL)
+ : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
+{
+}
+
+AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
+}
+
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDILDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::FrameIndex:
+ {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDIL::MOVE_i32;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ }
+ break;
+ }
+ // For all atomic instructions, we need to add a constant
+ // operand that stores the resource ID in the instruction
+ if (Opc > AMDILISD::ADDADDR && Opc < AMDILISD::APPEND_ALLOC) {
+ N = xformAtomicInst(N);
+ }
+ return SelectCode(N);
+}
+
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
+}
+
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
+ {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDILDAGToDAGISel::getPassName() const {
+ return "AMDIL DAG->DAG Pattern Instruction Selection";
+}</