summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2011-10-05 10:56:10 -0400
committerTom Stellard <tstellar@gmail.com>2012-01-14 10:00:15 -0500
commite93a7d821c22595e2eaed70c6443f7692f130e30 (patch)
tree2b232fa23603d7c904cd7d69d391ed0d3728917a
parent0cd959dca725c98ad2e5006db498f0e5d6e6c23e (diff)
r600g: Add LLVM backendtgsi-rewrite-needs-testing
Commits by other contributors squashed into this one: Author: Vadim Girlin <vadimgirlin@gmail.com> Date: Fri Dec 16 07:32:09 2011 +0400 gallium/radeon: rework R600LowerShaderInstructionsPass Signed-off-by: Tom Stellard <thomas.stellard@amd.com> Author: Vadim Girlin <vadimgirlin@gmail.com> Date: Fri Dec 16 07:32:08 2011 +0400 gallium/radeon: disable NEG propagation Signed-off-by: Tom Stellard <thomas.stellard@amd.com> Author: Vadim Girlin <vadimgirlin@gmail.com> Date: Wed Dec 14 21:14:49 2011 +0400 gallivm: increase visibility of the get_output_ptr Signed-off-by: Tom Stellard <thomas.stellard@amd.com>
-rw-r--r--configs/autoconf.in6
-rw-r--r--configure.ac8
-rw-r--r--src/gallium/Makefile.template2
-rw-r--r--src/gallium/drivers/r600/Makefile31
-rw-r--r--src/gallium/drivers/r600/Makefile.sources1
-rw-r--r--src/gallium/drivers/r600/r600_asm.c6
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c433
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h25
-rw-r--r--src/gallium/drivers/r600/r600_shader.c297
-rw-r--r--src/gallium/drivers/radeon/AMDIL.h334
-rw-r--r--src/gallium/drivers/radeon/AMDIL.td54
-rw-r--r--src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp756
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.cpp186
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.h102
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.cpp191
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXDevice.h117
-rw-r--r--src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp590
-rw-r--r--src/gallium/drivers/radeon/AMDILAlgorithms.tpp134
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmBackend.cpp151
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmBackend.h94
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmPrinter.cpp879
-rw-r--r--src/gallium/drivers/radeon/AMDILAsmPrinter.h284
-rw-r--r--src/gallium/drivers/radeon/AMDILBarrierDetect.cpp326
-rw-r--r--src/gallium/drivers/radeon/AMDILBase.td149
-rw-r--r--src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp3304
-rw-r--r--src/gallium/drivers/radeon/AMDILCallingConv.td116
-rw-r--r--src/gallium/drivers/radeon/AMDILCompilerErrors.h116
-rw-r--r--src/gallium/drivers/radeon/AMDILCompilerWarnings.h72
-rw-r--r--src/gallium/drivers/radeon/AMDILConversions.td1062
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.cpp176
-rw-r--r--src/gallium/drivers/radeon/AMDILDevice.h171
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.cpp126
-rw-r--r--src/gallium/drivers/radeon/AMDILDeviceInfo.h125
-rw-r--r--src/gallium/drivers/radeon/AMDILDevices.h57
-rw-r--r--src/gallium/drivers/radeon/AMDILEGAsmPrinter.cpp198
-rw-r--r--src/gallium/drivers/radeon/AMDILEGAsmPrinter.h101
-rw-r--r--src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp1126
-rw-r--r--src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp120
-rw-r--r--src/gallium/drivers/radeon/AMDILELFWriterInfo.h101
-rw-r--r--src/gallium/drivers/radeon/AMDILEnumeratedTypes.td569
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp244
-rw-r--r--src/gallium/drivers/radeon/AMDILEvergreenDevice.h133
-rw-r--r--src/gallium/drivers/radeon/AMDILFixupKinds.h73
-rw-r--r--src/gallium/drivers/radeon/AMDILFormats.td492
-rw-r--r--src/gallium/drivers/radeon/AMDILFrameLowering.cpp104
-rw-r--r--src/gallium/drivers/radeon/AMDILFrameLowering.h96
-rw-r--r--src/gallium/drivers/radeon/AMDILGlobalManager.cpp1386
-rw-r--r--src/gallium/drivers/radeon/AMDILGlobalManager.h294
-rw-r--r--src/gallium/drivers/radeon/AMDILIOExpansion.cpp1216
-rw-r--r--src/gallium/drivers/radeon/AMDILIOExpansion.h360
-rw-r--r--src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp506
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.cpp5754
-rw-r--r--src/gallium/drivers/radeon/AMDILISelLowering.h576
-rw-r--r--src/gallium/drivers/radeon/AMDILImageExpansion.cpp210
-rw-r--r--src/gallium/drivers/radeon/AMDILInliner.cpp318
-rw-r--r--src/gallium/drivers/radeon/AMDILInstPrinter.cpp67
-rw-r--r--src/gallium/drivers/radeon/AMDILInstPrinter.h72
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.cpp763
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.h217
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrInfo.td156
-rw-r--r--src/gallium/drivers/radeon/AMDILInstrPatterns.td107
-rw-r--r--src/gallium/drivers/radeon/AMDILInstructions.td2476
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp237
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsicInfo.h90
-rw-r--r--src/gallium/drivers/radeon/AMDILIntrinsics.td746
-rw-r--r--src/gallium/drivers/radeon/AMDILKernel.h124
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.cpp1387
-rw-r--r--src/gallium/drivers/radeon/AMDILKernelManager.h216
-rw-r--r--src/gallium/drivers/radeon/AMDILLLVMPC.h61
-rw-r--r--src/gallium/drivers/radeon/AMDILLLVMVersion.h60
-rw-r--r--src/gallium/drivers/radeon/AMDILLiteralManager.cpp171
-rw-r--r--src/gallium/drivers/radeon/AMDILMCAsmInfo.cpp169
-rw-r--r--src/gallium/drivers/radeon/AMDILMCAsmInfo.h68
-rw-r--r--src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp200
-rw-r--r--src/gallium/drivers/radeon/AMDILMachineFunctionInfo.cpp643
-rw-r--r--src/gallium/drivers/radeon/AMDILMachineFunctionInfo.h461
-rw-r--r--src/gallium/drivers/radeon/AMDILMachinePeephole.cpp217
-rw-r--r--src/gallium/drivers/radeon/AMDILModuleInfo.cpp1300
-rw-r--r--src/gallium/drivers/radeon/AMDILModuleInfo.h199
-rw-r--r--src/gallium/drivers/radeon/AMDILMultiClass.td1480
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.cpp110
-rw-r--r--src/gallium/drivers/radeon/AMDILNIDevice.h99
-rw-r--r--src/gallium/drivers/radeon/AMDILNodes.td366
-rw-r--r--src/gallium/drivers/radeon/AMDILOperands.td78
-rw-r--r--src/gallium/drivers/radeon/AMDILPatterns.td545
-rw-r--r--src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp1412
-rw-r--r--src/gallium/drivers/radeon/AMDILPointerManager.cpp2710
-rw-r--r--src/gallium/drivers/radeon/AMDILPointerManager.h249
-rw-r--r--src/gallium/drivers/radeon/AMDILPrintfConvert.cpp357
-rw-r--r--src/gallium/drivers/radeon/AMDILProfiles.td215
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterDefsScalar.td816
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterDefsV2.td408
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterDefsV4.td204
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.cpp275
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.h152
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterInfo.td1005
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterUsesScalar.td1
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterUsesV2.td1
-rw-r--r--src/gallium/drivers/radeon/AMDILRegisterUsesV4.td1
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.cpp222
-rw-r--r--src/gallium/drivers/radeon/AMDILSubtarget.h115
-rw-r--r--src/gallium/drivers/radeon/AMDILSwizzleEncoder.cpp1225
-rw-r--r--src/gallium/drivers/radeon/AMDILSwizzleEncoder.h81
-rw-r--r--src/gallium/drivers/radeon/AMDILTargetMachine.cpp259
-rw-r--r--src/gallium/drivers/radeon/AMDILTargetMachine.h155
-rw-r--r--src/gallium/drivers/radeon/AMDILTokenDesc.td166
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp727
-rw-r--r--src/gallium/drivers/radeon/AMDILUtilityFunctions.h403
-rw-r--r--src/gallium/drivers/radeon/AMDILVersion.td116
-rw-r--r--src/gallium/drivers/radeon/AMDISA.h55
-rw-r--r--src/gallium/drivers/radeon/AMDISA.td30
-rw-r--r--src/gallium/drivers/radeon/AMDISAConstants.pm35
-rw-r--r--src/gallium/drivers/radeon/AMDISAConvertToISA.cpp91
-rw-r--r--src/gallium/drivers/radeon/AMDISADelimitInstGroups.cpp194
-rw-r--r--src/gallium/drivers/radeon/AMDISAFixRegClasses.cpp85
-rw-r--r--src/gallium/drivers/radeon/AMDISAGenInstrEnums.pl130
-rw-r--r--src/gallium/drivers/radeon/AMDISAGenShaderPatterns.pl40
-rw-r--r--src/gallium/drivers/radeon/AMDISAISelLowering.cpp43
-rw-r--r--src/gallium/drivers/radeon/AMDISAISelLowering.h44
-rw-r--r--src/gallium/drivers/radeon/AMDISAInstrEnums.h72
-rw-r--r--src/gallium/drivers/radeon/AMDISAInstrInfo.cpp126
-rw-r--r--src/gallium/drivers/radeon/AMDISAInstrInfo.h75
-rw-r--r--src/gallium/drivers/radeon/AMDISAInstructions.td93
-rw-r--r--src/gallium/drivers/radeon/AMDISAIntrinsics.td68
-rw-r--r--src/gallium/drivers/radeon/AMDISALowerShaderInstructions.cpp19
-rw-r--r--src/gallium/drivers/radeon/AMDISALowerShaderInstructions.h24
-rw-r--r--src/gallium/drivers/radeon/AMDISARegisterInfo.cpp39
-rw-r--r--src/gallium/drivers/radeon/AMDISARegisterInfo.h60
-rw-r--r--src/gallium/drivers/radeon/AMDISARegisterInfo.td28
-rw-r--r--src/gallium/drivers/radeon/AMDISAReorderPreloadInstructions.cpp81
-rw-r--r--src/gallium/drivers/radeon/AMDISATargetMachine.cpp304
-rw-r--r--src/gallium/drivers/radeon/AMDISATargetMachine.h101
-rw-r--r--src/gallium/drivers/radeon/AMDISAUtil.cpp161
-rw-r--r--src/gallium/drivers/radeon/AMDISAUtil.h61
-rw-r--r--src/gallium/drivers/radeon/LICENSE.TXT48
-rw-r--r--src/gallium/drivers/radeon/Makefile68
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources78
-rw-r--r--src/gallium/drivers/radeon/Processors.td66
-rw-r--r--src/gallium/drivers/radeon/R600CodeEmitter.cpp752
-rw-r--r--src/gallium/drivers/radeon/R600GenRegisterInfo.pl132
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.cpp59
-rw-r--r--src/gallium/drivers/radeon/R600InstrInfo.h83
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td546
-rw-r--r--src/gallium/drivers/radeon/R600Intrinsics.td30
-rw-r--r--src/gallium/drivers/radeon/R600LowerInstructions.cpp282
-rw-r--r--src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp184
-rw-r--r--src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp25
-rw-r--r--src/gallium/drivers/radeon/R600MachineFunctionInfo.h25
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.cpp93
-rw-r--r--src/gallium/drivers/radeon/R600RegisterInfo.h55
-rw-r--r--src/gallium/drivers/radeon/TargetInfo/AMDILTargetInfo.cpp61
-rw-r--r--src/gallium/drivers/radeon/TargetInfo/CMakeLists.txt7
-rw-r--r--src/gallium/drivers/radeon/generateRegisters.pl135
-rw-r--r--src/gallium/drivers/radeon/macrodata.cpp338
-rw-r--r--src/gallium/drivers/radeon/macrodata.h95
-rw-r--r--src/gallium/drivers/radeon/macrodb.h108
-rw-r--r--src/gallium/drivers/radeon/macrodb_gen.h32186
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h132
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.cpp143
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c527
160 files changed, 86909 insertions, 26 deletions
diff --git a/configs/autoconf.in b/configs/autoconf.in
index 9ea7588a118..757992bc495 100644
--- a/configs/autoconf.in
+++ b/configs/autoconf.in
@@ -32,9 +32,12 @@ INTEL_LIBS = @INTEL_LIBS@
INTEL_CFLAGS = @INTEL_CFLAGS@
X11_LIBS = @X11_LIBS@
X11_CFLAGS = @X11_CFLAGS@
+LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
LLVM_LDFLAGS = @LLVM_LDFLAGS@
LLVM_LIBS = @LLVM_LIBS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
GLW_CFLAGS = @GLW_CFLAGS@
GLX_TLS = @GLX_TLS@
DRI_CFLAGS = @DRI_CFLAGS@
@@ -58,6 +61,9 @@ AWK = @AWK@
GREP = @GREP@
NM = @NM@
+# Perl
+PERL = @PERL@
+
# Python and flags (generally only needed by the developers)
PYTHON2 = @PYTHON2@
PYTHON_FLAGS = -t -O -O
diff --git a/configure.ac b/configure.ac
index 7c50e3c009d..5c1b5b460dd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -51,6 +51,8 @@ test "x$FLEX" = "x" && AC_MSG_ERROR([flex is needed to build Mesa])
AC_PATH_PROG([BISON], [bison])
test "x$BISON" = "x" && AC_MSG_ERROR([bison is needed to build Mesa])
+AC_PATH_PROG([PERL], [perl])
+
dnl Our fallback install-sh is a symlink to minstall. Use the existing
dnl configuration in that case.
AC_PROG_INSTALL
@@ -1645,9 +1647,12 @@ if test "x$with_gallium_drivers" != x; then
SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets"
fi
+AC_SUBST([LLVM_BINDIR])
AC_SUBST([LLVM_CFLAGS])
+AC_SUBST([LLVM_CXXFLAGS])
AC_SUBST([LLVM_LIBS])
AC_SUBST([LLVM_LDFLAGS])
+AC_SUBST([LLVM_INCLUDEDIR])
AC_SUBST([LLVM_VERSION])
case "x$enable_opengl$enable_gles1$enable_gles2" in
@@ -1773,6 +1778,9 @@ if test "x$enable_gallium_llvm" = xyes; then
LLVM_LIBS="`$LLVM_CONFIG --libs`"
LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
+ LLVM_BINDIR=`$LLVM_CONFIG --bindir`
+ LLVM_CXXFLAGS=`$LLVM_CONFIG --cxxflags`
+ LLVM_INCLUDEDIR=`$LLVM_CONFIG --includedir`
DEFINES="$DEFINES -D__STDC_CONSTANT_MACROS"
MESA_LLVM=1
else
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template
index de40f126ce5..6f3c0222196 100644
--- a/src/gallium/Makefile.template
+++ b/src/gallium/Makefile.template
@@ -48,7 +48,7 @@ tags:
etags `find . -name \*.[ch]` `find $(TOP)/src/gallium/include -name \*.h`
# Remove .o and backup files
-clean:
+clean: $(CLEAN_DEPS)
rm -f $(OBJECTS) $(GENERATED_SOURCES) $(PROGS) lib$(LIBNAME).a depend depend.bak $(CLEAN_EXTRA)
# Dummy target
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index 80f8cbfc803..9f38e466bea 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -3,9 +3,38 @@ include $(TOP)/configs/current
LIBNAME = r600
-LIBRARY_INCLUDES = -I$(TOP)/include
+LIBRARY_INCLUDES = \
+ -I$(TOP)/include \
+ -I$(TOP)/src/gallium/drivers/radeon/
+
# get C_SOURCES
include Makefile.sources
+ifeq ($(HAVE_LLVM),0x0300)
+
+LIBRADEON = $(TOP)/src/gallium/drivers/radeon/libradeon.a
+
+EXTRA_OBJECTS = \
+ $(LIBRADEON)
+
+CLEAN_DEPS = \
+ clean_radeon
+
+DEFAULT_DEPS = \
+ make_radeon
+
+endif
+
include ../../Makefile.template
+
+$(LIBRADEON): make_radeon
+
+.PHONY: make_radeon
+
+make_radeon:
+ touch $(LIBRADEON)
+ $(MAKE) -C $(TOP)/src/gallium/drivers/radeon/ default
+
+clean_radeon:
+ $(MAKE) -C $(TOP)/src/gallium/drivers/radeon/ clean
diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
index e7813ef51c8..b450a1d73f1 100644
--- a/src/gallium/drivers/r600/Makefile.sources
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -3,6 +3,7 @@ C_SOURCES := \
r600_blit.c \
r600_buffer.c \
r600_hw_context.c \
+ r600_llvm.c \
r600_pipe.c \
r600_query.c \
r600_resource.c \
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index e07b2cbffdd..c7c1a30b960 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -997,8 +997,14 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu
return 0;
have_mova = 1;
}
+ /* check for reduction instructions in the previous group*/
+ if (is_alu_reduction_inst(bc, prev[i]))
+ return 0;
num_once_inst += is_alu_once_inst(bc, prev[i]);
}
+ /* check for reduction instructions in this group */
+ if (slots[i] && is_alu_reduction_inst(bc, slots[i]))
+ return 0;
if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral))
return 0;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
new file mode 100644
index 00000000000..b4be251b903
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -0,0 +1,433 @@
+
+#include "r600_llvm.h"
+
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_gather.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_double_list.h"
+
+#include "r600.h"
+#include "r600_asm.h"
+#include "r600_opcodes.h"
+#include "r600_shader.h"
+#include "radeon_llvm.h"
+
+#include <llvm-c/Transforms/Scalar.h>
+#include <stdio.h>
+
+static unsigned reg_index_soa(
+ unsigned index,
+ unsigned chan)
+{
+ return (index * 4) + chan;
+}
+
+static LLVMValueRef
+emit_array_index(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_src_register *reg,
+ unsigned swizzle)
+{
+ struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
+
+ LLVMValueRef addr = LLVMBuildLoad(gallivm->builder,
+ bld->addr[reg->Indirect.Index][swizzle], "");
+ LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index);
+ LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, "");
+ LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index,
+lp_build_const_int32(gallivm, 4), "");
+ LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index,
+lp_build_const_int32(gallivm, swizzle), "");
+
+ return array_index;
+}
+
+static LLVMValueRef
+emit_fetch_immediate(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ return bld->immediates[reg->Register.Index][swizzle];
+}
+
+static LLVMValueRef
+emit_fetch_input(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ unsigned swizzle)
+{
+ struct radeon_llvm_context * ctx = bld_base->userdata;
+ if (swizzle == ~0) {
+ LLVMValueRef values[NUM_CHANNELS] = {};
+ unsigned chan;
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ values[chan] = ctx->inputs[reg_index_soa(
+ reg->Register.Index, chan)];
+ }
+ return lp_build_gather_values(bld_base->base.gallivm, values,
+ NUM_CHANNELS);
+ } else {
+ return ctx->inputs[reg_index_soa(reg->Register.Index, swizzle)];
+ }
+}
+
+static LLVMValueRef
+emit_fetch_temporary(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ if (reg->Register.Indirect) {
+LLVMValueRef array_index = emit_array_index(bld, reg, swizzle);
+LLVMValueRef ptr = LLVMBuildGEP(builder, bld->temps_array, &array_index,
+1, "");
+return LLVMBuildLoad(builder, ptr, "");
+ } else {
+LLVMValueRef temp_ptr;
+temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
+return LLVMBuildLoad(builder, temp_ptr, "");
+ }
+}
+
+static LLVMValueRef
+emit_fetch_output(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ if (reg->Register.Indirect) {
+LLVMValueRef array_index = emit_array_index(bld, reg, swizzle);
+LLVMValueRef ptr = LLVMBuildGEP(builder, bld->outputs_array, &array_index,
+1, "");
+return LLVMBuildLoad(builder, ptr, "");
+ } else {
+LLVMValueRef temp_ptr;
+temp_ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+return LLVMBuildLoad(builder, temp_ptr, "");
+ }
+}
+
+static LLVMValueRef llvm_fetch_const(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register *reg,
+ unsigned swizzle)
+{
+ return lp_build_intrinsic_unary(bld_base->base.gallivm->builder,
+ "llvm.AMDISA.load.const", bld_base->base.elem_type,
+ lp_build_const_int32(bld_base->base.gallivm,
+ reg_index_soa(reg->Register.Index, swizzle)));
+}
+
+static void llvm_load_input(
+ struct radeon_llvm_context * ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ unsigned chan;
+
+ for (chan = 0; chan < 4; chan++) {
+ unsigned soa_index = reg_index_soa(input_index, chan);
+
+ /* The * 4 is assuming that we are in soa mode. */
+ LLVMValueRef reg = lp_build_const_int32(
+ ctx->soa.bld_base.base.gallivm,
+ soa_index + (ctx->reserved_reg_count * 4));
+ ctx->inputs[soa_index] = lp_build_intrinsic_unary(
+ ctx->soa.bld_base.base.gallivm->builder,
+ "llvm.R600.load.input",
+ ctx->soa.bld_base.base.elem_type, reg);
+ }
+}
+
+static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct lp_build_context * base = &bld_base->base;
+ unsigned i;
+
+ /* Reserve special input registers */
+ for (i = 0; i < ctx->reserved_reg_count; i++) {
+ unsigned chan;
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ LLVMValueRef reg;
+ LLVMValueRef reg_index = lp_build_const_int32(
+ base->gallivm,
+ reg_index_soa(i, chan));
+ reg = lp_build_intrinsic_unary(base->gallivm->builder,
+ "llvm.AMDISA.reserve.reg",
+ base->elem_type, reg_index);
+ lp_build_intrinsic_unary(base->gallivm->builder,
+ "llvm.AMDISA.export.reg",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ reg);
+ }
+ }
+}
+
+static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct lp_build_context * base = &bld_base->base;
+ unsigned i;
+
+ /* Add the necessary export instructions */
+ for (i = 0; i < ctx->output_reg_count; i++) {
+ unsigned chan;
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ LLVMValueRef output;
+ LLVMValueRef store_output;
+ unsigned adjusted_reg_idx = i +
+ ctx->reserved_reg_count;
+ LLVMValueRef reg_index = lp_build_const_int32(
+ base->gallivm,
+ reg_index_soa(adjusted_reg_idx, chan));
+
+ output = LLVMBuildLoad(base->gallivm->builder,
+ ctx->soa.outputs[i][chan], "");
+
+ store_output = lp_build_intrinsic_binary(
+ base->gallivm->builder,
+ "llvm.AMDISA.store.output",
+ base->elem_type,
+ output, reg_index);
+
+ lp_build_intrinsic_unary(base->gallivm->builder,
+ "llvm.AMDISA.export.reg",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ store_output);
+ }
+ }
+}
+
+static void llvm_emit_tex(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMValueRef args[3];
+
+ args[0] = emit_data->args[0];
+ args[1] = lp_build_const_int32(gallivm,
+ emit_data->inst->Src[1].Register.Index);
+ args[2] = lp_build_const_int32(gallivm,
+ emit_data->inst->Texture.Texture);
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
+ action->intr_name,
+ emit_data->dst_type, args, 3);
+}
+
+static void dp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_context * base = &bld_base->base;
+ unsigned chan;
+ LLVMValueRef elements[2][4];
+ unsigned opcode = emit_data->inst->Instruction.Opcode;
+ unsigned dp_components = (opcode == TGSI_OPCODE_DP2 ? 2 :
+ (opcode == TGSI_OPCODE_DP3 ? 3 : 4));
+ for (chan = 0 ; chan < dp_components; chan++) {
+ elements[0][chan] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0, chan);
+ elements[1][chan] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1, chan);
+ }
+
+ for ( ; chan < 4; chan++) {
+ elements[0][chan] = base->zero;
+ elements[1][chan] = base->zero;
+ }
+
+ /* Fix up for DPH */
+ if (opcode == TGSI_OPCODE_DPH) {
+ elements[0][CHAN_W] = base->one;
+ }
+
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ elements[0], 4);
+ emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
+ elements[1], 4);
+ emit_data->arg_count = 2;
+
+ emit_data->dst_type = base->elem_type;
+}
+
+static struct lp_build_opcode_action dot_action = {
+ .fetch_args = dp_fetch_args,
+ .emit = lp_build_tgsi_intrinsic,
+ .intr_name = "llvm.AMDISA.dp4"
+};
+
+#if 0
+
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TXL:
+emit_data.dst_type = LLVMVectorType(base->elem_type, 4);
+tgsi_llvm_fetch_args_tex_soa(bld_base, &emit_data);
+dst = action->emit(action, bld_base, &emit_data);
+store_vec4_soa(bld_base, inst, dst);
+break;
+
+#endif
+
+static void txp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef src_w;
+ unsigned chan;
+ LLVMValueRef coords[4];
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+ src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, CHAN_W);
+
+ for (chan = 0; chan < 3; chan++ ) {
+ LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0, chan);
+ coords[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV, arg, src_w);
+ }
+ coords[3] = bld_base->base.one;
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->arg_count = 1;
+}
+
+#if 0
+store_vec4_soa(bld_base, inst, dst);
+#endif
+LLVMModuleRef r600_tgsi_llvm(
+ struct radeon_llvm_context * ctx,
+ const struct tgsi_token * tokens)
+{
+ struct tgsi_shader_info shader_info;
+ struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
+ radeon_llvm_context_init(ctx);
+ tgsi_scan_shader(tokens, &shader_info);
+
+ bld_base->info = &shader_info;
+ bld_base->userdata = ctx;
+ bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
+ bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
+ bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
+ bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_output;
+ bld_base->emit_prologue = llvm_emit_prologue;
+ bld_base->emit_epilogue = llvm_emit_epilogue;
+ ctx->userdata = ctx;
+ ctx->load_input = llvm_load_input;
+
+ bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+
+ lp_build_tgsi_llvm(bld_base, tokens);
+
+ /* End the main function with */
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+
+ /* Create the pass manager */
+ ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
+ bld_base->base.gallivm->module);
+
+ /* This pass should eliminate all the load and store instructions */
+ LLVMAddPromoteMemoryToRegisterPass(bld_base->base.gallivm->passmgr);
+
+ /* Add some optimization passes */
+ LLVMAddScalarReplAggregatesPass(bld_base->base.gallivm->passmgr);
+ LLVMAddCFGSimplificationPass(bld_base->base.gallivm->passmgr);
+
+ /* Run the passs */
+ LLVMRunFunctionPassManager(bld_base->base.gallivm->passmgr, ctx->main_fn);
+
+ LLVMDisposeBuilder(bld_base->base.gallivm->builder);
+ LLVMDisposePassManager(bld_base->base.gallivm->passmgr);
+
+ return ctx->gallivm.module;
+}
+
+unsigned r600_llvm_compile(
+ LLVMModuleRef mod,
+ unsigned char ** inst_bytes,
+ unsigned * inst_byte_count,
+ enum radeon_family family,
+ unsigned dump)
+{
+ const char * gpu_family;
+
+ switch (family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ gpu_family = "rv710";
+ break;
+ case CHIP_RV730:
+ gpu_family = "rv730";
+ break;
+ case CHIP_RV770:
+ gpu_family = "rv770";
+ break;
+ case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ case CHIP_CEDAR:
+ gpu_family = "cedar";
+ break;
+ case CHIP_REDWOOD:
+ gpu_family = "redwood";
+ break;
+ case CHIP_JUNIPER:
+ gpu_family = "juniper";
+ break;
+ case CHIP_HEMLOCK:
+ case CHIP_CYPRESS:
+ gpu_family = "cypress";
+ break;
+ case CHIP_BARTS:
+ gpu_family = "barts";
+ break;
+ case CHIP_TURKS:
+ gpu_family = "turks";
+ break;
+ case CHIP_CAICOS:
+ gpu_family = "caicos";
+ break;
+ case CHIP_CAYMAN:
+ gpu_family = "cayman";
+ break;
+ default:
+ gpu_family = "";
+ fprintf(stderr, "Chip not supported by r600 llvm "
+ "backend, please file a bug at bugs.freedesktop.org\n");
+ break;
+ }
+
+ return radeon_llvm_compile(mod, inst_bytes, inst_byte_count,
+ gpu_family, dump);
+}
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
new file mode 100644
index 00000000000..527f8ad11df
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -0,0 +1,25 @@
+
+#ifndef R600_LLVM_H
+#define R600_LLVM_H
+
+#include <llvm-c/Core.h>
+
+#include "gallivm/lp_bld_tgsi.h"
+
+struct r600_shader_ctx;
+struct radeon_llvm_context;
+enum radeon_family;
+
+LLVMModuleRef r600_tgsi_llvm(
+ struct radeon_llvm_context * ctx,
+ const struct tgsi_token * tokens);
+
+unsigned r600_llvm_compile(
+ LLVMModuleRef mod,
+ unsigned char ** inst_bytes,
+ unsigned * inst_byte_count,
+ enum radeon_family family,
+ unsigned dump);
+
+
+#endif /* R600_LLVM_H */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index ad4aded95cb..10bba46a4c9 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -26,12 +26,15 @@
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "r600_llvm.h"
#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600d.h"
+#include "radeon_llvm.h"
#include <stdio.h>
#include <errno.h>
#include <byteswap.h>
@@ -202,6 +205,196 @@ struct r600_shader_tgsi_instruction {
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
+static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
+static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
+static int tgsi_else(struct r600_shader_ctx *ctx);
+static int tgsi_endif(struct r600_shader_ctx *ctx);
+static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
+static int tgsi_endloop(struct r600_shader_ctx *ctx);
+static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+
+/*
+ * bytestream -> r600 shader
+ */
+
+static unsigned r600_src_from_byte_stream(unsigned char * bytes,
+ unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
+{
+ unsigned i;
+ unsigned sel0, sel1;
+ sel0 = bytes[bytes_read++];
+ sel1 = bytes[bytes_read++];
+ alu->src[src_idx].sel = sel0 | (sel1 << 8);
+ alu->src[src_idx].chan = bytes[bytes_read++];
+ alu->src[src_idx].neg = bytes[bytes_read++];
+ alu->src[src_idx].abs = bytes[bytes_read++];
+ alu->src[src_idx].rel = bytes[bytes_read++];
+ for (i = 0; i < 4; i++) {
+ alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
+ }
+ return bytes_read;
+}
+
+static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
+ unsigned char * bytes, unsigned bytes_read)
+{
+ unsigned src_idx;
+ unsigned inst0, inst1;
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(alu));
+ for(src_idx = 0; src_idx < 3; src_idx++) {
+ bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
+ &alu, src_idx);
+ }
+
+ alu.dst.sel = bytes[bytes_read++];
+ alu.dst.chan = bytes[bytes_read++];
+ alu.dst.clamp = bytes[bytes_read++];
+ alu.dst.write = bytes[bytes_read++];
+ alu.dst.rel = bytes[bytes_read++];
+ inst0 = bytes[bytes_read++];
+ inst1 = bytes[bytes_read++];
+ alu.inst = inst0 | (inst1 << 8);
+ alu.last = bytes[bytes_read++];
+ alu.is_op3 = bytes[bytes_read++];
+ alu.predicate = bytes[bytes_read++];
+ alu.bank_swizzle = bytes[bytes_read++];
+ alu.bank_swizzle_force = bytes[bytes_read++];
+ alu.omod = bytes[bytes_read++];
+ r600_bytecode_add_alu(ctx->bc, &alu);
+
+ /* XXX: Handle other KILL instructions */
+ if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
+ ctx->shader->uses_kill = 1;
+ /* XXX: This should be enforced in the LLVM backend. */
+ ctx->bc->force_add_cf = 1;
+ }
+ return bytes_read;
+}
+
+static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
+ unsigned pred_inst)
+{
+ alu->inst = pred_inst;
+ alu->predicate = 1;
+ alu->src[1].sel = V_SQ_ALU_SRC_0;
+ alu->src[1].chan = 0;
+ alu->last = 1;
+ r600_bytecode_add_alu_type(ctx->bc, alu,
+ CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
+
+ r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
+ fc_pushlevel(ctx, FC_IF);
+ callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+}
+
+static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
+ unsigned char * bytes, unsigned bytes_read)
+{
+ struct r600_bytecode_alu alu;
+ unsigned inst;
+ memset(&alu, 0, sizeof(alu));
+ bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
+ inst = bytes[bytes_read++];
+ switch (inst) {
+ case 0:
+ llvm_if(ctx, &alu,
+ CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
+ break;
+ case 1:
+ tgsi_else(ctx);
+ break;
+ case 2:
+ tgsi_endif(ctx);
+ break;
+ case 3:
+ tgsi_bgnloop(ctx);
+ break;
+ case 4:
+ tgsi_endloop(ctx);
+ break;
+ case 5:
+ {
+ unsigned opcode = TGSI_OPCODE_BRK;
+ /* XXX: This should be a helper function */
+ if (ctx->bc->chip_class == CAYMAN)
+ ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
+ else if (ctx->bc->chip_class >= EVERGREEN)
+ ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
+ else
+ ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
+ llvm_if(ctx, &alu,
+ CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE));
+ tgsi_loop_brk_cont(ctx);
+ tgsi_endif(ctx);
+ break;
+ }
+ }
+
+ return bytes_read;
+}
+
+static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
+ unsigned char * bytes, unsigned bytes_read)
+{
+ struct r600_bytecode_tex tex;
+
+ tex.inst = bytes[bytes_read++];
+ tex.resource_id = bytes[bytes_read++];
+ tex.src_gpr = bytes[bytes_read++];
+ tex.src_rel = bytes[bytes_read++];
+ tex.dst_gpr = bytes[bytes_read++];
+ tex.dst_rel = bytes[bytes_read++];
+ tex.dst_sel_x = bytes[bytes_read++];
+ tex.dst_sel_y = bytes[bytes_read++];
+ tex.dst_sel_z = bytes[bytes_read++];
+ tex.dst_sel_w = bytes[bytes_read++];
+ tex.lod_bias = bytes[bytes_read++];
+ tex.coord_type_x = bytes[bytes_read++];
+ tex.coord_type_y = bytes[bytes_read++];
+ tex.coord_type_z = bytes[bytes_read++];
+ tex.coord_type_w = bytes[bytes_read++];
+ tex.offset_x = bytes[bytes_read++];
+ tex.offset_y = bytes[bytes_read++];
+ tex.offset_z = bytes[bytes_read++];
+ tex.sampler_id = bytes[bytes_read++];
+ tex.src_sel_x = bytes[bytes_read++];
+ tex.src_sel_y = bytes[bytes_read++];
+ tex.src_sel_z = bytes[bytes_read++];
+ tex.src_sel_w = bytes[bytes_read++];
+
+ r600_bytecode_add_tex(ctx->bc, &tex);
+
+ return bytes_read;
+}
+
+static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
+ unsigned char * bytes, unsigned num_bytes)
+{
+ unsigned bytes_read = 0;
+ while (bytes_read < num_bytes) {
+ char inst_type = bytes[bytes_read++];
+ switch (inst_type) {
+ case 0:
+ bytes_read = r600_alu_from_byte_stream(ctx, bytes,
+ bytes_read);
+ break;
+ case 1:
+ bytes_read = r600_tex_from_byte_stream(ctx, bytes,
+ bytes_read);
+ break;
+ case 2:
+ bytes_read = r600_fc_from_byte_stream(ctx, bytes,
+ bytes_read);
+ break;
+ default:
+ /* XXX: Error here */
+ break;
+ }
+ }
+}
+
+
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
@@ -690,6 +883,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
return 0;
}
+
static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
{
struct r600_shader *shader = &pipeshader->shader;
@@ -702,6 +896,11 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
unsigned output_done, noutput;
unsigned opcode;
int i, j, r = 0, pos0;
+ struct radeon_llvm_context radeon_llvm_ctx;
+ LLVMModuleRef mod;
+ unsigned char * inst_bytes;
+ unsigned inst_byte_count;
+ unsigned use_llvm;
ctx.bc = &shader->bc;
ctx.shader = shader;
@@ -718,6 +917,23 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
shader->nr_cbufs = rctx->nr_cbufs;
+ use_llvm = (ctx.type == TGSI_PROCESSOR_FRAGMENT
+ && debug_get_bool_option("R600_LLVM_FS", TRUE))
+ || (ctx.type == TGSI_PROCESSOR_VERTEX
+ && debug_get_bool_option("R600_LLVM_VS", TRUE));
+#if HAVE_LLVM != 0x0300
+ if (use_llvm) {
+ fprintf(stderr, "Warning: R600 LLVM backend requires LLVM v3.0\n");
+ use_llvm = 0;
+ }
+#endif
+ if (use_llvm && ctx.info.indirect_files) {
+ fprintf(stderr, "Warning: R600 LLVM backend does not support "
+ "indirect adressing. Falling back to TGSI "
+ "backend.\n");
+ use_llvm = 0;
+ }
+
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
@@ -753,8 +969,34 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
}
- ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
- ctx.info.file_max[TGSI_FILE_INPUT] + 1;
+
+ if (use_llvm) {
+ unsigned dump = 0;
+ memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
+ radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
+ mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
+ if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
+ dump = 1;
+ LLVMDumpModule(mod);
+ }
+ if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
+ rctx->family, dump)) {
+ FREE(inst_bytes);
+ radeon_llvm_dispose(&radeon_llvm_ctx);
+ use_llvm = 0;
+ fprintf(stderr, "R600 LLVM backend failed to compile "
+ "shader. Falling back to TGSI\n");
+ } else {
+ ctx.file_offset[TGSI_FILE_OUTPUT] =
+ ctx.file_offset[TGSI_FILE_INPUT];
+ }
+ }
+
+ if (!use_llvm) {
+ ctx.file_offset[TGSI_FILE_OUTPUT] =
+ ctx.file_offset[TGSI_FILE_INPUT] +
+ ctx.info.file_max[TGSI_FILE_INPUT] + 1;
+ }
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
@@ -792,27 +1034,29 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
goto out_err;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- r = tgsi_is_supported(&ctx);
- if (r)
- goto out_err;
- ctx.max_driver_temp_used = 0;
- /* reserve first tmp for everyone */
- r600_get_temp(&ctx);
-
- opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
- if ((r = tgsi_split_constant(&ctx)))
- goto out_err;
- if ((r = tgsi_split_literal_constant(&ctx)))
- goto out_err;
- if (ctx.bc->chip_class == CAYMAN)
- ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
- else if (ctx.bc->chip_class >= EVERGREEN)
- ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
- else
- ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
- r = ctx.inst_info->process(&ctx);
- if (r)
- goto out_err;
+ if (!use_llvm) {
+ r = tgsi_is_supported(&ctx);
+ if (r)
+ goto out_err;
+ ctx.max_driver_temp_used = 0;
+ /* reserve first tmp for everyone */
+ r600_get_temp(&ctx);
+
+ opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
+ if (ctx.bc->chip_class == CAYMAN)
+ ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
+ else if (ctx.bc->chip_class >= EVERGREEN)
+ ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
+ else
+ ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
+ r = ctx.inst_info->process(&ctx);
+ if (r)
+ goto out_err;
+ }
break;
case TGSI_TOKEN_TYPE_PROPERTY:
property = &ctx.parse.FullToken.FullProperty;
@@ -827,7 +1071,12 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
goto out_err;
}
}
-
+ /* Get instructions */
+ if (use_llvm) {
+ r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
+ FREE(inst_bytes);
+ radeon_llvm_dispose(&radeon_llvm_ctx);
+ }
noutput = shader->noutput;
/* clamp color outputs */
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h
new file mode 100644
index 00000000000..d264de3fbd5
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL.h
@@ -0,0 +1,334 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AMDIL back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+#include "AMDILLLVMPC.h"
+#include "AMDILLLVMVersion.h"
+#include "AMDILInstPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define AMDIL_MAJOR_VERSION 2
+#define AMDIL_MINOR_VERSION 0
+#define AMDIL_REVISION_NUMBER 74
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+// SC->CAL version matchings.
+#define CAL_VERSION_SC_150 1700
+#define CAL_VERSION_SC_149 1700
+#define CAL_VERSION_SC_148 1525
+#define CAL_VERSION_SC_147 1525
+#define CAL_VERSION_SC_146 1525
+#define CAL_VERSION_SC_145 1451
+#define CAL_VERSION_SC_144 1451
+#define CAL_VERSION_SC_143 1441
+#define CAL_VERSION_SC_142 1441
+#define CAL_VERSION_SC_141 1420
+#define CAL_VERSION_SC_140 1400
+#define CAL_VERSION_SC_139 1387
+#define CAL_VERSION_SC_138 1387
+#define CAL_APPEND_BUFFER_SUPPORT 1340
+#define CAL_VERSION_SC_137 1331
+#define CAL_VERSION_SC_136 982
+#define CAL_VERSION_SC_135 950
+#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm {
+class AMDILInstrPrinter;
+class AMDILTargetMachine;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+ createAMDILISelDag(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILPrintfConvert(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILInlinePass(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILPeepholeOpt(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Pre regalloc passes.
+FunctionPass*
+ createAMDILPointerManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILMachinePeephole(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Pre emit passes.
+FunctionPass*
+ createAMDILCFGPreparationPass(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILCFGStructurizerPass(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILLiteralManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+FunctionPass*
+ createAMDILSwizzleEncoder(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+/// Instruction Emission Passes
+AMDILInstPrinter *createAMDILInstPrinter(const MCAsmInfo &MAI);
+
+//extern Target TheAMDILTarget;
+} // end namespace llvm;
+
+#define GET_REGINFO_ENUM
+#include "AMDILGenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "AMDILGenInstrInfo.inc"
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDILAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory.
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ ADDRESS_NONE = 5 // Address space for unknown memory.
+};
+
+// We are piggybacking on the CommentFlag enum in MachineInstr.h to
+// set bits in AsmPrinterFlags of the MachineInstruction. We will
+// start at bit 16 and allocate down while LLVM will start at bit
+// 1 and allocate up.
+
+// This union/struct combination is an easy way to read out the
+// exact bits that are needed.
+typedef union ResourceRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned short isImage : 1; // Reserved for future use/llvm.
+ unsigned short ResourceID : 10; // Flag to specify the resourece ID for
+ // the op.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
+ // conflict.
+ unsigned short ByteStore : 1; // Flag to specify if the op is a byte
+ // store op.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+#else
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short ByteStore : 1; // Flag to specify if the op is byte
+ // store op.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
+ // a conflict.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ResourceID : 10; // Flag to specify the resource ID for
+ // the op.
+ unsigned short isImage : 1; // Reserved for future use.
+#endif
+ } bits;
+ unsigned short u16all;
+} InstrResEnc;
+
+} // namespace AMDILAS
+
+// The OpSwizzle encodes a subset of all possible
+// swizzle combinations into a number of bits using
+// only the combinations utilized by the backend.
+// The lower 128 are for source swizzles and the
+// upper 128 or for destination swizzles.
+// The valid mappings can be found in the
+// getSrcSwizzle and getDstSwizzle functions of
+// AMDILUtilityFunctions.cpp.
+typedef union SwizzleRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned char dst : 1;
+ unsigned char swizzle : 7;
+#else
+ unsigned char swizzle : 7;
+ unsigned char dst : 1;
+#endif
+ } bits;
+ unsigned char u8all;
+} OpSwizzle;
+// Enums corresponding to AMDIL condition codes for IL. These
+// values must be kept in sync with the ones in the .td file.
+namespace AMDILCC {
+enum CondCodes {
+ // AMDIL specific condition codes. These correspond to the IL_CC_*
+ // in AMDILInstrInfo.td and must be kept in the same order.
+ IL_CC_D_EQ = 0, // DEQ instruction.
+ IL_CC_D_GE = 1, // DGE instruction.
+ IL_CC_D_LT = 2, // DLT instruction.
+ IL_CC_D_NE = 3, // DNE instruction.
+ IL_CC_F_EQ = 4, // EQ instruction.
+ IL_CC_F_GE = 5, // GE instruction.
+ IL_CC_F_LT = 6, // LT instruction.
+ IL_CC_F_NE = 7, // NE instruction.
+ IL_CC_I_EQ = 8, // IEQ instruction.
+ IL_CC_I_GE = 9, // IGE instruction.
+ IL_CC_I_LT = 10, // ILT instruction.
+ IL_CC_I_NE = 11, // INE instruction.
+ IL_CC_U_GE = 12, // UGE instruction.
+ IL_CC_U_LT = 13, // ULE instruction.
+ // Pseudo IL Comparison instructions here.
+ IL_CC_F_GT = 14, // GT instruction.
+ IL_CC_U_GT = 15,
+ IL_CC_I_GT = 16,
+ IL_CC_D_GT = 17,
+ IL_CC_F_LE = 18, // LE instruction
+ IL_CC_U_LE = 19,
+ IL_CC_I_LE = 20,
+ IL_CC_D_LE = 21,
+ IL_CC_F_UNE = 22,
+ IL_CC_F_UEQ = 23,
+ IL_CC_F_ULT = 24,
+ IL_CC_F_UGT = 25,
+ IL_CC_F_ULE = 26,
+ IL_CC_F_UGE = 27,
+ IL_CC_F_ONE = 28,
+ IL_CC_F_OEQ = 29,
+ IL_CC_F_OLT = 30,
+ IL_CC_F_OGT = 31,
+ IL_CC_F_OLE = 32,
+ IL_CC_F_OGE = 33,
+ IL_CC_D_UNE = 34,
+ IL_CC_D_UEQ = 35,
+ IL_CC_D_ULT = 36,
+ IL_CC_D_UGT = 37,
+ IL_CC_D_ULE = 38,
+ IL_CC_D_UGE = 39,
+ IL_CC_D_ONE = 40,
+ IL_CC_D_OEQ = 41,
+ IL_CC_D_OLT = 42,
+ IL_CC_D_OGT = 43,
+ IL_CC_D_OLE = 44,
+ IL_CC_D_OGE = 45,
+ IL_CC_U_EQ = 46,
+ IL_CC_U_NE = 47,
+ IL_CC_F_O = 48,
+ IL_CC_D_O = 49,
+ IL_CC_F_UO = 50,
+ IL_CC_D_UO = 51,
+ IL_CC_L_LE = 52,
+ IL_CC_L_GE = 53,
+ IL_CC_L_EQ = 54,
+ IL_CC_L_NE = 55,
+ IL_CC_L_LT = 56,
+ IL_CC_L_GT = 57,
+ IL_CC_UL_LE = 58,
+ IL_CC_UL_GE = 59,
+ IL_CC_UL_EQ = 60,
+ IL_CC_UL_NE = 61,
+ IL_CC_UL_LT = 62,
+ IL_CC_UL_GT = 63,
+ COND_ERROR = 64
+};
+
+} // end namespace AMDILCC
+} // end namespace llvm
+#endif // AMDIL_H_
diff --git a/src/gallium/drivers/radeon/AMDIL.td b/src/gallium/drivers/radeon/AMDIL.td
new file mode 100644
index 00000000000..33acf727ad0
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL.td
@@ -0,0 +1,54 @@
+// Main file for AMDIL Tablegen for the PC platform
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// This file specifies where the base TD file exists
+// and where the version specific TD file exists.
+include "AMDILBase.td"
+include "AMDILVersion.td"
+
diff --git a/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp b/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp
new file mode 100644
index 00000000000..417d62b21eb
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL789IOExpansion.cpp
@@ -0,0 +1,756 @@
+
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// @file AMDIL789IOExpansion.cpp
+// @details Implementation of the IO expansion class for 789 devices.
+//
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+
+using namespace llvm;
+AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+: AMDILIOExpansion(tm, OptLevel)
+{
+}
+
+AMDIL789IOExpansion::~AMDIL789IOExpansion() {
+}
+
+const char *AMDIL789IOExpansion::getPassName() const
+{
+ return "AMDIL 789 IO Expansion Pass";
+}
+// This code produces the following pseudo-IL:
+// mov r1007, $src.y000
+// cmov_logical r1007.x___, $flag.yyyy, r1007.xxxx, $src.xxxx
+// mov r1006, $src.z000
+// cmov_logical r1007.x___, $flag.zzzz, r1006.xxxx, r1007.xxxx
+// mov r1006, $src.w000
+// cmov_logical $dst.x___, $flag.wwww, r1006.xxxx, r1007.xxxx
+void
+AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI,
+ unsigned flag, unsigned src, unsigned dst, bool before)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(src)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(flag)
+ .addReg(AMDIL::R1007)
+ .addReg(src);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006)
+ .addReg(src)
+ .addImm(3);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1007)
+ .addReg(flag)
+ .addReg(AMDIL::R1006)
+ .addReg(AMDIL::R1007);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1006)
+ .addReg(src)
+ .addImm(4);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_W_i32), dst)
+ .addReg(flag)
+ .addReg(AMDIL::R1006)
+ .addReg(AMDIL::R1007);
+
+}
+// We have a 128 bit load but a 8/16/32bit value, so we need to
+// select the correct component and make sure that the correct
+// bits are selected. For the 8 and 16 bit cases we need to
+// extract from the component the correct bits and for 32 bits
+// we just need to select the correct component.
+ void
+AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false);
+ if (getMemorySize(MI) == 1) {
+ // This produces the following pseudo-IL:
+ // iand r1006.x___, r1010.xxxx, l14.xxxx
+ // mov r1006, r1006.xxxx
+ // iadd r1006, r1006, {0, -1, 2, 3}
+ // ieq r1008, r1006, 0
+ // mov r1011, r1011.xxxx
+ // ishr r1011, r1011, {0, 8, 16, 24}
+ // mov r1007, r1011.y000
+ // cmov_logical r1007.x___, r1008.yyyy, r1007.xxxx, r1011.xxxx
+ // mov r1006, r1011.z000
+ // cmov_logical r1007.x___, r1008.zzzz, r1006.xxxx, r1007.xxxx
+ // mov r1006, r1011.w000
+ // cmov_logical r1011.x___, r1008.wwww, r1006.xxxx, r1007.xxxx
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1006)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1006)
+ .addReg(AMDIL::R1006);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006)
+ .addReg(AMDIL::R1006)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1006)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1011, AMDIL::R1011, false);
+ } else if (getMemorySize(MI) == 2) {
+ // This produces the following pseudo-IL:
+ // ishr r1007.x___, r1010.xxxx, 1
+ // iand r1008.x___, r1007.xxxx, 1
+ // ishr r1007.x___, r1011.xxxx, 16
+ // cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addReg(AMDIL::R1011);
+ }
+}
+// This function does address calculations modifications to load from a vector
+// register type instead of a dword addressed load.
+ void
+AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ // This produces the following pseudo-IL:
+ // ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3
+ // iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1
+ // ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal((is32bit) ? 3 : 1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal((is32bit) ? 2 : 1));
+ if (needsSelect) {
+ // If the component selection is required, the following
+ // pseudo-IL is produced.
+ // mov r1008, r1008.xxxx
+ // iadd r1008, r1008, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1}
+ // ieq r1008, r1008, 0
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL,
+ (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) :
+ -1ULL));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ }
+}
+// This function emits a switch statement and writes 32bit/64bit
+// value to a 128bit vector register type.
+ void
+AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit)
+{
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ // This section generates the following pseudo-IL:
+ // switch r1008.x
+ // default
+ // mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y}
+ // break
+ // case 1
+ // mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
+ // break
+ // if is32bit is true, case 2 and 3 are emitted.
+ // case 2
+ // mov x1[r1007.x].__z_, r1011.x
+ // break
+ // case 3
+ // mov x1[r1007.x].___w, r1011.x
+ // break
+ // endswitch
+ DebugLoc DL;
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::SWITCH))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::DEFAULT));
+ BuildMI(*mBB, *MI, DL,
+ mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_X : AMDIL::SCRATCHSTORE_XY)
+ , AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BREAK));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CASE)).addImm(1);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get((is32bit) ? AMDIL::SCRATCHSTORE_Y : AMDIL::SCRATCHSTORE_ZW), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BREAK));
+ if (is32bit) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CASE)).addImm(2);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE_Z), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BREAK));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CASE)).addImm(3);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHSTORE_W), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BREAK));
+ }
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ENDSWITCH));
+
+}
+ void
+AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI)
+{
+ bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
+ if (!xID) {
+ xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ // Since the private register is a 128 bit aligned, we have to align the address
+ // first, since our source address is 32bit aligned and then load the data.
+ // This produces the following pseudo-IL:
+ // ishr r1010.x___, r1010.xxxx, 4
+ // mov r1011, x1[r1010.x]
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(xID);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ emitVectorAddressCalc(MI, true, true);
+ // This produces the following pseudo-IL:
+ // mov r1011, x1[r1007.x]
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ // These instructions go after the current MI.
+ emitDataLoadSelect(MI);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, true);
+ // This produces the following pseudo-IL:
+ // mov r1011, x1[r1007.x]
+ // mov r1007, r1011.zw00
+ // cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1007.zw
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ // These instructions go after the current MI.
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ break;
+ }
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)),
+ MI->getOperand(0).getReg())
+ .addReg(AMDIL::R1011);
+}
+
+
+ void
+AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI)
+{
+ if (!isHardwareInst(MI) || MI->memoperands_empty()) {
+ return expandGlobalLoad(MI);
+ }
+ uint32_t cID = getPointerID(MI);
+ if (cID < 2) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::CONSTANT_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(cID);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ emitVectorAddressCalc(MI, true, true);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(cID);
+ // These instructions go after the current MI.
+ emitDataLoadSelect(MI);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, true);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1007)
+ .addImm(cID);
+ // These instructions go after the current MI.
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::VEXTRACT_v2i64), AMDIL::R1007)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::CMOVLOG_i64), AMDIL::R1011)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ break;
+ }
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)),
+ MI->getOperand(0).getReg())
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI)
+{
+ if (!isStaticCPLoad(MI)) {
+ return expandConstantLoad(MI);
+ } else {
+ uint32_t idx = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MI->getParent()->getParent()
+ ->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &consts
+ = MCP->getConstants();
+ const Constant *C = consts[idx].Val.ConstVal;
+ emitCPInst(MI, C, mKM, 0, isExtendLoad(MI));
+ }
+}
+
+ void
+AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI)
+{
+ bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
+ return expandGlobalStore(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::SCRATCH_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t xID = getPointerID(MI);
+ assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ if (!xID) {
+ xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ // This section generates the following pseudo-IL:
+ // ishr r1010.x___, r1010.xxxx, 4
+ // mov x1[r1010.x], r1011
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SHR_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(4));
+ BuildMI(*mBB, *MI, MI->getDebugLoc(),
+ mTII->get(AMDIL::SCRATCHSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(xID);
+ break;
+ case 1:
+ emitVectorAddressCalc(MI, true, true);
+ // This section generates the following pseudo-IL:
+ // mov r1002, x1[r1007.x]
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true);
+ // This section generates the following pseudo-IL:
+ // iand r1003.x, r1010.x, 3
+ // mov r1003, r1003.xxxx
+ // iadd r1000, r1003, {0, -1, -2, -3}
+ // ieq r1000, r1000, 0
+ // mov r1002, r1002.xxxx
+ // ishr r1002, r1002, {0, 8, 16, 24}
+ // mov r1011, r1011.xxxx
+ // cmov_logical r1002, r1000, r1011, r1002
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1003)
+ .addReg(AMDIL::R1003);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001)
+ .addReg(AMDIL::R1003)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1002);
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This section generates the following pseudo-IL:
+ // iand r1002, r1002, 0xFF
+ // ishl r1002, r1002, {0, 8, 16, 24}
+ // ior r1002.xy, r1002.xy, r1002.zw
+ // ior r1011.x, r1002.x, r1002.y
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1002)
+ .addReg(AMDIL::R1002).addReg(AMDIL::R1002);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1002).addReg(AMDIL::R1002);
+ } else {
+ // This section generates the following pseudo-IL:
+ // mov r1001.xy, r1002.yw
+ // mov r1002.xy, r1002.xz
+ // ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy
+ // mov r1001.x, r1002.y
+ // ubit_insert r1011.x, 16, 16, r1002.y, r1002.x
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1002)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(8))
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI), AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ }
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 2:
+ emitVectorAddressCalc(MI, true, true);
+ // This section generates the following pseudo-IL:
+ // mov r1002, x1[r1007.x]
+ BuildMI(*mBB, *MI, DL,
+ mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
+ .addReg(AMDIL::R1007)
+ .addImm(xID);
+ emitComponentExtract(MI, AMDIL::R1008, AMDIL::R1002, AMDIL::R1002, true);
+ // This section generates the following pseudo-IL:
+ // ishr r1003.x, r1010.x, 1
+ // iand r1003.x, r1003.x, 1
+ // ishr r1001.x, r1002.x, 16
+ // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x
+ // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1003)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1003)
+ .addReg(AMDIL::R1003)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1002)
+ .addReg(AMDIL::R1003)
+ .addReg(AMDIL::R1002)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1003)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1001);
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This section generates the following pseudo-IL:
+ // iand r1002.x, r1002.x, 0xFFFF
+ // iand r1001.x, r1001.x, 0xFFFF
+ // ishl r1001.x, r1002.x, 16
+ // ior r1011.x, r1002.x, r1001.x
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1002)
+ .addReg(AMDIL::R1002)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1001)
+ .addReg(AMDIL::R1001)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1002).addReg(AMDIL::R1001);
+ } else {
+ // This section generates the following pseudo-IL:
+ // ubit_insert r1011.x, 16, 16, r1001.y, r1002.x
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1001)
+ .addReg(AMDIL::R1002);
+ }
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 4:
+ emitVectorAddressCalc(MI, true, false);
+ emitVectorSwitchWrite(MI, true);
+ break;
+ case 8:
+ emitVectorAddressCalc(MI, false, false);
+ emitVectorSwitchWrite(MI, false);
+ break;
+ };
+}
+ void
+AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+ DebugLoc DL;
+ if (MI->getOperand(0).isUndef()) {
+ BuildMI(*mBB, *MI, DL, mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ .addReg(MI->getOperand(0).getReg());
+ }
+ expandTruncData(MI);
+ if (MI->getOperand(2).isReg()) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_i32), AMDIL::R1010)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg());
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::MOVE_i32), AMDIL::R1010)
+ .addReg(MI->getOperand(1).getReg());
+ }
+ expandAddressCalc(MI);
+ expandPackedData(MI);
+}
+
+
+void
+AMDIL789IOExpansion::expandPackedData(MachineInstr *MI)
+{
+ if (!isPackedData(MI)) {
+ return;
+ }
+ DebugLoc DL;
+ // If we have packed data, then the shift size is no longer
+ // the same as the load size and we need to adjust accordingly
+ switch(getPackedID(MI)) {
+ default:
+ break;
+ case PACK_V2I8:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32)));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addImm(mMFI->addi64Literal(8ULL << 32));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I8:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V2I16:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL << 32));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I16:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL << 32));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ }
+ break;
+ case UNPACK_V2I8:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ break;
+ case UNPACK_V4I8:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
+ }
+ break;
+ case UNPACK_V2I16:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ }
+ break;
+ case UNPACK_V4I16:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ }
+ break;
+ };
+}
diff --git a/src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.cpp
new file mode 100644
index 00000000000..39e96b5b68b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.cpp
@@ -0,0 +1,186 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+// TODO: Add support for verbose.
+ AMDIL7XXAsmPrinter::AMDIL7XXAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+: AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDIL7XXAsmPrinter::~AMDIL7XXAsmPrinter()
+{
+}
+///
+/// @param name
+/// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+/// and returns that name if both of the tokens are present.
+///
+ static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+ void
+AMDIL7XXAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ if (curTarget->device()->usesHardware(
+ AMDILDeviceInfo::DoubleOps)
+ && !::strncmp(name, "__sqrt_f64", 10) ) {
+ name = "__sqrt_f64_7xx";
+ }
+ }
+ emitMCallInst(MI, O, name);
+}
+
+ bool
+AMDIL7XXAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+
+ void
+AMDIL7XXAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ const char *name = II->getDesc().getName() + 5;
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<")";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ }
+
+ // Print the assembly for the instruction.
+ // We want to make sure that we do HW constants
+ // before we do arena segment
+ if (mMeta->useCompilerWrite(II)) {
+ // TODO: This is a hack to get around some
+ // conformance failures.
+ O << "\tif_logicalz cb0[0].x\n";
+ O << "\tuav_raw_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ") ";
+ O << "mem0.x___, cb0[3].x, r0.0\n";
+ O << "\tendif\n";
+ mMFI->addMetadata(";memory:compilerwrite");
+ } else {
+ printInstruction(II, O);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
diff --git a/src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.h b/src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.h
new file mode 100644
index 00000000000..99632e744c4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXAsmPrinter.h
@@ -0,0 +1,102 @@
+//===----- AMDIL7XXAsmPrinter.h --- AMDIL 7XX Asm Printer class ---------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Asm Printer class for 7XX generation of cards.
+// This class handles all of the items that are
+// unique to these devices that must be handles by
+// the AsmPrinter.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL_7XX_ASM_PRINTER_H_
+#define _AMDIL_7XX_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+ class LLVM_LIBRARY_VISIBILITY AMDIL7XXAsmPrinter : public AMDILAsmPrinter
+ {
+ public:
+ //
+ // Constructor for the AMDIL 7XX specific AsmPrinter class.
+ // Interface is defined by LLVM proper and should reference
+ // there for more information.
+ //
+ AMDIL7XXAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+ //
+ // Destructor for the 7XX Asm Printer class that deletes
+ // all of the allocated memory
+ //
+ virtual ~AMDIL7XXAsmPrinter();
+
+
+ void
+ EmitInstruction(const MachineInstr *MI);
+
+ //
+ // @param F MachineFunction to print the assembly for
+ // @brief parse the specified machine function and print
+ // out the assembly for all the instructions in the function
+ //
+ bool
+ runOnMachineFunction(MachineFunction &F);
+
+ protected:
+ //
+ // @param MI Machine instruction to emit the macro code for
+ //
+ // Emits a fully functional macro function that uses the argument
+ // registers as the macro arguments.
+ //
+ virtual void
+ emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+ }; // AMDIL7XXAsmPrinter
+} // end of llvm namespace
+#endif // AMDIL_7XX_ASM_PRINTER_H_
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
new file mode 100644
index 00000000000..f18c9abffb7
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
@@ -0,0 +1,191 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDIL7XXAsmPrinter.h"
+#include "AMDILDevice.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+
+using namespace llvm;
+
+AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
+{
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ mDeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ mDeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ mDeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDIL7XXDevice::~AMDIL7XXDevice()
+{
+}
+
+void AMDIL7XXDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL7XXDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDIL7XXDevice::getWavefrontSize() const
+{
+ return AMDILDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDIL7XXDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
+{
+ return 1;
+}
+
+FunctionPass*
+AMDIL7XXDevice::getIOExpansion(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDIL7XXIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDIL7XXDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+ return new AMDIL7XXAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDIL7XXDevice::getPointerManager(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDILPointerManager(TM, OptLevel);
+}
+
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+{
+ setCaps();
+}
+
+AMDIL770Device::~AMDIL770Device()
+{
+}
+
+void AMDIL770Device::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL770Device::getWavefrontSize() const
+{
+ return AMDILDevice::WavefrontSize;
+}
+
+AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
+{
+}
+
+AMDIL710Device::~AMDIL710Device()
+{
+}
+
+size_t AMDIL710Device::getWavefrontSize() const
+{
+ return AMDILDevice::QuarterWavefrontSize;
+}
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.h b/src/gallium/drivers/radeon/AMDIL7XXDevice.h
new file mode 100644
index 00000000000..911b6cc63ce
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.h
@@ -0,0 +1,117 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm {
+class AMDILSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDIL7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDIL7XXDevice : public AMDILDevice {
+public:
+ AMDIL7XXDevice(AMDILSubtarget *ST);
+ virtual ~AMDIL7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+ FunctionPass*
+ getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+ AsmPrinter*
+ getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+ FunctionPass*
+ getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+
+protected:
+ virtual void setCaps();
+}; // AMDIL7XXDevice
+
+// The AMDIL770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDIL770Device : public AMDIL7XXDevice {
+public:
+ AMDIL770Device(AMDILSubtarget *ST);
+ virtual ~AMDIL770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDIL770Device
+
+// The AMDIL710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDIL710Device : public AMDIL7XXDevice {
+public:
+ AMDIL710Device(AMDILSubtarget *ST);
+ virtual ~AMDIL710Device();
+ virtual size_t getWavefrontSize() const;
+}; // AMDIL710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp b/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp
new file mode 100644
index 00000000000..8a7740d2135
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDIL7XXIOExpansion.cpp
@@ -0,0 +1,590 @@
+//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// @file AMDIL7XXIOExpansion.cpp
+// @details Implementation of the IO Printing class for 7XX devices
+//
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+
+using namespace llvm;
+AMDIL7XXIOExpansion::AMDIL7XXIOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) : AMDIL789IOExpansion(tm, OptLevel)
+{
+}
+
+AMDIL7XXIOExpansion::~AMDIL7XXIOExpansion() {
+}
+const char *AMDIL7XXIOExpansion::getPassName() const
+{
+ return "AMDIL 7XX IO Expansion Pass";
+}
+
+ void
+AMDIL7XXIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ switch(getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(0))
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ break;
+ }
+ // These instructions go after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDIL7XXIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Z), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_W), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ }
+
+ // These instructions go after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+ void
+AMDIL7XXIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::LDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ // These instructions go before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 1:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(24));
+ break;
+ case 2:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16));
+ break;
+ }
+
+ // These instructions go after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDIL7XXIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 1:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 2:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ break;
+ };
+}
+
+ void
+AMDIL7XXIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalStore(MI);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Z), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_W), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 1:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::R1006)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 2:
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 4:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ };
+}
+
+ void
+AMDIL7XXIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalStore(MI);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ // These instructions go before the current MI.
+ expandStoreSetupCode(MI);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+}
diff --git a/src/gallium/drivers/radeon/AMDILAlgorithms.tpp b/src/gallium/drivers/radeon/AMDILAlgorithms.tpp
new file mode 100644
index 00000000000..24a773947be
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAlgorithms.tpp
@@ -0,0 +1,134 @@
+//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides templates algorithms that extend the STL algorithms, but
+// are useful for the AMDIL backend
+//
+//===----------------------------------------------------------------------===//
+
+// A template function that loops through the iterators and passes the second
+// argument along with each iterator to the function. If the function returns
+// true, then the current iterator is invalidated and it moves back, before
+// moving forward to the next iterator, otherwise it moves forward without
+// issue. This is based on the for_each STL function, but allows a reference to
+// the second argument
+template<class InputIterator, class Function, typename Arg>
+Function binaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ F(*First, Second);
+ }
+ return F;
+}
+
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ if (F(*First, Second)) {
+ --First;
+ }
+ }
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with the passed in argument. See binaryForEach for further
+// explanation
+template<class InputIterator, class Function, typename Arg>
+Function binaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ binaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ safeBinaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+
+// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
+// versions of these functions This allows the function to handle situations
+// such as invalidated iterators
+template<class InputIterator, class Function>
+Function safeForEach(InputIterator First, InputIterator Last, Function F)
+{
+ for ( ; First!=Last; ++First ) F(&First)
+ ; // Do nothing.
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator. See binaryForEach for
+// further explanation
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
diff --git a/src/gallium/drivers/radeon/AMDILAsmBackend.cpp b/src/gallium/drivers/radeon/AMDILAsmBackend.cpp
new file mode 100644
index 00000000000..2ede6a57190
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmBackend.cpp
@@ -0,0 +1,151 @@
+//===------ AMDILAsmBackend.cpp - AMDIL Assembly Backend ---===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//
+#include "AMDILAsmBackend.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+namespace llvm {
+#if LLVM_VERSION < 2500
+ TargetAsmBackend* createAMDILAsmBackend(const Target &T,
+#else
+ ASM_BACKEND_CLASS* createAMDILAsmBackend(const ASM_BACKEND_CLASS &T,
+#endif
+ const std::string &TT)
+ {
+ return new AMDILAsmBackend(T);
+ }
+} // namespace llvm
+
+//===--------------------- Default AMDIL Asm Backend ---------------------===//
+#if LLVM_VERSION < 2500
+AMDILAsmBackend::AMDILAsmBackend(const Target &T)
+ : TargetAsmBackend(T)
+#else
+AMDILAsmBackend::AMDILAsmBackend(const ASM_BACKEND_CLASS &T)
+ : ASM_BACKEND_CLASS()
+#endif
+{
+}
+
+MCObjectWriter *
+AMDILAsmBackend::createObjectWriter(raw_ostream &OS) const
+{
+ return 0;
+}
+
+bool
+AMDILAsmBackend::doesSectionRequireSymbols(const MCSection &Section) const
+{
+ return false;
+}
+
+bool
+AMDILAsmBackend::isSectionAtomizable(const MCSection &Section) const
+{
+ return true;
+}
+
+bool
+AMDILAsmBackend::isVirtualSection(const MCSection &Section) const
+{
+ return false;
+ //const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
+ //return SE.getType() == MCSectionELF::SHT_NOBITS;
+}
+#if LLVM_VERSION >= 2500
+void
+AMDILAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const
+#else
+void
+AMDILAsmBackend::ApplyFixup(const MCAsmFixup &Fixup,
+ MCDataFragment &Fragment, uint64_t Value) const
+#endif
+{
+}
+
+bool
+AMDILAsmBackend::MayNeedRelaxation(const MCInst &Inst
+#if LLVM_VERSION < 2500
+ , const SmallVectorImpl<MCAsmFixup> &Fixups
+#endif
+ ) const
+{
+ return false;
+}
+
+#if LLVM_VERSION < 2500
+void
+AMDILAsmBackend::RelaxInstruction(const MCInstFragment *IF,
+ MCInst &Res) const
+{
+}
+#else
+void
+AMDILAsmBackend::RelaxInstruction(const MCInst &Inst,
+ MCInst &Res) const
+{
+}
+#endif
+
+bool
+AMDILAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const
+{
+ return false;
+}
+
+#if LLVM_VERSION >= 2500
+unsigned
+AMDILAsmBackend::getNumFixupKinds() const
+{
+ return 0;
+}
+#endif
diff --git a/src/gallium/drivers/radeon/AMDILAsmBackend.h b/src/gallium/drivers/radeon/AMDILAsmBackend.h
new file mode 100644
index 00000000000..d96de1bf4f3
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmBackend.h
@@ -0,0 +1,94 @@
+//==-----------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_ASM_BACKEND_H_
+#define _AMDIL_ASM_BACKEND_H_
+#include "AMDIL.h"
+#include "llvm/MC/MCAsmBackend.h"
+#define ASM_BACKEND_CLASS MCAsmBackend
+
+using namespace llvm;
+namespace llvm {
+ class AMDILAsmBackend : public ASM_BACKEND_CLASS {
+ public:
+#if LLVM_VERSION < 2500
+ AMDILAsmBackend(const Target &T);
+#else
+ AMDILAsmBackend(const ASM_BACKEND_CLASS &T);
+#endif
+ virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const;
+ virtual bool isSectionAtomizable(const MCSection &Section) const;
+ virtual bool isVirtualSection(const MCSection &Section) const;
+#if LLVM_VERSION < 2500
+ virtual void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &Fragment,
+ uint64_t Value) const;
+#else
+ virtual void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+#endif
+ virtual bool
+ MayNeedRelaxation(const MCInst &Inst
+#if LLVM_VERSION < 2500
+ , const SmallVectorImpl<MCAsmFixup> &Fixups
+#endif
+ ) const;
+#if LLVM_VERSION < 2500
+ virtual void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+#else
+ virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+#endif
+ virtual bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+#if LLVM_VERSION >= 2500
+ unsigned getNumFixupKinds() const;
+#endif
+ }; // class AMDILAsmBackend;
+} // llvm namespace
+
+#endif // _AMDIL_ASM_BACKEND_H_
diff --git a/src/gallium/drivers/radeon/AMDILAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDILAsmPrinter.cpp
new file mode 100644
index 00000000000..9108fb1506d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmPrinter.cpp
@@ -0,0 +1,879 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#define DEBUG_TYPE "asm-printer"
+#if !defined(NDEBUG)
+# define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+# define DEBUGME (false)
+#endif
+#include "AMDILAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <sstream>
+using namespace llvm;
+/// createAMDILCodePrinterPass - Returns a pass that prints the AMDIL
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+
+ ASMPRINTER_RETURN_TYPE
+createAMDILCodePrinterPass(AMDIL_ASM_PRINTER_ARGUMENTS)
+{
+ const AMDILSubtarget *stm = &TM.getSubtarget<AMDILSubtarget>();
+ return stm->device()->getAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+#include "AMDILGenAsmWriter.inc"
+
+/*
+// Force static initialization
+extern "C" void LLVMInitializeAMDILAsmPrinter() {
+ llvm::TargetRegistry::RegisterAsmPrinter(TheAMDILTarget,
+ createAMDILCodePrinterPass);
+}
+*/
+AMDILInstPrinter *llvm::createAMDILInstPrinter(const MCAsmInfo &MAI) {
+ return new AMDILInstPrinter(MAI);
+}
+
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+ static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+// TODO: Add support for verbose.
+ AMDILAsmPrinter::AMDILAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+: AsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+ mDebugMode = DEBUGME;
+ mTM = reinterpret_cast<AMDILTargetMachine*>(&TM);
+ mTM->setDebug(mDebugMode);
+ mGlobal = new AMDILGlobalManager(mDebugMode);
+ mMeta = new AMDILKernelManager(mTM, mGlobal);
+ mBuffer = 0;
+ mNeedVersion = false;
+}
+
+AMDILAsmPrinter::~AMDILAsmPrinter()
+{
+ delete mMeta;
+ delete mGlobal;
+}
+const char*
+AMDILAsmPrinter::getPassName() const
+{
+ return "AMDIL Assembly Printer";
+}
+
+void
+AMDILAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ const char *name = II->getDesc().getName() + 5;
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<")";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ }
+
+ printInstruction(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
+ void
+AMDILAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ }
+ emitMCallInst(MI, O, name);
+}
+
+ bool
+AMDILAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ SetupMachineFunction(lMF);
+ std::string kernelName = CurrentFnSym->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+ void
+AMDILAsmPrinter::addCPoolLiteral(const Constant *C)
+{
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ mMFI->addf32Literal(CFP);
+ } else {
+ mMFI->addf64Literal(CFP);
+ }
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ int64_t val = 0;
+ if (CI) {
+ val = CI->getSExtValue();
+ }
+ if (CI->getBitWidth() == (int64_t)64) {
+ mMFI->addi64Literal(val);
+ } else if (CI->getBitWidth() == (int64_t)8) {
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i8);
+ } else if (CI->getBitWidth() == (int64_t)16) {
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i16);
+ } else {
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i32);
+ }
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ uint32_t size = CA->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CA->getOperand(x));
+ }
+ } else if (const ConstantAggregateZero *CAZ
+ = dyn_cast<ConstantAggregateZero>(C)) {
+ if (CAZ->isNullValue()) {
+ mMFI->addi32Literal(0, AMDIL::LOADCONST_i32);
+ mMFI->addi64Literal(0);
+ mMFI->addf64Literal(0);
+ mMFI->addf32Literal(0);
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+ uint32_t size = CS->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CS->getOperand(x));
+ }
+#if LLVM_VERSION < 2500
+ } else if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(C)) {
+ uint32_t size = CU->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CU->getOperand(x));
+ }
+#endif
+ } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ // TODO: Make this handle vectors natively up to the correct
+ // size
+ uint32_t size = CV->getNumOperands();
+ for (uint32_t x = 0; x < size; ++x) {
+ addCPoolLiteral(CV->getOperand(x));
+ }
+ } else {
+ // TODO: Do we really need to handle ConstantPointerNull?
+ // What about BlockAddress, ConstantExpr and Undef?
+ // How would these even be generated by a valid CL program?
+ assert(0 && "Found a constant type that I don't know how to handle");
+ }
+}
+
+ void
+AMDILAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV)
+{
+ llvm::StringRef GVname = GV->getName();
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ int32_t autoSize = curTarget->getGlobalManager()->getArrayOffset(GVname);
+ int32_t constSize = curTarget->getGlobalManager()->getConstOffset(GVname);
+ O << ".global@" << GVname;
+ if (autoSize != -1) {
+ O << ":" << autoSize << "\n";
+ } else if (constSize != -1) {
+ O << ":" << constSize << "\n";
+ }
+ O.flush();
+ OutStreamer.EmitRawText(O.str());
+}
+
+void
+AMDILAsmPrinter::printOperand(const MachineInstr *MI, int opNum
+#if LLVM_VERSION >= 2351
+ , OSTREAM_TYPE &O
+#endif
+ )
+{
+ const MachineOperand &MO = MI->getOperand (opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MO.isReg()) {
+ if ((signed)MO.getReg() < 0) {
+ // FIXME: we need to remove all virtual register creation after register allocation.
+ // This is a work-around to make sure that the virtual register range does not
+ // clobber the physical register range.
+ O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+ } else {
+ O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+ }
+ } else {
+ assert(0 && "Invalid Register type");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_FPImmediate:
+ {
+ unsigned opcode = MI->getOpcode();
+ if ((opNum == (int)(MI->getNumOperands() - 1))
+ && (opcode >= AMDIL::ATOM_A_ADD
+ && opcode <= AMDIL::ATOM_R_XOR_NORET
+ || (opcode >= AMDIL::SCRATCHLOAD
+ && opcode <= AMDIL::SCRATCHSTORE_ZW)
+ || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
+ || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE_Z)
+ || (opcode >= AMDIL::UAVARENALOAD_W_i32
+ && opcode <= AMDIL::UAVRAWSTORE_v4i32)
+ || opcode == AMDIL::CBLOAD
+ || opcode == AMDIL::CASE)
+ ){
+ O << MO.getImm();
+ } else if (((opcode >= AMDIL::VEXTRACT_v2f32
+ && opcode <= AMDIL::VEXTRACT_v4i8)
+ && (opNum == 2))) {
+ // The swizzle is encoded in the operand so the
+ // literal that represents the swizzle out of ISel
+ // can be ignored.
+ } else if ((opcode >= AMDIL::VINSERT_v2f32)
+ && (opcode <= AMDIL::VINSERT_v4i8)
+ && ((opNum == 3) || (opNum == 4))) {
+ // The swizzle is encoded in the operand so the
+ // literal that represents the swizzle out of ISel
+ // can be ignored.
+ // The swizzle is encoded in the operand so the
+ // literal that represents the swizzle out of ISel
+ // can be ignored.
+ } else if (opNum == 1 &&
+ (opcode == AMDIL::APPEND_ALLOC
+ || opcode == AMDIL::APPEND_ALLOC_NORET
+ || opcode == AMDIL::APPEND_CONSUME
+ || opcode == AMDIL::APPEND_CONSUME_NORET
+ || opcode == AMDIL::IMAGE2D_READ
+ || opcode == AMDIL::IMAGE3D_READ
+ || opcode == AMDIL::IMAGE2D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ_UNNORM
+ || opcode == AMDIL::CBLOAD)) {
+ // We don't need to emit the 'l' so we just emit
+ // the immediate as it stores the resource ID and
+ // is not a true literal.
+ O << MO.getImm();
+ } else if (opNum == 0 &&
+ (opcode == AMDIL::IMAGE2D_READ
+ || opcode == AMDIL::IMAGE3D_READ
+ || opcode == AMDIL::IMAGE2D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ_UNNORM
+ || opcode == AMDIL::IMAGE2D_WRITE
+ || opcode == AMDIL::IMAGE3D_WRITE)) {
+ O << MO.getImm();
+ } else if (opNum == 3 &&
+ (opcode == AMDIL::IMAGE2D_READ
+ || opcode == AMDIL::IMAGE2D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ)) {
+ O << MO.getImm();
+ } else if (MO.isImm() || MO.isFPImm()) {
+ O << "l" << MO.getImm() << getSwizzle(MI, opNum);
+ } else {
+ assert(0 && "Invalid literal/constant type");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ }
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ EmitBasicBlockStart(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ {
+ int offset = 0;
+ const GlobalValue *gv = MO.getGlobal();
+ // Here we look up by the name for the corresponding number
+ // and we print that out instead of the name or the address
+ if (MI->getOpcode() == AMDIL::CALL) {
+ uint32_t funcNum;
+ llvm::StringRef name = gv->getName();
+ funcNum = name.empty()
+ ? mGlobal->getOrCreateFunctionID(gv)
+ : mGlobal->getOrCreateFunctionID(name);
+ mMFI->addCalledFunc(funcNum);
+ O << funcNum <<" ; "<< name;
+ } else if((offset = mGlobal->getArrayOffset(gv->getName()))
+ != -1) {
+ mMFI->setUsesLocal();
+ O << "l" << mMFI->getIntLits(offset) << ".x";
+ } else if((offset = mGlobal->getConstOffset(gv->getName()))
+ != -1) {
+ mMFI->addMetadata(";memory:datareqd");
+ O << "l" << mMFI->getIntLits(offset) << ".x";
+ } else {
+ assert(0 && "GlobalAddress without a function call!");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[MISSING_FUNCTION_CALL]);
+ }
+ }
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ {
+ if (MI->getOpcode() == AMDIL::CALL) {
+ uint32_t funcNum = mGlobal->getOrCreateFunctionID(
+ std::string(MO.getSymbolName()));
+ mMFI->addCalledFunc(funcNum);
+ O << funcNum << " ; "<< MO.getSymbolName();
+ // This is where pointers should get resolved
+ } else {
+ assert(0 && "ExternalSymbol without a function call!");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[MISSING_FUNCTION_CALL]);
+ }
+ }
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ {
+ // Copies of constant buffers need to be done here
+ const kernel &tmp = mGlobal->getKernel(mKernelName);
+ O << "l" << mMFI->getIntLits(
+ tmp.CPOffsets[MO.getIndex()].first);
+ }
+ break;
+ default:
+ O << "<unknown operand type>"; break;
+ }
+}
+
+void
+AMDILAsmPrinter::printMemOperand(
+ const MachineInstr *MI,
+ int opNum,
+#if LLVM_VERSION >= 2351
+ OSTREAM_TYPE &O,
+#endif
+ const char *Modifier
+ )
+{
+ const MachineOperand &MO = MI->getOperand (opNum);
+ if (opNum != 1) {
+ printOperand(MI, opNum
+#if LLVM_VERSION >= 2351
+ , O
+#endif
+ );
+ } else {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (MO.isReg()) {
+ if ((signed)MO.getReg() < 0) {
+ // FIXME: we need to remove all virtual register creation after register allocation.
+ // This is a work-around to make sure that the virtual register range does not
+ // clobber the physical register range.
+ O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+ } else {
+ O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+ }
+ }
+ else {
+ assert(0 && "Invalid Register type");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_FPImmediate:
+ {
+ unsigned opcode = MI->getOpcode();
+ if ((opNum == (int)(MI->getNumOperands() - 1))
+ && (opcode >= AMDIL::ATOM_A_ADD
+ && opcode <= AMDIL::ATOM_R_XOR_NORET
+ || (opcode >= AMDIL::SCRATCHLOAD
+ && opcode <= AMDIL::SCRATCHSTORE_ZW)
+ || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
+ || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE_Z)
+ || (opcode >= AMDIL::UAVARENALOAD_W_i32
+ && opcode <= AMDIL::UAVRAWSTORE_v4i32)
+ || opcode == AMDIL::CBLOAD
+ || opcode == AMDIL::CASE)
+ ) {
+ O << MO.getImm();
+ } else if (opNum == 1 &&
+ (opcode == AMDIL::APPEND_ALLOC
+ || opcode == AMDIL::APPEND_ALLOC_NORET
+ || opcode == AMDIL::APPEND_CONSUME
+ || opcode == AMDIL::APPEND_CONSUME_NORET
+ || opcode == AMDIL::IMAGE2D_READ
+ || opcode == AMDIL::IMAGE3D_READ
+ || opcode == AMDIL::IMAGE2D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ_UNNORM
+ || opcode == AMDIL::CBLOAD)) {
+ // We don't need to emit the 'l' so we just emit
+ // the immediate as it stores the resource ID and
+ // is not a true literal.
+ O << MO.getImm();
+ } else if (opNum == 0 &&
+ (opcode == AMDIL::IMAGE2D_READ
+ || opcode == AMDIL::IMAGE3D_READ
+ || opcode == AMDIL::IMAGE2D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ_UNNORM
+ || opcode == AMDIL::IMAGE2D_WRITE
+ || opcode == AMDIL::IMAGE3D_WRITE)) {
+ O << MO.getImm();
+ } else if (opNum == 3 &&
+ (opcode == AMDIL::IMAGE2D_READ
+ || opcode == AMDIL::IMAGE2D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ_UNNORM
+ || opcode == AMDIL::IMAGE3D_READ)) {
+ O << MO.getImm();
+ } else if (MO.isImm() || MO.isFPImm()) {
+ O << "l" << MO.getImm();
+ }
+ else {
+ assert(0 && "Invalid literal/constant type");
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ }
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ {
+ // Copies of constant buffers need to be done here
+ const kernel &tmp = mGlobal->getKernel(mKernelName);
+ O << "l" << mMFI->getIntLits(
+ tmp.CPOffsets[MO.getIndex()].first);
+ }
+ break;
+ default:
+ O << "<unknown operand type>"; break;
+ };
+ }
+}
+
+ const char*
+AMDILAsmPrinter::getSwizzle(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ OpSwizzle swiz;
+ swiz.u8all = MO.getTargetFlags();
+ if (!swiz.bits.dst) {
+ return getSrcSwizzle(swiz.bits.swizzle);
+ } else {
+ return getDstSwizzle(swiz.bits.swizzle);
+ }
+}
+
+ void
+AMDILAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+#if LLVM_VERSION >= 2351
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+#endif
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ curTarget->setGlobalManager(mGlobal);
+ curTarget->setKernelManager(mMeta);
+ mGlobal->processModule(M, mTM);
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ // Map all the known names to a unique number
+ mGlobal->getOrCreateFunctionID(I);
+ }
+
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ // Since we are using the macro db, the first token must be a macro.
+ // So we make up a macro that is never used.
+ // I originally picked -1, but the IL text translater treats them as
+ // unsigned integers.
+ O << "mdef(16383)_out(1)_in(2)\n";
+ O << "mov r0, in0\n";
+ O << "mov r1, in1\n";
+ O << "div_zeroop(infinity) r0.x___, r0.x, r1.x\n";
+ O << "mov out0, r0\n";
+ O << "mend\n";
+ }
+
+
+ // We need to increase the number of reserved literals for
+ // any literals we output manually instead of via the
+ // emitLiteral function. This function should never
+ // have any executable code in it. Only declarations
+ // and the main function patch symbol.
+ if (curTarget->device()->getGeneration() == AMDILDeviceInfo::HDTEST) {
+ O << "il_cs_3_0\n";
+ } else {
+ O << "il_cs_2_0\n";
+ }
+ O << "dcl_cb cb0[10] ; Constant buffer that holds ABI data\n";
+ O << "dcl_literal l0, 4, 1, 2, 3\n";
+ O << "dcl_literal l1, 0x00FFFFFF, -1, -2, -3\n";
+ O << "dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE,0x000000FF,0xFFFFFFFC\n";
+ O << "dcl_literal l3, 24, 16, 8, 0xFFFFFFFF\n";
+ O << "dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF\n";
+ O << "dcl_literal l5, 0, 4, 8, 12\n";
+ O << "dcl_literal l6, 32, 32, 32, 32\n";
+ O << "dcl_literal l7, 24, 31, 16, 31\n";
+ O << ";$$$$$$$$$$\n";
+ O << "endmain\n";
+ O << ";DEBUGSTART\n";
+#if LLVM_VERSION >= 2351
+ OutStreamer.EmitRawText(O.str());
+#endif
+}
+ void
+AMDILAsmPrinter::EmitEndOfAsmFile(Module &M)
+{
+#if LLVM_VERSION >= 2351
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+#endif
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ O << ";DEBUGEND\n";
+ if (curTarget->device()->isSupported(AMDILDeviceInfo::MacroDB)) {
+ int lines;
+ for (llvm::DenseSet<uint32_t>::iterator msb = mMacroIDs.begin()
+ , mse = mMacroIDs.end(); msb != mse; ++msb) {
+ int idx = *msb;
+ const char* *macro = amd::MacroDBGetMacro(&lines, idx);
+ for (int k = 0; k < lines; ++k) {
+ O << macro[k];
+ }
+ }
+ }
+ mGlobal->dumpDataSection(O, mMeta);
+ O << "\nend\n";
+#ifdef _DEBUG
+ if (mDebugMode) {
+ mGlobal->print(O);
+ mTM->dump(O);
+ }
+#endif
+#if LLVM_VERSION >= 2351
+ OutStreamer.EmitRawText(O.str());
+#endif
+}
+void
+AMDILAsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const
+{
+ assert(0 && "When is this function hit!");
+}
+ bool
+AMDILAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned int OpNo,
+ unsigned int AsmVariant, const char *ExtraCode)
+{
+ assert(0 && "When is this function hit!");
+ return false;
+}
+ bool
+AMDILAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned int OpNo, unsigned int AsmVariant, const char *ExtraCode)
+{
+ assert(0 && "When is this function hit!");
+ return false;
+}
+ void
+AMDILAsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV)
+{
+ assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const
+{
+ assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const
+{
+ assert(0 && "When is this function hit!");
+}
+void
+AMDILAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const
+{
+ assert(0 && "When is this function hit!");
+}
+
+ void
+AMDILAsmPrinter::EmitFunctionBodyStart()
+{
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+
+ bool isKernel = false;
+ O << "";
+ O << ";DEBUGEND\n";
+ ++mBuffer;
+ isKernel = mGlobal->isKernel(mKernelName);
+ uint32_t id = mName.empty()
+ ? mGlobal->getOrCreateFunctionID(MF->getFunction())
+ : mGlobal->getOrCreateFunctionID(mName);
+ mMeta->setKernel(isKernel);
+ mMeta->setID(id);
+ if (isKernel) {
+ mMeta->printHeader(this, O, mKernelName);
+ mMeta->processArgMetadata(O, mBuffer, isKernel);
+ mMeta->printGroupSize(O);
+ mMeta->printDecls(this, O);
+ kernel &tmp = const_cast<kernel&>(mGlobal->getKernel(mKernelName));
+ // add the literals for the offsets and sizes of
+ // all kernel declared local arrays
+ if (tmp.lvgv) {
+ localArg *lptr = tmp.lvgv;
+ llvm::SmallVector<arraymem*, DEFAULT_VEC_SLOTS>::iterator lmb, lme;
+ for (lmb = lptr->local.begin(), lme = lptr->local.end();
+ lmb != lme; ++lmb) {
+ mMFI->addi32Literal((*lmb)->offset);
+ mMFI->addi32Literal((*lmb)->vecSize);
+ mMFI->setUsesLocal();
+ }
+ }
+ // Add the literals for the offsets and sizes of
+ // all the globally scoped constant arrays
+ for (StringMap<constPtr>::iterator cmb = mGlobal->consts_begin(),
+ cme = mGlobal->consts_end(); cmb != cme; ++cmb) {
+ mMFI->addi32Literal((cmb)->second.offset);
+ mMFI->addi32Literal((cmb)->second.size);
+ mMFI->addMetadata(";memory:datareqd");
+ }
+
+ // Add the literals for the offsets and sizes of
+ // all the kernel constant arrays
+ llvm::SmallVector<constPtr, DEFAULT_VEC_SLOTS>::const_iterator cpb, cpe;
+ for (cpb = tmp.constPtr.begin(), cpe = tmp.constPtr.end();
+ cpb != cpe; ++cpb) {
+ mMFI->addi32Literal(cpb->size);
+ mMFI->addi32Literal(cpb->offset);
+ }
+ mMeta->emitLiterals(O);
+ // Add 1 to the size so that the next literal is the one we want
+ mMeta->printArgCopies(O, this);
+ O << "call " << id << " ; " << mName << "\n";
+ mMeta->printFooter(O);
+ mMeta->printMetaData(O, id, isKernel);
+ O << "func " << id << " ; " << mName << "\n";
+ } else {
+ if (mName.empty()) {
+ std::stringstream ss;
+ ss << "unknown_" << id;
+ mName = ss.str();
+ }
+ mMeta->setName(mName);
+ O << "func " << id << " ; " << mName << "\n";
+ mMeta->processArgMetadata(O, mBuffer, false);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(O.str());
+}
+ void
+AMDILAsmPrinter::EmitFunctionBodyEnd()
+{
+ SmallString<1024> Str;
+ raw_svector_ostream O(Str);
+ uint32_t id = mName.empty()
+ ? mGlobal->getOrCreateFunctionID(MF->getFunction())
+ : mGlobal->getOrCreateFunctionID(mName);
+ if (mName.empty()) {
+ std::stringstream ss;
+ ss << "unknown_" << id;
+ mName = ss.str();
+ }
+ if (mGlobal->isKernel(mKernelName)) {
+ O << "endfunc ; " << mName << "\n";
+ mMeta->setName(mName);
+ mMeta->printMetaData(O, id, false);
+ } else {
+ O << "endfunc ; " << mName << "\n";
+ mMeta->printMetaData(O, id, false);
+ }
+ mMeta->clear();
+ O << ";DEBUGSTART\n";
+ O.flush();
+ OutStreamer.EmitRawText(O.str());
+}
+ void
+AMDILAsmPrinter::EmitConstantPool()
+{
+ // If we aren't a kernel, we should not need to
+ // emit constant pool data yet
+ if (!mGlobal->isKernel(mKernelName)) {
+ return;
+ }
+ kernel &tmp = const_cast<kernel&>(mGlobal->getKernel(mKernelName));
+ mGlobal->calculateCPOffsets(MF, tmp);
+ // Add all the constant pool offsets to the literal table
+ for (uint32_t x = 0; x < tmp.CPOffsets.size(); ++x) {
+ mMFI->addMetadata(";memory:datareqd");
+ mMFI->addi32Literal(tmp.CPOffsets[x].first);
+ }
+
+ // Add all the constant pool constants to the literal tables
+ {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &consts
+ = MCP->getConstants();
+ for (uint32_t x = 0, s = consts.size(); x < s; ++x) {
+ addCPoolLiteral(consts[x].Val.ConstVal);
+ }
+ }
+}
+ void
+AMDILAsmPrinter::EmitFunctionEntryLabel()
+{
+ return;
+ assert(0 && "When is this function hit!");
+}
+
+bool
+AMDILAsmPrinter::isMacroCall(const MachineInstr *MI) {
+ return !strncmp(MI->getDesc().getName(), "MACRO", 5);
+}
+
+bool
+AMDILAsmPrinter::isMacroFunc(const MachineInstr *MI) {
+ if (MI->getOpcode() != AMDIL::CALL) {
+ return false;
+ }
+ if (!MI->getOperand(0).isGlobal()) {
+ return false;
+ }
+ const llvm::StringRef &nameRef = MI->getOperand(0).getGlobal()->getName();
+ if (nameRef.startswith("__atom_")
+ || nameRef.startswith("__atomic_")) {
+ mMeta->setOutputInst();
+ }
+ return amd::MacroDBFindMacro(nameRef.data()) != -1;
+}
+ void
+AMDILAsmPrinter::emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ int macronum = amd::MacroDBFindMacro(name);
+ int numIn = amd::MacroDBNumInputs(macronum);
+ int numOut = amd::MacroDBNumOutputs(macronum);
+ if (macronum == -1) {
+ return;
+ }
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ const TargetRegisterClass *trc = MF->getTarget()
+ .getRegisterInfo()->getRegClass(AMDIL::GPRF32RegClassID);
+ O << "\tmcall(" << macronum << ")(";
+ int x ;
+ for (x = 0; x < numOut - 1; ++x) {
+ O << getRegisterName(trc->getRegister(x)) << ", ";
+ }
+ O << getRegisterName(trc->getRegister(x)) << "),(";
+ for (x = 0; x < numIn - 1; ++x) {
+ O << getRegisterName(trc->getRegister(x)) << ", ";
+ }
+ O << getRegisterName(trc->getRegister(x)) << ")";
+ O << " ;" << name <<"\n";
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILAsmPrinter.h b/src/gallium/drivers/radeon/AMDILAsmPrinter.h
new file mode 100644
index 00000000000..b88cb667722
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILAsmPrinter.h
@@ -0,0 +1,284 @@
+//===-------- AMDILAsmPrinter.h --- AMDIL Asm Printer class ---------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL_ASM_PRINTER_H_
+#define _AMDIL_ASM_PRINTER_H_
+#include "AMDIL.h"
+#include "AMDILLLVMVersion.h"
+#include "macrodata.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm
+{
+ class AMDILKernelManager;
+ class AMDILGlobalManager;
+ class AMDILTargetMachine;
+ class AMDILMachineFunctionInfo;
+ class AnalysisUsage;
+ class Constant;
+ class Function;
+ class Module;
+ class MachineInstr;
+ class MachineBasicBlock;
+ class MachineConstantPoolValue;
+ class MachineFunction;
+ class MachineJumptableInfo;
+ class raw_ostream;
+ class MCStreamer;
+ class MCSymbol;
+ class MCInst;
+ class MCContext;
+
+
+ class LLVM_LIBRARY_VISIBILITY AMDILAsmPrinter : public AsmPrinter
+ {
+ public:
+ //
+ // Constructor for the AMDIL specific AsmPrinter class.
+ // Interface is defined by LLVM proper and should reference
+ // there for more information.
+ //
+ explicit AMDILAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+ //
+ // Destructor for the AsmPrinter class that deletes all the
+ // allocated memory
+ //
+ virtual ~AMDILAsmPrinter();
+
+ //
+ // @param MI Machine instruction to print the operand of
+ // @param opNum operand to print from the specified machine instruciton
+ // @param O The output stream for the operand
+ // @brief Based on the register type, print out register specific
+ // information
+ // and add swizzle information in the cases that require it
+ //
+ virtual void
+ printOperand(const MachineInstr *MI, int opNum
+#if LLVM_VERSION >= 2351
+ , OSTREAM_TYPE &O
+#endif
+ );
+
+ void
+ EmitGlobalVariable(const GlobalVariable *GV);
+ // overloading ALL AsmPrinter.h virtual functions to better
+ // understand how everything works
+ void
+ EmitStartOfAsmFile(Module &M);
+ void
+ EmitEndOfAsmFile(Module &M);
+ void
+ PrintSpecial(const MachineInstr *MI, const char *Code) const;
+ bool
+ PrintAsmOperand(const MachineInstr *MI, unsigned int OpNo,
+ unsigned int AsmVariant, const char *ExtraCode);
+ bool
+ PrintAsmMemoryOperand(const MachineInstr *MI, unsigned int OpNo,
+ unsigned int AsmVariant,
+ const char *ExtraCode);
+ void
+ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+ void
+ printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void
+ printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const;
+ void
+ printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const;
+ virtual void
+ EmitInstruction(const MachineInstr *MI);
+ void
+ EmitFunctionBodyStart();
+ void
+ EmitFunctionBodyEnd();
+ void
+ EmitConstantPool();
+ void
+ EmitFunctionEntryLabel();
+
+ //
+ // @param MI Machine instruction to print memory operand of
+ // @param opNum operand to print from the specified machine instrucion
+ // @param Modifier optional modifier for the memory operand
+ // @brief Print the memory operand based on the register type
+ //
+ void
+ printMemOperand(const MachineInstr *MI, int opNum,
+#if LLVM_VERSION >= 2351
+ OSTREAM_TYPE &O,
+#endif
+ const char *Modifier = NULL);
+
+ //
+ // @param MI Machine instruction to print to the buffer
+ // @brief autogenerated function from tablegen files that prints out
+ // the assembly format of the specified instruction
+ //
+ void
+ printInstruction(const MachineInstr *MI , OSTREAM_TYPE &O); // autogenerated
+
+ const char *getRegisterName(unsigned RegNo);
+
+ //
+ // @param F MachineFunction to print the assembly for
+ // @brief parse the specified machine function and print
+ // out the assembly for all the instructions in the function
+ //
+ bool
+ runOnMachineFunction(MachineFunction &F);
+
+ //
+ // @param MI Machine Instruction to determine if it a macro call
+ // @brief Query to see if the instruction is a Macro or not
+ // @return true if instruction is a macro
+ //
+ bool
+ isMacroCall(const MachineInstr *MI);
+
+ //
+ // @param MI Machine Instruction to determine if the fucntion is a macro
+ // @brief determine if the function is a macro function or a normal
+ // function
+ // @return true if the function call should be transformed to a macro,
+ // false otherwise
+ //
+ bool
+ isMacroFunc(const MachineInstr *MI);
+
+
+ //
+ // @param MI Machine instruction to print swizzle for
+ // @param opNum the operand number to print swizzle for
+ // @brief print out the swizzle for a scalar register class
+ //
+ const char*
+ getSwizzle(const MachineInstr *MI, int opNum);
+
+ //
+ // @return the name of this specific pass
+ //
+ virtual const char*
+ getPassName() const;
+
+
+ protected:
+ //
+ // @param MI Machine instruction to emit the macro code for
+ //
+ // Emits a fully functional macro function that uses the argument
+ // registers as the macro arguments.
+ //
+ virtual void
+ emitMacroFunc(const MachineInstr *MI , OSTREAM_TYPE &O);
+
+ // Flag whether to print out debug information
+ // or not.
+ bool mDebugMode;
+
+ //
+ //
+ void
+ emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name);
+
+ // Set of alll macros that are used in this compilation unit.
+ llvm::DenseSet<uint32_t> mMacroIDs;
+
+ /// Pointer to the Target Machine that the asm printer
+ /// should be printing compatible code for.
+ AMDILTargetMachine *mTM;
+
+ /// pointer to the kernel manager that keeps track
+ /// of the metadata required by the runtime to
+ /// call a kernel correctly.
+ AMDILKernelManager *mMeta;
+
+ /// Global variable manager that is used to keep
+ /// track of the metadata passed to the backend
+ /// from the frontend of the compiler
+ AMDILGlobalManager *mGlobal;
+
+ /// Class that holds information about the current
+ /// function that is being processed.
+ AMDILMachineFunctionInfo *mMFI;
+
+ /// Name of the current function being printed
+ /// by the asm printer
+ std::string mName;
+
+ /// name of the kernel wrapper of the current function
+ std::string mKernelName;
+
+ private:
+ void addCPoolLiteral(const Constant *C);
+
+ /// The constant buffer that the data should be
+ /// allocated in by the runtime
+ int mBuffer;
+
+ /// Flag to determine whether the printer needs
+ /// to print assembly version information in the metadata
+ bool mNeedVersion;
+ };
+
+
+} // end of llvm namespace
+
+#endif // _AMDIL_ASM_PRINTER_H_
diff --git a/src/gallium/drivers/radeon/AMDILBarrierDetect.cpp b/src/gallium/drivers/radeon/AMDILBarrierDetect.cpp
new file mode 100644
index 00000000000..4cb434a3acb
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILBarrierDetect.cpp
@@ -0,0 +1,326 @@
+//===----- AMDILBarrierDetect.cpp - Barrier Detect pass -*- C++ -*- ------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "BarrierDetect"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+// The barrier detect pass determines if a barrier has been duplicated in the
+// source program which can cause undefined behaviour if more than a single
+// wavefront is executed in a group. This is because LLVM does not have an
+// execution barrier and if this barrier function gets duplicated, undefined
+// behaviour can occur. In order to work around this, we detect the duplicated
+// barrier and then make the work-group execute in a single wavefront mode,
+// essentially making the barrier a no-op.
+
+namespace
+{
+ class LLVM_LIBRARY_VISIBILITY AMDILBarrierDetect : public FunctionPass
+ {
+ TargetMachine &TM;
+ static char ID;
+ public:
+ AMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+ ~AMDILBarrierDetect();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ private:
+ bool detectBarrier(BasicBlock::iterator *BBI);
+ bool detectMemFence(BasicBlock::iterator *BBI);
+ bool mChanged;
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
+ const AMDILSubtarget *mStm;
+
+ // Constants used to define memory type.
+ static const unsigned int LOCAL_MEM_FENCE = 1<<0;
+ static const unsigned int GLOBAL_MEM_FENCE = 1<<1;
+ static const unsigned int REGION_MEM_FENCE = 1<<2;
+ };
+ char AMDILBarrierDetect::ID = 0;
+} // anonymouse namespace
+
+namespace llvm
+{
+ FunctionPass *
+ createAMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ {
+ return new AMDILBarrierDetect(TM, OptLevel);
+ }
+} // llvm namespace
+
+AMDILBarrierDetect::AMDILBarrierDetect(TargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ :
+#if LLVM_VERSION >= 2500
+ FunctionPass(ID),
+#else
+ FunctionPass((intptr_t)&ID),
+#endif
+ TM(TM)
+{
+}
+
+AMDILBarrierDetect::~AMDILBarrierDetect()
+{
+}
+
+bool AMDILBarrierDetect::detectBarrier(BasicBlock::iterator *BBI)
+{
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS>::iterator bIter;
+ int64_t bID;
+ Instruction *inst = (*BBI);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+
+ if (!CI || !CI->getNumOperands()) {
+ return false;
+ }
+#if LLVM_VERSION < 2500
+ const Value *funcVal = CI->getOperand(0);
+#else
+ const Value *funcVal = CI->getOperand(CI->getNumOperands() - 1);
+#endif
+ if (funcVal && strncmp(funcVal->getName().data(), "__amd_barrier", 13)) {
+ return false;
+ }
+
+ if (inst->getNumOperands() >= 3) {
+#if LLVM_VERSION < 2500
+ const Value *V = inst->getOperand(1);
+#else
+ const Value *V = inst->getOperand(0);
+#endif
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ bID = Cint->getSExtValue();
+ bIter = std::find(bVecMap.begin(), bVecMap.end(), bID);
+ if (bIter == bVecMap.end()) {
+ bVecMap.push_back(bID);
+ } else {
+ if (mStm->device()->isSupported(AMDILDeviceInfo::BarrierDetect)) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addMetadata(";limitgroupsize");
+ MFI->addErrorMsg(amd::CompilerWarningMessage[BAD_BARRIER_OPT]);
+ }
+ }
+ }
+ if (mStm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addErrorMsg(amd::CompilerWarningMessage[LIMIT_BARRIER]);
+ MFI->addMetadata(";limitgroupsize");
+ MFI->setUsesLocal();
+ }
+#if LLVM_VERSION < 2500
+ const Value *V = inst->getOperand(inst->getNumOperands()-1);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(0));
+#else
+ const Value *V = inst->getOperand(inst->getNumOperands()-2);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+#endif
+ Module *M = iF->getParent();
+ bID = Cint->getSExtValue();
+ if (bID > 0) {
+ const char *name = "barrier";
+ if (bID == GLOBAL_MEM_FENCE) {
+ name = "barrierGlobal";
+ } else if (bID == LOCAL_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ name = "barrierLocal";
+ } else if (bID == REGION_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ name = "barrierRegion";
+ }
+ Function *nF =
+ dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
+#if LLVM_VERSION < 2500
+ inst->setOperand(0, nF);
+#else
+ inst->setOperand(inst->getNumOperands()-1, nF);
+#endif
+ return false;
+ }
+
+ return false;
+}
+
+bool AMDILBarrierDetect::detectMemFence(BasicBlock::iterator *BBI)
+{
+ int64_t bID;
+ Instruction *inst = (*BBI);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+
+ if (!CI || CI->getNumOperands() != 2) {
+ return false;
+ }
+
+#if LLVM_VERSION < 2500
+ const Value *V = inst->getOperand(inst->getNumOperands()-1);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(0));
+#else
+ const Value *V = inst->getOperand(inst->getNumOperands()-2);
+ const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
+ Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+#endif
+
+ const char *fence_local_name;
+ const char *fence_global_name;
+ const char *fence_region_name;
+ const char* fence_name = "mem_fence";
+ if (!iF) {
+ return false;
+ }
+
+ if (strncmp(iF->getName().data(), "mem_fence", 9) == 0) {
+ fence_local_name = "mem_fence_local";
+ fence_global_name = "mem_fence_global";
+ fence_region_name = "mem_fence_region";
+ } else if (strncmp(iF->getName().data(), "read_mem_fence", 14) == 0) {
+ fence_local_name = "read_mem_fence_local";
+ fence_global_name = "read_mem_fence_global";
+ fence_region_name = "read_mem_fence_region";
+ } else if (strncmp(iF->getName().data(), "write_mem_fence", 15) == 0) {
+ fence_local_name = "write_mem_fence_local";
+ fence_global_name = "write_mem_fence_global";
+ fence_region_name = "write_mem_fence_region";
+ } else {
+ return false;
+ }
+
+ Module *M = iF->getParent();
+ bID = Cint->getSExtValue();
+ if (bID > 0) {
+ const char *name = fence_name;
+ if (bID == GLOBAL_MEM_FENCE) {
+ name = fence_global_name;
+ } else if (bID == LOCAL_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ name = fence_local_name;
+ } else if (bID == REGION_MEM_FENCE
+ && mStm->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ name = fence_region_name;
+ }
+ Function *nF =
+ dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
+#if LLVM_VERSION < 2500
+ inst->setOperand(0, nF);
+#else
+ inst->setOperand(inst->getNumOperands()-1, nF);
+#endif
+ return false;
+ }
+
+ return false;
+
+}
+
+bool AMDILBarrierDetect::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ bVecMap.clear();
+ mStm = &TM.getSubtarget<AMDILSubtarget>();
+ Function *F = &MF;
+ safeNestedForEach(F->begin(), F->end(), F->begin()->begin(),
+ std::bind1st(
+ std::mem_fun(
+ &AMDILBarrierDetect::detectBarrier), this));
+ safeNestedForEach(F->begin(), F->end(), F->begin()->begin(),
+ std::bind1st(
+ std::mem_fun(
+ &AMDILBarrierDetect::detectMemFence), this));
+ return mChanged;
+}
+
+const char* AMDILBarrierDetect::getPassName() const
+{
+ return "AMDIL Barrier Detect Pass";
+}
+
+bool AMDILBarrierDetect::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool AMDILBarrierDetect::doFinalization(Module &M)
+{
+ return false;
+}
+
+void AMDILBarrierDetect::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td
new file mode 100644
index 00000000000..b15a3e72613
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILBase.td
@@ -0,0 +1,149 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDILDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDILDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDILDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDILDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDILDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDILDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDILDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "mIs64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "mIs32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDILDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILCallingConv.td"
+include "AMDILInstrInfo.td"
+
+def AMDILInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// AMDIL processors supported.
+//===----------------------------------------------------------------------===//
+include "Processors.td"
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDILAsmWriter : AsmWriter {
+ string AsmWriterClassName = "AsmPrinter";
+ int Variant = 0;
+}
+
+def AMDILAsmParser : AsmParser {
+ string AsmParserClassName = "AsmParser";
+ int Variant = 0;
+
+ string CommentDelimiter = ";";
+
+ string RegisterPrefix = "r";
+
+}
+
+
+def AMDIL : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDILInstrInfo;
+ let AssemblyWriters = [AMDILAsmWriter];
+ let AssemblyParsers = [AMDILAsmParser];
+}
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
new file mode 100644
index 00000000000..7eb37a220cb
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3304 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "structcfg"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// bixia TODO: move this out to analysis lib. Make this work for both target
+// AMDIL and CBackend.
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
+
+namespace llvm {
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTreeBase<MachineBasicBlock> *DT;
+#if LLVM_VERSION >= 2500
+ MachinePostDominatorTree() : MachineFunctionPass(ID)
+#else
+ MachinePostDominatorTree() : MachineFunctionPass(&ID)
+#endif
+ {
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+ }
+
+ ~MachinePostDominatorTree();
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+ return DT->getRoots();
+ }
+
+ inline MachineDomTreeNode *getRootNode() const {
+ return DT->getRootNode();
+ }
+
+ inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline MachineBasicBlock *
+ findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+ return DT->findNearestCommonDominator(A, B);
+ }
+
+ virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+ DT->print(OS);
+ }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+static RegisterPass<MachinePostDominatorTree>
+machinePostDominatorTreePass("machinepostdomtree",
+ "MachinePostDominator Tree Construction",
+ true, true);
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ //DEBUG(DT->dump());
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass);
+
+private:
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass) {
+ passRep = &pass;
+ funcRep = &func;
+
+ bool changed = false;
+ //func.RenumberBlocks();
+
+ //to do, if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::prepare\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //domTree = CFGTraits::getDominatorTree(pass);
+ //if (DEBUGME) {
+ // domTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //postDomTree = CFGTraits::getPostDominatorTree(pass);
+ //if (DEBUGME) {
+ // postDomTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ //assert(curBlk->size() > 0);
+ //removeEmptyBlock(curBlk) ??
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass) {
+ passRep = &pass;
+ funcRep = &func;
+
+ //func.RenumberBlocks();
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::run\n";
+ //errs() << func.getFunction()->getNameStr() << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+#if 1
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+#endif
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ //func.dump();
+ }
+
+ if (!finish) {
+ MachineFunction *MF = &func;
+ AMDILMachineFunctionInfo *mMFI =
+ MF->getInfo<AMDILMachineFunctionInfo>();
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[IRREDUCIBLE_CF]);
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ } //end of for
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ //numMatch += switchPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1)
+ {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+#if 1
+
+ // TODO: Simplify, maybe separate function?
+ //funcRep->viewCFG();
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+#else
+ return -1;
+#endif
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (//postDomTree->dominates(downBlk, falseBlk) &&
+ singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+#if 0
+ if (DEBUGME) {
+ errs() << "improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+#endif
+
+ // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+ // May consider the # landBlk->pred_size() as it represents the number of
+ // assignment initReg = .. needed to insert.
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDIL::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDIL::IF_LOGICALZ_i32, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+ //CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+ //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ //removeUnconditionalBranch(dstBlk);
+ dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+
+ //curBlk->remove(branchInstrPos);
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
+ {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
+ : CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ } else {
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::BREAK, passRep);
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDIL::ENDIF, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ //exitingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false)
+ {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ //contingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+ } else {
+ /* if we've arrived here then we've already erased the branch instruction
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDIL::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+// BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDIL::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg)
+ .addImm(i - 1); //preVal
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass));
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDIL branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(&AMDIL::GPRI32RegClass);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+#if 0
+ if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+ if (DEBUGME) {
+ errs() << "Replace empty block BB" << curBlk->getNumber()
+ << " with dummyExitBlock\n";
+ }
+ BlockT *predb = *curBlk->pred_begin();
+ predb->removeSuccessor(curBlk);
+ curBlk = predb;
+ } //handle empty curBlk
+#endif
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ //int i = srcBlk->succ_size();
+ //int j = srcBlk->pred_size();
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME)
+ {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDIL
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGStructurizer : public MachineFunctionPass
+{
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+//private:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+//public:
+// static char ID;
+
+public:
+#if LLVM_VERSION >= 2500
+ AMDILCFGStructurizer(char &pid, TargetMachine &tm, CodeGenOpt::Level OL);
+#else
+ AMDILCFGStructurizer(intptr_t pid, TargetMachine &tm, CodeGenOpt::Level OL);
+#endif
+ const TargetInstrInfo *getTargetInstrInfo() const;
+ //bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGStructurizer
+
+//char AMDILCFGStructurizer::ID = 0;
+} //end of namespace llvm
+#if LLVM_VERSION >= 2500
+AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm,
+#else
+AMDILCFGStructurizer::AMDILCFGStructurizer(intptr_t pid, TargetMachine &tm,
+#endif
+ CodeGenOpt::Level OL)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()) {
+}
+
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPrepare : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPrepare(TargetMachine &tm, CodeGenOpt::Level OL);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPrepare
+
+char AMDILCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm, CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+ : AMDILCFGStructurizer(ID, tm, OL)
+#else
+ : AMDILCFGStructurizer((intptr_t)&ID, tm, OL)
+#endif
+{
+}
+const char *AMDILCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPerform : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPerform(TargetMachine &tm, CodeGenOpt::Level OL);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPerform
+
+char AMDILCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm, CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+: AMDILCFGStructurizer(ID, tm, OL)
+#else
+: AMDILCFGStructurizer((intptr_t)&ID, tm, OL)
+#endif
+{
+}
+
+const char *AMDILCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDILCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDIL backend
+template<>
+struct CFGStructTraits<AMDILCFGStructurizer>
+{
+ typedef int RegiT;
+
+ static int getBreakNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBreakZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode)
+ {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDIL::BRANCH:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isPhimove(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::MOVE);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!isPhimove(instr)) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDIL::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+ false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ DebugLoc());
+ MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addImm(regVal); //set src value
+
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc());
+
+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ //newBlk->setNumber(srcBlk->getNumber());
+ for (MachineBasicBlock::const_iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
+ // does not clone the AsmPrinterFlags.
+ instr->setAsmPrinterFlag(
+ (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDIL instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getExplicitBranch(branchInstr) == oldBlk) {
+ setExplicitBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDIL::CONTINUE
+ && iter->getOpcode() == AMDIL::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDILCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) {
+ return new AMDILCFGPrepare(tm, OptLevel);
+}
+
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
+ *this);
+}
+
+// createAMDILCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) {
+ return new AMDILCFGPerform(tm, OptLevel);
+}
+
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
+ *this);
+}
+
+//end of file newline goes below
+
diff --git a/src/gallium/drivers/radeon/AMDILCallingConv.td b/src/gallium/drivers/radeon/AMDILCallingConv.td
new file mode 100644
index 00000000000..cd9736da572
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCallingConv.td
@@ -0,0 +1,116 @@
+//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMDIL architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// AMDIL 32-bit C return-value convention.
+def RetCC_AMDIL32 : CallingConv<[
+ // Since IL has no return values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ CCIfType<[i1, i8, i16, i32, f32, f64, i64], CCAssignToReg<
+ [
+ R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element Short vector types get 16 byte alignment and size of 8 bytes
+ CCIfType<[v2i32, v2f32, v2i8, v4i8, v2i16, v4i16], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 4-element Short vector types get 16 byte alignment and size of 16 bytes
+ CCIfType<[v4i32, v4f32], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element 64-bit vector types get aligned to 16 bytes with a size of 16 bytes
+ CCIfType<[v2f64, v2i64], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >, CCAssignToStack<16, 16>
+]>;
+
+// AMDIL 32-bit C Calling convention.
+def CC_AMDIL32 : CallingConv<[
+ // Since IL has parameter values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ CCIfType<[i1, i8, i16, i32, f32, f64, i64], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element Short vector types get 16 byte alignment and size of 8 bytes
+ CCIfType<[v2i32, v2f32, v2i8, v4i8, v2i16, v4i16], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 4-element Short vector types get 16 byte alignment and size of 16 bytes
+ CCIfType<[v4i32, v4f32], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >,
+
+ // 2-element 64-bit vector types get aligned to 16 bytes with a size of 16 bytes
+ CCIfType<[v2f64, v2i64], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204, R205, R206, R207, R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220, R221, R222, R223, R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236, R237, R238, R239, R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252, R253, R254, R255, R256, R257, R258, R259, R260, R261, R262, R263, R264, R265, R266, R267, R268, R269, R270, R271, R272, R273, R274, R275, R276, R277, R278, R279, R280, R281, R282, R283, R284, R285, R286, R287, R288, R289, R290, R291, R292, R293, R294, R295, R296, R297, R298, R299, R300, R301, R302, R303, R304, R305, R306, R307, R308, R309, R310, R311, R312, R313, R314, R315, R316, R317, R318, R319, R320, R321, R322, R323, R324, R325, R326, R327, R328, R329, R330, R331, R332, R333, R334, R335, R336, R337, R338, R339, R340, R341, R342, R343, R344, R345, R346, R347, R348, R349, R350, R351, R352, R353, R354, R355, R356, R357, R358, R359, R360, R361, R362, R363, R364, R365, R366, R367, R368, R369, R370, R371, R372, R373, R374, R375, R376, R377, R378, R379, R380, R381, R382, R383, R384, R385, R386, R387, R388, R389, R390, R391, R392, R393, R394, R395, R396, R397, R398, R399, R400, R401, R402, R403, R404, R405, R406, R407, R408, R409, R410, R411, R412, R413, R414, R415, R416, R417, R418, R419, R420, R421, R422, R423, R424, R425, R426, R427, R428, R429, R430, R431, R432, R433, R434, R435, R436, R437, R438, R439, R440, R441, R442, R443, R444, R445, R446, R447, R448, R449, R450, R451, R452, R453, R454, R455, R456, R457, R458, R459, R460, R461, R462, R463, R464, R465, R466, R467, R468, R469, R470, R471, R472, R473, R474, R475, R476, R477, R478, R479, R480, R481, R482, R483, R484, R485, R486, R487, R488, R489, R490, R491, R492, R493, R494, R495, R496, R497, R498, R499, R500, R501, R502, R503, R504, R505, R506, R507, R508, R509, R510, R511, R512, R513, R514, R515, R516, R517, R518, R519, R520, R521, R522, R523, R524, R525, R526, R527, R528, R529, R530, R531, R532, R533, R534, R535, R536, R537, R538, R539, R540, R541, R542, R543, R544, R545, R546, R547, R548, R549, R550, R551, R552, R553, R554, R555, R556, R557, R558, R559, R560, R561, R562, R563, R564, R565, R566, R567, R568, R569, R570, R571, R572, R573, R574, R575, R576, R577, R578, R579, R580, R581, R582, R583, R584, R585, R586, R587, R588, R589, R590, R591, R592, R593, R594, R595, R596, R597, R598, R599, R600, R601, R602, R603, R604, R605, R606, R607, R608, R609, R610, R611, R612, R613, R614, R615, R616, R617, R618, R619, R620, R621, R622, R623, R624, R625, R626, R627, R628, R629, R630, R631, R632, R633, R634, R635, R636, R637, R638, R639, R640, R641, R642, R643, R644, R645, R646, R647, R648, R649, R650, R651, R652, R653, R654, R655, R656, R657, R658, R659, R660, R661, R662, R663, R664, R665, R666, R667, R668, R669, R670, R671, R672, R673, R674, R675, R676, R677, R678, R679, R680, R681, R682, R683, R684, R685, R686, R687, R688, R689, R690, R691, R692, R693, R694, R695, R696, R697, R698, R699, R700, R701, R702, R703, R704, R705, R706, R707, R708, R709, R710, R711, R712, R713, R714, R715, R716, R717, R718, R719, R720, R721, R722, R723, R724, R725, R726, R727, R728, R729, R730, R731, R732, R733, R734, R735, R736, R737, R738, R739, R740, R741, R742, R743, R744, R745, R746, R747, R748, R749, R750, R751, R752, R753, R754, R755, R756, R757, R758, R759, R760, R761, R762, R763, R764, R765, R766, R767
+]> >, CCAssignToStack<16, 16>
+]>;
+
diff --git a/src/gallium/drivers/radeon/AMDILCompilerErrors.h b/src/gallium/drivers/radeon/AMDILCompilerErrors.h
new file mode 100644
index 00000000000..e7156870828
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCompilerErrors.h
@@ -0,0 +1,116 @@
+//==-----------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_COMPILER_ERRORS_H_
+#define _AMDIL_COMPILER_ERRORS_H_
+// Compiler errors generated by the backend that will cause
+// the runtime to abort compilation. These are mainly for
+// device constraint violations or invalid code.
+namespace amd {
+
+#define INVALID_COMPUTE 0
+#define GENERIC_ERROR 1
+#define INTERNAL_ERROR 2
+#define MISSING_FUNCTION_CALL 3
+#define RESERVED_FUNCTION 4
+#define BYTE_STORE_ERROR 5
+#define UNKNOWN_TYPE_NAME 6
+#define NO_IMAGE_SUPPORT 7
+#define NO_ATOMIC_32 8
+#define NO_ATOMIC_64 9
+#define IRREDUCIBLE_CF 10
+#define INSUFFICIENT_RESOURCES 11
+#define INSUFFICIENT_LOCAL_RESOURCES 12
+#define INSUFFICIENT_PRIVATE_RESOURCES 13
+#define INSUFFICIENT_IMAGE_RESOURCES 14
+#define DOUBLE_NOT_SUPPORTED 15
+#define INVALID_CONSTANT_WRITE 16
+#define INSUFFICIENT_CONSTANT_RESOURCES 17
+#define INSUFFICIENT_COUNTER_RESOURCES 18
+#define INSUFFICIENT_REGION_RESOURCES 19
+#define REGION_MEMORY_ERROR 20
+#define MEMOP_NO_ALLOCATION 21
+#define RECURSIVE_FUNCTION 22
+#define INCORRECT_COUNTER_USAGE 23
+#define INVALID_INTRINSIC_USAGE 24
+#define NUM_ERROR_MESSAGES 25
+
+
+ static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] =
+ {
+ "E000:Compute Shader Not Supported! ",
+ "E001:Generic Compiler Error Message! ",
+ "E002:Internal Compiler Error Message!",
+ "E003:Missing Function Call Detected! ",
+ "E004:Reserved Function Call Detected!",
+ "E005:Byte Addressable Stores Invalid!",
+ "E006:Kernel Arg Type Name Is Invalid!",
+ "E007:Image 1.0 Extension Unsupported!",
+ "E008:32bit Atomic Op are Unsupported!",
+ "E009:64bit Atomic Op are Unsupported!",
+ "E010:Irreducible ControlFlow Detected",
+ "E011:Insufficient Resources Detected!",
+ "E012:Insufficient Local Resources! ",
+ "E013:Insufficient Private Resources! ",
+ "E014:Images not currently supported! ",
+ "E015:Double precision not supported! ",
+ "E016:Invalid Constant Memory Write! ",
+ "E017:Max number Constant Ptr reached!",
+ "E018:Max number of Counters reached! ",
+ "E019:Insufficient Region Resources! ",
+ "E020:Region address space invalid! ",
+ "E021:MemOp with no memory allocated! ",
+ "E022:Recursive Function detected! ",
+ "E023:Illegal Inc+Dec to same counter!",
+ "E024:Illegal usage of intrinsic inst!"
+ };
+
+}
+
+#endif // _AMDIL_COMPILER_ERRORS_H_
diff --git a/src/gallium/drivers/radeon/AMDILCompilerWarnings.h b/src/gallium/drivers/radeon/AMDILCompilerWarnings.h
new file mode 100644
index 00000000000..268507bb51c
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILCompilerWarnings.h
@@ -0,0 +1,72 @@
+//==-----------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_COMPILER_WARNINGS_H_
+#define _AMDIL_COMPILER_WARNINGS_H_
+/// Compiler backend generated warnings that might cause
+/// issues with compilation. These warnings become errors if
+/// -Werror is specified on the command line.
+namespace amd {
+
+#define LIMIT_BARRIER 0
+#define BAD_BARRIER_OPT 1
+#define RECOVERABLE_ERROR 2
+#define NUM_WARN_MESSAGES 3
+ /// All warnings must be prefixed with the W token or they might be
+ /// treated as errors.
+ static const char *CompilerWarningMessage[NUM_WARN_MESSAGES] =
+ {
+ "W000:Barrier caused limited groupsize",
+ "W001:Dangerous Barrier Opt Detected! ",
+ "W002:Recoverable BE Error Detected! "
+
+ };
+}
+
+#endif // _AMDIL_COMPILER_WARNINGS_H_
diff --git a/src/gallium/drivers/radeon/AMDILConversions.td b/src/gallium/drivers/radeon/AMDILConversions.td
new file mode 100644
index 00000000000..33aac5b4585
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILConversions.td
@@ -0,0 +1,1062 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+def actos_i16:Pat < (i16 (anyext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def uctos_i16:Pat < (i16 (zext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def sctos_i16:Pat < (i16 (sext GPRI8:$src)),
+(IL_ASSHORT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def actoi_i32:Pat < (i32 (anyext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def uctoi_i32:Pat < (i32 (zext GPRI8:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def sctoi_i32:Pat < (i32 (sext GPRI8:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))) >;
+
+
+def actol_i64:Pat < (i64 (anyext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def uctol_i64:Pat < (i64 (zext GPRI8:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 0)) >;
+
+
+def sctol_i64:Pat < (i64 (sext GPRI8:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 31))) >;
+
+
+def astoi_i32:Pat < (i32 (anyext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def ustoi_i32:Pat < (i32 (zext GPRI16:$src)),
+(IL_ASINT_i32
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def sstoi_i32:Pat < (i32 (sext GPRI16:$src)),
+(IL_ASINT_i32
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))) >;
+
+
+def astol_i64:Pat < (i64 (anyext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def ustol_i64:Pat < (i64 (zext GPRI16:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 0)) >;
+
+
+def sstol_i64:Pat < (i64 (sext GPRI16:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 31))) >;
+
+
+def aitol_i64:Pat < (i64 (anyext GPRI32:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)) >;
+
+
+def uitol_i64:Pat < (i64 (zext GPRI32:$src)),
+(LCREATE
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)) >;
+
+
+def sitol_i64:Pat < (i64 (sext GPRI32:$src)),
+(LCREATE
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 0)),
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i32 GPRI32:$src),
+ (LOADCONST_i32 0)),
+ (LOADCONST_i32 31))) >;
+
+
+
+def sctof_f32:Pat < (f32 (sint_to_fp GPRI8:$src)),
+(f32
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)))) >;
+
+
+def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
+(f32
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24)))) >;
+
+
+def ftosc_i8:Pat < (i8 (fp_to_sint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOI GPRF32:$src),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def ftouc_i8:Pat < (i8 (fp_to_uint GPRF32:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOU GPRF32:$src),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)),
+(f64 (FTOD
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))))) >;
+
+
+def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
+(f64 (FTOD
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i8 GPRI8:$src),
+ (LOADCONST_i32 24)),
+ (LOADCONST_i32 24))))) >;
+
+
+def dtosc_i8:Pat < (i8 (fp_to_sint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def dtouc_i8:Pat < (i8 (fp_to_uint GPRF64:$src)),
+(i8
+ (IL_ASCHAR_i32
+ (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x000000FF)))) >;
+
+
+def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)),
+(f32
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)))) >;
+
+
+def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
+(f32
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16)))) >;
+
+
+def ftoss_i16:Pat < (i16 (fp_to_sint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOI GPRF32:$src),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def ftous_i16:Pat < (i16 (fp_to_uint GPRF32:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOU GPRF32:$src),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)),
+(f64 (FTOD
+ (ITOF
+ (SHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))))) >;
+
+
+def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)),
+(f64 (FTOD
+ (UTOF
+ (USHR_i32
+ (SHL_i32
+(IL_ASINT_i16 GPRI16:$src),
+ (LOADCONST_i32 16)),
+ (LOADCONST_i32 16))))) >;
+
+
+def dtoss_i16:Pat < (i16 (fp_to_sint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOI (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+def dtous_i16:Pat < (i16 (fp_to_uint GPRF64:$src)),
+(i16
+ (IL_ASSHORT_i32
+ (BINARY_AND_i32
+(FTOU (DTOF GPRF64:$src)),
+ (LOADCONST_i32 0x0000FFFF)))) >;
+
+
+
+
+
+def stoc_i8:Pat < (i8 (trunc GPRI16:$src)),
+(IL_ASCHAR_i32
+ (IL_ASINT_i16
+(BINARY_AND_i16 GPRI16:$src,
+ (LOADCONST_i16 0x000000FF)))
+ ) >;
+
+
+def itoc_i8:Pat < (i8 (trunc GPRI32:$src)),
+(IL_ASCHAR_i32
+ (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+ (LOADCONST_i32 0x000000FF)))
+ ) >;
+
+
+def itos_i16:Pat < (i16 (trunc GPRI32:$src)),
+(IL_ASSHORT_i32
+ (IL_ASINT_i32
+(BINARY_AND_i32 GPRI32:$src,
+ (LOADCONST_i32 0x0000FFFF)))
+ ) >;
+
+
+def ltoc_i8:Pat < (i8 (trunc GPRI64:$src)),
+(IL_ASCHAR_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0x000000FF))
+ ) >;
+
+
+def ltos_i16:Pat < (i16 (trunc GPRI64:$src)),
+(IL_ASSHORT_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0x0000FFFF))
+ ) >;
+
+
+def ltoi_i32:Pat < (i32 (trunc GPRI64:$src)),
+(IL_ASINT_i32
+ (BINARY_AND_i32
+(LLO GPRI64:$src),
+ (LOADCONST_i32 0xFFFFFFFF))
+ ) >;
+
+
+def actos_v2i16:Pat < (v2i16 (anyext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v2i16:Pat < (v2i16 (zext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v2i16:Pat < (v2i16 (sext GPRV2I8:$src)),
+(IL_ASV2SHORT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v2i32:Pat < (v2i32 (anyext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v2i32:Pat < (v2i32 (zext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v2i32:Pat < (v2i32 (sext GPRV2I8:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
+
+
+def actol_v2i64:Pat < (v2i64 (anyext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uctol_v2i64:Pat < (v2i64 (zext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sctol_v2i64:Pat < (v2i64 (sext GPRV2I8:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def astoi_v2i32:Pat < (v2i32 (anyext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v2i32:Pat < (v2i32 (zext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v2i32:Pat < (v2i32 (sext GPRV2I16:$src)),
+(IL_ASV2INT_v2i32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
+
+
+def astol_v2i64:Pat < (v2i64 (anyext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def ustol_v2i64:Pat < (v2i64 (zext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sstol_v2i64:Pat < (v2i64 (sext GPRV2I16:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+def aitol_v2i64:Pat < (v2i64 (anyext GPRV2I32:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def uitol_v2i64:Pat < (v2i64 (zext GPRV2I32:$src)),
+(LCREATE_v2i64
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))) >;
+
+
+def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)),
+(LCREATE_v2i64
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i32 GPRV2I32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0))),
+ (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+
+
+
+def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
+(v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
+(v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F32:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i8 GPRV2I8:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 24))),
+ (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+
+def dtosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def dtouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F64:$src)),
+(v2i8
+ (IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
+(v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
+(v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F32:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 GPRV2F32:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (ITOF_v2f32
+ (SHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
+(v2f64
+ (VINSERT_v2f64
+ (VCREATE_v2f64
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 1)
+ )),
+ (FTOD
+ (VEXTRACT_v2f32
+ (UTOF_v2f32
+ (USHRVEC_v2i32
+ (SHLVEC_v2i32
+(IL_ASV2INT_v2i16 GPRV2I16:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 16))),
+ (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ 2)
+ ), 1, 256)
+ ) >;
+
+
+def dtoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOI_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def dtous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F64:$src)),
+(v2i16
+ (IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(FTOU_v2i32 (VINSERT_v2f32
+ (VCREATE_v2f32
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
+ (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+def stoc_v2i8:Pat < (v2i8 (trunc GPRV2I16:$src)),
+(IL_ASV2CHAR_v2i32
+ (IL_ASV2INT_v2i16
+(BINARY_AND_v2i16 GPRV2I16:$src,
+ (VCREATE_v2i16 (LOADCONST_i16 0x000000FF))))
+ ) >;
+
+
+def itoc_v2i8:Pat < (v2i8 (trunc GPRV2I32:$src)),
+(IL_ASV2CHAR_v2i32
+ (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))
+ ) >;
+
+
+def itos_v2i16:Pat < (v2i16 (trunc GPRV2I32:$src)),
+(IL_ASV2SHORT_v2i32
+ (IL_ASV2INT_v2i32
+(BINARY_AND_v2i32 GPRV2I32:$src,
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))
+ ) >;
+
+
+def ltoc_v2i8:Pat < (v2i8 (trunc GPRV2I64:$src)),
+(IL_ASV2CHAR_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x000000FF)))
+ ) >;
+
+
+def ltos_v2i16:Pat < (v2i16 (trunc GPRV2I64:$src)),
+(IL_ASV2SHORT_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF)))
+ ) >;
+
+
+def ltoi_v2i32:Pat < (v2i32 (trunc GPRV2I64:$src)),
+(IL_ASV2INT_v2i32
+ (BINARY_AND_v2i32
+(LLO_v2i64 GPRV2I64:$src),
+ (VCREATE_v2i32 (LOADCONST_i32 0xFFFFFFFF)))
+ ) >;
+
+
+
+
+def actos_v4i16:Pat < (v4i16 (anyext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctos_v4i16:Pat < (v4i16 (zext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctos_v4i16:Pat < (v4i16 (sext GPRV4I8:$src)),
+(IL_ASV4SHORT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def actoi_v4i32:Pat < (v4i32 (anyext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def uctoi_v4i32:Pat < (v4i32 (zext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def sctoi_v4i32:Pat < (v4i32 (sext GPRV4I8:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+
+
+def astoi_v4i32:Pat < (v4i32 (anyext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def ustoi_v4i32:Pat < (v4i32 (zext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)),
+(IL_ASV4INT_v4i32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+
+
+
+def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
+(v4f32
+ (ITOF_v4f32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
+(v4f32
+ (UTOF_v4f32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i8 GPRV4I8:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 24))),
+ (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+
+
+def ftosc_v4i8:Pat < (v4i8 (fp_to_sint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+ (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def ftouc_v4i8:Pat < (v4i8 (fp_to_uint GPRV4F32:$src)),
+(v4i8
+ (IL_ASV4CHAR_v4i32
+ (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+
+
+def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
+(v4f32
+ (ITOF_v4f32
+ (SHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
+(v4f32
+ (UTOF_v4f32
+ (USHRVEC_v4i32
+ (SHLVEC_v4i32
+(IL_ASV4INT_v4i16 GPRV4I16:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 16))),
+ (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+
+
+def ftoss_v4i16:Pat < (v4i16 (fp_to_sint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+ (BINARY_AND_v4i32
+(FTOI_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+def ftous_v4i16:Pat < (v4i16 (fp_to_uint GPRV4F32:$src)),
+(v4i16
+ (IL_ASV4SHORT_v4i32
+ (BINARY_AND_v4i32
+(FTOU_v4i32 GPRV4F32:$src),
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+
+
+
+
+
+def stoc_v4i8:Pat < (v4i8 (trunc GPRV4I16:$src)),
+(IL_ASV4CHAR_v4i32
+ (IL_ASV4INT_v4i16
+(BINARY_AND_v4i16 GPRV4I16:$src,
+ (VCREATE_v4i16 (LOADCONST_i16 0x000000FF))))
+ ) >;
+
+
+def itoc_v4i8:Pat < (v4i8 (trunc GPRV4I32:$src)),
+(IL_ASV4CHAR_v4i32
+ (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+ (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))
+ ) >;
+
+
+def itos_v4i16:Pat < (v4i16 (trunc GPRV4I32:$src)),
+(IL_ASV4SHORT_v4i32
+ (IL_ASV4INT_v4i32
+(BINARY_AND_v4i32 GPRV4I32:$src,
+ (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))
+ ) >;
+
+
diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp
new file mode 100644
index 00000000000..4508ce922db
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDevice.cpp
@@ -0,0 +1,176 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
+{
+ mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDILDevice::~AMDILDevice()
+{
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDILDevice::getMaxGDSSize() const
+{
+ return 0;
+}
+
+uint32_t
+AMDILDevice::getDeviceFlag() const
+{
+ return mDeviceFlag;
+}
+
+size_t AMDILDevice::getMaxNumCBs() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxCBSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxScratchSize() const
+{
+ return 65536;
+}
+
+uint32_t AMDILDevice::getStackAlignment() const
+{
+ return 16;
+}
+
+void AMDILDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::HalfOps);
+ mSWBits.set(AMDILDeviceInfo::ByteOps);
+ mSWBits.set(AMDILDeviceInfo::ShortOps);
+ mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
+ mSWBits.set(AMDILDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDILDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+}
+
+AMDILDeviceInfo::ExecutionMode
+AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
+{
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Software;
+ }
+
+ return AMDILDeviceInfo::Unsupported;
+
+}
+
+bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
+}
+
+bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
+}
+
+bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
+}
+
+std::string
+AMDILDevice::getDataLayout() const
+{
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/src/gallium/drivers/radeon/AMDILDevice.h b/src/gallium/drivers/radeon/AMDILDevice.h
new file mode 100644
index 00000000000..42fa4115f01
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDevice.h
@@ -0,0 +1,171 @@
+//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "AMDILLLVMPC.h"
+#include "llvm/ADT/BitVector.h"
+namespace llvm {
+ class AMDILSubtarget;
+ class AMDILAsmPrinter;
+ class AMDILIOExpansion;
+ class AMDILPointerManager;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDILDevice {
+public:
+ AMDILDevice(AMDILSubtarget *ST);
+ virtual ~AMDILDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ // Returns the max LDS size that the hardware supports. Size is in
+ // bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ // Returns the max GDS size that the hardware supports if the GDS is
+ // supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ // Returns the max number of hardware constant address spaces that
+ // are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ // Returns the max number of bytes a single hardware constant buffer
+ // can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ // Returns the max number of bytes allowed by the hardware scratch
+ // buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ // Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ // Returns the number of work-items that exist in a single hardware
+ // wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ // Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ // Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ // Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ // Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+ // Interface to get the IO Expansion pass for each device.
+ virtual FunctionPass*
+ getIOExpansion(TargetMachine&, CodeGenOpt::Level) const = 0;
+
+ // Interface to get the Asm printer for each device.
+ virtual AsmPrinter*
+ getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const = 0;
+
+ // Interface to get the Pointer manager pass for each device.
+ virtual FunctionPass*
+ getPointerManager(TargetMachine&, CodeGenOpt::Level) const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+ bool isSupported(AMDILDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ llvm::BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDILSubtarget *mSTM;
+ uint32_t mDeviceFlag;
+private:
+ AMDILDeviceInfo::ExecutionMode
+ getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
+}; // AMDILDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
new file mode 100644
index 00000000000..7082b7b0751
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
@@ -0,0 +1,126 @@
+//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILSubtarget.h"
+#include <string>
+using namespace llvm;
+namespace llvm {
+ AMDILDevice*
+getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+{
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDIL710Device(ptr);
+ case '7':
+ return new AMDIL770Device(ptr);
+ default:
+ return new AMDIL7XXDevice(ptr);
+ };
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILRedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCedarDevice(ptr);
+ } else if (deviceName == "barts"
+ || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDIL7XXDevice(ptr);
+ }
+}
+}
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
new file mode 100644
index 00000000000..f0b018d0ce1
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
@@ -0,0 +1,125 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+#include <string>
+namespace llvm
+{
+ class AMDILDevice;
+ class AMDILSubtarget;
+ namespace AMDILDeviceInfo
+ {
+ // Each Capabilities can be executed using a hardware instruction,
+ // emulated with a sequence of software instructions, or not
+ // supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
+ // feature is emulated in software
+ Hardware // This execution mode is set if the feature exists
+ // natively in hardware
+ };
+
+ // Any changes to this needs to have a corresponding update to the
+ // twiki page GPUMetadataABI
+ enum Caps {
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, // ReservedFlag
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
+ // Debug mode implies that no hardware features or optimizations
+ // are performned and that all memory access go through a single
+ // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Reserved1 = 0x15, // Reserved flag
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ // If more capabilities are required, then
+ // this number needs to be increased.
+ // All capabilities must come before this
+ // number.
+ MaxNumberCapabilities = 0x20
+ };
+ // These have to be in order with the older generations
+ // having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HDTEST, // Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ } // namespace AMDILDeviceInfo
+ llvm::AMDILDevice*
+ getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h
new file mode 100644
index 00000000000..3d0019e82a3
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILDevices.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDILDevice.h"
+#include "AMDIL7XXDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#endif // _AMDIL_DEVICES_H_
diff --git a/src/gallium/drivers/radeon/AMDILEGAsmPrinter.cpp b/src/gallium/drivers/radeon/AMDILEGAsmPrinter.cpp
new file mode 100644
index 00000000000..cd3454e681b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEGAsmPrinter.cpp
@@ -0,0 +1,198 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+
+// TODO: Add support for verbose.
+AMDILEGAsmPrinter::AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+: AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDILEGAsmPrinter::~AMDILEGAsmPrinter()
+{
+}
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+ static
+std::string Strip(const std::string &name)
+{
+ size_t start = name.find("__OpenCL_");
+ size_t end = name.find("_kernel");
+ if (start == std::string::npos
+ || end == std::string::npos
+ || (start == end)) {
+ return name;
+ } else {
+ return name.substr(9, name.length()-16);
+ }
+}
+void
+AMDILEGAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ const char *name = "unknown";
+ llvm::StringRef nameRef;
+ if (MI->getOperand(0).isGlobal()) {
+ nameRef = MI->getOperand(0).getGlobal()->getName();
+ name = nameRef.data();
+ }
+ if (!::strncmp(name, "__fma_f32", 9) && curTarget->device()->usesHardware(
+ AMDILDeviceInfo::FMA)) {
+ name = "__hwfma_f32";
+ }
+ emitMCallInst(MI, O, name);
+}
+
+ bool
+AMDILEGAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+ this->MF = &lMF;
+ mMeta->setMF(&lMF);
+ mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+ SetupMachineFunction(lMF);
+ std::string kernelName = MF->getFunction()->getName();
+ mName = Strip(kernelName);
+
+ mKernelName = kernelName;
+ EmitFunctionHeader();
+ EmitFunctionBody();
+ return false;
+}
+ void
+AMDILEGAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+ std::string FunStr;
+ raw_string_ostream OFunStr(FunStr);
+ formatted_raw_ostream O(OFunStr);
+ const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+ if (mDebugMode) {
+ O << ";" ;
+ II->print(O);
+ }
+ if (isMacroFunc(II)) {
+ emitMacroFunc(II, O);
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+ return;
+ }
+ if (isMacroCall(II)) {
+ const char *name = II->getDesc().getName() + 5;
+ if (!::strncmp(name, "__fma_f32", 9)
+ && curTarget->device()->usesHardware(
+ AMDILDeviceInfo::FMA)) {
+ name = "__hwfma_f32";
+ }
+ //assert(0 &&
+ //"Found a macro that is still in use!");
+ int macronum = amd::MacroDBFindMacro(name);
+ O << "\t;"<< name<<"\n";
+ O << "\tmcall("<<macronum<<")";
+ if (curTarget->device()->isSupported(
+ AMDILDeviceInfo::MacroDB)) {
+ mMacroIDs.insert(macronum);
+ } else {
+ mMFI->addCalledIntr(macronum);
+ }
+ }
+
+ // Print the assembly for the instruction.
+ // We want to make sure that we do HW constants
+ // before we do arena segment
+ // TODO: This is a hack to get around some
+ // conformance failures.
+ if (mMeta->useCompilerWrite(II)) {
+ O << "\tif_logicalz cb0[0].x\n";
+ if (mMFI->usesMem(AMDILDevice::RAW_UAV_ID)) {
+ O << "\tuav_raw_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ") ";
+ O << "mem0.x___, cb0[3].x, r0.0\n";
+ } else {
+ O << "\tuav_arena_store_id("
+ << curTarget->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+ << ")_size(dword) ";
+ O << "cb0[3].x, r0.0\n";
+ }
+ O << "\tendif\n";
+ mMFI->addMetadata(";memory:compilerwrite");
+ } else {
+ printInstruction(II, O);
+ }
+ O.flush();
+ OutStreamer.EmitRawText(StringRef(FunStr));
+}
diff --git a/src/gallium/drivers/radeon/AMDILEGAsmPrinter.h b/src/gallium/drivers/radeon/AMDILEGAsmPrinter.h
new file mode 100644
index 00000000000..9956f03ddd4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEGAsmPrinter.h
@@ -0,0 +1,101 @@
+//===----- AMDILEGAsmPrinter.h --- AMDIL EG Asm Printer class ---------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Asm Printer Class for Evergreen/N. Islands
+// generation of cards. This class handles all
+// of the items that are unique to the these
+// devices that must be handled by the AsmPrinter.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL_EG_ASM_PRINTER_H_
+#define _AMDIL_EG_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+ class LLVM_LIBRARY_VISIBILITY AMDILEGAsmPrinter : public AMDILAsmPrinter
+ {
+ public:
+ //
+ // Constructor for the AMDIL EG specific AsmPrinter class.
+ // Interface is defined by LLVM proper and should reference
+ // there for more information.
+ //
+ AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+ //
+ // Destructor for the EG Asm Printer class that deletes
+ // all of the allocated memory
+ //
+ virtual ~AMDILEGAsmPrinter();
+
+ void
+ EmitInstruction(const MachineInstr *MI);
+
+ //
+ // @param F MachineFunction to print the assembly for
+ // @brief parse the specified machine function and print
+ // out the assembly for all the instructions in the function
+ //
+ bool
+ runOnMachineFunction(MachineFunction &F);
+
+ protected:
+ //
+ // @param MI Machine instruction to emit the macro code for
+ //
+ // Emits a fully functional macro function that uses the argument
+ // registers as the macro arguments.
+ //
+ virtual void
+ emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+ }; // AMDILEGAsmPrinter
+} // end of llvm namespace
+#endif // _AMDIL_EG_ASM_PRINTER_H_
diff --git a/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp b/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp
new file mode 100644
index 00000000000..e0ef88d2a60
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEGIOExpansion.cpp
@@ -0,0 +1,1126 @@
+//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// @file AMDILEGIOExpansion.cpp
+// @details Implementation of IO expansion class for evergreen and NI devices.
+//
+#include "AMDILIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILCompilerWarnings.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+using namespace llvm;
+AMDILEGIOExpansion::AMDILEGIOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) : AMDILImageExpansion(tm, OptLevel)
+{
+}
+
+AMDILEGIOExpansion::~AMDILEGIOExpansion() {
+}
+const char *AMDILEGIOExpansion::getPassName() const
+{
+ return "AMDIL EG/NI IO Expansion Pass";
+}
+ bool
+AMDILEGIOExpansion::isImageIO(MachineInstr *MI)
+{
+ if (!MI->getOperand(0).isGlobal()) {
+ return false;
+ }
+ const llvm::StringRef& nameRef = MI->getOperand(0).getGlobal()->getName();
+ const char *name = nameRef.data();
+ if (nameRef.size() > 8 && !strncmp(name, "__amdil_", 8)) {
+ name += 8;
+ if (!strncmp(name, "sample_data", 11)
+ || !strncmp(name, "write_image", 11)
+ || !strncmp(name, "get_image2d_params", 18)
+ || !strncmp(name, "get_image3d_params", 18)) {
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDILEGIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+ if (!MI) {
+ return false;
+ }
+ switch (MI->getOpcode()) {
+ default:
+ return AMDILIOExpansion::isIOInstruction(MI);
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE2D_WRITE:
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ case AMDIL::IMAGE3D_WRITE:
+ case AMDIL::IMAGE3D_INFO0:
+ case AMDIL::IMAGE3D_INFO1:
+ return true;
+ };
+ return false;
+}
+void
+AMDILEGIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+ assert(isIOInstruction(MI) && "Must be an IO instruction to "
+ "be passed to this function!");
+ switch (MI->getOpcode()) {
+ default:
+ AMDILIOExpansion::expandIOInstruction(MI);
+ break;
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ expandImageLoad(mBB, MI);
+ break;
+ case AMDIL::IMAGE2D_WRITE:
+ case AMDIL::IMAGE3D_WRITE:
+ expandImageStore(mBB, MI);
+ break;
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE3D_INFO0:
+ case AMDIL::IMAGE3D_INFO1:
+ expandImageParam(mBB, MI);
+ break;
+ };
+}
+ bool
+AMDILEGIOExpansion::isCacheableOp(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ // We only support caching on UAV11 - JeffG
+ if (curRes.bits.ResourceID == 11) {
+ return curRes.bits.CacheableRead;
+ } else {
+ return false;
+ }
+}
+ bool
+AMDILEGIOExpansion::isArenaOp(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ return curRes.bits.ResourceID
+ == mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)
+ || curRes.bits.ResourceID >= ARENA_SEGMENT_RESERVED_UAVS;
+}
+ void
+AMDILEGIOExpansion::expandPackedData(MachineInstr *MI)
+{
+ if (!isPackedData(MI)) {
+ return;
+ }
+ // There is a bug in the CAL compiler that incorrectly
+ // errors when the UBIT_INSERT instruction is
+ if (mSTM->calVersion() < CAL_VERSION_SC_137) {
+ AMDIL789IOExpansion::expandPackedData(MI);
+ return;
+ }
+ DebugLoc DL;
+ // If we have packed data, then the shift size is no longer
+ // the same as the load size and we need to adjust accordingly
+ switch(getPackedID(MI)) {
+ default:
+ break;
+ case PACK_V2I8:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8)).addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I8:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32),
+ AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+ .addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V2I16:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case PACK_V4I16:
+ {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::R1012)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+ .addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
+ .addReg(AMDIL::R1012).addReg(AMDIL::R1011);
+ }
+ break;
+ case UNPACK_V2I8:
+ case UNPACK_V4I8:
+ case UNPACK_V2I16:
+ case UNPACK_V4I16:
+ AMDIL789IOExpansion::expandPackedData(MI);
+ break;
+ };
+}
+
+ void
+AMDILEGIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+ bool usesArena = isArenaOp(MI);
+ bool cacheable = isCacheableOp(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+ && !mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ expandArenaSetup(MI);
+ DebugLoc DL;
+ if (getMemorySize(MI) == 1) {
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i8), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(0))
+ .addImm(mMFI->addi32Literal(24));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1012)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008);
+ if (cacheable) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_v4i8), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else if (getMemorySize(MI) == 2) {
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ if (cacheable) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else if (getMemorySize(MI) == 4) {
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ }
+ } else if (getMemorySize(MI) == 8) {
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_Y_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v2i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ }
+ } else {
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_Y_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_Z_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_W_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(3);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(4);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::R1006)
+ .addReg(AMDIL::R1007)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1008)
+ .addReg(AMDIL::R1006)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ }
+ } else {
+ if (cacheable) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v4i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32),
+ AMDIL::R1011).addReg(AMDIL::R1010).addImm(ID);
+ }
+ }
+ }
+ // These instructions are generated after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+
+ void
+AMDILEGIOExpansion::expandRegionLoad(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[REGION_MEMORY_ERROR]);
+ return;
+ }
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ DebugLoc DL;
+ unsigned mulOp = 0;
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS load that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD_Z), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD_W), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, *MI, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ // The instruction would normally fit in right here so everything created
+ // after this point needs to go into the afterInst vector.
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ break;
+ case 2:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, *MI, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ break;
+ case 4:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSLOAD_Y), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(gID);
+ break;
+ };
+
+ // These instructions are generated after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+ void
+AMDILEGIOExpansion::expandLocalLoad(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalLoad(MI);
+ }
+ if (!mMFI->usesMem(AMDILDevice::LDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS load that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ unsigned mulOp = 0;
+ // These instructions are generated before the current MI.
+ expandLoadStartCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOADVEC_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOADVEC_v2i32), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, *MI, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD_i8), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD_u8), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ }
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ BuildMI(*mBB, *MI, DL, mTII->get(mulOp), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(0xFFFFFFFC));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1011);
+ } else {
+ if (isSWSExtLoadInst(MI)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD_i16), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::LDSLOAD_u16), AMDIL::R1011)
+ .addReg(AMDIL::R1010)
+ .addImm(lID);
+ }
+ }
+ break;
+ }
+
+ // These instructions are generated after the current MI.
+ expandPackedData(MI);
+ expandExtendLoad(MI);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(),
+ mTII->get(getMoveInstFromID(
+ MI->getDesc().OpInfo[0].RegClass)))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDIL::R1011);
+ MI->getOperand(0).setReg(AMDIL::R1011);
+}
+ void
+AMDILEGIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+ bool usesArena = isArenaOp(MI);
+ uint32_t ID = getPointerID(MI);
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+ && !mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ DebugLoc DL;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ expandArenaSetup(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENASTORE_Y_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENASTORE_Z_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_W_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(3);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(3);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(4);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(4);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ }
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 1:
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i8), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 2:
+ if (usesArena) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i16), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 4:
+ if (usesArena) {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ case 8:
+ if (usesArena) {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_Y_i32), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1007)
+ .addReg(AMDIL::R1010)
+ .addImm(2);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1011)
+ .addImm(2);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(ID);
+ }
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(ID);
+ }
+ break;
+ };
+}
+ void
+AMDILEGIOExpansion::expandRegionStore(MachineInstr *MI)
+{
+ bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ if (!HWRegion || !isHardwareRegion(MI)) {
+ return expandGlobalStore(MI);
+ }
+ mKM->setOutputInst();
+ if (!mMFI->usesMem(AMDILDevice::GDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t gID = getPointerID(MI);
+ assert(gID && "Found a GDS store that was incorrectly marked as zero ID!\n");
+ if (!gID) {
+ gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ DebugLoc DL;
+ unsigned mulOp = HWRegion ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ expandArenaSetup(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSSTORE_Z), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE_W), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 1:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, *MI, DL, mTII->get(mulOp), AMDIL::R1006)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_R_MSKOR), AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 2:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_R_MSKOR), AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 4:
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ case 8:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(1ULL << 32));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::GDSSTORE_Y), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(gID);
+ break;
+ };
+
+}
+
+ void
+AMDILEGIOExpansion::expandLocalStore(MachineInstr *MI)
+{
+ bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ if (!HWLocal || !isHardwareLocal(MI)) {
+ return expandGlobalStore(MI);
+ }
+ DebugLoc DL;
+ if (!mMFI->usesMem(AMDILDevice::LDS_ID)
+ && mKM->isKernel()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
+ }
+ uint32_t lID = getPointerID(MI);
+ assert(lID && "Found a LDS store that was incorrectly marked as zero ID!\n");
+ if (!lID) {
+ lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
+ mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
+ }
+ unsigned mulOp = HWLocal ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ // These instructions are expandted before the current MI.
+ expandStoreSetupCode(MI);
+ switch (getMemorySize(MI)) {
+ default:
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTOREVEC_v4i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 8:
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTOREVEC_v2i32), AMDIL::MEM)
+ .addReg(AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 4:
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ break;
+ case 1:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
+ (0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
+ BuildMI(*mBB, *MI, DL, mTII->get(mulOp), AMDIL::R1006)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(8));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0xFFFFFF00))
+ .addImm(mMFI->addi32Literal(0x00FFFFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Y_i32), AMDIL::R1007)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFF00FFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_Z_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addReg(AMDIL::R1007)
+ .addImm(mMFI->addi32Literal(0xFFFF00FF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1007);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_MSKOR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addImm(lID);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_OR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE_i8), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ break;
+ case 2:
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0x0000FFFF));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi32Literal(3));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(1));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(0x0000FFFF))
+ .addImm(mMFI->addi32Literal(0xFFFF0000));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::R1008)
+ .addReg(AMDIL::R1008)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi32Literal(0));
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1008);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_MSKOR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1012)
+ .addImm(lID);
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::ATOM_L_OR_NORET),
+ AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ } else {
+ BuildMI(*mBB, *MI, MI->getDebugLoc(), mTII->get(AMDIL::LDSSTORE_i16), AMDIL::R1010)
+ .addReg(AMDIL::R1011)
+ .addImm(lID);
+ }
+ break;
+ }
+}
+
+
+ void
+AMDILEGIOExpansion::expandStoreSetupCode(MachineInstr *MI)
+{
+ AMDIL789IOExpansion::expandStoreSetupCode(MI);
+}
+ void
+AMDILEGIOExpansion::expandArenaSetup(MachineInstr *MI)
+{
+ if (!isArenaOp(MI)) {
+ return;
+ }
+ const MCInstrDesc &TID = (MI->getDesc());
+ const MCOperandInfo &TOI = TID.OpInfo[0];
+ unsigned short RegClass = TOI.RegClass;
+ DebugLoc DL;
+ switch (RegClass) {
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV2F32RegClassID:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi64Literal(4ULL << 32));
+ break;
+ default:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VCREATE_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010);
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
+ .addReg(AMDIL::R1010)
+ .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+ break;
+ case AMDIL::GPRI8RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRI16RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ case AMDIL::GPRI32RegClassID:
+ case AMDIL::GPRF32RegClassID:
+ break;
+ };
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp b/src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp
new file mode 100644
index 00000000000..ee549e77c0a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILELFWriterInfo.cpp
@@ -0,0 +1,120 @@
+//===-- AMDILElfWriterInfo.cpp - Elf Writer Info for AMDIL ----------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the AMDIL backend.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDIL.h"
+#include "AMDILELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#if LLVM_VERSION >= 2500
+#include "llvm/Target/TargetELFWriterInfo.h"
+#endif
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the AMDILELFWriterInfo class
+//===----------------------------------------------------------------------===//
+#if LLVM_VERSION >= 2500
+AMDILELFWriterInfo::AMDILELFWriterInfo(bool is64bit, bool endian)
+ : TargetELFWriterInfo(is64bit, endian)
+#else
+AMDILELFWriterInfo::AMDILELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM)
+#endif
+{
+}
+
+AMDILELFWriterInfo::~AMDILELFWriterInfo() {
+}
+
+unsigned AMDILELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+bool AMDILELFWriterInfo::hasRelocationAddend() const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return false;
+}
+
+long int AMDILELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+unsigned AMDILELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+bool AMDILELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return false;
+}
+
+unsigned AMDILELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
+
+long int AMDILELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ assert(0 && "What do we do here? Lets assert an analyze");
+ return 0;
+}
diff --git a/src/gallium/drivers/radeon/AMDILELFWriterInfo.h b/src/gallium/drivers/radeon/AMDILELFWriterInfo.h
new file mode 100644
index 00000000000..acc04f14457
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILELFWriterInfo.h
@@ -0,0 +1,101 @@
+//===-- AMDILElfWriterInfo.h - Elf Writer Info for AMDIL ---------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===---------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the AMDIL backend.
+//
+//===---------------------------------------------------------------------===//
+#ifndef _AMDIL_ELF_WRITER_INFO_H_
+#define _AMDIL_ELF_WRITER_INFO_H_
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+ class AMDILELFWriterInfo : public TargetELFWriterInfo {
+ public:
+#if LLVM_VERSION >= 2500
+ AMDILELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+#else
+ AMDILELFWriterInfo(TargetMachine &TM);
+#endif
+ virtual ~AMDILELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// 'hasRelocationAddend - True if the target uses and addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const;
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatble field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+} // namespace llvm
+#endif // _AMDIL_ELF_WRITER_INFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
new file mode 100644
index 00000000000..4a57c154348
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
@@ -0,0 +1,569 @@
+//===--------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// ILEnumreatedTypes.td - The IL Enumerated Types - Micah Villmow - 9-3-2008
+//===--------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+//
+//
+//
+//===--------------------------------------------------------------------===//
+
+// Section 5.1 IL Shader
+class ILShader<bits<8> val> {
+ bits<8> Value = val;
+}
+// Table 5-1
+def IL_SHADER_PIXEL : ILShader<0>;
+def IL_SHADER_COMPUTE : ILShader<1>;
+
+// Section 5.2 IL RegType
+class ILRegType<bits<6> val> {
+ bits<6> Value = val;
+}
+// Table 5-2
+def IL_REGTYPE_TEMP : ILRegType<0>;
+def IL_REGTYPE_WINCOORD : ILRegType<1>;
+def IL_REGTYPE_CONST_BUF : ILRegType<2>;
+def IL_REGTYPE_LITERAL : ILRegType<3>;
+def IL_REGTYPE_ITEMP : ILRegType<4>;
+def IL_REGTYPE_GLOBAL : ILRegType<5>;
+
+// Section 5.3 IL Component Select
+class ILComponentSelect<bits<3> val, string text> {
+ bits<3> Value = val;
+ string Text = text;
+}
+// Table 5-3
+def IL_COMPSEL_X : ILComponentSelect<0, "x">;
+def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
+def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
+def IL_COMPSEL_W : ILComponentSelect<3, "w">;
+def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
+def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
+
+// Section 5.4 IL Mod Dst Comp
+class ILModDstComp<bits<2> val, string text> {
+ bits<2> Value = val;
+ string Text = text;
+}
+// Table 5-4
+def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
+def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
+def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
+def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
+def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
+def IL_MODCOMP_0 : ILModDstComp<2, "0">;
+def IL_MODCOMP_1 : ILModDstComp<3, "1">;
+
+// Section 5.5 IL Import Usage
+class ILImportUsage<bits<1> val, string usage> {
+ bits<1> Value = val;
+ string Text = usage;
+}
+// Table 5-5
+def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
+
+// Section 5.6 Il Shift Scale
+class ILShiftScale<bits<4> val, string scale> {
+ bits<4> Value = val;
+ string Text = scale;
+}
+
+// Table 5-6
+def IL_SHIFT_NONE : ILShiftScale<0, "">;
+def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
+def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
+def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
+def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
+def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
+def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
+
+// Section 5.7 IL Divide Component
+class ILDivComp<bits<3> val, string divcomp> {
+ bits<3> Value = val;
+ string Text = divcomp;
+}
+
+// Table 5-7
+def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
+def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
+def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
+def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
+//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
+
+// Section 5.8 IL Relational Op
+class ILRelOp<bits<3> val, string op> {
+ bits<3> Value = val;
+ string Text = op;
+}
+
+// Table 5-8
+def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
+def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
+def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
+def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
+def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
+def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
+
+// Section 5.9 IL Zero Op
+class ILZeroOp<bits<3> val, string behavior> {
+ bits<3> Value = val;
+ string Text = behavior;
+}
+
+// Table 5-9
+def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
+def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
+def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
+def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
+
+// Section 5.10 IL Cmp Value
+class ILCmpValue<bits<3> val, string num> {
+ bits<3> Value = val;
+ string Text = num;
+}
+
+// Table 5-10
+def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
+def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
+def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
+def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
+def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
+
+// Section 5.11 IL Addressing
+class ILAddressing<bits<3> val> {
+ bits<3> Value = val;
+}
+
+// Table 5-11
+def IL_ADDR_ABSOLUTE : ILAddressing<0>;
+def IL_ADDR_RELATIVE : ILAddressing<1>;
+def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
+
+// Section 5.11 IL Element Format
+class ILElementFormat<bits<5> val> {
+ bits<5> Value = val;
+}
+
+// Table 5-11
+def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
+def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
+def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
+def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
+def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
+def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
+def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
+def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
+def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
+
+// Section 5.12 IL Op Code
+class ILOpCode<bits<16> val = -1, string cmd> {
+ bits<16> Value = val;
+ string Text = cmd;
+}
+
+// Table 5-12
+def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
+def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
+def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
+def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
+def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
+def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
+def IL_OP_ABS : ILOpCode<6, "abs">;
+def IL_OP_ADD : ILOpCode<7, "add">;
+def IL_OP_AND : ILOpCode<8, "iand">;
+def IL_OP_BREAK : ILOpCode<9, "break">;
+def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
+def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
+def IL_OP_BREAKC : ILOpCode<12, "breakc">;
+def IL_OP_CALL : ILOpCode<13, "call">;
+def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
+def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
+def IL_OP_CASE : ILOpCode<16, "case">;
+def IL_OP_CLG : ILOpCode<17, "clg">;
+def IL_OP_CMOV : ILOpCode<18, "cmov">;
+def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
+def IL_OP_CMP : ILOpCode<20, "cmp">;
+def IL_OP_CONTINUE : ILOpCode<21, "continue">;
+def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
+def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
+def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
+def IL_OP_COS : ILOpCode<25, "cos">;
+def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
+def IL_OP_D_2_F : ILOpCode<27, "d2f">;
+def IL_OP_D_ADD : ILOpCode<28, "dadd">;
+def IL_OP_D_EQ : ILOpCode<29, "deq">;
+def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
+def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
+def IL_OP_D_GE : ILOpCode<32, "dge">;
+def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
+def IL_OP_D_LT : ILOpCode<34, "dlt">;
+def IL_OP_D_MAD : ILOpCode<35, "dmad">;
+def IL_OP_D_MUL : ILOpCode<36, "dmul">;
+def IL_OP_D_NE : ILOpCode<37, "dne">;
+def IL_OP_DEFAULT : ILOpCode<38, "default">;
+def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
+def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
+def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
+def IL_OP_DP2 : ILOpCode<42, "dp2">;
+def IL_OP_DP3 : ILOpCode<43, "dp3">;
+def IL_OP_DP4 : ILOpCode<44, "dp4">;
+def IL_OP_ELSE : ILOpCode<45, "else">;
+def IL_OP_END : ILOpCode<46, "end">;
+def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
+def IL_OP_ENDIF : ILOpCode<48, "endif">;
+def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
+def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
+def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
+def IL_OP_EQ : ILOpCode<52, "eq">;
+def IL_OP_EXP : ILOpCode<53, "exp">;
+def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
+def IL_OP_F_2_D : ILOpCode<55, "f2d">;
+def IL_OP_FLR : ILOpCode<56, "flr">;
+def IL_OP_FRC : ILOpCode<57, "frc">;
+def IL_OP_FTOI : ILOpCode<58, "ftoi">;
+def IL_OP_FTOU : ILOpCode<59, "ftou">;
+def IL_OP_FUNC : ILOpCode<60, "func">;
+def IL_OP_GE : ILOpCode<61, "ge">;
+def IL_OP_I_ADD : ILOpCode<62, "iadd">;
+def IL_OP_I_EQ : ILOpCode<63, "ieq">;
+def IL_OP_I_GE : ILOpCode<64, "ige">;
+def IL_OP_I_LT : ILOpCode<65, "ilt">;
+def IL_OP_I_MAD : ILOpCode<66, "imad">;
+def IL_OP_I_MAX : ILOpCode<67, "imax">;
+def IL_OP_I_MIN : ILOpCode<68, "imin">;
+def IL_OP_I_MUL : ILOpCode<69, "imul">;
+def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
+def IL_OP_I_NE : ILOpCode<71, "ine">;
+def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
+def IL_OP_I_NOT : ILOpCode<73, "inot">;
+def IL_OP_I_OR : ILOpCode<74, "ior">;
+def IL_OP_I_SHL : ILOpCode<75, "ishl">;
+def IL_OP_I_SHR : ILOpCode<76, "ishr">;
+def IL_OP_I_XOR : ILOpCode<77, "ixor">;
+def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
+def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
+def IL_OP_IFC : ILOpCode<80, "ifc">;
+def IL_OP_ITOF : ILOpCode<81, "itof">;
+def IL_OP_LN : ILOpCode<82, "ln">;
+def IL_OP_LOG : ILOpCode<83, "log">;
+def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
+def IL_OP_LOOP : ILOpCode<85, "loop">;
+def IL_OP_LT : ILOpCode<86, "lt">;
+def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
+def IL_OP_MAX : ILOpCode<88, "max_ieee">;
+def IL_OP_MIN : ILOpCode<89, "min_ieee">;
+def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
+def IL_OP_MOV : ILOpCode<91, "mov">;
+def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
+def IL_OP_NE : ILOpCode<93, "ne">;
+def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
+def IL_OP_POW : ILOpCode<95, "pow">;
+def IL_OP_RCP : ILOpCode<96, "rcp">;
+def IL_OP_RET : ILOpCode<97, "ret">;
+def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
+def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
+def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
+def IL_OP_RND : ILOpCode<101, "rnd">;
+def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
+def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
+def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
+def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
+def IL_OP_RSQ : ILOpCode<106, "rsq">;
+def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
+def IL_OP_SAMPLE : ILOpCode<108, "sample">;
+def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
+def IL_OP_SET : ILOpCode<110, "set">;
+def IL_OP_SGN : ILOpCode<111, "sgn">;
+def IL_OP_SIN : ILOpCode<112, "sin">;
+def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
+def IL_OP_SUB : ILOpCode<114, "sub">;
+def IL_OP_SWITCH : ILOpCode<115, "switch">;
+def IL_OP_TRC : ILOpCode<116, "trc">;
+def IL_OP_U_DIV : ILOpCode<117, "udiv">;
+def IL_OP_U_GE : ILOpCode<118, "uge">;
+def IL_OP_U_LT : ILOpCode<119, "ult">;
+def IL_OP_U_MAD : ILOpCode<120, "umad">;
+def IL_OP_U_MAX : ILOpCode<121, "umax">;
+def IL_OP_U_MIN : ILOpCode<122, "umin">;
+def IL_OP_U_MOD : ILOpCode<123, "umod">;
+def IL_OP_U_MUL : ILOpCode<124, "umul">;
+def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
+def IL_OP_U_SHR : ILOpCode<126, "ushr">;
+def IL_OP_UTOF : ILOpCode<127, "utof">;
+def IL_OP_WHILE : ILOpCode<128, "whileloop">;
+// SC IL instructions that are not in CAL IL
+def IL_OP_ACOS : ILOpCode<129, "acos">;
+def IL_OP_ASIN : ILOpCode<130, "asin">;
+def IL_OP_EXN : ILOpCode<131, "exn">;
+def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
+def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
+def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
+def IL_OP_SQRT : ILOpCode<135, "sqrt">;
+def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
+def IL_OP_ATAN : ILOpCode<137, "atan">;
+def IL_OP_TAN : ILOpCode<137, "tan">;
+def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
+def IL_OP_F_NEG : ILOpCode<139, "mov">;
+def IL_OP_GT : ILOpCode<140, "gt">;
+def IL_OP_LE : ILOpCode<141, "lt">;
+def IL_OP_DIST : ILOpCode<142, "dist">;
+def IL_OP_LEN : ILOpCode<143, "len">;
+def IL_OP_MACRO : ILOpCode<144, "mcall">;
+def IL_OP_INTR : ILOpCode<145, "call">;
+def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
+def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
+def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
+def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
+def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
+def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
+def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
+def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
+def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
+def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
+def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
+def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
+def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
+def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
+def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
+def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
+def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
+def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
+def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
+def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
+def IL_OP_SAD : ILOpCode<166, "sad">;
+def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
+def IL_OP_SAD4 : ILOpCode<168, "sad4">;
+def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
+def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
+def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
+def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
+def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
+def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
+def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
+def IL_OP_CLAMP : ILOpCode<176, "clamp">;
+def IL_OP_LERP : ILOpCode<177, "lrp">;
+def IL_OP_FMA : ILOpCode<178, "fma">;
+def IL_OP_D_MIN : ILOpCode<179, "dmin">;
+def IL_OP_D_MAX : ILOpCode<180, "dmax">;
+def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
+def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
+def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
+def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
+def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
+def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
+def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
+def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
+def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
+def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
+def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
+def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
+def IL_OP_D_MOV : ILOpCode<193, "dmov">;
+def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
+def IL_OP_NOP : ILOpCode<195, "nop">;
+def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
+def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
+def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
+def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
+def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
+def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
+def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
+def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
+def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
+def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
+def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
+def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
+def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
+def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
+def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
+def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
+def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
+def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
+def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
+def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
+def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
+def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
+def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
+def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
+def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
+def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
+def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
+def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
+def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
+def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
+def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
+def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
+def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
+def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
+def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
+def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
+def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
+def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
+def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
+def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
+def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
+def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
+def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
+def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
+def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
+def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
+def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
+def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
+def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
+def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
+def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
+def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
+def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
+def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
+def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
+def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
+def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
+def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
+def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
+def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
+def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
+def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
+def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
+def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
+def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
+def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
+def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
+def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
+def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
+def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
+def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
+def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
+def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
+def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
+def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
+def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
+def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
+def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
+def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
+def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
+def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
+def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
+def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
+def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
+def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
+def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
+def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
+def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
+def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
+def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
+def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
+def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
+def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
+def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
+def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
+def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
+def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
+def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
+def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
+def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
+def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
+def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
+def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
+def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
+def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
+def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
+def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
+def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
+def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
+def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
+def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
+def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
+def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
+def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
+def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
+def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
+def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
+def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
+def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
+def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
+def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
+def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
+def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
+def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
+def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
+def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
+def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
+def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
+def IL_OP_MUL : ILOpCode<323, "mul">;
+def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
+def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
+def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
+def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
+def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
+def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
+def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
+def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
+def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
+def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
+def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
+def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
+def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
+def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
+def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
+def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
+def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
+def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
+def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
+def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
+def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
+def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
+def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
+def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">;
+def IL_OP_BFI : ILOpCode<394, "bfi">;
+def IL_OP_BFM : ILOpCode<395, "bfm">;
+def IL_DBG_STRING : ILOpCode<396, "dbg_string">;
+def IL_DBG_LINE : ILOpCode<397, "dbg_line">;
+def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">;
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
new file mode 100644
index 00000000000..c278a00b4da
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
@@ -0,0 +1,244 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+#include "AMDILEGAsmPrinter.h"
+#include "AMDILIOExpansion.h"
+#include "AMDILPointerManager.h"
+using namespace llvm;
+
+AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
+: AMDILDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ mDeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ mDeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ mDeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ mDeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDILEvergreenDevice::~AMDILEvergreenDevice() {
+}
+
+size_t AMDILEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDILEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ } else {
+ return DEFAULT_RAW_UAV_ID;
+ }
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDILEvergreenDevice::getWavefrontSize() const {
+ return AMDILDevice::WavefrontSize;
+}
+
+uint32_t AMDILEvergreenDevice::getGeneration() const {
+ return AMDILDeviceInfo::HD5XXX;
+}
+
+void AMDILEvergreenDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDILDeviceInfo::ArenaUAV);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+ mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+ }
+ mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDILDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+ mSWBits.set(AMDILDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::LocalMem);
+ mHWBits.set(AMDILDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDILDeviceInfo::Images);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDILDeviceInfo::NoAlias);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ mHWBits.set(AMDILDeviceInfo::CachedMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDILDeviceInfo::MultiUAV);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_136) {
+ mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+ } else {
+ mSWBits.set(AMDILDeviceInfo::ArenaVectors);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_137) {
+ mHWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.reset(AMDILDeviceInfo::LongOps);
+ }
+ mHWBits.set(AMDILDeviceInfo::TmrReg);
+}
+FunctionPass*
+AMDILEvergreenDevice::getIOExpansion(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDILEGIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDILEvergreenDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+ return new AMDILEGAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDILEvergreenDevice::getPointerManager(
+ TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+ return new AMDILEGPointerManager(TM, OptLevel);
+}
+
+AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCypressDevice::~AMDILCypressDevice() {
+}
+
+void AMDILCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+}
+
+
+AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCedarDevice::~AMDILCedarDevice() {
+}
+
+void AMDILCedarDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILCedarDevice::getWavefrontSize() const {
+ return AMDILDevice::QuarterWavefrontSize;
+}
+
+AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILRedwoodDevice::~AMDILRedwoodDevice()
+{
+}
+
+void AMDILRedwoodDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILRedwoodDevice::getWavefrontSize() const {
+ return AMDILDevice::HalfWavefrontSize;
+}
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.h b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h
new file mode 100644
index 00000000000..b56acca48ee
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.h
@@ -0,0 +1,133 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm {
+ class AMDILSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDILEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDILDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDILEvergreenDevice : public AMDILDevice {
+public:
+ AMDILEvergreenDevice(AMDILSubtarget *ST);
+ virtual ~AMDILEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+ virtual FunctionPass*
+ getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+ virtual AsmPrinter*
+ getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+ virtual FunctionPass*
+ getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+protected:
+ virtual void setCaps();
+}; // AMDILEvergreenDevice
+
+// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDILCypressDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCypressDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCypressDevice();
+private:
+ virtual void setCaps();
+}; // AMDILCypressDevice
+
+
+// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDILEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDILCedarDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCedarDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILCedarDevice
+
+// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDILRedwoodDevice : public AMDILEvergreenDevice {
+public:
+ AMDILRedwoodDevice(AMDILSubtarget *ST);
+ virtual ~AMDILRedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILRedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILFixupKinds.h b/src/gallium/drivers/radeon/AMDILFixupKinds.h
new file mode 100644
index 00000000000..167bb18d2d5
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFixupKinds.h
@@ -0,0 +1,73 @@
+//===-- AMDIL/AMDILFixupKinds.h - AMDIL Specific Fixup Entries --------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AMDIL_AMDILFIXUPKINDS_H
+#define LLVM_AMDIL_AMDILFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace AMDIL {
+enum Fixups {
+ reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
+ reloc_riprel_4byte, // 32-bit rip-relative
+};
+}
+}
+
+#endif
+
diff --git a/src/gallium/drivers/radeon/AMDILFormats.td b/src/gallium/drivers/radeon/AMDILFormats.td
new file mode 100644
index 00000000000..2038f053868
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFormats.td
@@ -0,0 +1,492 @@
+//==- AMDILInstrFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===--------------------------------------------------------------------===//
+include "AMDILTokenDesc.td"
+
+//===--------------------------------------------------------------------===//
+// The parent IL instruction class that inherits the Instruction class. This
+// class sets the corresponding namespace, the out and input dag lists the
+// pattern to match to and the string to print out for the assembly printer.
+//===--------------------------------------------------------------------===//
+class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+ field bits<32> Inst;
+
+ let Inst = 0xffffffff;
+ let Namespace = "AMDIL";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ ILOpCode operation = op;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// The base class for vector insert instructions. It is a single dest, quad
+// source instruction where the last two source operands must be 32bit
+// immediate values that are encoding the swizzle of the source register
+// The src2 and src3 instructions must also be inversion of each other such
+// that if src2 is 0x1000300(x0z0), src3 must be 0x20004(0y0w). The values
+// are encoded as 32bit integer with each 8 char representing a swizzle value.
+// The encoding is as follows for 32bit register types:
+// 0x00 -> '_'
+// 0x01 -> 'x'
+// 0x02 -> 'y'
+// 0x03 -> 'z'
+// 0x04 -> 'w'
+// 0x05 -> 'x'
+// 0x06 -> 'y'
+// 0x07 -> 'z'
+// 0x08 -> 'w'
+// 0x09 -> '0'
+// The encoding is as follows for 64bit register types:
+// 0x00 -> "__"
+// 0x01 -> "xy"
+// 0x02 -> "zw"
+// 0x03 -> "xy"
+// 0x04 -> "zw"
+// 0x05 -> "00"
+//===--------------------------------------------------------------------===//
+class InsertVectorClass<ILOpCode op, RegisterClass DReg, RegisterClass SReg,
+ SDNode OpNode, string asmstr> :
+ ILFormat<op, (outs DReg:$dst),
+ (ins DReg:$src0, SReg:$src1, i32imm:$src2, i32imm:$src3),
+ !strconcat(asmstr, " $dst, $src0, $src1"),
+ [(set DReg:$dst, (OpNode DReg:$src0, SReg:$src1,
+ timm:$src2, timm:$src3))]>;
+
+//===--------------------------------------------------------------------===//
+// Class that has one input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0" and
+// handles the unary math operators.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input and output register 0.
+//===--------------------------------------------------------------------===//
+class OneInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILDst dst_reg;
+ ILDstMod dst_mod;
+ ILRelAddr dst_rel;
+ ILSrc dst_reg_rel;
+ ILSrcMod dst_reg_rel_mod;
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// A simplified version of OneInOneOut class where the pattern is standard
+// and does not need special cases. This requires that the pattern has
+// a SDNode and takes a source and destination register that is of type
+// RegisterClass. This is the standard unary op class.
+//===--------------------------------------------------------------------===//
+class UnaryOp<ILOpCode op, SDNode OpNode,
+ RegisterClass dRegs, RegisterClass sRegs>
+ : OneInOneOut<op, (outs dRegs:$dst), (ins sRegs:$src),
+ !strconcat(op.Text, " $dst, $src"),
+ [(set dRegs:$dst, (OpNode sRegs:$src))]>;
+
+//===--------------------------------------------------------------------===//
+// This class is similiar to the UnaryOp class, however, there is no
+// result value to assign.
+//===--------------------------------------------------------------------===//
+class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have two input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
+// handles the binary math operators and comparison operations.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input register 1.
+//===--------------------------------------------------------------------===//
+class TwoInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : OneInOneOut<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+//===--------------------------------------------------------------------===//
+// A simplification of the TwoInOneOut pattern for Binary Operations.
+// This class is a helper class that assumes the simple pattern of
+// $dst = op $src0 $src1.
+// Other type of matching patterns need to use the TwoInOneOut class.
+//===--------------------------------------------------------------------===//
+class BinaryOp<ILOpCode op, SDNode OpNode, RegisterClass dReg,
+ RegisterClass sReg0, RegisterClass sReg1>
+ : TwoInOneOut<op, (outs dReg:$dst), (ins sReg0:$src0, sReg1:$src1),
+ !strconcat(op.Text, " $dst, $src0, $src1"),
+ [(set dReg:$dst, (OpNode sReg0:$src0, sReg1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector extract instructions. The vector extract
+// instructions take as an input value a source register and a 32bit integer
+// with the same encoding as specified in InsertVectorClass and produces
+// a result with only the swizzled component in the destination register.
+//===--------------------------------------------------------------------===//
+class ExtractVectorClass<RegisterClass DReg, RegisterClass SReg, SDNode OpNode>
+: TwoInOneOut<IL_OP_MOV, (outs DReg:$dst), (ins SReg:$src0, i32imm:$src1),
+ "mov $dst, $src0",
+ [(set DReg:$dst, (OpNode SReg:$src0, timm:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// The base class for vector concatenation. This class creates either a vec2
+// or a vec4 of 32bit data types or a vec2 of 64bit data types. This is done
+// by swizzling either the 'x' or 'xy' components of the source operands
+// into the destination register.
+//===--------------------------------------------------------------------===//
+class VectorConcatClass<RegisterClass Dst, RegisterClass Src, SDNode OpNode>
+ : TwoInOneOut<IL_OP_I_ADD, (outs Dst:$dst), (ins Src:$src0, Src:$src1),
+ "iadd $dst, $src0, $src1",
+ [(set Dst:$dst, (OpNode Src:$src0, Src:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Similiar to the UnaryOpNoRet class, but takes as arguments two input
+// operands. Used mainly for barrier instructions on PC platform.
+//===--------------------------------------------------------------------===//
+class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : TwoInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src2_reg;
+ ILSrcMod src2_mod;
+ ILRelAddr src2_rel;
+ ILSrc src2_reg_rel;
+ ILSrcMod src2_reg_rel_mod;
+ }
+
+//===--------------------------------------------------------------------===//
+// The g version of the Three Input pattern uses a standard pattern but
+// but allows specification of the register to further generalize the class
+// This class is mainly used in the generic multiclasses in AMDILMultiClass.td
+//===--------------------------------------------------------------------===//
+class TernaryOp<ILOpCode op, SDNode OpNode,
+ RegisterClass dReg,
+ RegisterClass sReg0,
+ RegisterClass sReg1,
+ RegisterClass sReg2>
+ : ThreeInOneOut<op, (outs dReg:$dst),
+ (ins sReg0:$src0, sReg1:$src1, sReg2:$src2),
+ !strconcat(op.Text, " $dst, $src0, $src1, $src2"),
+ [(set dReg:$dst,
+ (OpNode sReg0:$src0, sReg1:$src1, sReg2:$src2))]>;
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class FourInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ThreeInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src3_reg;
+ ILSrcMod src3_mod;
+ ILRelAddr src3_rel;
+ ILSrc src3_reg_rel;
+ ILSrcMod src3_reg_rel_mod;
+ }
+
+
+//===--------------------------------------------------------------------===//
+// The macro class that is an extension of OneInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class UnaryMacro<RegisterClass Dst, RegisterClass Src0, SDNode OpNode>
+: OneInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ (ins Src0:$src0),
+ "($dst),($src0)",
+ [(set Dst:$dst, (OpNode Src0:$src0))]>;
+
+//===--------------------------------------------------------------------===//
+// The macro class is an extension of TwoInOneOut but is tailored for
+// macros only where all the register types are the same
+//===--------------------------------------------------------------------===//
+class BinaryMacro<RegisterClass Dst,
+ RegisterClass Src0,
+ RegisterClass Src1,
+ SDNode OpNode>
+ : TwoInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ (ins Src0: $src0, Src1:$src1),
+ "($dst),($src0, $src1)",
+ [(set Dst:$dst, (OpNode Src0:$src0, Src1:$src1))]>;
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 32bit pointers
+//===--------------------------------------------------------------------===//
+class Append<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class AppendNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR:$id))]>;
+
+class UniAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class UniAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI32:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, timm:$id))]>;
+
+class BinAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class BinAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI32:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, timm:$id))]>;
+
+class TriAtom<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class TriAtomNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class CmpXChgNoRet<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+
+//===--------------------------------------------------------------------===//
+// Classes for dealing with atomic instructions w/ 64bit pointers
+//===--------------------------------------------------------------------===//
+class Append64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR64:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class AppendNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst")),
+ [(set GPRI32:$dst, (intr ADDR64:$id))]>;
+
+class UniAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class UniAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI64:$ptr, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, timm:$id))]>;
+
+class BinAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, timm:$id))]>;
+
+
+// TODO: Need to get this working without dst...
+class BinAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst), (ins MEMI64:$ptr, GPRI32:$src, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, timm:$id))]>;
+
+class TriAtom64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+class CmpXChg64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $dst, $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class TriAtomNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src, $src1")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+// TODO: Need to get this working without dst...
+class CmpXChgNoRet64<ILOpCode op, string idType, SDNode intr>
+ : ILFormat<op, (outs GPRI32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$src, GPRI32:$src1, i32imm:$id),
+ !strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
+ [(set GPRI32:$dst, (intr ADDR64:$ptr, GPRI32:$src, GPRI32:$src1, timm:$id))]>;
+
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], []>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], []>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], []>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.cpp b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp
new file mode 100644
index 00000000000..ccf688d46eb
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFrameLowering.cpp
@@ -0,0 +1,104 @@
+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+#if LLVM_VERSION >= 2500
+ : TargetFrameLowering(D, StackAl, LAO, TransAl)
+#else
+ : TargetFrameInfo(D, StackAl, LAO, TransAl)
+#endif
+{
+}
+
+AMDILFrameLowering::~AMDILFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+#if LLVM_VERSION >= 2500
+const TargetFrameLowering::SpillSlot *
+#else
+const TargetFrameInfo::SpillSlot *
+#endif
+AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+ NumEntries = 0;
+ return 0;
+}
+#if LLVM_VERSION >= 2500
+void
+AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDILFrameLowering::hasFP(const MachineFunction &MF) const
+{
+ return false;
+}
+#endif
diff --git a/src/gallium/drivers/radeon/AMDILFrameLowering.h b/src/gallium/drivers/radeon/AMDILFrameLowering.h
new file mode 100644
index 00000000000..840830ff57a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILFrameLowering.h
@@ -0,0 +1,96 @@
+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+#include "AMDIL.h"
+#if LLVM_VERSION >= 2500
+#include "llvm/Target/TargetFrameLowering.h"
+#else
+#include "llvm/Target/TargetFrameInfo.h"
+#endif
+
+/// Information about the stack frame layout on the AMDIL targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm {
+#if LLVM_VERSION >= 2500
+ class AMDILFrameLowering : public TargetFrameLowering {
+#else
+ class AMDILFrameLowering : public TargetFrameInfo {
+#endif
+ public:
+ AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+ TransAl = 1);
+ virtual ~AMDILFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const;
+ virtual const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+#if LLVM_VERSION >= 2500
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+#endif
+ }; // class AMDILFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_
diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.cpp b/src/gallium/drivers/radeon/AMDILGlobalManager.cpp
new file mode 100644
index 00000000000..da459c28fd1
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILGlobalManager.cpp
@@ -0,0 +1,1386 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILGlobalManager.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILSubtarget.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cstdio>
+
+using namespace llvm;
+
+AMDILGlobalManager::AMDILGlobalManager(bool debugMode) {
+ mOffset = 0;
+ mReservedBuffs = 0;
+ symTab = NULL;
+ mCurrentCPOffset = 0;
+ mDebugMode = debugMode;
+}
+
+AMDILGlobalManager::~AMDILGlobalManager() {
+}
+
+void AMDILGlobalManager::print(llvm::raw_ostream &O) {
+ if (!mDebugMode) {
+ return;
+ }
+ O << ";AMDIL Global Manager State Dump:\n";
+ O << ";\tSubtarget: " << mSTM << "\tSymbol Table: " << symTab
+ << "\n";
+ O << ";\tConstant Offset: " << mOffset << "\tCP Offset: "
+ << mCurrentCPOffset << "\tReserved Buffers: " << mReservedBuffs
+ << "\n";
+ if (!mImageNameMap.empty()) {
+ llvm::DenseMap<uint32_t, llvm::StringRef>::iterator imb, ime;
+ O << ";\tGlobal Image Mapping: \n";
+ for (imb = mImageNameMap.begin(), ime = mImageNameMap.end(); imb != ime;
+ ++imb) {
+ O << ";\t\tImage ID: " << imb->first << "\tName: "
+ << imb->second << "\n";
+ }
+ }
+ std::set<llvm::StringRef>::iterator sb, se;
+ if (!mByteStore.empty()) {
+ O << ";Byte Store Kernels: \n";
+ for (sb = mByteStore.begin(), se = mByteStore.end(); sb != se; ++sb) {
+ O << ";\t\t" << *sb << "\n";
+ }
+ }
+ if (!mIgnoreStr.empty()) {
+ O << ";\tIgnored Data Strings: \n";
+ for (sb = mIgnoreStr.begin(), se = mIgnoreStr.end(); sb != se; ++sb) {
+ O << ";\t\t" << *sb << "\n";
+ }
+ }
+}
+
+void AMDILGlobalManager::dump() {
+ print(errs());
+}
+
+static const constPtr *getConstPtr(const kernel &krnl, const std::string &arg) {
+ llvm::SmallVector<constPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+ for (begin = krnl.constPtr.begin(), end = krnl.constPtr.end();
+ begin != end; ++begin) {
+ if (!strcmp(begin->name.data(),arg.c_str())) {
+ return &(*begin);
+ }
+ }
+ return NULL;
+}
+#if 0
+static bool structContainsSub32bitType(const StructType *ST) {
+ StructType::element_iterator eib, eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ Type *ptr = *eib;
+ uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr);
+ if (!size) {
+ if (const StructType *ST = dyn_cast<StructType>(ptr)) {
+ if (structContainsSub32bitType(ST)) {
+ return true;
+ }
+ }
+ } else if (size < 32) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+void AMDILGlobalManager::processModule(const Module &M,
+ const AMDILTargetMachine *mTM)
+{
+ Module::const_global_iterator GI;
+ Module::const_global_iterator GE;
+ symTab = "NoSymTab";
+ mSTM = mTM->getSubtargetImpl();
+ for (GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
+ const GlobalValue *GV = GI;
+ if (mDebugMode) {
+ GV->dump();
+ errs() << "\n";
+ }
+ llvm::StringRef GVName = GV->getName();
+ const char *name = GVName.data();
+ if (!strncmp(name, "sgv", 3)) {
+ mKernelArgs[GVName] = parseSGV(GV);
+ } else if (!strncmp(name, "fgv", 3)) {
+ // we can ignore this since we don't care about the filename
+ // string
+ } else if (!strncmp(name, "lvgv", 4)) {
+ mLocalArgs[GVName] = parseLVGV(GV);
+ } else if (!strncmp(name, "llvm.image.annotations", 22)) {
+ if (strstr(name, "__OpenCL")
+ && strstr(name, "_kernel")) {
+ // we only want to parse the image information if the
+ // image is a kernel, we might have to parse out the
+ // information if a function is found that is not
+ // inlined.
+ parseImageAnnotate(GV);
+ }
+ } else if (!strncmp(name, "llvm.global.annotations", 23)) {
+ parseGlobalAnnotate(GV);
+ } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
+ if (strstr(name, "__OpenCL")
+ && strstr(name, "_kernel")) {
+ // we only want to parse constant pointer information
+ // if it is a kernel
+ parseConstantPtrAnnotate(GV);
+ }
+ } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
+ // These are skipped as we handle them later in AMDILPointerManager.cpp
+ } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS #
+ parseAutoArray(GV, false);
+ } else if (strstr(name, "clregion")) {
+ parseAutoArray(GV, true);
+ } else if (!GV->use_empty()
+ && mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
+ parseConstantPtr(GV);
+ }
+ }
+ allocateGlobalCB();
+
+ safeForEach(M.begin(), M.end(),
+ std::bind1st(
+ std::mem_fun(&AMDILGlobalManager::checkConstPtrsUseHW),
+ this));
+}
+
+void AMDILGlobalManager::allocateGlobalCB(void) {
+ uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
+ uint32_t offset = 0;
+ uint32_t curCB = 0;
+ uint32_t swoffset = 0;
+ for (StringMap<constPtr>::iterator cpb = mConstMems.begin(),
+ cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+ bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ cpb->second.usesHardware = false;
+ if (constHW) {
+ // If we have a limit on the max CB Size, then we need to make sure that
+ // the constant sizes fall within the limits.
+ if (cpb->second.size <= maxCBSize) {
+ if (offset + cpb->second.size > maxCBSize) {
+ offset = 0;
+ curCB++;
+ }
+ if (curCB < mSTM->device()->getMaxNumCBs()) {
+ cpb->second.cbNum = curCB + CB_BASE_OFFSET;
+ cpb->second.offset = offset;
+ offset += (cpb->second.size + 15) & (~15);
+ cpb->second.usesHardware = true;
+ continue;
+ }
+ }
+ }
+ cpb->second.cbNum = 0;
+ cpb->second.offset = swoffset;
+ swoffset += (cpb->second.size + 15) & (~15);
+ }
+ if (!mConstMems.empty()) {
+ mReservedBuffs = curCB + 1;
+ }
+}
+
+bool AMDILGlobalManager::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
+{
+ Function::const_arg_iterator AI, AE;
+ const Function *func = *FCI;
+ std::string name = func->getName();
+ if (!strstr(name.c_str(), "__OpenCL")
+ || !strstr(name.c_str(), "_kernel")) {
+ return false;
+ }
+ kernel &krnl = mKernels[name];
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ for (AI = func->arg_begin(), AE = func->arg_end();
+ AI != AE; ++AI) {
+ const Argument *Arg = &(*AI);
+ const PointerType *P = dyn_cast<PointerType>(Arg->getType());
+ if (!P) {
+ continue;
+ }
+ if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
+ continue;
+ }
+ const constPtr *ptr = getConstPtr(krnl, Arg->getName());
+ if (ptr) {
+ continue;
+ }
+ constPtr constAttr;
+ constAttr.name = Arg->getName();
+ constAttr.size = this->mSTM->device()->getMaxCBSize();
+ constAttr.base = Arg;
+ constAttr.isArgument = true;
+ constAttr.isArray = false;
+ constAttr.offset = 0;
+ constAttr.usesHardware =
+ mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ if (constAttr.usesHardware) {
+ constAttr.cbNum = krnl.constPtr.size() + 2;
+ } else {
+ constAttr.cbNum = 0;
+ }
+ krnl.constPtr.push_back(constAttr);
+ }
+ }
+ // Now lets make sure that only the N largest buffers
+ // get allocated in hardware if we have too many buffers
+ uint32_t numPtrs = krnl.constPtr.size();
+ if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
+ // TODO: Change this routine so it sorts
+ // constPtr instead of pulling the sizes out
+ // and then grab the N largest and disable the rest
+ llvm::SmallVector<uint32_t, 16> sizes;
+ for (uint32_t x = 0; x < numPtrs; ++x) {
+ sizes.push_back(krnl.constPtr[x].size);
+ }
+ std::sort(sizes.begin(), sizes.end());
+ uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
+ mReservedBuffs);
+ uint32_t safeSize = sizes[numToDisable-1];
+ for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
+ if (krnl.constPtr[x].size <= safeSize) {
+ krnl.constPtr[x].usesHardware = false;
+ --numToDisable;
+ }
+ }
+ }
+ // Renumber all of the valid CB's so that
+ // they are linear increase
+ uint32_t CBid = 2 + mReservedBuffs;
+ for (uint32_t x = 0; x < numPtrs; ++x) {
+ if (krnl.constPtr[x].usesHardware) {
+ krnl.constPtr[x].cbNum = CBid++;
+ }
+ }
+ for (StringMap<constPtr>::iterator cpb = mConstMems.begin(),
+ cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+ if (cpb->second.usesHardware) {
+ krnl.constPtr.push_back(cpb->second);
+ }
+ }
+ for (uint32_t x = 0; x < krnl.constPtr.size(); ++x) {
+ constPtr &c = krnl.constPtr[x];
+ uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
+ if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
+ if ((c.size + c.offset) > krnl.constSizes[cbNum]) {
+ krnl.constSizes[cbNum] =
+ ((c.size + c.offset) + 15) & ~15;
+ }
+ } else {
+ krnl.constPtr[x].usesHardware = false;
+ }
+ }
+ return false;
+}
+
+int32_t AMDILGlobalManager::getArrayOffset(const llvm::StringRef &a) const {
+ StringMap<arraymem>::const_iterator iter = mArrayMems.find(a);
+ if (iter != mArrayMems.end()) {
+ return iter->second.offset;
+ } else {
+ return -1;
+ }
+}
+
+int32_t AMDILGlobalManager::getConstOffset(const llvm::StringRef &a) const {
+ StringMap<constPtr>::const_iterator iter = mConstMems.find(a);
+ if (iter != mConstMems.end()) {
+ return iter->second.offset;
+ } else {
+ return -1;
+ }
+}
+
+bool AMDILGlobalManager::getConstHWBit(const llvm::StringRef &name) const {
+ StringMap<constPtr>::const_iterator iter = mConstMems.find(name);
+ if (iter != mConstMems.end()) {
+ return iter->second.usesHardware;
+ } else {
+ return false;
+ }
+}
+
+// As of right now we only care about the required group size
+// so we can skip the variable encoding
+kernelArg AMDILGlobalManager::parseSGV(const GlobalValue *G) {
+ kernelArg nArg;
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ memset(&nArg, 0, sizeof(nArg));
+ for (int x = 0; x < 3; ++x) {
+ nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
+ nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
+ }
+ if (!GV || !GV->hasInitializer()) {
+ return nArg;
+ }
+ const Constant *CV = GV->getInitializer();
+ const ConstantArray *CA = dyn_cast_or_null<ConstantArray>(CV);
+ if (!CA || !CA->isString()) {
+ return nArg;
+ }
+ std::string init = CA->getAsString();
+ size_t pos = init.find("RWG");
+ if (pos != llvm::StringRef::npos) {
+ pos += 3;
+ std::string LWS = init.substr(pos, init.length() - pos);
+ const char *lws = LWS.c_str();
+ sscanf(lws, "%d,%d,%d", &(nArg.reqGroupSize[0]),
+ &(nArg.reqGroupSize[1]),
+ &(nArg.reqGroupSize[2]));
+ nArg.mHasRWG = true;
+ }
+ pos = init.find("RWR");
+ if (pos != llvm::StringRef::npos) {
+ pos += 3;
+ std::string LWS = init.substr(pos, init.length() - pos);
+ const char *lws = LWS.c_str();
+ sscanf(lws, "%d,%d,%d", &(nArg.reqRegionSize[0]),
+ &(nArg.reqRegionSize[1]),
+ &(nArg.reqRegionSize[2]));
+ nArg.mHasRWR = true;
+ }
+ return nArg;
+}
+
+localArg AMDILGlobalManager::parseLVGV(const GlobalValue *G) {
+ localArg nArg;
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ nArg.name = "";
+ if (!GV || !GV->hasInitializer()) {
+ return nArg;
+ }
+ const ConstantArray *CA =
+ dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return nArg;
+ }
+ for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
+ const Value *local = CA->getOperand(x);
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
+ if (!CE || !CE->getNumOperands()) {
+ continue;
+ }
+ nArg.name = (*(CE->op_begin()))->getName();
+ if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
+ nArg.local.push_back(&(mArrayMems[nArg.name]));
+ }
+ }
+ return nArg;
+}
+
+void AMDILGlobalManager::parseConstantPtrAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
+ const ConstantArray *CA =
+ dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return;
+ }
+ uint32_t numOps = CA->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ const Value *V = CA->getOperand(x);
+ const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+ if (!CS) {
+ continue;
+ }
+ assert(CS->getNumOperands() == 2 && "There can only be 2"
+ " fields, a name and size");
+ const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
+ const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
+ assert(nameField && "There must be a constant name field");
+ assert(sizeField && "There must be a constant size field");
+ const GlobalVariable *nameGV =
+ dyn_cast<GlobalVariable>(nameField->getOperand(0));
+ const ConstantArray *nameArray =
+ dyn_cast<ConstantArray>(nameGV->getInitializer());
+ // Lets add this string to the set of strings we should ignore processing
+ mIgnoreStr.insert(nameGV->getName());
+ if (mConstMems.find(nameGV->getName())
+ != mConstMems.end()) {
+ // If we already processesd this string as a constant, lets remove it from
+ // the list of known constants. This way we don't process unneeded data
+ // and don't generate code/metadata for strings that are never used.
+ mConstMems.erase(mConstMems.find(nameGV->getName()));
+ } else {
+ mIgnoreStr.insert(CS->getOperand(0)->getName());
+ }
+ constPtr constAttr;
+ constAttr.name = nameArray->getAsString();
+ constAttr.size = (sizeField->getZExtValue() + 15) & ~15;
+ constAttr.base = CS;
+ constAttr.isArgument = true;
+ constAttr.isArray = false;
+ constAttr.cbNum = 0;
+ constAttr.offset = 0;
+ constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
+ // Now that we have all our constant information,
+ // lets update the kernel
+ llvm::StringRef kernelName = G->getName().data() + 30;
+ kernel k;
+ if (mKernels.find(kernelName) != mKernels.end()) {
+ k = mKernels[kernelName];
+ } else {
+ k.curSize = 0;
+ k.curRSize = 0;
+ k.curHWSize = 0;
+ k.curHWRSize = 0;
+ k.constSize = 0;
+ k.lvgv = NULL;
+ k.sgv = NULL;
+ memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+ constAttr.cbNum = k.constPtr.size() + 2;
+ k.constPtr.push_back(constAttr);
+ mKernels[kernelName] = k;
+ }
+}
+
+void AMDILGlobalManager::parseImageAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return;
+ }
+ if (isa<GlobalValue>(CA)) {
+ return;
+ }
+ uint32_t e = CA->getNumOperands();
+ if (!e) {
+ return;
+ }
+ kernel k;
+ llvm::StringRef name = G->getName().data() + 23;
+ if (mKernels.find(name) != mKernels.end()) {
+ k = mKernels[name];
+ } else {
+ k.curSize = 0;
+ k.curRSize = 0;
+ k.curHWSize = 0;
+ k.curHWRSize = 0;
+ k.constSize = 0;
+ k.lvgv = NULL;
+ k.sgv = NULL;
+ memset(k.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+ for (uint32_t i = 0; i != e; ++i) {
+ const Value *V = CA->getOperand(i);
+ const Constant *C = dyn_cast<Constant>(V);
+ const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+ if (CS && CS->getNumOperands() == 2) {
+ if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
+ mConstMems.end()) {
+ // If we already processesd this string as a constant, lets remove it
+ // from the list of known constants. This way we don't process unneeded
+ // data and don't generate code/metadata for strings that are never
+ // used.
+ mConstMems.erase(
+ mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
+ } else {
+ mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
+ }
+ const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
+ uint32_t val = (uint32_t)CI->getZExtValue();
+ if (val == 1) {
+ k.readOnly.insert(i);
+ } else if (val == 2) {
+ k.writeOnly.insert(i);
+ } else {
+ assert(!"Unknown image type value!");
+ }
+ }
+ }
+ mKernels[name] = k;
+}
+
+void AMDILGlobalManager::parseAutoArray(const GlobalValue *GV, bool isRegion) {
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+ Type *Ty = (G) ? G->getType() : NULL;
+ arraymem tmp;
+ tmp.isHW = true;
+ tmp.offset = 0;
+ tmp.vecSize = getTypeSize(Ty, true);
+ tmp.isRegion = isRegion;
+ mArrayMems[GV->getName()] = tmp;
+}
+
+void AMDILGlobalManager::parseConstantPtr(const GlobalValue *GV) {
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+ Type *Ty = (G) ? G->getType() : NULL;
+ constPtr constAttr;
+ constAttr.name = G->getName();
+ constAttr.size = getTypeSize(Ty, true);
+ constAttr.base = GV;
+ constAttr.isArgument = false;
+ constAttr.isArray = true;
+ constAttr.offset = 0;
+ constAttr.cbNum = 0;
+ constAttr.usesHardware = false;
+ mConstMems[GV->getName()] = constAttr;
+}
+
+void AMDILGlobalManager::parseGlobalAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV->hasInitializer()) {
+ return;
+ }
+ const Constant *CT = GV->getInitializer();
+ if (!CT || isa<GlobalValue>(CT)) {
+ return;
+ }
+ const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
+ if (!CA) {
+ return;
+ }
+
+ unsigned int nKernels = CA->getNumOperands();
+ for (unsigned int i = 0, e = nKernels; i != e; ++i) {
+ parseKernelInformation(CA->getOperand(i));
+ }
+}
+
+void AMDILGlobalManager::parseKernelInformation(const Value *V) {
+ if (isa<GlobalValue>(V)) {
+ return;
+ }
+ const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+ if (!CS) {
+ return;
+ }
+ uint32_t N = CS->getNumOperands();
+ if (N != 5) {
+ return;
+ }
+ kernel tmp;
+
+ tmp.curSize = 0;
+ tmp.curRSize = 0;
+ tmp.curHWSize = 0;
+ tmp.curHWRSize = 0;
+ // The first operand is always a pointer to the kernel.
+ const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
+ llvm::StringRef kernelName = "";
+ if (CV->getNumOperands()) {
+ kernelName = (*(CV->op_begin()))->getName();
+ }
+
+ // If we have images, then we have already created the kernel and we just need
+ // to get the kernel information.
+ if (mKernels.find(kernelName) != mKernels.end()) {
+ tmp = mKernels[kernelName];
+ } else {
+ tmp.curSize = 0;
+ tmp.curRSize = 0;
+ tmp.curHWSize = 0;
+ tmp.curHWRSize = 0;
+ tmp.constSize = 0;
+ tmp.lvgv = NULL;
+ tmp.sgv = NULL;
+ memset(tmp.constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+
+
+ // The second operand is SGV, there can only be one so we don't need to worry
+ // about parsing out multiple data points.
+ CV = dyn_cast<Constant>(CS->getOperand(1));
+
+ llvm::StringRef sgvName;
+ if (CV->getNumOperands()) {
+ sgvName = (*(CV->op_begin()))->getName();
+ }
+
+ if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
+ tmp.sgv = &mKernelArgs[sgvName];
+ }
+ // The third operand is FGV, which is skipped
+ // The fourth operand is LVGV
+ // There can be multiple local arrays, so we
+ // need to handle each one seperatly
+ CV = dyn_cast<Constant>(CS->getOperand(3));
+ llvm::StringRef lvgvName = "";
+ if (CV->getNumOperands()) {
+ lvgvName = (*(CV->op_begin()))->getName();
+ }
+ if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) {
+ localArg *ptr = &mLocalArgs[lvgvName];
+ tmp.lvgv = ptr;
+ llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
+ for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
+ if ((*ib)->isRegion) {
+ if ((*ib)->isHW) {
+ (*ib)->offset = tmp.curHWRSize;
+ tmp.curHWRSize += ((*ib)->vecSize + 15) & ~15;
+ } else {
+ (*ib)->offset = tmp.curRSize;
+ tmp.curRSize += ((*ib)->vecSize + 15) & ~15;
+ }
+ } else {
+ if ((*ib)->isHW) {
+ (*ib)->offset = tmp.curHWSize;
+ tmp.curHWSize += ((*ib)->vecSize + 15) & ~15;
+ } else {
+ (*ib)->offset = tmp.curSize;
+ tmp.curSize += ((*ib)->vecSize + 15) & ~15;
+ }
+ }
+ }
+ }
+
+ // The fifth operand is NULL
+ mKernels[kernelName] = tmp;
+}
+
+const kernel &AMDILGlobalManager::getKernel(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ assert(isKernel(name) && "Must be a kernel to call getKernel");
+ return iter->second;
+}
+
+bool AMDILGlobalManager::isKernel(const llvm::StringRef &name) const {
+ return (mKernels.find(name) != mKernels.end());
+}
+
+bool AMDILGlobalManager::isWriteOnlyImage(const llvm::StringRef &name,
+ uint32_t iID) const {
+ const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return false;
+ }
+ return kiter->second.writeOnly.count(iID);
+}
+
+uint32_t
+AMDILGlobalManager::getNumWriteImages(const llvm::StringRef &name) const {
+ char *env = NULL;
+ env = getenv("GPU_DISABLE_RAW_UAV");
+ if (env && env[0] == '1') {
+ return 8;
+ }
+ const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return 0;
+ } else {
+ return kiter->second.writeOnly.size();
+ }
+}
+
+bool AMDILGlobalManager::isReadOnlyImage(const llvm::StringRef &name,
+ uint32_t iID) const {
+ const StringMap<kernel>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return false;
+ }
+ return kiter->second.readOnly.count(iID);
+}
+
+bool AMDILGlobalManager::hasRWG(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *ptr = iter->second.sgv;
+ if (ptr) {
+ return ptr->mHasRWG;
+ }
+ }
+ return false;
+}
+
+bool AMDILGlobalManager::hasRWR(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *ptr = iter->second.sgv;
+ if (ptr) {
+ return ptr->mHasRWR;
+ }
+ }
+ return false;
+}
+
+uint32_t
+AMDILGlobalManager::getMaxGroupSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *sgv = iter->second.sgv;
+ if (sgv) {
+ return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+ }
+ }
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+}
+
+uint32_t
+AMDILGlobalManager::getMaxRegionSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ kernelArg *sgv = iter->second.sgv;
+ if (sgv) {
+ return sgv->reqRegionSize[0] *
+ sgv->reqRegionSize[1] *
+ sgv->reqRegionSize[2];
+ }
+ }
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+}
+
+uint32_t AMDILGlobalManager::getRegionSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curRSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getLocalSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.constSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILGlobalManager::getHWRegionSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curHWRSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getHWLocalSize(const llvm::StringRef &name) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second.curHWSize;
+ } else {
+ return 0;
+ }
+}
+
+int32_t AMDILGlobalManager::getArgID(const Argument *arg) {
+ DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
+ if (argiter != mArgIDMap.end()) {
+ return argiter->second;
+ } else {
+ return -1;
+ }
+}
+
+
+uint32_t
+AMDILGlobalManager::getLocal(const llvm::StringRef &name, uint32_t dim) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end() && iter->second.sgv) {
+ kernelArg *sgv = iter->second.sgv;
+ switch (dim) {
+ default: break;
+ case 0:
+ case 1:
+ case 2:
+ return sgv->reqGroupSize[dim];
+ break;
+ case 3:
+ return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+ };
+ }
+ switch (dim) {
+ default:
+ return 1;
+ case 3:
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+ case 2:
+ case 1:
+ case 0:
+ return mSTM->getDefaultSize(dim);
+ break;
+ };
+ return 1;
+}
+
+uint32_t
+AMDILGlobalManager::getRegion(const llvm::StringRef &name, uint32_t dim) const {
+ StringMap<kernel>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end() && iter->second.sgv) {
+ kernelArg *sgv = iter->second.sgv;
+ switch (dim) {
+ default: break;
+ case 0:
+ case 1:
+ case 2:
+ return sgv->reqRegionSize[dim];
+ break;
+ case 3:
+ return sgv->reqRegionSize[0] *
+ sgv->reqRegionSize[1] *
+ sgv->reqRegionSize[2];
+ };
+ }
+ switch (dim) {
+ default:
+ return 1;
+ case 3:
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+ case 2:
+ case 1:
+ case 0:
+ return mSTM->getDefaultSize(dim);
+ break;
+ };
+ return 1;
+}
+
+StringMap<constPtr>::iterator AMDILGlobalManager::consts_begin() {
+ return mConstMems.begin();
+}
+
+
+StringMap<constPtr>::iterator AMDILGlobalManager::consts_end() {
+ return mConstMems.end();
+}
+
+bool AMDILGlobalManager::byteStoreExists(StringRef S) const {
+ return mByteStore.find(S) != mByteStore.end();
+}
+
+bool AMDILGlobalManager::usesHWConstant(const kernel &krnl,
+ const llvm::StringRef &arg) {
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->usesHardware;
+ } else {
+ return false;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstPtrSize(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->size;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstPtrOff(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->offset;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILGlobalManager::getConstPtrCB(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->cbNum;
+ } else {
+ return 0;
+ }
+}
+
+void AMDILGlobalManager::calculateCPOffsets(const MachineFunction *MF,
+ kernel &krnl)
+{
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ if (!MCP) {
+ return;
+ }
+ const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
+ size_t numConsts = consts.size();
+ for (size_t x = 0; x < numConsts; ++x) {
+ krnl.CPOffsets.push_back(
+ std::make_pair<uint32_t, const Constant*>(
+ mCurrentCPOffset, consts[x].Val.ConstVal));
+ size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true);
+ // Align the size to the vector boundary
+ curSize = (curSize + 15) & (~15);
+ mCurrentCPOffset += curSize;
+ }
+}
+
+bool AMDILGlobalManager::isConstPtrArray(const kernel &krnl,
+ const llvm::StringRef &arg) {
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->isArray;
+ } else {
+ return false;
+ }
+}
+
+bool AMDILGlobalManager::isConstPtrArgument(const kernel &krnl,
+ const llvm::StringRef &arg)
+{
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->isArgument;
+ } else {
+ return false;
+ }
+}
+
+const Value *AMDILGlobalManager::getConstPtrValue(const kernel &krnl,
+ const llvm::StringRef &arg) {
+ const constPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->base;
+ } else {
+ return NULL;
+ }
+}
+
+static void
+dumpZeroElements(const StructType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(const IntegerType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(const ArrayType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(const VectorType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(const Type * const T, OSTREAM_TYPE &O, bool asBytes);
+
+void dumpZeroElements(const Type * const T, OSTREAM_TYPE &O, bool asBytes) {
+ if (!T) {
+ return;
+ }
+ switch(T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::DoubleTyID:
+ if (asBytes) {
+ O << ":0:0:0:0:0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ break;
+ case Type::FloatTyID:
+ case Type::PointerTyID:
+ case Type::FunctionTyID:
+ if (asBytes) {
+ O << ":0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ break;
+ case Type::IntegerTyID:
+ dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
+ break;
+ case Type::StructTyID:
+ {
+ const StructType *ST = cast<StructType>(T);
+ if (!ST->isOpaque()) {
+ dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
+ } else { // A pre-LLVM 3.0 opaque type
+ if (asBytes) {
+ O << ":0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ }
+ }
+ break;
+ case Type::ArrayTyID:
+ dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
+ break;
+ case Type::VectorTyID:
+ dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
+ break;
+ };
+}
+
+void
+dumpZeroElements(const StructType * const ST, OSTREAM_TYPE &O, bool asBytes) {
+ if (!ST) {
+ return;
+ }
+ Type *curType;
+ StructType::element_iterator eib = ST->element_begin();
+ StructType::element_iterator eie = ST->element_end();
+ for (;eib != eie; ++eib) {
+ curType = *eib;
+ dumpZeroElements(curType, O, asBytes);
+ }
+}
+
+void
+dumpZeroElements(const IntegerType * const IT, OSTREAM_TYPE &O, bool asBytes) {
+ if (asBytes) {
+ unsigned byteWidth = (IT->getBitWidth() >> 3);
+ for (unsigned x = 0; x < byteWidth; ++x) {
+ O << ":0";
+ }
+ }
+}
+
+void
+dumpZeroElements(const ArrayType * const AT, OSTREAM_TYPE &O, bool asBytes) {
+ size_t size = AT->getNumElements();
+ for (size_t x = 0; x < size; ++x) {
+ dumpZeroElements(AT->getElementType(), O, asBytes);
+ }
+}
+
+void
+dumpZeroElements(const VectorType * const VT, OSTREAM_TYPE &O, bool asBytes) {
+ size_t size = VT->getNumElements();
+ for (size_t x = 0; x < size; ++x) {
+ dumpZeroElements(VT->getElementType(), O, asBytes);
+ }
+}
+
+void AMDILGlobalManager::printConstantValue(const Constant *CAval,
+ OSTREAM_TYPE &O, bool asBytes) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
+ bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+ if (isDouble) {
+ double val = CFP->getValueAPF().convertToDouble();
+ union dtol_union {
+ double d;
+ uint64_t l;
+ char c[8];
+ } conv;
+ conv.d = val;
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(conv.l);
+ } else {
+ for (int i = 0; i < 8; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ } else {
+ float val = CFP->getValueAPF().convertToFloat();
+ union ftoi_union {
+ float f;
+ uint32_t u;
+ char c[4];
+ } conv;
+ conv.f = val;
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(conv.u);
+ } else {
+ for (int i = 0; i < 4; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ }
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
+ uint64_t zVal = CI->getValue().getZExtValue();
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(zVal);
+ } else {
+ switch (CI->getBitWidth()) {
+ default:
+ {
+ union ltob_union {
+ uint64_t l;
+ char c[8];
+ } conv;
+ conv.l = zVal;
+ for (int i = 0; i < 8; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ break;
+ case 8:
+ O << ":";
+ O.write_hex(zVal & 0xFF);
+ break;
+ case 16:
+ {
+ union stob_union {
+ uint16_t s;
+ char c[2];
+ } conv;
+ conv.s = (uint16_t)zVal;
+ O << ":";
+ O.write_hex((unsigned)conv.c[0] & 0xFF);
+ O << ":";
+ O.write_hex((unsigned)conv.c[1] & 0xFF);
+ }
+ break;
+ case 32:
+ {
+ union itob_union {
+ uint32_t i;
+ char c[4];
+ } conv;
+ conv.i = (uint32_t)zVal;
+ for (int i = 0; i < 4; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ break;
+ }
+ }
+ } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
+ int y = CV->getNumOperands()-1;
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CV->getOperand(x), O, asBytes);
+ }
+ printConstantValue(CV->getOperand(x), O, asBytes);
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
+ int y = CS->getNumOperands();
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CS->getOperand(x), O, asBytes);
+ }
+ } else if (const ConstantAggregateZero *CAZ
+ = dyn_cast<ConstantAggregateZero>(CAval)) {
+ int y = CAZ->getNumOperands();
+ if (y > 0) {
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue((llvm::Constant *)CAZ->getOperand(x),
+ O, asBytes);
+ }
+ } else {
+ if (asBytes) {
+ dumpZeroElements(CAval->getType(), O, asBytes);
+ } else {
+ int y = getNumElements(CAval->getType())-1;
+ for (int x = 0; x < y; ++x) {
+ O << ":0";
+ }
+ O << ":0";
+ }
+ }
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
+ int y = CA->getNumOperands();
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CA->getOperand(x), O, asBytes);
+ }
+ } else if (dyn_cast<ConstantPointerNull>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else if (dyn_cast<ConstantExpr>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else if (dyn_cast<UndefValue>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else {
+ assert(0 && "Hit condition which was not expected");
+ }
+}
+
+static bool isStruct(Type * const T)
+{
+ if (!T) {
+ return false;
+ }
+ switch (T->getTypeID()) {
+ default:
+ return false;
+ case Type::PointerTyID:
+ return isStruct(T->getContainedType(0));
+ case Type::StructTyID:
+ return true;
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ return isStruct(dyn_cast<SequentialType>(T)->getElementType());
+ };
+
+}
+
+void AMDILGlobalManager::dumpDataToCB(OSTREAM_TYPE &O, AMDILKernelManager *km,
+ uint32_t id) {
+ uint32_t size = 0;
+ for (StringMap<constPtr>::iterator cmb = consts_begin(),
+ cme = consts_end(); cmb != cme; ++cmb) {
+ if (id == cmb->second.cbNum) {
+ size += (cmb->second.size + 15) & (~15);
+ }
+ }
+ if (id == 0) {
+ O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
+ if (mCurrentCPOffset) {
+ for (StringMap<kernel>::iterator kcpb = mKernels.begin(),
+ kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
+ const kernel& k = kcpb->second;
+ size_t numConsts = k.CPOffsets.size();
+ for (size_t x = 0; x < numConsts; ++x) {
+ size_t offset = k.CPOffsets[x].first;
+ const Constant *C = k.CPOffsets[x].second;
+ Type *Ty = C->getType();
+ size_t size = (isStruct(Ty) ? getTypeSize(Ty, true)
+ : getNumElements(Ty));
+ O << ";#" << km->getTypeName(Ty, symTab) << ":";
+ O << offset << ":" << size ;
+ printConstantValue(C, O, isStruct(Ty));
+ O << "\n";
+ }
+ }
+ }
+ } else {
+ O << ";#DATASTART:" << id << ":" << size << "\n";
+ }
+
+ for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end();
+ cmb != cme; ++cmb) {
+ if (cmb->second.cbNum != id) {
+ continue;
+ }
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
+ Type *Ty = (G) ? G->getType() : NULL;
+ size_t offset = cmb->second.offset;
+ const Constant *C = G->getInitializer();
+ size_t size = (isStruct(Ty)
+ ? getTypeSize(Ty, true)
+ : getNumElements(Ty));
+ O << ";#" << km->getTypeName(Ty, symTab) << ":";
+ if (!id) {
+ O << (offset + mCurrentCPOffset) << ":" << size;
+ } else {
+ O << offset << ":" << size;
+ }
+ if (C) {
+ printConstantValue(C, O, isStruct(Ty));
+ } else {
+ assert(0 && "Cannot have a constant pointer"
+ " without an initializer!");
+ }
+ O <<"\n";
+ }
+ if (id == 0) {
+ O << ";#DATAEND\n";
+ } else {
+ O << ";#DATAEND:" << id << "\n";
+ }
+}
+
+void
+AMDILGlobalManager::dumpDataSection(OSTREAM_TYPE &O, AMDILKernelManager *km) {
+ if (mConstMems.empty() && !mCurrentCPOffset) {
+ return;
+ } else {
+ llvm::DenseSet<uint32_t> const_set;
+ for (StringMap<constPtr>::iterator cmb = consts_begin(), cme = consts_end();
+ cmb != cme; ++cmb) {
+ const_set.insert(cmb->second.cbNum);
+ }
+ if (mCurrentCPOffset) {
+ const_set.insert(0);
+ }
+ for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
+ sete = const_set.end(); setb != sete; ++setb) {
+ dumpDataToCB(O, km, *setb);
+ }
+ }
+}
+
+/// Create a function ID if it is not known or return the known
+/// function ID.
+uint32_t AMDILGlobalManager::getOrCreateFunctionID(const GlobalValue* func) {
+ if (func->getName().size()) {
+ return getOrCreateFunctionID(func->getName());
+ }
+ uint32_t id;
+ if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
+ id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
+ mFuncPtrNames[func] = id;
+ } else {
+ id = mFuncPtrNames[func];
+ }
+ return id;
+}
+uint32_t AMDILGlobalManager::getOrCreateFunctionID(const std::string &func) {
+ uint32_t id;
+ if (mFuncNames.find(func) == mFuncNames.end()) {
+ id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
+ mFuncNames[func] = id;
+ } else {
+ id = mFuncNames[func];
+ }
+ return id;
+}
diff --git a/src/gallium/drivers/radeon/AMDILGlobalManager.h b/src/gallium/drivers/radeon/AMDILGlobalManager.h
new file mode 100644
index 00000000000..2d12ed2d5de
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILGlobalManager.h
@@ -0,0 +1,294 @@
+// ==-----------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+// ==-----------------------------------------------------------------------===//
+// @file AMDILGlobalManager.h
+// Class that handles parsing and storing global variables that are relevant to
+// the compilation of the module.
+
+
+#ifndef _AMDILGLOBALMANAGER_H_
+#define _AMDILGLOBALMANAGER_H_
+
+#include "AMDIL.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <set>
+#define CB_BASE_OFFSET 2
+
+namespace llvm {
+
+class PointerType;
+class AMDILKernelManager;
+class AMDILSubtarget;
+class TypeSymbolTable;
+class Argument;
+class GlobalValue;
+class MachineFunction;
+
+/// structure that holds information for a single local/region address array
+typedef struct _arrayMemRec {
+ uint32_t vecSize; // size of each vector
+ uint32_t offset; // offset into the memory section
+ bool isHW; // flag to specify if HW is used or SW is used
+ bool isRegion; // flag to specify if GDS is used or not
+} arraymem;
+
+/// Structure that holds information for all local/region address
+/// arrays in the kernel
+typedef struct _localArgRec {
+ llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS> local;
+ std::string name; // Kernel Name
+} localArg;
+
+/// structure that holds information about a constant address
+/// space pointer that is a kernel argument
+typedef struct _constPtrRec {
+ const Value *base;
+ uint32_t size;
+ uint32_t offset;
+ uint32_t cbNum; // value of 0 means that it does not use hw CB
+ bool isArray;
+ bool isArgument;
+ bool usesHardware;
+ std::string name;
+} constPtr;
+
+/// Structure that holds information for each kernel argument
+typedef struct _kernelArgRec {
+ uint32_t reqGroupSize[3];
+ uint32_t reqRegionSize[3];
+ llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo;
+ bool mHasRWG;
+ bool mHasRWR;
+} kernelArg;
+
+/// Structure that holds information for each kernel
+typedef struct _kernelRec {
+ mutable uint32_t curSize;
+ mutable uint32_t curRSize;
+ mutable uint32_t curHWSize;
+ mutable uint32_t curHWRSize;
+ uint32_t constSize;
+ kernelArg *sgv;
+ localArg *lvgv;
+ llvm::SmallVector<struct _constPtrRec, DEFAULT_VEC_SLOTS> constPtr;
+ uint32_t constSizes[HW_MAX_NUM_CB];
+ llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly;
+ llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly;
+ llvm::SmallVector<std::pair<uint32_t, const Constant *>,
+ DEFAULT_VEC_SLOTS> CPOffsets;
+} kernel;
+
+class AMDILGlobalManager {
+public:
+ AMDILGlobalManager(bool debugMode = false);
+ ~AMDILGlobalManager();
+
+ /// Process the given module and parse out the global variable metadata passed
+ /// down from the frontend-compiler
+ void processModule(const Module &MF, const AMDILTargetMachine* mTM);
+
+ /// Returns whether the current name is the name of a kernel function or a
+ /// normal function
+ bool isKernel(const llvm::StringRef &name) const;
+
+ /// Returns true if the image ID corresponds to a read only image.
+ bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+ /// Returns true if the image ID corresponds to a write only image.
+ bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+ /// Returns the number of write only images for the kernel.
+ uint32_t getNumWriteImages(const llvm::StringRef &name) const;
+
+ /// Gets the group size of the kernel for the given dimension.
+ uint32_t getLocal(const llvm::StringRef &name, uint32_t dim) const;
+
+ /// Gets the region size of the kernel for the given dimension.
+ uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const;
+
+ /// Get the Region memory size in 1d for the given function/kernel.
+ uint32_t getRegionSize(const llvm::StringRef &name) const;
+
+ /// Get the region memory size in 1d for the given function/kernel.
+ uint32_t getLocalSize(const llvm::StringRef &name) const;
+
+ // Get the max group size in one 1D for the given function/kernel.
+ uint32_t getMaxGroupSize(const llvm::StringRef &name) const;
+
+ // Get the max region size in one 1D for the given function/kernel.
+ uint32_t getMaxRegionSize(const llvm::StringRef &name) const;
+
+ /// Get the constant memory size in 1d for the given function/kernel.
+ uint32_t getConstSize(const llvm::StringRef &name) const;
+
+ /// Get the HW local size in 1d for the given function/kernel We need to
+ /// seperate SW local and HW local for the case where some local memory is
+ /// emulated in global and some is using the hardware features. The main
+ /// problem is that in OpenCL 1.0/1.1 cl_khr_byte_addressable_store allows
+ /// these actions to happen on all memory spaces, but the hardware can only
+ /// write byte address stores to UAV and LDS, not GDS or Stack.
+ uint32_t getHWLocalSize(const llvm::StringRef &name) const;
+ uint32_t getHWRegionSize(const llvm::StringRef &name) const;
+
+ /// Get the offset of the array for the kernel.
+ int32_t getArrayOffset(const llvm::StringRef &name) const;
+
+ /// Get the offset of the const memory for the kernel.
+ int32_t getConstOffset(const llvm::StringRef &name) const;
+
+ /// Get the boolean value if this particular constant uses HW or not.
+ bool getConstHWBit(const llvm::StringRef &name) const;
+
+ /// Get a reference to the kernel metadata information for the given function
+ /// name.
+ const kernel &getKernel(const llvm::StringRef &name) const;
+
+ /// Returns whether a reqd_workgroup_size attribute has been used or not.
+ bool hasRWG(const llvm::StringRef &name) const;
+
+ /// Returns whether a reqd_workregion_size attribute has been used or not.
+ bool hasRWR(const llvm::StringRef &name) const;
+
+
+ /// Dump the data section to the output stream for the given kernel.
+ void dumpDataSection(OSTREAM_TYPE &O, AMDILKernelManager *km);
+
+ /// Iterate through the constants that are global to the compilation unit.
+ StringMap<constPtr>::iterator consts_begin();
+ StringMap<constPtr>::iterator consts_end();
+
+ /// Query if the kernel has a byte store.
+ bool byteStoreExists(llvm::StringRef S) const;
+
+ /// Query if the kernel and argument uses hardware constant memory.
+ bool usesHWConstant(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query if the constant pointer is an argument.
+ bool isConstPtrArgument(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query if the constant pointer is an array that is globally scoped.
+ bool isConstPtrArray(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the size of the constant pointer.
+ uint32_t getConstPtrSize(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the offset of the constant pointer.
+ uint32_t getConstPtrOff(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the constant buffer number for a constant pointer.
+ uint32_t getConstPtrCB(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Query the Value* that the constant pointer originates from.
+ const Value *getConstPtrValue(const kernel &krnl, const llvm::StringRef &arg);
+
+ /// Get the ID of the argument.
+ int32_t getArgID(const Argument *arg);
+
+ /// Get the unique function ID for the specific function name and create a new
+ /// unique ID if it is not found.
+ uint32_t getOrCreateFunctionID(const GlobalValue* func);
+ uint32_t getOrCreateFunctionID(const std::string& func);
+
+ /// Calculate the offsets of the constant pool for the given kernel and
+ /// machine function.
+ void calculateCPOffsets(const MachineFunction *MF, kernel &krnl);
+
+ /// Print the global manager to the output stream.
+ void print(llvm::raw_ostream& O);
+
+ /// Dump the global manager to the output stream - debug use.
+ void dump();
+
+private:
+ /// Various functions that parse global value information and store them in
+ /// the global manager. This approach is used instead of dynamic parsing as it
+ /// might require more space, but should allow caching of data that gets
+ /// requested multiple times.
+ kernelArg parseSGV(const GlobalValue *GV);
+ localArg parseLVGV(const GlobalValue *GV);
+ void parseGlobalAnnotate(const GlobalValue *G);
+ void parseImageAnnotate(const GlobalValue *G);
+ void parseConstantPtrAnnotate(const GlobalValue *G);
+ void printConstantValue(const Constant *CAval,
+ OSTREAM_TYPE& O,
+ bool asByte);
+ void parseKernelInformation(const Value *V);
+ void parseAutoArray(const GlobalValue *G, bool isRegion);
+ void parseConstantPtr(const GlobalValue *G);
+ void allocateGlobalCB();
+ void dumpDataToCB(OSTREAM_TYPE &O, AMDILKernelManager *km, uint32_t id);
+ bool checkConstPtrsUseHW(Module::const_iterator *F);
+
+ llvm::StringMap<arraymem> mArrayMems;
+ llvm::StringMap<localArg> mLocalArgs;
+ llvm::StringMap<kernelArg> mKernelArgs;
+ llvm::StringMap<kernel> mKernels;
+ llvm::StringMap<constPtr> mConstMems;
+ llvm::StringMap<uint32_t> mFuncNames;
+ llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames;
+ llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap;
+ std::set<llvm::StringRef> mByteStore;
+ std::set<llvm::StringRef> mIgnoreStr;
+ llvm::DenseMap<const Argument *, int32_t> mArgIDMap;
+ const char *symTab;
+ const AMDILSubtarget *mSTM;
+ size_t mOffset;
+ uint32_t mReservedBuffs;
+ uint32_t mCurrentCPOffset;
+ bool mDebugMode;
+};
+} // namespace llvm
+#endif // __AMDILGLOBALMANAGER_H_
diff --git a/src/gallium/drivers/radeon/AMDILIOExpansion.cpp b/src/gallium/drivers/radeon/AMDILIOExpansion.cpp
new file mode 100644
index 00000000000..061145f73e8
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIOExpansion.cpp
@@ -0,0 +1,1216 @@
+//===----------- AMDILIOExpansion.cpp - IO Expansion Pass -----------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are
+// not expanded earlier in the pass because any pass before this can assume to
+// be able to generate a load/store instruction. So this pass can only have
+// passes that execute after it if no load/store instructions can be generated.
+//===----------------------------------------------------------------------===//
+#include "AMDILIOExpansion.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+char AMDILIOExpansion::ID = 0;
+namespace llvm {
+ FunctionPass*
+ createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ {
+ return TM.getSubtarget<AMDILSubtarget>()
+ .device()->getIOExpansion(TM, OptLevel);
+ }
+}
+
+AMDILIOExpansion::AMDILIOExpansion(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) :
+#if LLVM_VERSION >= 2500
+ MachineFunctionPass(ID), TM(tm)
+#else
+ MachineFunctionPass((intptr_t)&ID), TM(tm)
+#endif
+{
+ mSTM = &tm.getSubtarget<AMDILSubtarget>();
+ mDebug = DEBUGME;
+ mTII = tm.getInstrInfo();
+ mKM = NULL;
+}
+
+AMDILIOExpansion::~AMDILIOExpansion()
+{
+}
+ bool
+AMDILIOExpansion::runOnMachineFunction(MachineFunction &MF)
+{
+ mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager());
+ mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr *MI = MBI;
+ if (isIOInstruction(MI)) {
+ mBB = MBB;
+ saveInst = false;
+ expandIOInstruction(MI);
+ if (!saveInst) {
+ // erase returns the instruction after
+ // and we want the instruction before
+ MBI = MBB->erase(MI);
+ --MBI;
+ }
+ }
+ }
+ }
+ return false;
+}
+const char *AMDILIOExpansion::getPassName() const
+{
+ return "AMDIL Generic IO Expansion Pass";
+}
+ bool
+AMDILIOExpansion::isIOInstruction(MachineInstr *MI)
+{
+ if (!MI) {
+ return false;
+ }
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::GLOBALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ return true;
+ };
+ return false;
+}
+void
+AMDILIOExpansion::expandIOInstruction(MachineInstr *MI)
+{
+ assert(isIOInstruction(MI) && "Must be an IO instruction to "
+ "be passed to this function!");
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Not an IO Instruction!");
+ ExpandCaseToAllTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD);
+ expandGlobalLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD);
+ expandRegionLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD);
+ expandLocalLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD);
+ expandConstantLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD);
+ expandPrivateLoad(MI);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ expandConstantPoolLoad(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::GLOBALSTORE);
+ expandGlobalStore(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE);
+ expandPrivateStore(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE);
+ expandRegionStore(MI);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE);
+ expandLocalStore(MI);
+ break;
+ }
+}
+ bool
+AMDILIOExpansion::isAddrCalcInstr(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ {
+ // This section of code is a workaround for the problem of
+ // globally scoped constant address variables. The problems
+ // comes that although they are declared in the constant
+ // address space, all variables must be allocated in the
+ // private address space. So when there is a load from
+ // the global address, it automatically goes into the private
+ // address space. However, the data section is placed in the
+ // constant address space so we need to check to see if our
+ // load base address is a global variable or not. Only if it
+ // is not a global variable can we do the address calculation
+ // into the private memory ring.
+
+ MachineMemOperand& memOp = (**MI->memoperands_begin());
+ const Value *V = memOp.getValue();
+ if (V) {
+ const GlobalValue *GV = dyn_cast<GlobalVariable>(V);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)
+ && !(GV);
+ } else {
+ return false;
+ }
+ }
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ return MI->getOperand(1).isReg();
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE);
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD);
+ return mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem);
+ };
+ return false;
+
+}
+ bool
+AMDILIOExpansion::isExtendLoad(MachineInstr *MI)
+{
+ return isSExtLoadInst(MI) || isZExtLoadInst(MI) || isAExtLoadInst(MI)
+ || isSWSExtLoadInst(MI);
+}
+
+ bool
+AMDILIOExpansion::isHardwareRegion(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+ break;
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE)
+ return mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ };
+ return false;
+}
+ bool
+AMDILIOExpansion::isHardwareLocal(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+ break;
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ return mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ };
+ return false;
+}
+ bool
+AMDILIOExpansion::isPackedData(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ default:
+ if (isTruncStoreInst(MI)) {
+ switch (MI->getDesc().OpInfo[0].RegClass) {
+ default:
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ switch (getMemorySize(MI)) {
+ case 2:
+ case 4:
+ return true;
+ default:
+ break;
+ }
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ switch (getMemorySize(MI)) {
+ case 4:
+ case 8:
+ return true;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+ break;
+ ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD);
+ ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD);
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+ ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE);
+ ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE);
+ ExpandCaseToPackedTypes(AMDIL::LOCALSTORE);
+ ExpandCaseToPackedTypes(AMDIL::REGIONSTORE);
+ return true;
+ }
+ return false;
+}
+
+ bool
+AMDILIOExpansion::isStaticCPLoad(MachineInstr *MI)
+{
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ {
+ uint32_t x = 0;
+ uint32_t num = MI->getNumOperands();
+ for (x = 0; x < num; ++x) {
+ if (MI->getOperand(x).isCPI()) {
+ return true;
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+ bool
+AMDILIOExpansion::isNbitType(Type *mType, uint32_t nBits, bool isScalar)
+{
+ if (!mType) {
+ return false;
+ }
+ if (dyn_cast<PointerType>(mType)) {
+ PointerType *PT = dyn_cast<PointerType>(mType);
+ return isNbitType(PT->getElementType(), nBits);
+ } else if (dyn_cast<StructType>(mType)) {
+ return getTypeSize(mType) == nBits;
+#if LLVM_VERSION < 2500
+ } else if (dyn_cast<UnionType>(mType)) {
+ return getTypeSize(mType) == nBits;
+#endif
+ } else if (dyn_cast<VectorType>(mType)) {
+ VectorType *VT = dyn_cast<VectorType>(mType);
+ size_t size = VT->getScalarSizeInBits();
+ return (isScalar ?
+ VT->getNumElements() * size == nBits : size == nBits);
+ } else if (dyn_cast<ArrayType>(mType)) {
+ ArrayType *AT = dyn_cast<ArrayType>(mType);
+ size_t size = AT->getScalarSizeInBits();
+ return (isScalar ?
+ AT->getNumElements() * size == nBits : size == nBits);
+ } else if (mType->isSized()) {
+ return mType->getScalarSizeInBits() == nBits;
+ } else {
+ assert(0 && "Found a type that we don't know how to handle!");
+ return false;
+ }
+}
+
+ bool
+AMDILIOExpansion::isHardwareInst(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curRes;
+ curRes.u16all = MI->getAsmPrinterFlags();
+ return curRes.bits.HardwareInst;
+}
+
+REG_PACKED_TYPE
+AMDILIOExpansion::getPackedID(MachineInstr *MI)
+{
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALSTORE_v2i8:
+ case AMDIL::LOCALSTORE_v2i8:
+ case AMDIL::REGIONSTORE_v2i8:
+ case AMDIL::PRIVATESTORE_v2i8:
+ return PACK_V2I8;
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+ case AMDIL::GLOBALSTORE_v4i8:
+ case AMDIL::LOCALSTORE_v4i8:
+ case AMDIL::REGIONSTORE_v4i8:
+ case AMDIL::PRIVATESTORE_v4i8:
+ return PACK_V4I8;
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+ case AMDIL::GLOBALSTORE_v2i16:
+ case AMDIL::LOCALSTORE_v2i16:
+ case AMDIL::REGIONSTORE_v2i16:
+ case AMDIL::PRIVATESTORE_v2i16:
+ return PACK_V2I16;
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ case AMDIL::GLOBALSTORE_v4i16:
+ case AMDIL::LOCALSTORE_v4i16:
+ case AMDIL::REGIONSTORE_v4i16:
+ case AMDIL::PRIVATESTORE_v4i16:
+ return PACK_V4I16;
+ case AMDIL::GLOBALLOAD_v2i8:
+ case AMDIL::GLOBALSEXTLOAD_v2i8:
+ case AMDIL::GLOBALAEXTLOAD_v2i8:
+ case AMDIL::GLOBALZEXTLOAD_v2i8:
+ case AMDIL::LOCALLOAD_v2i8:
+ case AMDIL::LOCALSEXTLOAD_v2i8:
+ case AMDIL::LOCALAEXTLOAD_v2i8:
+ case AMDIL::LOCALZEXTLOAD_v2i8:
+ case AMDIL::REGIONLOAD_v2i8:
+ case AMDIL::REGIONSEXTLOAD_v2i8:
+ case AMDIL::REGIONAEXTLOAD_v2i8:
+ case AMDIL::REGIONZEXTLOAD_v2i8:
+ case AMDIL::PRIVATELOAD_v2i8:
+ case AMDIL::PRIVATESEXTLOAD_v2i8:
+ case AMDIL::PRIVATEAEXTLOAD_v2i8:
+ case AMDIL::PRIVATEZEXTLOAD_v2i8:
+ case AMDIL::CONSTANTLOAD_v2i8:
+ case AMDIL::CONSTANTSEXTLOAD_v2i8:
+ case AMDIL::CONSTANTAEXTLOAD_v2i8:
+ case AMDIL::CONSTANTZEXTLOAD_v2i8:
+ return UNPACK_V2I8;
+ case AMDIL::GLOBALLOAD_v4i8:
+ case AMDIL::GLOBALSEXTLOAD_v4i8:
+ case AMDIL::GLOBALAEXTLOAD_v4i8:
+ case AMDIL::GLOBALZEXTLOAD_v4i8:
+ case AMDIL::LOCALLOAD_v4i8:
+ case AMDIL::LOCALSEXTLOAD_v4i8:
+ case AMDIL::LOCALAEXTLOAD_v4i8:
+ case AMDIL::LOCALZEXTLOAD_v4i8:
+ case AMDIL::REGIONLOAD_v4i8:
+ case AMDIL::REGIONSEXTLOAD_v4i8:
+ case AMDIL::REGIONAEXTLOAD_v4i8:
+ case AMDIL::REGIONZEXTLOAD_v4i8:
+ case AMDIL::PRIVATELOAD_v4i8:
+ case AMDIL::PRIVATESEXTLOAD_v4i8:
+ case AMDIL::PRIVATEAEXTLOAD_v4i8:
+ case AMDIL::PRIVATEZEXTLOAD_v4i8:
+ case AMDIL::CONSTANTLOAD_v4i8:
+ case AMDIL::CONSTANTSEXTLOAD_v4i8:
+ case AMDIL::CONSTANTAEXTLOAD_v4i8:
+ case AMDIL::CONSTANTZEXTLOAD_v4i8:
+ return UNPACK_V4I8;
+ case AMDIL::GLOBALLOAD_v2i16:
+ case AMDIL::GLOBALSEXTLOAD_v2i16:
+ case AMDIL::GLOBALAEXTLOAD_v2i16:
+ case AMDIL::GLOBALZEXTLOAD_v2i16:
+ case AMDIL::LOCALLOAD_v2i16:
+ case AMDIL::LOCALSEXTLOAD_v2i16:
+ case AMDIL::LOCALAEXTLOAD_v2i16:
+ case AMDIL::LOCALZEXTLOAD_v2i16:
+ case AMDIL::REGIONLOAD_v2i16:
+ case AMDIL::REGIONSEXTLOAD_v2i16:
+ case AMDIL::REGIONAEXTLOAD_v2i16:
+ case AMDIL::REGIONZEXTLOAD_v2i16:
+ case AMDIL::PRIVATELOAD_v2i16:
+ case AMDIL::PRIVATESEXTLOAD_v2i16:
+ case AMDIL::PRIVATEAEXTLOAD_v2i16:
+ case AMDIL::PRIVATEZEXTLOAD_v2i16:
+ case AMDIL::CONSTANTLOAD_v2i16:
+ case AMDIL::CONSTANTSEXTLOAD_v2i16:
+ case AMDIL::CONSTANTAEXTLOAD_v2i16:
+ case AMDIL::CONSTANTZEXTLOAD_v2i16:
+ return UNPACK_V2I16;
+ case AMDIL::GLOBALLOAD_v4i16:
+ case AMDIL::GLOBALSEXTLOAD_v4i16:
+ case AMDIL::GLOBALAEXTLOAD_v4i16:
+ case AMDIL::GLOBALZEXTLOAD_v4i16:
+ case AMDIL::LOCALLOAD_v4i16:
+ case AMDIL::LOCALSEXTLOAD_v4i16:
+ case AMDIL::LOCALAEXTLOAD_v4i16:
+ case AMDIL::LOCALZEXTLOAD_v4i16:
+ case AMDIL::REGIONLOAD_v4i16:
+ case AMDIL::REGIONSEXTLOAD_v4i16:
+ case AMDIL::REGIONAEXTLOAD_v4i16:
+ case AMDIL::REGIONZEXTLOAD_v4i16:
+ case AMDIL::PRIVATELOAD_v4i16:
+ case AMDIL::PRIVATESEXTLOAD_v4i16:
+ case AMDIL::PRIVATEAEXTLOAD_v4i16:
+ case AMDIL::PRIVATEZEXTLOAD_v4i16:
+ case AMDIL::CONSTANTLOAD_v4i16:
+ case AMDIL::CONSTANTSEXTLOAD_v4i16:
+ case AMDIL::CONSTANTAEXTLOAD_v4i16:
+ case AMDIL::CONSTANTZEXTLOAD_v4i16:
+ return UNPACK_V4I16;
+ };
+ return NO_PACKING;
+}
+
+ uint32_t
+AMDILIOExpansion::getPointerID(MachineInstr *MI)
+{
+ AMDILAS::InstrResEnc curInst;
+ getAsmPrinterFlags(MI, curInst);
+ return curInst.bits.ResourceID;
+}
+
+ uint32_t
+AMDILIOExpansion::getShiftSize(MachineInstr *MI)
+{
+ switch(getPackedID(MI)) {
+ default:
+ return 0;
+ case PACK_V2I8:
+ case PACK_V4I8:
+ case UNPACK_V2I8:
+ case UNPACK_V4I8:
+ return 1;
+ case PACK_V2I16:
+ case PACK_V4I16:
+ case UNPACK_V2I16:
+ case UNPACK_V4I16:
+ return 2;
+ }
+ return 0;
+}
+ uint32_t
+AMDILIOExpansion::getMemorySize(MachineInstr *MI)
+{
+ if (MI->memoperands_empty()) {
+ return 4;
+ }
+ return (uint32_t)((*MI->memoperands_begin())->getSize());
+}
+
+ void
+AMDILIOExpansion::expandLongExtend(MachineInstr *MI,
+ uint32_t numComps, uint32_t size, bool signedShift)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ switch(size) {
+ default:
+ assert(0 && "Found a case we don't handle!");
+ break;
+ case 8:
+ if (numComps == 1) {
+ expandLongExtendSub32(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_v2i32,
+ AMDIL::USHRVEC_i8,
+ 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE, signedShift);
+ } else if (numComps == 2) {
+ expandLongExtendSub32(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v4i32,
+ AMDIL::USHRVEC_v2i8,
+ 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE_v2i64, signedShift);
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ break;
+ case 16:
+ if (numComps == 1) {
+ expandLongExtendSub32(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_v2i32,
+ AMDIL::USHRVEC_i16,
+ 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE, signedShift);
+ } else if (numComps == 2) {
+ expandLongExtendSub32(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v4i32,
+ AMDIL::USHRVEC_v2i16,
+ 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE_v2i64, signedShift);
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ break;
+ case 32:
+ if (numComps == 1) {
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(31));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ } else if (numComps == 2) {
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v2i32), AMDIL::R1012)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(31));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addReg(AMDIL::R1012);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+ } else {
+ assert(0 && "Found a case we don't handle!");
+ }
+ };
+}
+ void
+AMDILIOExpansion::expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop, unsigned SHRop, unsigned USHRop,
+ unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+ unsigned LCRop, bool signedShift)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, MI, DL, mTII->get(SHLop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(SHLimm));
+ if (signedShift) {
+ BuildMI(*mBB, MI, DL, mTII->get(LCRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(SHRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi64Literal(SHRimm));
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(USHRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(USHRimm));
+ BuildMI(*mBB, MI, MI->getDebugLoc(), mTII->get(LCRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0));
+ }
+}
+
+ void
+AMDILIOExpansion::expandIntegerExtend(MachineInstr *MI, unsigned SHLop,
+ unsigned SHRop, unsigned offset)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ offset = mMFI->addi32Literal(offset);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(SHLop), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addImm(offset);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(SHRop), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addImm(offset);
+}
+ void
+AMDILIOExpansion::expandExtendLoad(MachineInstr *MI)
+{
+ if (!isExtendLoad(MI)) {
+ return;
+ }
+ Type *mType = NULL;
+ if (!MI->memoperands_empty()) {
+ MachineMemOperand *memOp = (*MI->memoperands_begin());
+ const Value *moVal = (memOp) ? memOp->getValue() : NULL;
+ mType = (moVal) ? moVal->getType() : NULL;
+ }
+ unsigned opcode = 0;
+ DebugLoc DL = MI->getDebugLoc();
+ if (isZExtLoadInst(MI) || isAExtLoadInst(MI) || isSExtLoadInst(MI)) {
+ switch(MI->getDesc().OpInfo[0].RegClass) {
+ default:
+ assert(0 && "Found an extending load that we don't handle!");
+ break;
+ case AMDIL::GPRI16RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_i16 : AMDIL::USHRVEC_i16;
+ expandIntegerExtend(MI, AMDIL::SHL_i16, opcode, 24);
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v2i16 : AMDIL::USHRVEC_v2i16;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i16, opcode, 24);
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v4i8 : AMDIL::USHRVEC_v4i8;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i8, opcode, 24);
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v4i16 : AMDIL::USHRVEC_v4i16;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i16, opcode, 24);
+ break;
+ case AMDIL::GPRI32RegClassID:
+ // We can be a i8 or i16 bit sign extended value
+ if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+ expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 24);
+ } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
+ expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 16);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ // We can be a v2i8 or v2i16 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 24);
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 16);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ // We can be a v4i8 or v4i16 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 4) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 24);
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 8) {
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
+ expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 16);
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRI64RegClassID:
+ // We can be a i8, i16 or i32 bit sign extended value
+ if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
+ expandLongExtend(MI, 1, 8, isSExtLoadInst(MI));
+ } else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
+ expandLongExtend(MI, 1, 16, isSExtLoadInst(MI));
+ } else if (isNbitType(mType, 32) || getMemorySize(MI) == 4) {
+ expandLongExtend(MI, 1, 32, isSExtLoadInst(MI));
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ // We can be a v2i8, v2i16 or v2i32 bit sign extended value
+ if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
+ expandLongExtend(MI, 2, 8, isSExtLoadInst(MI));
+ } else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
+ expandLongExtend(MI, 2, 16, isSExtLoadInst(MI));
+ } else if (isNbitType(mType, 32, false) || getMemorySize(MI) == 8) {
+ expandLongExtend(MI, 2, 32, isSExtLoadInst(MI));
+ } else {
+ assert(0 && "Found an extending load that we don't handle!");
+ }
+ break;
+ case AMDIL::GPRF32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_v2f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::HTOF_v4f32), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRF64RegClassID:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v2f32),
+ AMDIL::R1012).addReg(AMDIL::R1011).addImm(2);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTOD), AMDIL::R1012)
+ .addReg(AMDIL::R1012);
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::VINSERT_v2f64), AMDIL::R1011)
+ .addReg(AMDIL::R1011).addReg(AMDIL::R1012)
+ .addImm(1 << 8).addImm(1 << 8);
+ break;
+ };
+ } else if (isSWSExtLoadInst(MI)) {
+ switch(MI->getDesc().OpInfo[0].RegClass) {
+ case AMDIL::GPRI8RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ expandIntegerExtend(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_i8, 24);
+ }
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v2i8, 24);
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v4i8, AMDIL::SHRVEC_v4i8, 24);
+ break;
+ case AMDIL::GPRI16RegClassID:
+ if (!isHardwareLocal(MI)
+ || mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
+ expandIntegerExtend(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_i16, 16);
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v2i16, 16);
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ expandIntegerExtend(MI, AMDIL::SHL_v4i16, AMDIL::SHRVEC_v4i16, 16);
+ break;
+
+ };
+ }
+}
+
+ void
+AMDILIOExpansion::expandTruncData(MachineInstr *MI)
+{
+ if (!isTruncStoreInst(MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default:
+ MI->dump();
+ assert(!"Found a trunc store instructions we don't handle!");
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::LOCALTRUNCSTORE_i64i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i8:
+ case AMDIL::REGIONTRUNCSTORE_i64i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_i64i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ case AMDIL::GLOBALTRUNCSTORE_i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
+ case AMDIL::LOCALTRUNCSTORE_i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i16i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i16i8:
+ case AMDIL::REGIONTRUNCSTORE_i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i16i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
+ case AMDIL::LOCALTRUNCSTORE_i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i8:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i8:
+ case AMDIL::REGIONTRUNCSTORE_i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i8:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFF));
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_i64i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
+ case AMDIL::LOCALTRUNCSTORE_i64i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i16:
+ case AMDIL::REGIONTRUNCSTORE_i64i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_i64i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ case AMDIL::GLOBALTRUNCSTORE_i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
+ case AMDIL::LOCALTRUNCSTORE_i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v2i32i16:
+ case AMDIL::LOCALTRUNCSTORE_v4i32i16:
+ case AMDIL::REGIONTRUNCSTORE_i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v2i32i16:
+ case AMDIL::REGIONTRUNCSTORE_v4i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
+ case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
+ .addReg(AMDIL::R1011)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_i64i32:
+ case AMDIL::LOCALTRUNCSTORE_i64i32:
+ case AMDIL::REGIONTRUNCSTORE_i64i32:
+ case AMDIL::PRIVATETRUNCSTORE_i64i32:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i32:
+ case AMDIL::LOCALTRUNCSTORE_v2i64i32:
+ case AMDIL::REGIONTRUNCSTORE_v2i64i32:
+ case AMDIL::PRIVATETRUNCSTORE_v2i64i32:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLO_v2i64), AMDIL::R1011)
+ .addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_f64f32:
+ case AMDIL::LOCALTRUNCSTORE_f64f32:
+ case AMDIL::REGIONTRUNCSTORE_f64f32:
+ case AMDIL::PRIVATETRUNCSTORE_f64f32:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::R1011).addReg(AMDIL::R1011);
+ break;
+ case AMDIL::GLOBALTRUNCSTORE_v2f64f32:
+ case AMDIL::LOCALTRUNCSTORE_v2f64f32:
+ case AMDIL::REGIONTRUNCSTORE_v2f64f32:
+ case AMDIL::PRIVATETRUNCSTORE_v2f64f32:
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::VEXTRACT_v2f64),
+ AMDIL::R1012).addReg(AMDIL::R1011).addImm(2);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::R1011).addReg(AMDIL::R1011);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
+ AMDIL::R1012).addReg(AMDIL::R1012);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v2f32),
+ AMDIL::R1011).addReg(AMDIL::R1011).addReg(AMDIL::R1012)
+ .addImm(1 << 8).addImm(1 << 8);
+ break;
+ }
+}
+ void
+AMDILIOExpansion::expandAddressCalc(MachineInstr *MI)
+{
+ if (!isAddrCalcInstr(MI)) {
+ return;
+ }
+ DebugLoc DL = MI->getDebugLoc();
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::T1);
+ break;
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::T2);
+ break;
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(AMDIL::R1010).addReg(AMDIL::SDP);
+ break;
+ default:
+ return;
+ }
+}
+ void
+AMDILIOExpansion::expandLoadStartCode(MachineInstr *MI)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ if (MI->getOperand(2).isReg()) {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_i32),
+ AMDIL::R1010).addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(2).getReg());
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::MOVE_i32),
+ AMDIL::R1010).addReg(MI->getOperand(1).getReg());
+ }
+ MI->getOperand(1).setReg(AMDIL::R1010);
+ expandAddressCalc(MI);
+}
+ void
+AMDILIOExpansion::emitStaticCPLoad(MachineInstr* MI, int swizzle,
+ int id, bool ExtFPLoad)
+{
+ DebugLoc DL = MI->getDebugLoc();
+ switch(swizzle) {
+ default:
+ BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+ ? AMDIL::DTOF : AMDIL::MOVE_i32),
+ MI->getOperand(0).getReg())
+ .addImm(id);
+ break;
+ case 1:
+ case 2:
+ case 3:
+ BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
+ ? AMDIL::DTOF : AMDIL::MOVE_i32), AMDIL::R1001)
+ .addImm(id);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v4i32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(AMDIL::R1001)
+ .addImm(swizzle + 1);
+ break;
+ };
+}
+ void
+AMDILIOExpansion::emitCPInst(MachineInstr* MI,
+ const Constant* C, AMDILKernelManager* KM, int swizzle, bool ExtFPLoad)
+{
+ if (const ConstantFP* CFP = dyn_cast<ConstantFP>(C)) {
+ if (CFP->getType()->isFloatTy()) {
+ uint32_t val = (uint32_t)(CFP->getValueAPF().bitcastToAPInt()
+ .getZExtValue());
+ uint32_t id = mMFI->addi32Literal(val);
+ if (!id) {
+ const APFloat &APF = CFP->getValueAPF();
+ union dtol_union {
+ double d;
+ uint64_t ul;
+ } conv;
+ if (&APF.getSemantics()
+ == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ conv.d = (double)fval;
+ } else {
+ conv.d = APF.convertToDouble();
+ }
+ id = mMFI->addi64Literal(conv.ul);
+ }
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ } else {
+ const APFloat &APF = CFP->getValueAPF();
+ union ftol_union {
+ double d;
+ uint64_t ul;
+ } conv;
+ if (&APF.getSemantics()
+ == (const llvm::fltSemantics*)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ conv.d = (double)fval;
+ } else {
+ conv.d = APF.convertToDouble();
+ }
+ uint32_t id = mMFI->getLongLits(conv.ul);
+ if (!id) {
+ id = mMFI->getIntLits((uint32_t)conv.ul);
+ }
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ }
+ } else if (const ConstantInt* CI = dyn_cast<ConstantInt>(C)) {
+ int64_t val = 0;
+ if (CI) {
+ val = CI->getSExtValue();
+ }
+ if (CI->getBitWidth() == 64) {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(val), ExtFPLoad);
+ } else {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(val), ExtFPLoad);
+ }
+ } else if (const ConstantArray* CA = dyn_cast<ConstantArray>(C)) {
+ uint32_t size = CA->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else if (const ConstantAggregateZero* CAZ
+ = dyn_cast<ConstantAggregateZero>(C)) {
+ if (CAZ->isNullValue()) {
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad);
+ }
+ } else if (const ConstantStruct* CS = dyn_cast<ConstantStruct>(C)) {
+ uint32_t size = CS->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad);
+ }
+#if LLVM_VERSION < 2500
+ } else if (const ConstantUnion* CU = dyn_cast<ConstantUnion>(C)) {
+ uint32_t size = CU->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CU->getOperand(0), KM, x, ExtFPLoad);
+ }
+#endif
+ } else if (const ConstantVector* CV = dyn_cast<ConstantVector>(C)) {
+ // TODO: Make this handle vectors natively up to the correct
+ // size
+ uint32_t size = CV->getNumOperands();
+ assert(size < 5 && "Cannot handle a constant array where size > 4");
+ if (size > 4) {
+ size = 4;
+ }
+ for (uint32_t x = 0; x < size; ++x) {
+ emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad);
+ }
+ } else {
+ // TODO: Do we really need to handle ConstantPointerNull?
+ // What about BlockAddress, ConstantExpr and Undef?
+ // How would these even be generated by a valid CL program?
+ assert(0 && "Found a constant type that I don't know how to handle");
+ }
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILIOExpansion.h b/src/gallium/drivers/radeon/AMDILIOExpansion.h
new file mode 100644
index 00000000000..54179e9aa8e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIOExpansion.h
@@ -0,0 +1,360 @@
+//===----------- AMDILIOExpansion.h - IO Expansion Pass -------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// The AMDIL IO Expansion class expands pseudo IO instructions into a sequence
+// of instructions that produces the correct results. These instructions are
+// not expanded earlier in the backend because any pass before this can assume to
+// be able to generate a load/store instruction. So this pass can only have
+// passes that execute after it if no load/store instructions can be generated
+// in those passes.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILIOEXPANSION_H_
+#define _AMDILIOEXPANSION_H_
+#undef DEBUG_TYPE
+#undef DEBUGME
+#define DEBUG_TYPE "IOExpansion"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "AMDIL.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MachineFunction;
+ class AMDILKernelManager;
+ class AMDILMachineFunctionInfo;
+ class AMDILSubtarget;
+ class MachineInstr;
+ class Constant;
+ class TargetInstrInfo;
+ typedef enum {
+ NO_PACKING = 0,
+ PACK_V2I8,
+ PACK_V4I8,
+ PACK_V2I16,
+ PACK_V4I16,
+ UNPACK_V2I8,
+ UNPACK_V4I8,
+ UNPACK_V2I16,
+ UNPACK_V4I16,
+ UNPACK_LAST
+ } REG_PACKED_TYPE;
+ class AMDILIOExpansion : public MachineFunctionPass
+ {
+ public:
+ virtual ~AMDILIOExpansion();
+ virtual const char* getPassName() const;
+ bool runOnMachineFunction(MachineFunction &MF);
+ static char ID;
+ protected:
+ AMDILIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ //
+ // @param MI Machine instruction to check.
+ // @brief checks to see if the machine instruction
+ // is an I/O instruction or not.
+ //
+ // @return true if I/O, false otherwise.
+ //
+ virtual bool
+ isIOInstruction(MachineInstr *MI);
+ // Wrapper function that calls the appropriate I/O
+ // expansion function based on the instruction type.
+ virtual void
+ expandIOInstruction(MachineInstr *MI);
+ virtual void
+ expandGlobalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionStore(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateStore(MachineInstr *MI) = 0;
+ virtual void
+ expandGlobalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandConstantLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandConstantPoolLoad(MachineInstr *MI) = 0;
+ bool
+ isAddrCalcInstr(MachineInstr *MI);
+ bool
+ isExtendLoad(MachineInstr *MI);
+ bool
+ isHardwareRegion(MachineInstr *MI);
+ bool
+ isHardwareLocal(MachineInstr *MI);
+ bool
+ isPackedData(MachineInstr *MI);
+ bool
+ isStaticCPLoad(MachineInstr *MI);
+ bool
+ isNbitType(Type *MI, uint32_t nBits, bool isScalar = true);
+ bool
+ isHardwareInst(MachineInstr *MI);
+ uint32_t
+ getMemorySize(MachineInstr *MI);
+ REG_PACKED_TYPE
+ getPackedID(MachineInstr *MI);
+ uint32_t
+ getShiftSize(MachineInstr *MI);
+ uint32_t
+ getPointerID(MachineInstr *MI);
+ void
+ expandTruncData(MachineInstr *MI);
+ void
+ expandLoadStartCode(MachineInstr *MI);
+ virtual void
+ expandStoreSetupCode(MachineInstr *MI) = 0;
+ void
+ expandAddressCalc(MachineInstr *MI);
+ void
+ expandLongExtend(MachineInstr *MI,
+ uint32_t numComponents, uint32_t size, bool signedShift);
+ void
+ expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop, unsigned SHRop, unsigned USHRop,
+ unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
+ unsigned LCRop, bool signedShift);
+ void
+ expandIntegerExtend(MachineInstr *MI, unsigned, unsigned, unsigned);
+ void
+ expandExtendLoad(MachineInstr *MI);
+ virtual void
+ expandPackedData(MachineInstr *MI) = 0;
+ void
+ emitCPInst(MachineInstr* MI, const Constant* C,
+ AMDILKernelManager* KM, int swizzle, bool ExtFPLoad);
+
+ bool mDebug;
+ const AMDILSubtarget *mSTM;
+ AMDILKernelManager *mKM;
+ MachineBasicBlock *mBB;
+ AMDILMachineFunctionInfo *mMFI;
+ const TargetInstrInfo *mTII;
+ bool saveInst;
+ private:
+ void
+ emitStaticCPLoad(MachineInstr* MI, int swizzle, int id,
+ bool ExtFPLoad);
+ TargetMachine &TM;
+ }; // class AMDILIOExpansion
+
+ // Intermediate class that holds I/O code expansion that is common to the
+ // 7XX, Evergreen and Northern Island family of chips.
+ class AMDIL789IOExpansion : public AMDILIOExpansion {
+ public:
+ virtual ~AMDIL789IOExpansion();
+ virtual const char* getPassName() const;
+ protected:
+ AMDIL789IOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ virtual void
+ expandGlobalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalStore(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionStore(MachineInstr *MI) = 0;
+ virtual void
+ expandGlobalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandRegionLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandLocalLoad(MachineInstr *MI) = 0;
+ virtual void
+ expandPrivateStore(MachineInstr *MI);
+ virtual void
+ expandConstantLoad(MachineInstr *MI);
+ virtual void
+ expandPrivateLoad(MachineInstr *MI) ;
+ virtual void
+ expandConstantPoolLoad(MachineInstr *MI);
+ void
+ expandStoreSetupCode(MachineInstr *MI);
+ virtual void
+ expandPackedData(MachineInstr *MI);
+ private:
+ void emitVectorAddressCalc(MachineInstr *MI, bool is32bit,
+ bool needsSelect);
+ void emitVectorSwitchWrite(MachineInstr *MI, bool is32bit);
+ void emitComponentExtract(MachineInstr *MI, unsigned flag, unsigned src,
+ unsigned dst, bool beforeInst);
+ void emitDataLoadSelect(MachineInstr *MI);
+ }; // class AMDIL789IOExpansion
+ // Class that handles I/O emission for the 7XX family of devices.
+ class AMDIL7XXIOExpansion : public AMDIL789IOExpansion {
+ public:
+ AMDIL7XXIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+ ~AMDIL7XXIOExpansion();
+ const char* getPassName() const;
+ protected:
+ void
+ expandGlobalStore(MachineInstr *MI);
+ void
+ expandLocalStore(MachineInstr *MI);
+ void
+ expandRegionStore(MachineInstr *MI);
+ void
+ expandGlobalLoad(MachineInstr *MI);
+ void
+ expandRegionLoad(MachineInstr *MI);
+ void
+ expandLocalLoad(MachineInstr *MI);
+ }; // class AMDIL7XXIOExpansion
+
+ // Class that handles image functions to expand them into the
+ // correct set of I/O instructions.
+ class AMDILImageExpansion : public AMDIL789IOExpansion {
+ public:
+ AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+ virtual ~AMDILImageExpansion();
+ protected:
+ //
+ // @param MI Instruction iterator that has the sample instruction
+ // that needs to be taken care of.
+ // @brief transforms the __amdil_sample_data function call into a
+ // sample instruction in IL.
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+ //
+ // @param MI Instruction iterator that has the write instruction that
+ // needs to be taken care of.
+ // @brief transforms the __amdil_write_data function call into a
+ // simple UAV write instruction in IL.
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageStore(MachineBasicBlock *BB, MachineInstr *MI);
+ //
+ // @param MI Instruction interator that has the image parameter
+ // instruction
+ // @brief transforms the __amdil_get_image_params function call into
+ // a copy of data from a specific constant buffer to the register
+ //
+ // @warning This function only works correctly if all functions get
+ // inlined
+ //
+ virtual void
+ expandImageParam(MachineBasicBlock *BB, MachineInstr *MI);
+
+ //
+ // @param MI Insturction that points to the image
+ // @brief transforms __amdil_sample_data into a sequence of
+ // if/else that selects the correct sample instruction.
+ //
+ // @warning This function is inefficient and works with no
+ // inlining.
+ //
+ virtual void
+ expandInefficientImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
+ private:
+ AMDILImageExpansion(); // Do not implement.
+
+ }; // class AMDILImageExpansion
+
+ // Class that expands IO instructions for Evergreen and Northern
+ // Island family of devices.
+ class AMDILEGIOExpansion : public AMDILImageExpansion {
+ public:
+ AMDILEGIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+
+ virtual ~AMDILEGIOExpansion();
+ const char* getPassName() const;
+ protected:
+ virtual bool
+ isIOInstruction(MachineInstr *MI);
+ virtual void
+ expandIOInstruction(MachineInstr *MI);
+ bool
+ isImageIO(MachineInstr *MI);
+ virtual void
+ expandGlobalStore(MachineInstr *MI);
+ void
+ expandLocalStore(MachineInstr *MI);
+ void
+ expandRegionStore(MachineInstr *MI);
+ virtual void
+ expandGlobalLoad(MachineInstr *MI);
+ void
+ expandRegionLoad(MachineInstr *MI);
+ void
+ expandLocalLoad(MachineInstr *MI);
+ virtual bool
+ isCacheableOp(MachineInstr *MI);
+ void
+ expandStoreSetupCode(MachineInstr *MI);
+ void
+ expandPackedData(MachineInstr *MI);
+ private:
+ bool
+ isArenaOp(MachineInstr *MI);
+ void
+ expandArenaSetup(MachineInstr *MI);
+ }; // class AMDILEGIOExpansion
+} // namespace llvm
+#endif // _AMDILIOEXPANSION_H_
diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
new file mode 100644
index 00000000000..6534e3a1311
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,506 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
+// //for SelectionDAG operations.
+//
+namespace {
+class AMDILDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDILSubtarget &Subtarget;
+public:
+ AMDILDAGToDAGISel(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel);
+ virtual ~AMDILDAGToDAGISel();
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ SDNode *Select(SDNode *N);
+ // Complex pattern selectors
+ bool SelectADDR(
+#if LLVM_VERSION < 2500
+ SDNode *Op,
+#endif
+ SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(
+#if LLVM_VERSION < 2500
+ SDNode *Op,
+#endif
+ SDValue N, SDValue &R1, SDValue &R2);
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ virtual const char *getPassName() const;
+private:
+ SDNode *xformAtomicInst(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDILGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new AMDILDAGToDAGISel(TM, OptLevel);
+}
+
+AMDILDAGToDAGISel::AMDILDAGToDAGISel(AMDILTargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel), Subtarget(TM.getSubtarget<AMDILSubtarget>())
+{
+}
+
+AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
+}
+
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDILDAGToDAGISel::SelectADDR(
+#if LLVM_VERSION < 2500
+ SDNode *N,
+#endif
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+
+bool AMDILDAGToDAGISel::SelectADDR64(
+#if LLVM_VERSION < 2500
+ SDNode *N,
+#endif
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::FrameIndex:
+ {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDIL::MOVE_i32;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ }
+ break;
+ }
+ // For all atomic instructions, we need to add a constant
+ // operand that stores the resource ID in the instruction
+ if (Opc > AMDILISD::ADDADDR && Opc < AMDILISD::APPEND_ALLOC) {
+ N = xformAtomicInst(N);
+ }
+ return SelectCode(N);
+}
+
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
+}
+
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS))
+ {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDILDAGToDAGISel::getPassName() const {
+ return "AMDIL DAG->DAG Pattern Instruction Selection";
+}
+
+SDNode*
+AMDILDAGToDAGISel::xformAtomicInst(SDNode *N)
+{
+ uint32_t addVal = 1;
+ bool addOne = false;
+ // bool bitCastToInt = (N->getValueType(0) == MVT::f32);
+ unsigned opc = N->getOpcode();
+ switch (opc) {
+ default: return N;
+ case AMDILISD::ATOM_G_ADD:
+ case AMDILISD::ATOM_G_AND:
+ case AMDILISD::ATOM_G_MAX:
+ case AMDILISD::ATOM_G_UMAX:
+ case AMDILISD::ATOM_G_MIN:
+ case AMDILISD::ATOM_G_UMIN:
+ case AMDILISD::ATOM_G_OR:
+ case AMDILISD::ATOM_G_SUB:
+ case AMDILISD::ATOM_G_RSUB:
+ case AMDILISD::ATOM_G_XCHG:
+ case AMDILISD::ATOM_G_XOR:
+ case AMDILISD::ATOM_G_ADD_NORET:
+ case AMDILISD::ATOM_G_AND_NORET:
+ case AMDILISD::ATOM_G_MAX_NORET:
+ case AMDILISD::ATOM_G_UMAX_NORET:
+ case AMDILISD::ATOM_G_MIN_NORET:
+ case AMDILISD::ATOM_G_UMIN_NORET:
+ case AMDILISD::ATOM_G_OR_NORET:
+ case AMDILISD::ATOM_G_SUB_NORET:
+ case AMDILISD::ATOM_G_RSUB_NORET:
+ case AMDILISD::ATOM_G_XCHG_NORET:
+ case AMDILISD::ATOM_G_XOR_NORET:
+ case AMDILISD::ATOM_L_ADD:
+ case AMDILISD::ATOM_L_AND:
+ case AMDILISD::ATOM_L_MAX:
+ case AMDILISD::ATOM_L_UMAX:
+ case AMDILISD::ATOM_L_MIN:
+ case AMDILISD::ATOM_L_UMIN:
+ case AMDILISD::ATOM_L_OR:
+ case AMDILISD::ATOM_L_SUB:
+ case AMDILISD::ATOM_L_RSUB:
+ case AMDILISD::ATOM_L_XCHG:
+ case AMDILISD::ATOM_L_XOR:
+ case AMDILISD::ATOM_L_ADD_NORET:
+ case AMDILISD::ATOM_L_AND_NORET:
+ case AMDILISD::ATOM_L_MAX_NORET:
+ case AMDILISD::ATOM_L_UMAX_NORET:
+ case AMDILISD::ATOM_L_MIN_NORET:
+ case AMDILISD::ATOM_L_UMIN_NORET:
+ case AMDILISD::ATOM_L_OR_NORET:
+ case AMDILISD::ATOM_L_SUB_NORET:
+ case AMDILISD::ATOM_L_RSUB_NORET:
+ case AMDILISD::ATOM_L_XCHG_NORET:
+ case AMDILISD::ATOM_L_XOR_NORET:
+ case AMDILISD::ATOM_R_ADD:
+ case AMDILISD::ATOM_R_AND:
+ case AMDILISD::ATOM_R_MAX:
+ case AMDILISD::ATOM_R_UMAX:
+ case AMDILISD::ATOM_R_MIN:
+ case AMDILISD::ATOM_R_UMIN:
+ case AMDILISD::ATOM_R_OR:
+ case AMDILISD::ATOM_R_SUB:
+ case AMDILISD::ATOM_R_RSUB:
+ case AMDILISD::ATOM_R_XCHG:
+ case AMDILISD::ATOM_R_XOR:
+ case AMDILISD::ATOM_R_ADD_NORET:
+ case AMDILISD::ATOM_R_AND_NORET:
+ case AMDILISD::ATOM_R_MAX_NORET:
+ case AMDILISD::ATOM_R_UMAX_NORET:
+ case AMDILISD::ATOM_R_MIN_NORET:
+ case AMDILISD::ATOM_R_UMIN_NORET:
+ case AMDILISD::ATOM_R_OR_NORET:
+ case AMDILISD::ATOM_R_SUB_NORET:
+ case AMDILISD::ATOM_R_RSUB_NORET:
+ case AMDILISD::ATOM_R_XCHG_NORET:
+ case AMDILISD::ATOM_R_XOR_NORET:
+ case AMDILISD::ATOM_G_CMPXCHG:
+ case AMDILISD::ATOM_G_CMPXCHG_NORET:
+ case AMDILISD::ATOM_L_CMPXCHG:
+ case AMDILISD::ATOM_L_CMPXCHG_NORET:
+ case AMDILISD::ATOM_R_CMPXCHG:
+ case AMDILISD::ATOM_R_CMPXCHG_NORET:
+ break;
+ case AMDILISD::ATOM_G_DEC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_G_INC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_G_DEC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_G_INC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_L_DEC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_L_INC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_L_DEC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_L_INC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDILISD::ATOM_R_DEC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_R_INC:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDILISD::ATOM_R_DEC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILISD::ATOM_R_INC_NORET:
+ addOne = true;
+ if (Subtarget.calVersion() >= CAL_VERSION_SC_136) {
+ addVal = (uint32_t)-1;
+ } else {
+ opc = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ }
+ // The largest we can have is a cmpxchg w/ a return value and an output chain.
+ // The cmpxchg function has 3 inputs and a single output along with an
+ // output change and a target constant, giving a total of 6.
+ SDValue Ops[12];
+ unsigned x = 0;
+ unsigned y = N->getNumOperands();
+ for (x = 0; x < y; ++x) {
+ Ops[x] = N->getOperand(x);
+ }
+ if (addOne) {
+ Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode()), 0);
+ }
+ Ops[x++] = CurDAG->getTargetConstant(0, MVT::i32);
+ SDVTList Tys = N->getVTList();
+ MemSDNode *MemNode = dyn_cast<MemSDNode>(N);
+ assert(MemNode && "Atomic should be of MemSDNode type!");
+ N = CurDAG->getMemIntrinsicNode(opc, N->getDebugLoc(), Tys, Ops, x,
+ MemNode->getMemoryVT(), MemNode->getMemOperand()).getNode();
+ return N;
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
new file mode 100644
index 00000000000..ffa23a54fde
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -0,0 +1,5754 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILLLVMPC.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+#if LLVM_VERSION >= 2500
+#define ISDBITCAST ISD::BITCAST
+#define MVTGLUE MVT::Glue
+#else
+#define ISDBITCAST ISD::BIT_CONVERT
+#define MVTGLUE MVT::Flag
+#endif
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDILGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+ static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+ DebugLoc DL = Src.getDebugLoc();
+ EVT svt = Src.getValueType().getScalarType();
+ EVT dvt = Dst.getValueType().getScalarType();
+ if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+ if (dvt.bitsGT(svt)) {
+ Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+ } else if (svt.bitsLT(svt)) {
+ Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else if (svt.isInteger() && dvt.isInteger()) {
+ if (!svt.bitsEq(dvt)) {
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ } else {
+ Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
+ }
+ } else if (svt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+ if (!svt.bitsEq(dvt)) {
+ if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+ } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ }
+ Src = DAG.getNode(opcode, DL, dvt, Src);
+ } else if (dvt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+ if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+ } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+ static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+ switch (CC) {
+ default:
+ {
+ errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+ assert(0 && "Unknown condition code!");
+ }
+ case ISD::SETO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_O;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_O;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UO;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UO;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_NE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_NE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_EQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_EQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UNE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UNE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UEQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETONE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ONE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ONE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOEQ:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OEQ;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ };
+}
+
+ static unsigned int
+translateToOpcode(uint64_t CCCode, unsigned int regClass)
+{
+ switch (CCCode) {
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ if (regClass == AMDIL::GPRV2F64RegClassID) {
+ return (unsigned int)AMDIL::DEQ_v2f64;
+ } else {
+ return (unsigned int)AMDIL::DEQ;
+ }
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_D_ULE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_D_UGE:
+ return (unsigned int)AMDIL::DGE;
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_D_OLT:
+ case AMDILCC::IL_CC_D_ULT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_D_UGT:
+ return (unsigned int)AMDIL::DLT;
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_D_UNE:
+ return (unsigned int)AMDIL::DNE;
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ return (unsigned int)AMDIL::FEQ;
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_F_ULE:
+ case AMDILCC::IL_CC_F_OLE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_F_UGE:
+ case AMDILCC::IL_CC_F_OGE:
+ return (unsigned int)AMDIL::FGE;
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_F_UGT:
+ if (regClass == AMDIL::GPRV2F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v2f32;
+ } else if (regClass == AMDIL::GPRV4F32RegClassID) {
+ return (unsigned int)AMDIL::FLT_v4f32;
+ } else {
+ return (unsigned int)AMDIL::FLT;
+ }
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ return (unsigned int)AMDIL::FNE;
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_U_EQ:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IEQ;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRV2I8RegClassID
+ || regClass == AMDIL::GPRV2I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRV4I8RegClassID
+ || regClass == AMDIL::GPRV4I16RegClassID) {
+ return (unsigned int)AMDIL::IEQ_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_UL_EQ:
+ return (unsigned int)AMDIL::LEQ;
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::IGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_I_LT:
+ case AMDILCC::IL_CC_I_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ILT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_GE:
+ return (unsigned int)AMDIL::LGE;
+ case AMDILCC::IL_CC_L_LE:
+ return (unsigned int)AMDIL::LLE;
+ case AMDILCC::IL_CC_L_LT:
+ return (unsigned int)AMDIL::LLT;
+ case AMDILCC::IL_CC_L_GT:
+ return (unsigned int)AMDIL::LGT;
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_U_NE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::INE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LE:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGE_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_UL_NE:
+ return (unsigned int)AMDIL::LNE;
+ case AMDILCC::IL_CC_UL_GE:
+ return (unsigned int)AMDIL::ULGE;
+ case AMDILCC::IL_CC_UL_LE:
+ return (unsigned int)AMDIL::ULLE;
+ case AMDILCC::IL_CC_U_LT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::ULT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_U_GT:
+ if (regClass == AMDIL::GPRI32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT;
+ } else if (regClass == AMDIL::GPRV2I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v2i32;
+ } else if (regClass == AMDIL::GPRV4I32RegClassID
+ || regClass == AMDIL::GPRI8RegClassID
+ || regClass == AMDIL::GPRI16RegClassID) {
+ return (unsigned int)AMDIL::UGT_v4i32;
+ } else {
+ assert(!"Unknown reg class!");
+ }
+ case AMDILCC::IL_CC_UL_LT:
+ return (unsigned int)AMDIL::ULLT;
+ case AMDILCC::IL_CC_UL_GT:
+ return (unsigned int)AMDIL::ULGT;
+ case AMDILCC::IL_CC_F_UEQ:
+ case AMDILCC::IL_CC_D_UEQ:
+ case AMDILCC::IL_CC_F_ONE:
+ case AMDILCC::IL_CC_D_ONE:
+ case AMDILCC::IL_CC_F_O:
+ case AMDILCC::IL_CC_F_UO:
+ case AMDILCC::IL_CC_D_O:
+ case AMDILCC::IL_CC_D_UO:
+ // we don't care
+ return 0;
+
+ }
+ errs()<<"Opcode: "<<CCCode<<"\n";
+ assert(0 && "Unknown opcode retrieved");
+ return 0;
+}
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const
+{
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+ GuaranteedTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can
+ // be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable
+#if LLVM_VERSION < 2500
+ , false
+#endif
+ );
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction generation functions
+//===----------------------------------------------------------------------===//
+uint32_t
+AMDILTargetLowering::addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const
+{
+ int shiftSize = 0;
+ uint32_t LShift, RShift;
+ switch(simpleVT)
+ {
+ default:
+ return reg;
+ case AMDIL::GPRI8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i8;
+ } else {
+ RShift = AMDIL::USHR_i8;
+ }
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v2i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i8;
+ } else {
+ RShift = AMDIL::USHR_v2i8;
+ }
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ shiftSize = 24;
+ LShift = AMDIL::SHL_v4i8;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i8;
+ } else {
+ RShift = AMDIL::USHR_v4i8;
+ }
+ break;
+ case AMDIL::GPRI16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_i16;
+ } else {
+ RShift = AMDIL::USHR_i16;
+ }
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v2i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v2i16;
+ } else {
+ RShift = AMDIL::USHR_v2i16;
+ }
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ shiftSize = 16;
+ LShift = AMDIL::SHL_v4i16;
+ if (signedShift) {
+ RShift = AMDIL::SHR_v4i16;
+ } else {
+ RShift = AMDIL::USHR_v4i16;
+ }
+ break;
+ };
+ uint32_t LoadReg = genVReg(simpleVT);
+ uint32_t tmp1 = genVReg(simpleVT);
+ uint32_t tmp2 = genVReg(simpleVT);
+ generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
+ generateMachineInst(LShift, tmp1, reg, LoadReg);
+ generateMachineInst(RShift, tmp2, tmp1, LoadReg);
+ return tmp2;
+}
+
+MachineOperand
+AMDILTargetLowering::convertToReg(MachineOperand op) const
+{
+ if (op.isReg()) {
+ return op;
+ } else if (op.isImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
+ .addImm(op.getImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isFPImm()) {
+ uint32_t loadReg
+ = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
+ generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
+ .addFPImm(op.getFPImm());
+ op.ChangeToRegister(loadReg, false);
+ } else if (op.isMBB()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isFI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isCPI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isJTI()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isGlobal()) {
+ op.ChangeToRegister(0, false);
+ } else if (op.isSymbol()) {
+ op.ChangeToRegister(0, false);
+ }/* else if (op.isMetadata()) {
+ op.ChangeToRegister(0, false);
+ }*/
+ return op;
+}
+
+void
+AMDILTargetLowering::generateCMPInstr(
+ MachineInstr *MI,
+ MachineBasicBlock *BB,
+ const TargetInstrInfo& TII)
+const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand CC = MI->getOperand(1);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ int64_t ccCode = CC.getImm();
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ unsigned int opCode = translateToOpcode(ccCode, simpleVT);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock::iterator BBI = MI;
+ setPrivateData(BB, BBI, &DL, &TII);
+ if (!LHS.isReg()) {
+ LHS = convertToReg(LHS);
+ }
+ if (!RHS.isReg()) {
+ RHS = convertToReg(RHS);
+ }
+ switch (ccCode) {
+ case AMDILCC::IL_CC_I_EQ:
+ case AMDILCC::IL_CC_I_NE:
+ case AMDILCC::IL_CC_I_GE:
+ case AMDILCC::IL_CC_I_LT:
+ {
+ uint32_t lhsreg = addExtensionInstructions(
+ LHS.getReg(), true, simpleVT);
+ uint32_t rhsreg = addExtensionInstructions(
+ RHS.getReg(), true, simpleVT);
+ generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
+ }
+ break;
+ case AMDILCC::IL_CC_U_EQ:
+ case AMDILCC::IL_CC_U_NE:
+ case AMDILCC::IL_CC_U_GE:
+ case AMDILCC::IL_CC_U_LT:
+ case AMDILCC::IL_CC_D_EQ:
+ case AMDILCC::IL_CC_F_EQ:
+ case AMDILCC::IL_CC_F_OEQ:
+ case AMDILCC::IL_CC_D_OEQ:
+ case AMDILCC::IL_CC_D_NE:
+ case AMDILCC::IL_CC_F_NE:
+ case AMDILCC::IL_CC_F_UNE:
+ case AMDILCC::IL_CC_D_UNE:
+ case AMDILCC::IL_CC_D_GE:
+ case AMDILCC::IL_CC_F_GE:
+ case AMDILCC::IL_CC_D_OGE:
+ case AMDILCC::IL_CC_F_OGE:
+ case AMDILCC::IL_CC_D_LT:
+ case AMDILCC::IL_CC_F_LT:
+ case AMDILCC::IL_CC_F_OLT:
+ case AMDILCC::IL_CC_D_OLT:
+ generateMachineInst(opCode, DST.getReg(),
+ LHS.getReg(), RHS.getReg());
+ break;
+ case AMDILCC::IL_CC_I_GT:
+ case AMDILCC::IL_CC_I_LE:
+ {
+ uint32_t lhsreg = addExtensionInstructions(
+ LHS.getReg(), true, simpleVT);
+ uint32_t rhsreg = addExtensionInstructions(
+ RHS.getReg(), true, simpleVT);
+ generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
+ }
+ break;
+ case AMDILCC::IL_CC_U_GT:
+ case AMDILCC::IL_CC_U_LE:
+ case AMDILCC::IL_CC_F_GT:
+ case AMDILCC::IL_CC_D_GT:
+ case AMDILCC::IL_CC_F_OGT:
+ case AMDILCC::IL_CC_D_OGT:
+ case AMDILCC::IL_CC_F_LE:
+ case AMDILCC::IL_CC_D_LE:
+ case AMDILCC::IL_CC_D_OLE:
+ case AMDILCC::IL_CC_F_OLE:
+ generateMachineInst(opCode, DST.getReg(),
+ RHS.getReg(), LHS.getReg());
+ break;
+ case AMDILCC::IL_CC_F_UGT:
+ case AMDILCC::IL_CC_F_ULE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0],
+ RHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ULT:
+ case AMDILCC::IL_CC_F_UGE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(opCode, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGT:
+ case AMDILCC::IL_CC_D_ULE:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0],
+ RHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UGE:
+ case AMDILCC::IL_CC_D_ULT:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(opCode, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UEQ:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FEQ, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_ONE:
+ {
+ uint32_t VReg[4] = {
+ genVReg(simpleVT), genVReg(simpleVT),
+ genVReg(simpleVT), genVReg(simpleVT)
+ };
+ generateMachineInst(AMDIL::FNE, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UEQ:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_D_ONE:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[4] = {
+ genVReg(regID), genVReg(regID),
+ genVReg(regID), genVReg(regID)
+ };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0],
+ LHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[2],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ VReg[3], VReg[0], VReg[1]);
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[2], VReg[3]);
+
+ }
+ break;
+ case AMDILCC::IL_CC_F_O:
+ {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FEQ, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_O:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DEQ, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DEQ, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_AND_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_F_UO:
+ {
+ uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
+ generateMachineInst(AMDIL::FNE, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::FNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_D_UO:
+ {
+ uint32_t regID = AMDIL::GPRF64RegClassID;
+ uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
+ // The result of a double comparison is a 32bit result
+ generateMachineInst(AMDIL::DNE, VReg[0],
+ RHS.getReg(), RHS.getReg());
+ generateMachineInst(AMDIL::DNE, VReg[1],
+ LHS.getReg(), LHS.getReg());
+ generateMachineInst(AMDIL::BINARY_OR_f32,
+ DST.getReg(), VReg[0], VReg[1]);
+ }
+ break;
+ case AMDILCC::IL_CC_L_LE:
+ case AMDILCC::IL_CC_L_GE:
+ case AMDILCC::IL_CC_L_EQ:
+ case AMDILCC::IL_CC_L_NE:
+ case AMDILCC::IL_CC_L_LT:
+ case AMDILCC::IL_CC_L_GT:
+ case AMDILCC::IL_CC_UL_LE:
+ case AMDILCC::IL_CC_UL_GE:
+ case AMDILCC::IL_CC_UL_EQ:
+ case AMDILCC::IL_CC_UL_NE:
+ case AMDILCC::IL_CC_UL_LT:
+ case AMDILCC::IL_CC_UL_GT:
+ {
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
+ generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
+ } else {
+ generateLongRelational(MI, opCode);
+ }
+ }
+ break;
+ case AMDILCC::COND_ERROR:
+ assert(0 && "Invalid CC code");
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+ AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+: TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+ int types[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] =
+ {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] =
+ {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ // These are the current register classes that are
+ // supported
+
+ addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
+ addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
+
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
+ addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
+ addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
+ addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
+ addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
+ setOperationAction(ISD::Constant , MVT::i8 , Legal);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
+ addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
+ addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
+ addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
+ setOperationAction(ISD::Constant , MVT::i16 , Legal);
+ }
+ addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
+ addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
+ addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
+ addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
+ addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
+ }
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISDBITCAST, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ }
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ }
+ setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
+ if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ if (stm->calVersion() < CAL_VERSION_SC_139
+ || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+ setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+ setOperationAction(ISD::TRAP , MVT::Other , Legal);
+
+ setStackPointerRegisterToSaveRestore(AMDIL::SP);
+#if LLVM_VERSION >= 2500
+ setSchedulingPreference(Sched::RegPressure);
+#else
+ setSchedulingPreference(SchedulingForRegPressure);
+ setIfCvtBlockSizeLimit(16);
+ setIfCvtDupBlockSizeLimit(16);
+ setShiftAmountType(MVT::i32);
+#endif
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+ computeRegisterProperties();
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return 0;
+ case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
+ case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
+ case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
+ case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
+ case AMDILISD::CMOV: return "AMDILISD::CMOV";
+ case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
+ case AMDILISD::INEGATE: return "AMDILISD::INEGATE";
+ case AMDILISD::MAD: return "AMDILISD::MAD";
+ case AMDILISD::UMAD: return "AMDILISD::UMAD";
+ case AMDILISD::CALL: return "AMDILISD::CALL";
+ case AMDILISD::RET: return "AMDILISD::RET";
+ case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
+ case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
+ case AMDILISD::ADD: return "AMDILISD::ADD";
+ case AMDILISD::UMUL: return "AMDILISD::UMUL";
+ case AMDILISD::AND: return "AMDILISD::AND";
+ case AMDILISD::OR: return "AMDILISD::OR";
+ case AMDILISD::NOT: return "AMDILISD::NOT";
+ case AMDILISD::XOR: return "AMDILISD::XOR";
+ case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
+ case AMDILISD::SMAX: return "AMDILISD::SMAX";
+ case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
+ case AMDILISD::MOVE: return "AMDILISD::MOVE";
+ case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
+ case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
+ case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
+ case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
+ case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
+ case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
+ case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
+ case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
+ case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
+ case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
+ case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
+ case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
+ case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
+ case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
+ case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
+ case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
+ case AMDILISD::CMP: return "AMDILISD::CMP";
+ case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
+ case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
+ case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
+ case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
+ case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
+ case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
+ case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
+ case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
+ case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
+ case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
+ case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
+ case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
+ case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
+ case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
+ case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
+ case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
+ case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
+ case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
+ case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
+ case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
+ case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
+ case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
+ case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
+ case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
+ case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
+ case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
+ case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
+ case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
+ case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
+ case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
+ case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
+ case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
+ case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
+ case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
+ case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
+ case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
+ case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
+ case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
+ case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
+ case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
+ case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
+ case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
+ case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
+ case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
+ case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
+ case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
+ case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
+ case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
+ case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
+ case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
+ case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
+ case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
+ case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
+ case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
+ case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
+ case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
+ case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
+ case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
+ case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
+ case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
+ case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
+ case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
+ case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
+ case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
+ case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
+ case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
+ case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
+ case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
+ case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
+ case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
+ case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
+ case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
+ case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
+ case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
+ case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
+ case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
+ case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
+ case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
+ case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
+ case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
+ case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
+ case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
+ case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
+ case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
+ case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
+ case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
+ case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
+ case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
+ case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
+ case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
+ case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
+ case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
+ case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
+ case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
+ case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
+ case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
+ case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
+ case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
+ case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
+ case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
+ case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
+ case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
+ case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
+ case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
+ case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
+ case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
+ case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
+ case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
+
+ };
+}
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ if (Intrinsic <= AMDILIntrinsic::last_non_AMDIL_intrinsic
+ || Intrinsic > AMDILIntrinsic::num_AMDIL_intrinsics) {
+ return false;
+ }
+ bool bitCastToInt = false;
+ unsigned IntNo;
+ bool isRet = true;
+ const AMDILSubtarget *STM = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ switch (Intrinsic) {
+ default: return false; // Don't custom lower most intrinsics.
+ case AMDILIntrinsic::AMDIL_atomic_add_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_add_gu32:
+ IntNo = AMDILISD::ATOM_G_ADD; break;
+ case AMDILIntrinsic::AMDIL_atomic_add_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_add_lu32:
+ case AMDILIntrinsic::AMDIL_atomic_add_li32:
+ IntNo = AMDILISD::ATOM_L_ADD; break;
+ case AMDILIntrinsic::AMDIL_atomic_add_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_add_ru32:
+ case AMDILIntrinsic::AMDIL_atomic_add_ri32:
+ IntNo = AMDILISD::ATOM_R_ADD; break;
+ case AMDILIntrinsic::AMDIL_atomic_add_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_add_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_and_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_and_gu32:
+ IntNo = AMDILISD::ATOM_G_AND; break;
+ case AMDILIntrinsic::AMDIL_atomic_and_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_AND_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_and_li32:
+ case AMDILIntrinsic::AMDIL_atomic_and_lu32:
+ IntNo = AMDILISD::ATOM_L_AND; break;
+ case AMDILIntrinsic::AMDIL_atomic_and_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_AND_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_and_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_and_ru32:
+ IntNo = AMDILISD::ATOM_R_AND; break;
+ case AMDILIntrinsic::AMDIL_atomic_and_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_and_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_AND_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32:
+ IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32:
+ IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32:
+ IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_gu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_SUB_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_li32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_lu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_lu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_SUB_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ru32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_dec_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_dec_ru32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_DEC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_SUB_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_gu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_G_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_G_ADD_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_li32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_lu32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_lu32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_L_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_L_ADD_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ru32:
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_inc_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_inc_ru32_noret:
+ isRet = false;
+ if (STM->calVersion() >= CAL_VERSION_SC_136) {
+ IntNo = AMDILISD::ATOM_R_INC_NORET;
+ } else {
+ IntNo = AMDILISD::ATOM_R_ADD_NORET;
+ }
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gi32:
+ IntNo = AMDILISD::ATOM_G_MAX; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gu32:
+ IntNo = AMDILISD::ATOM_G_UMAX; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gi32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_li32:
+ IntNo = AMDILISD::ATOM_L_MAX; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_lu32:
+ IntNo = AMDILISD::ATOM_L_UMAX; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_li32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ri32:
+ IntNo = AMDILISD::ATOM_R_MAX; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ru32:
+ IntNo = AMDILISD::ATOM_R_UMAX; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ri32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_max_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gi32:
+ IntNo = AMDILISD::ATOM_G_MIN; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gu32:
+ IntNo = AMDILISD::ATOM_G_UMIN; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gi32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_li32:
+ IntNo = AMDILISD::ATOM_L_MIN; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_lu32:
+ IntNo = AMDILISD::ATOM_L_UMIN; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_li32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ri32:
+ IntNo = AMDILISD::ATOM_R_MIN; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ru32:
+ IntNo = AMDILISD::ATOM_R_UMIN; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ri32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_min_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_or_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_or_gu32:
+ IntNo = AMDILISD::ATOM_G_OR; break;
+ case AMDILIntrinsic::AMDIL_atomic_or_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_OR_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_or_li32:
+ case AMDILIntrinsic::AMDIL_atomic_or_lu32:
+ IntNo = AMDILISD::ATOM_L_OR; break;
+ case AMDILIntrinsic::AMDIL_atomic_or_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_OR_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_or_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_or_ru32:
+ IntNo = AMDILISD::ATOM_R_OR; break;
+ case AMDILIntrinsic::AMDIL_atomic_or_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_or_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_OR_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gu32:
+ IntNo = AMDILISD::ATOM_G_SUB; break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_li32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_lu32:
+ IntNo = AMDILISD::ATOM_L_SUB; break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ru32:
+ IntNo = AMDILISD::ATOM_R_SUB; break;
+ case AMDILIntrinsic::AMDIL_atomic_sub_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_sub_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gu32:
+ IntNo = AMDILISD::ATOM_G_RSUB; break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_li32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_lu32:
+ IntNo = AMDILISD::ATOM_L_RSUB; break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ru32:
+ IntNo = AMDILISD::ATOM_R_RSUB; break;
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_rsub_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gf32:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gu32:
+ IntNo = AMDILISD::ATOM_G_XCHG; break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gf32_noret:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lf32:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_li32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lu32:
+ IntNo = AMDILISD::ATOM_L_XCHG; break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lf32_noret:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_rf32:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ru32:
+ IntNo = AMDILISD::ATOM_R_XCHG; break;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_rf32_noret:
+ bitCastToInt = true;
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xchg_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gu32:
+ IntNo = AMDILISD::ATOM_G_XOR; break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_gi32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_gu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_li32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_lu32:
+ IntNo = AMDILISD::ATOM_L_XOR; break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_li32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_lu32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_ri32:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ru32:
+ IntNo = AMDILISD::ATOM_R_XOR; break;
+ case AMDILIntrinsic::AMDIL_atomic_xor_ri32_noret:
+ case AMDILIntrinsic::AMDIL_atomic_xor_ru32_noret:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
+ case AMDILIntrinsic::AMDIL_append_alloc_i32:
+ IntNo = AMDILISD::APPEND_ALLOC; break;
+ case AMDILIntrinsic::AMDIL_append_alloc_i32_noret:
+ isRet = false;
+ IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
+ case AMDILIntrinsic::AMDIL_append_consume_i32:
+ IntNo = AMDILISD::APPEND_CONSUME; break;
+ case AMDILIntrinsic::AMDIL_append_consume_i32_noret:
+ isRet = false;
+ IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
+ };
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
+ stm->getKernelManager());
+ KM->setOutputInst();
+
+ Info.opc = IntNo;
+ Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
+#if LLVM_VERSION >= 2500
+ Info.ptrVal = I.getOperand(0);
+#else
+ Info.ptrVal = I.getOperand(1);
+#endif
+ Info.offset = 0;
+ Info.align = 4;
+ Info.vol = true;
+ Info.readMem = isRet;
+ Info.writeMem = true;
+ return true;
+}
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case AMDILISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ Mask,
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ Mask,
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+ //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers. This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered. The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+ SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Assign locations to each value returned by this call
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ if (RVLocs[i].isRegLoc()) {
+ Chain = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+ }
+
+ return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AMDILTargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr *MI, MachineBasicBlock *BB) const
+{
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::CMP);
+ generateCMPInstr(MI, BB, TII);
+#if LLVM_VERSION >= 2500
+ MI->eraseFromParent();
+#else
+ BB->getParent()->DeleteMachineInstr(MI);
+#endif
+ break;
+ default:
+ break;
+ }
+ return BB;
+}
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+ static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+ if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+ DAG.AssignOrdering( New.getNode(), order );
+ for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+ Ordered( DAG, order, New.getOperand(i) );
+ }
+ return New;
+}
+
+#define LOWER(A) \
+ case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ LOWER(GlobalAddress);
+ LOWER(JumpTable);
+ LOWER(ConstantPool);
+ LOWER(ExternalSymbol);
+ LOWER(FP_TO_SINT);
+ LOWER(FP_TO_UINT);
+ LOWER(SINT_TO_FP);
+ LOWER(UINT_TO_FP);
+ LOWER(ADD);
+ LOWER(MUL);
+ LOWER(SUB);
+ LOWER(FDIV);
+ LOWER(SDIV);
+ LOWER(SREM);
+ LOWER(UDIV);
+ LOWER(UREM);
+ LOWER(BUILD_VECTOR);
+ LOWER(INSERT_VECTOR_ELT);
+ LOWER(EXTRACT_VECTOR_ELT);
+ LOWER(EXTRACT_SUBVECTOR);
+ LOWER(SCALAR_TO_VECTOR);
+ LOWER(CONCAT_VECTORS);
+ LOWER(AND);
+ LOWER(OR);
+ LOWER(SELECT);
+ LOWER(SELECT_CC);
+ LOWER(SETCC);
+ LOWER(SIGN_EXTEND_INREG);
+#if LLVM_VERSION >= 2500
+ LOWER(BITCAST);
+#else
+ LOWER(BIT_CONVERT);
+#endif
+ LOWER(DYNAMIC_STACKALLOC);
+ LOWER(BRCOND);
+ LOWER(BR_CC);
+ LOWER(FP_ROUND);
+ }
+ return Op;
+}
+
+int
+AMDILTargetLowering::getVarArgsFrameOffset() const
+{
+ return VarArgsFrameOffset;
+}
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = Op;
+ const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *G = GADN->getGlobal();
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ const AMDILGlobalManager *GM = stm->getGlobalManager();
+ DebugLoc DL = Op.getDebugLoc();
+ int64_t base_offset = GADN->getOffset();
+ int32_t arrayoffset = GM->getArrayOffset(G->getNameStr());
+ int32_t constoffset = GM->getConstOffset(G->getNameStr());
+ if (arrayoffset != -1) {
+ DST = DAG.getConstant(arrayoffset, MVT::i32);
+ DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ DST, DAG.getConstant(base_offset, MVT::i32));
+ } else if (constoffset != -1) {
+ if (GM->getConstHWBit(G->getNameStr())) {
+ DST = DAG.getConstant(constoffset, MVT::i32);
+ DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ DST, DAG.getConstant(base_offset, MVT::i32));
+ } else {
+#if LLVM_VERSION >= 2500
+ SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32);
+#else
+ SDValue addr = DAG.getTargetGlobalAddress(G, MVT::i32);
+#endif
+ SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32);
+ DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg,
+ DAG.getConstant(base_offset, MVT::i32));
+ DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg);
+ }
+ } else {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+#if LLVM_VERSION >= 2500
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+#else
+ DST = DAG.getTargetGlobalAddress(GV, MVT::i32);
+#endif
+ } else {
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
+ } else {
+ DST = DAG.getConstantFP(0, VT);
+ }
+ } else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
+#if LLVM_VERSION >= 2500
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+#else
+ DST = DAG.getTargetGlobalAddress(GV, MVT::i32);
+#endif
+ }
+ } else {
+#if LLVM_VERSION >= 2500
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+#else
+ DST = DAG.getTargetGlobalAddress(GV, MVT::i32);
+#endif
+ }
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = Op.getValueType();
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ } else {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ }
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
+ return Result;
+}
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+ SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+
+ SDValue Orig = Chain;
+ MachineFunction &MF = DAG.getMachineFunction();
+ AMDILMachineFunctionInfo *FuncInfo
+ = MF.getInfo<AMDILMachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ //const Function *Fn = MF.getFunction();
+ //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+ CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // When more calling conventions are added, they need to be chosen here
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+ SDValue StackPtr;
+
+ //unsigned int FirstStackArgLoc = 0;
+
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ EVT ValVT = VA.getValVT();
+ const TargetRegisterClass *RC = getRegClassFromType(
+ RegVT.getSimpleVT().SimpleTy);
+
+ unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ Reg,
+ RegVT);
+ // If this is an 8 or 16-bit value, it is really passed
+ // promoted to 32 bits. Insert an assert[sz]ext to capture
+ // this, then truncate to the right size.
+
+ if (VA.getLocInfo() == CCValAssign::SExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ }
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ ArgValue = DAG.getNode(
+ ISD::TRUNCATE,
+ dl,
+ VA.getValVT(),
+ ArgValue);
+ }
+ // Add the value to the list of arguments
+ // to be passed in registers
+ InVals.push_back(ArgValue);
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }
+ } else if(VA.isMemLoc()) {
+ InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+ dl, DAG, VA, MFI, i));
+ } else {
+ assert(0 && "found a Value Assign that is "
+ "neither a register or a memory location");
+ }
+ }
+ /*if (hasStructRet) {
+ assert(0 && "Has struct return is not yet implemented");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }*/
+
+ unsigned int StackSize = CCInfo.getNextStackOffset();
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+ }
+ // This needs to be changed to non-zero if the return function needs
+ // to pop bytes
+ FuncInfo->setBytesToPopOnReturn(StackSize);
+ return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
+ assert(0 && "MemCopy does not exist yet");
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain,
+ Src.getDebugLoc(),
+ Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*IsVol=*/false, /*AlwaysInline=*/true,
+#if LLVM_VERSION >= 2500
+ MachinePointerInfo(), MachinePointerInfo());
+#else
+ NULL, 0, NULL, 0);
+#endif
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const
+{
+ unsigned int LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD,
+ dl,
+ getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ } else {
+#if LLVM_VERSION >= 2500
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+#else
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset,
+ false, false, 0);
+#endif
+ }
+ return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool& isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+#if LLVM_VERSION >= 2500
+ const SmallVectorImpl<SDValue> &OutVals,
+#endif
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+ isTailCall = false;
+ MachineFunction& MF = DAG.getMachineFunction();
+ // FIXME: DO we need to handle fast calling conventions and tail call
+ // optimizations?? X86/PPC ISelLowering
+ /*bool hasStructRet = (TheCall->getNumArgs())
+ ? TheCall->getArgFlags(0).device()->isSRet()
+ : false;*/
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // Analyize the calling operands, but need to change
+ // if we have more than one calling convetion
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ unsigned int NumBytes = CCInfo.getNextStackOffset();
+ if (isTailCall) {
+ assert(isTailCall && "Tail Call not handled yet!");
+ // See X86/PPC ISelLowering
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ //unsigned int FirstStacArgLoc = 0;
+ //int LastArgStackLoc = 0;
+
+ // Walk the register/memloc assignments, insert copies/loads
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
+ // Arguments start after the 5 first operands of ISD::CALL
+#if LLVM_VERSION >= 2500
+ SDValue Arg = OutVals[i];
+#else
+ SDValue Arg = Outs[i].Val;
+#endif
+ //Promote the value if needed
+ switch(VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND,
+ dl,
+ VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (VA.isMemLoc()) {
+ // Create the frame index object for this incoming parameter
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true
+#if LLVM_VERSION < 2500
+ , false
+#endif
+ );
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+#if LLVM_VERSION >= 2500
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+#else
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ NULL, 0, false, false, 0));
+
+#endif
+ } else {
+ assert(0 && "Not a Reg/Mem Loc, major error!");
+ }
+ }
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor,
+ dl,
+ MVT::Other,
+ &MemOpChains[0],
+ MemOpChains.size());
+ }
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ RegsToPass[i].first,
+ RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress/
+ // TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+#if LLVM_VERSION >= 2500
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+#else
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+#endif
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+ else if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1708
+ }
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
+ SmallVector<SDValue, 8> Ops;
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1721
+ }
+ // If this is a direct call, pass the chain and the callee
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1739
+ }
+
+ // Add argument registers to the end of the list so that they are known
+ // live into the call
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(
+ RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ // Emit Tail Call
+ if (isTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1762
+ }
+
+ Chain = DAG.getNode(AMDILISD::CALL,
+ dl,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node
+ Chain = DAG.getCALLSEQ_END(
+ Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ // Handle result values, copying them out of physregs into vregs that
+ // we return
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+}
+static void checkMADType(
+ SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
+{
+ bool globalLoadStore = false;
+ is24bitMAD = false;
+ is32bitMAD = false;
+ return;
+ assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
+ "this to work correctly!");
+ if (Op.getNode()->use_empty()) {
+ return;
+ }
+ for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
+ nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
+ SDNode *ptr = *nBegin;
+ const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
+ // If we are not a LSBaseSDNode then we don't do this
+ // optimization.
+ // If we are a LSBaseSDNode, but the op is not the offset
+ // or base pointer, then we don't do this optimization
+ // (i.e. we are the value being stored)
+ if (!lsNode ||
+ (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
+ return;
+ }
+ const PointerType *PT =
+ dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
+ unsigned as = PT->getAddressSpace();
+ switch(as) {
+ default:
+ globalLoadStore = true;
+ case AMDILAS::PRIVATE_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::CONSTANT_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ globalLoadStore = true;
+ }
+ break;
+ }
+ }
+ if (globalLoadStore) {
+ is32bitMAD = true;
+ } else {
+ is24bitMAD = true;
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
+ && INTTY == MVT::i32) {
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
+ SDValue cmp;
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ INTLO, RHSLO);
+ cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ }
+ } else {
+ if (LHS.getOpcode() == ISD::FrameIndex ||
+ RHS.getOpcode() == ISD::FrameIndex) {
+ DST = DAG.getNode(AMDILISD::ADDADDR,
+ DL,
+ OVT,
+ LHS, RHS);
+ } else {
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && LHS.getNumOperands()
+ && RHS.getNumOperands()) {
+ bool is24bitMAD = false;
+ bool is32bitMAD = false;
+ const ConstantSDNode *LHSConstOpCode =
+ dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
+ const ConstantSDNode *RHSConstOpCode =
+ dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
+ if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
+ || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
+ || LHS.getOpcode() == ISD::MUL
+ || RHS.getOpcode() == ISD::MUL) {
+ SDValue Op1, Op2, Op3;
+ // FIXME: Fix this so that it works for unsigned 24bit ops.
+ bool signedOnly = true;
+ if (LHS.getOpcode() == ISD::MUL) {
+ Op1 = LHS.getOperand(0);
+ Op2 = LHS.getOperand(1);
+ Op3 = RHS;
+ } else if (RHS.getOpcode() == ISD::MUL) {
+ Op1 = RHS.getOperand(0);
+ Op2 = RHS.getOperand(1);
+ Op3 = LHS;
+ } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
+ Op1 = LHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = RHS;
+ signedOnly = LHSConstOpCode->getSExtValue() < 0;
+ } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
+ Op1 = RHS.getOperand(0);
+ Op2 = DAG.getConstant(
+ 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
+ Op3 = LHS;
+ signedOnly = RHSConstOpCode->getSExtValue() < 0;
+ }
+ checkMADType(Op, stm, is24bitMAD, is32bitMAD);
+ // We can possibly do a MAD transform!
+ if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ uint32_t opcode = AMDILIntrinsic::AMDIL_mad24_i32;
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
+ Op1, Op2, Op3);
+ } else if(is32bitMAD) {
+ SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, Tys, DAG.getEntryNode(),
+ DAG.getConstant(
+ AMDILIntrinsic::AMDIL_mad_i32, MVT::i32),
+ Op1, Op2, Op3);
+ }
+ }
+ }
+ DST = DAG.getNode(AMDILISD::ADD,
+ DL,
+ OVT,
+ LHS, RHS);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
+ uint32_t bits) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ EVT FPTY;
+ if (INTTY.isVector()) {
+ FPTY = EVT(MVT::getVectorVT(MVT::f32,
+ INTTY.getVectorNumElements()));
+ } else {
+ FPTY = EVT(MVT::f32);
+ }
+ /* static inline uint
+ __clz_Nbit(uint x)
+ {
+ int xor = 0x3f800000U | x;
+ float tp = as_float(xor);
+ float t = tp + -1.0f;
+ uint tint = as_uint(t);
+ int cmp = (x != 0);
+ uint tsrc = tint >> 23;
+ uint tmask = tsrc & 0xffU;
+ uint cst = (103 + N)U - tmask;
+ return cmp ? cst : N;
+ }
+ */
+ assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
+ && "genCLZu16 only works on 32bit types");
+ // uint x = Op
+ SDValue x = Op;
+ // xornode = 0x3f800000 | x
+ SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
+ DAG.getConstant(0x3f800000, INTTY), x);
+ // float tp = as_float(xornode)
+ SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
+ // float t = tp + -1.0f
+ SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
+ DAG.getConstantFP(-1.0f, FPTY));
+ // uint tint = as_uint(t)
+ SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
+ // int cmp = (x != 0)
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
+ DAG.getConstant(0, INTTY));
+ // uint tsrc = tint >> 23
+ SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
+ DAG.getConstant(23, INTTY));
+ // uint tmask = tsrc & 0xFF
+ SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
+ DAG.getConstant(0xFFU, INTTY));
+ // uint cst = (103 + bits) - tmask
+ SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
+ DAG.getConstant((103U + bits), INTTY), tmask);
+ // return cmp ? cst : N
+ cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
+ DAG.getConstant(bits, INTTY));
+ return cst;
+}
+
+SDValue
+AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY = Op.getValueType();
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ //__clz_32bit(uint u)
+ //{
+ // int z = __amdil_ffb_hi(u) ;
+ // return z < 0 ? 32 : z;
+ // }
+ // uint u = op
+ SDValue u = Op;
+ // int z = __amdil_ffb_hi(u)
+ SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
+ // int cmp = z < 0
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ z, DAG.getConstant(0, INTTY));
+ // return cmp ? 32 : z
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
+ DAG.getConstant(32, INTTY), z);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // static inline uint
+ //__clz_32bit(uint x)
+ //{
+ // uint zh = __clz_16bit(x >> 16);
+ // uint zl = __clz_16bit(x & 0xffffU);
+ // return zh == 16U ? 16U + zl : zh;
+ //}
+ // uint x = Op
+ SDValue x = Op;
+ // uint xs16 = x >> 16
+ SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
+ DAG.getConstant(16, INTTY));
+ // uint zh = __clz_16bit(xs16)
+ SDValue zh = genCLZuN(xs16, DAG, 16);
+ // uint xa16 = x & 0xFFFF
+ SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
+ DAG.getConstant(0xFFFFU, INTTY));
+ // uint zl = __clz_16bit(xa16)
+ SDValue zl = genCLZuN(xa16, DAG, 16);
+ // uint cmp = zh == 16U
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zh, DAG.getConstant(16U, INTTY));
+ // uint zl16 = zl + 16
+ SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
+ DAG.getConstant(16, INTTY), zl);
+ // return cmp ? zl16 : zh
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp, zl16, zh);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = SDValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT INTTY;
+ EVT LONGTY = Op.getValueType();
+ bool isVec = LONGTY.isVector();
+ if (isVec) {
+ INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
+ .getVectorNumElements()));
+ } else {
+ INTTY = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ // Evergreen:
+ // static inline uint
+ // __clz_u64(ulong x)
+ // {
+ //uint zhi = __clz_32bit((uint)(x >> 32));
+ //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
+ //return zhi == 32U ? 32U + zlo : zhi;
+ //}
+ //ulong x = op
+ SDValue x = Op;
+ // uint xhi = x >> 32
+ SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xlo = x & 0xFFFFFFFF
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
+ // uint zhi = __clz_32bit(xhi)
+ SDValue zhi = genCLZu32(xhi, DAG);
+ // uint zlo = __clz_32bit(xlo)
+ SDValue zlo = genCLZu32(xlo, DAG);
+ // uint cmp = zhi == 32
+ SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhi, DAG.getConstant(32U, INTTY));
+ // uint zlop32 = 32 + zlo
+ SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
+ DAG.getConstant(32U, INTTY), zlo);
+ // return cmp ? zlop32: zhi
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
+ } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // HD4XXX:
+ // static inline uint
+ //__clz_64bit(ulong x)
+ //{
+ //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
+ //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
+ //uint zl = __clz_23bit((uint)x & 0x7fffffU);
+ //uint r = zh == 18U ? 18U + zm : zh;
+ //return zh + zm == 41U ? 41U + zl : r;
+ //}
+ //ulong x = Op
+ SDValue x = Op;
+ // ulong xs46 = x >> 46
+ SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(46, LONGTY));
+ // uint ixs46 = (uint)xs46
+ SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
+ // ulong xs23 = x >> 23
+ SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
+ DAG.getConstant(23, LONGTY));
+ // uint ixs23 = (uint)xs23
+ SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
+ // uint xs23m23 = ixs23 & 0x7FFFFF
+ SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint ix = (uint)x
+ SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ // uint xm23 = ix & 0x7FFFFF
+ SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
+ DAG.getConstant(0x7fffffU, INTTY));
+ // uint zh = __clz_23bit(ixs46)
+ SDValue zh = genCLZuN(ixs46, DAG, 23);
+ // uint zm = __clz_23bit(xs23m23)
+ SDValue zm = genCLZuN(xs23m23, DAG, 23);
+ // uint zl = __clz_23bit(xm23)
+ SDValue zl = genCLZuN(xm23, DAG, 23);
+ // uint zhm5 = zh - 5
+ SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
+ DAG.getConstant(-5U, INTTY));
+ SDValue const18 = DAG.getConstant(18, INTTY);
+ SDValue const41 = DAG.getConstant(41, INTTY);
+ // uint cmp1 = zh = 18
+ SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5, const18);
+ // uint zhm5zm = zhm5 + zh
+ SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
+ // uint cmp2 = zhm5zm == 41
+ SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ zhm5zm, const41);
+ // uint zmp18 = zhm5 + 18
+ SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
+ // uint zlp41 = zl + 41
+ SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
+ // uint r = cmp1 ? zmp18 : zh
+ SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
+ cmp1, zmp18, zhm5);
+ // return cmp2 ? zlp41 : r
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
+ } else {
+ assert(0 && "Attempting to generate a CLZ function with an"
+ " unknown graphics card");
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
+ .getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
+ .getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // unsigned version:
+ // uint uhi = (uint)(d * 0x1.0p-32);
+ // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
+ // return as_ulong2((uint2)(ulo, uhi));
+ //
+ // signed version:
+ // double ad = fabs(d);
+ // long l = unsigned_version(ad);
+ // long nl = -l;
+ // return d == ad ? l : nl;
+ SDValue d = RHS;
+ if (includeSign) {
+ d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
+ }
+ SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
+ DAG.getConstantFP(0x2f800000, RHSVT));
+ SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
+ SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
+ ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
+ DAG.getConstantFP(0xcf800000, RHSVT), d);
+ SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
+ SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
+ if (includeSign) {
+ SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
+ RHS, d);
+ l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
+ }
+ DST = l;
+ } else {
+ /*
+ __attribute__((always_inline)) long
+ cast_f64_to_i64(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+ mlo = xlo << 11;
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 63 - e;
+ srge64 = sr >= 64;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ rhi0 = mhi >> (sr &31);
+ rlo0 = mlo >> (sr &31);
+ temp = mhi << (32 - sr);
+ temp |= rlo0;
+ rlo0 = sr ? temp : rlo0;
+
+ // Compute result for 32 <= sr
+ rhi1 = 0;
+ rlo1 = srge64 ? 0 : rhi0;
+
+ // Pick between the 2 results
+ rhi = srge32 ? rhi1 : rhi0;
+ rlo = srge32 ? rlo1 : rlo0;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ rhi = srlt0 ? MAXVALUE : rhi;
+ rlo = srlt0 ? MAXVALUE : rlo;
+
+ // Create long
+ res = LCREATE( rlo, rhi );
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ sign = LCREATE( sign, sign );
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+ SDValue c32 = DAG.getConstant( 32, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+ SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 63, INTVT ), e );
+ SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(64, INTVT));
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
+ temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
+ temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
+ temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
+ rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
+
+ // Compute result for 32 <= sr
+ SDValue rhi1 = DAG.getConstant( 0, INTVT );
+ SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge64, rhi1, rhi0 );
+
+ // Pick between the 2 results
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rhi1, rhi0 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, rlo1, rlo0 );
+
+ // Create long
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
+ }
+ DST = res;
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ EVT INTVT;
+ EVT LONGVT;
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ /*
+ __attribute__((always_inline)) int
+ cast_f64_to_[u|i]32(double d)
+ {
+ // Convert d in to 32-bit components
+ long x = as_long(d);
+ xhi = LCOMPHI(x);
+ xlo = LCOMPLO(x);
+
+ // Generate 'normalized' mantissa
+ mhi = xhi | 0x00100000; // hidden bit
+ mhi <<= 11;
+ temp = xlo >> (32 - 11);
+ mhi |= temp
+
+ // Compute shift right count from exponent
+ e = (xhi >> (52-32)) & 0x7ff;
+ sr = 1023 + 31 - e;
+ srge32 = sr >= 32;
+
+ // Compute result for 0 <= sr < 32
+ res = mhi >> (sr &31);
+ res = srge32 ? 0 : res;
+
+ // Optional saturate on overflow
+ srlt0 = sr < 0;
+ res = srlt0 ? MAXVALUE : res;
+
+ // Deal with sign bit (ignoring whether result is signed or unsigned value)
+ if (includeSign) {
+ sign = ((signed int) xhi) >> 31; fill with sign bit
+ res += sign;
+ res ^= sign;
+ }
+
+ return res;
+ }
+ */
+ SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
+ SDValue c32 = DAG.getConstant( 32, INTVT );
+
+ // Convert d in to 32-bit components
+ SDValue d = RHS;
+ SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Generate 'normalized' mantissa
+ SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
+ xhi, DAG.getConstant( 0x00100000, INTVT ) );
+ mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
+ SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
+ xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
+ mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
+
+ // Compute shift right count from exponent
+ SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
+ xhi, DAG.getConstant( 52-32, INTVT ) );
+ e = DAG.getNode( ISD::AND, DL, INTVT,
+ e, DAG.getConstant( 0x7ff, INTVT ) );
+ SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 1023 + 31, INTVT ), e );
+ SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ sr, DAG.getConstant(32, INTVT));
+
+ // Compute result for 0 <= sr < 32
+ SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
+ res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ srge32, DAG.getConstant(0,INTVT), res );
+
+ // Deal with sign bit
+ if (includeSign) {
+ SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
+ xhi, DAG.getConstant( 31, INTVT ) );
+ res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
+ res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
+ }
+ return res;
+}
+SDValue
+AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, true);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, true);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, true);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST;
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (RST == MVT::f64 && RHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
+ DST, op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+ if (RST == MVT::f64
+ && LST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genf64toi32(RHS, DAG, false);
+ }
+ } else if (RST == MVT::f64
+ && LST == MVT::i64) {
+ DST = genf64toi64(RHS, DAG, false);
+ } else if (RST == MVT::f64
+ && (LST == MVT::i8 || LST == MVT::i16)) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
+ } else {
+ SDValue ToInt = genf64toi32(RHS, DAG, false);
+ DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
+ }
+
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ SDValue x = RHS;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // unsigned x = RHS;
+ // ulong xd = (ulong)(0x4330_0000 << 32) | x;
+ // double d = as_double( xd );
+ // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
+ DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
+ return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
+ } else {
+ SDValue clz = genCLZu32(x, DAG);
+
+ // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
+ // Except for an input 0... which requires a 0 exponent
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+31), INTVT), clz );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
+
+ // Normalize frac
+ SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Pack exponent and frac
+ SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Convert 2 x 32 in to 1 x 64, then to double precision float type
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
+ SelectionDAG &DAG) const
+{
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT INTVT;
+ EVT LONGVT;
+ bool isVec = RHSVT.isVector();
+ if (isVec) {
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ INTVT = EVT(MVT::i32);
+ }
+ LONGVT = RHSVT;
+ MVT IST = INTVT.getSimpleVT();
+ MVT LST = LONGVT.getSimpleVT();
+ SDValue x = RHS;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // double dhi = (double)(as_uint2(x).y);
+ // double dlo = (double)(as_uint2(x).x);
+ // return mad(dhi, 0x1.0p+32, dlo)
+ SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
+ dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
+ SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
+ dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
+ return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
+ DAG.getConstantFP(0x4f800000, LHSVT), dlo);
+ } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
+ // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
+ // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
+ SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
+ SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
+ SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
+ hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
+ return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
+
+ } else {
+ SDValue clz = genCLZu64(x, DAG);
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+
+ // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
+ SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( (1023+63), INTVT), clz );
+ SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
+ exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ mash, exp, mash ); // exp = exp, or 0 if input was 0
+
+ // Normalize frac
+ SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 31, INTVT ) );
+ SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
+ DAG.getConstant( 32, INTVT ), clz31 );
+ SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
+ SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
+ t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
+ SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
+ SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
+ SDValue rlo2 = DAG.getConstant( 0, INTVT );
+ SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
+ clz, DAG.getConstant( 32, INTVT ) );
+ SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rhi2, rhi1 );
+ SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ clz32, rlo2, rlo1 );
+
+ // Eliminate hidden bit
+ rhi = DAG.getNode( ISD::AND, DL, INTVT,
+ rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
+
+ // Save bits needed to round properly
+ SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 0x7ff, INTVT ) );
+
+ // Pack exponent and frac
+ rlo = DAG.getNode( ISD::SRL, DL, INTVT,
+ rlo, DAG.getConstant( 11, INTVT ) );
+ SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
+ rhi, DAG.getConstant( (32 - 11), INTVT ) );
+ rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
+ rhi = DAG.getNode( ISD::SRL, DL, INTVT,
+ rhi, DAG.getConstant( 11, INTVT ) );
+ exp = DAG.getNode( ISD::SHL, DL, INTVT,
+ exp, DAG.getConstant( 20, INTVT ) );
+ rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
+
+ // Compute rounding bit
+ SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
+ rlo, DAG.getConstant( 1, INTVT ) );
+ SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
+ round, DAG.getConstant( 0x3ff, INTVT ) );
+ grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
+ DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
+ grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
+ grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
+ round = DAG.getNode( ISD::SRL, DL, INTVT,
+ round, DAG.getConstant( 10, INTVT ) );
+ round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
+
+ // Add rounding bit
+ SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
+ round, DAG.getConstant( 0, INTVT ) );
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
+ return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
+ }
+}
+SDValue
+AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue DST;
+ EVT INTVT;
+ EVT LONGVT;
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ DST = Op;
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+
+ if (RST == MVT::i32
+ && LST == MVT::f64) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ DST = genu32tof64(RHS, LHSVT, DAG);
+ }
+ } else if (RST == MVT::i64
+ && LST == MVT::f64) {
+ DST = genu64tof64(RHS, LHSVT, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue RHS = Op.getOperand(0);
+ EVT RHSVT = RHS.getValueType();
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ EVT INTVT;
+ EVT LONGVT;
+ SDValue DST;
+ bool isVec = RHSVT.isVector();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT LHSVT = Op.getValueType();
+ MVT LST = LHSVT.getScalarType().getSimpleVT();
+ const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());
+ if (LST == MVT::f64 && LHSVT.isVector()
+ && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ // We dont support vector 64bit floating point convertions.
+ for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
+ SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
+ op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
+ if (!x) {
+ DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
+ } else {
+ DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
+ op, DAG.getTargetConstant(x, MVT::i32));
+ }
+
+ }
+ } else {
+
+ if (isVec) {
+ LONGVT = EVT(MVT::getVectorVT(MVT::i64,
+ RHSVT.getVectorNumElements()));
+ INTVT = EVT(MVT::getVectorVT(MVT::i32,
+ RHSVT.getVectorNumElements()));
+ } else {
+ LONGVT = EVT(MVT::i64);
+ INTVT = EVT(MVT::i32);
+ }
+ MVT RST = RHSVT.getScalarType().getSimpleVT();
+ if ((RST == MVT::i32 || RST == MVT::i64)
+ && LST == MVT::f64) {
+ if (RST == MVT::i32) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ DST = SDValue(Op.getNode(), 0);
+ return DST;
+ }
+ }
+ SDValue c31 = DAG.getConstant( 31, INTVT );
+ SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
+
+ SDValue S; // Sign, as 0 or -1
+ SDValue Sbit; // Sign bit, as one bit, MSB only.
+ if (RST == MVT::i32) {
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
+ S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
+ } else { // 64-bit case... SRA of 64-bit values is slow
+ SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
+ Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
+ SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
+ S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
+ }
+
+ // get abs() of input value, given sign as S (0 or -1)
+ // SpI = RHS + S
+ SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
+ // SpIxS = SpI ^ S
+ SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
+
+ // Convert unsigned value to double precision
+ SDValue R;
+ if (RST == MVT::i32) {
+ // r = cast_u32_to_f64(SpIxS)
+ R = genu32tof64(SpIxS, LHSVT, DAG);
+ } else {
+ // r = cast_u64_to_f64(SpIxS)
+ R = genu64tof64(SpIxS, LHSVT, DAG);
+ }
+
+ // drop in the sign bit
+ SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
+ SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
+ SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
+ thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
+ t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
+ DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = RHS.getValueType().isVector();
+ if (OVT.getScalarType() == MVT::i64) {
+ /*const AMDILTargetMachine*
+ amdtm = reinterpret_cast<const AMDILTargetMachine*>
+ (&this->getTargetMachine());
+ const AMDILSubtarget*
+ stm = dynamic_cast<const AMDILSubtarget*>(
+ amdtm->getSubtargetImpl());*/
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
+ // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
+ INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
+ //TODO: need to use IBORROW on HD5XXX and later hardware
+ SDValue cmp;
+ if (OVT == MVT::i64) {
+ cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSLO, RHSLO);
+ } else {
+ SDValue cmplo;
+ SDValue cmphi;
+ SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
+ SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
+ SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
+ cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRLO, RHSRLO);
+ cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
+ DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
+ LHSRHI, RHSRHI);
+ cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
+ cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
+ cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
+ }
+ INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
+ INTLO, INTHI);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::f64) {
+ DST = LowerFDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::f32) {
+ DST = LowerFDIV32(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerUDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerUREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerUREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerUREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerUREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ bool isVec = OVT.isVector();
+ if (OVT.getScalarType() != MVT::i64)
+ {
+ DST = SDValue(Op.getNode(), 0);
+ } else {
+ assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
+ // TODO: This needs to be turned into a tablegen pattern
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i64) {
+ INTTY = MVT::v2i32;
+ }
+ // mul64(h1, l1, h0, l0)
+ SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, LHS);
+ SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, LHS);
+ SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY, RHS);
+ SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY, RHS);
+ // MULLO_UINT_1 r1, h0, l1
+ SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSHI, LHSLO);
+ // MULLO_UINT_1 r2, h1, l0
+ SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, RHSLO, LHSHI);
+ // ADD_INT hr, r1, r2
+ SDValue ADDHI = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, RHILLO, RLOHHI);
+ // MULHI_UINT_1 r3, l1, l0
+ SDValue RLOLLO = DAG.getNode(ISD::MULHU,
+ DL,
+ INTTY, RHSLO, LHSLO);
+ // ADD_INT hr, hr, r3
+ SDValue HIGH = DAG.getNode(ISD::ADD,
+ DL,
+ INTTY, ADDHI, RLOLLO);
+ // MULLO_UINT_1 l3, l1, l0
+ SDValue LOW = DAG.getNode(AMDILISD::UMUL,
+ DL,
+ INTTY, LHSLO, RHSLO);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ OVT, LOW, HIGH);
+ }
+ return DST;
+}
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ const SDValue *ptr = NULL;
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint32_t swizzleNum = 0;
+ SDValue DST;
+ if (!VT.isVector()) {
+ SDValue Res = Op.getOperand(0);
+ return Res;
+ }
+
+ if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
+ ptr = &Op.getOperand(1);
+ } else {
+ ptr = &Op.getOperand(0);
+ }
+ if (CSDN) {
+ swizzleNum = (uint32_t)CSDN->getZExtValue();
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ DST = DAG.getNode(AMDILISD::VINSERT,
+ DL,
+ VT,
+ Op.getOperand(0),
+ *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ } else {
+ uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
+ uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
+ SDValue res = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
+ mask2 = 0x04030201 & ~(0xFF << (x * 8));
+ mask3 = 0x01010101 & (0xFF << (x * 8));
+ SDValue t = DAG.getNode(AMDILISD::VINSERT,
+ DL, VT, Op.getOperand(0), *ptr,
+ DAG.getTargetConstant(mask2, MVT::i32),
+ DAG.getTargetConstant(mask3, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op.getOperand(2), DAG.getConstant(x, MVT::i32));
+ c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
+ }
+ DST = res;
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ //printSDValue(Op, 1);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t swizzleNum = 0;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Res;
+ if (!Op.getOperand(0).getValueType().isVector()) {
+ Res = Op.getOperand(0);
+ return Res;
+ }
+ if (CSDN) {
+ // Static vector extraction
+ swizzleNum = CSDN->getZExtValue() + 1;
+ Res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT,
+ Op.getOperand(0),
+ DAG.getTargetConstant(swizzleNum, MVT::i32));
+ } else {
+ SDValue Op1 = Op.getOperand(1);
+ uint32_t vecSize = 4;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(1, MVT::i32));
+ if (Op0.getValueType().isVector()) {
+ vecSize = Op0.getValueType().getVectorNumElements();
+ }
+ for (uint32_t x = 2; x <= vecSize; ++x) {
+ SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
+ DL, VT, Op0,
+ DAG.getTargetConstant(x, MVT::i32));
+ SDValue c = DAG.getNode(AMDILISD::CMP,
+ DL, Op1.getValueType(),
+ DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
+ Op1, DAG.getConstant(x, MVT::i32));
+ res = DAG.getNode(AMDILISD::CMOVLOG, DL,
+ VT, c, t, res);
+
+ }
+ Res = res;
+ }
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ uint32_t vecSize = Op.getValueType().getVectorNumElements();
+ SDValue src = Op.getOperand(0);
+ const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ uint64_t offset = 0;
+ EVT vecType = Op.getValueType().getVectorElementType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result;
+ if (CSDN) {
+ offset = CSDN->getZExtValue();
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL,vecType, src, DAG.getConstant(offset, MVT::i32));
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, DAG.getConstant(offset + x, MVT::i32));
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt,
+ DAG.getConstant(x, MVT::i32));
+ }
+ }
+ } else {
+ SDValue idx = Op.getOperand(1);
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, vecType, src, idx);
+ Result = DAG.getNode(AMDILISD::VBUILD, DL,
+ Op.getValueType(), Result);
+ for (uint32_t x = 1; x < vecSize; ++x) {
+ idx = DAG.getNode(ISD::ADD, DL, vecType,
+ idx, DAG.getConstant(1, MVT::i32));
+ SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
+ src, idx);
+ if (elt.getOpcode() != ISD::UNDEF) {
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), Result, elt, idx);
+ }
+ }
+ }
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Res = DAG.getNode(AMDILISD::VBUILD,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0));
+ return Res;
+}
+SDValue
+AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue andOp;
+ andOp = DAG.getNode(
+ AMDILISD::AND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return andOp;
+}
+SDValue
+AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue orOp;
+ orOp = DAG.getNode(AMDILISD::OR,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return orOp;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
+ DL,
+ Op.getValueType(), Cond, LHS, RHS);
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TRUE = Op.getOperand(2);
+ SDValue FALSE = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ DebugLoc DL = Op.getDebugLoc();
+ bool skipCMov = false;
+ bool genINot = false;
+ EVT OVT = Op.getValueType();
+
+ // Check for possible elimination of cmov
+ if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
+ const ConstantSDNode *trueConst
+ = dyn_cast<ConstantSDNode>( TRUE.getNode() );
+ const ConstantSDNode *falseConst
+ = dyn_cast<ConstantSDNode>( FALSE.getNode() );
+ if (trueConst && falseConst) {
+ // both possible result values are constants
+ if (trueConst->isAllOnesValue()
+ && falseConst->isNullValue()) { // and convenient constants
+ skipCMov = true;
+ }
+ else if (trueConst->isNullValue()
+ && falseConst->isAllOnesValue()) { // less convenient
+ skipCMov = true;
+ genINot = true;
+ }
+ }
+ }
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ if (genINot) {
+ Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
+ }
+ if (!skipCMov) {
+ Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
+ }
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ AMDILISD::CMP,
+ DL,
+ LHS.getValueType(),
+ DAG.getConstant(AMDILCC, MVT::i32),
+ LHS,
+ RHS);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ Cond.getValueType(),
+ DAG.getConstant(1, Cond.getValueType()),
+ Cond);
+ return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+#if LLVM_VERSION >= 2500
+AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
+#else
+AMDILTargetLowering::LowerBIT_CONVERT(SDValue Op, SelectionDAG &DAG) const
+#endif
+{
+ SDValue orig = Op;
+ SDValue Src = Op.getOperand(0);
+ SDValue Dst = Op;
+ SDValue Res;
+ DebugLoc DL = Op.getDebugLoc();
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Dst.getValueType();
+ // Lets bitcast the floating point types to an
+ // equivalent integer type before converting to vectors.
+ if (SrcVT.getScalarType().isFloatingPoint()) {
+ Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
+ SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
+ SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
+ Src);
+ SrcVT = Src.getValueType();
+ }
+ uint32_t ScalarSrcSize = SrcVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t ScalarDstSize = DstVT.getScalarType()
+ .getSimpleVT().getSizeInBits();
+ uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
+ bool isVec = SrcVT.isVector();
+ if (DstVT.getScalarType().isInteger() &&
+ (SrcVT.getScalarType().isInteger()
+ || SrcVT.getScalarType().isFloatingPoint())) {
+ if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
+ || (ScalarSrcSize == 64
+ && DstNumEle == 4
+ && ScalarDstSize == 16)) {
+ // This is the problematic case when bitcasting i64 <-> <4 x i16>
+ // This approach is a little different as we cannot generate a
+ // <4 x i64> vector
+ // as that is illegal in our backend and we are already past
+ // the DAG legalizer.
+ // So, in this case, we will do the following conversion.
+ // Case 1:
+ // %dst = <4 x i16> %src bitconvert i64 ==>
+ // %tmp = <4 x i16> %src convert <4 x i32>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
+ // %dst = <2 x i32> %tmp bitcast i64
+ // case 2:
+ // %dst = i64 %src bitconvert <4 x i16> ==>
+ // %tmp = i64 %src bitcast <2 x i32>
+ // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
+ // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
+ // %tmp = <4 x i32> %tmp and 0xFFFF
+ // %dst = <4 x i16> %tmp bitcast <4 x i32>
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
+ DAG.getConstant(0xFFFF, MVT::i32));
+ SDValue const16 = DAG.getConstant(16, MVT::i32);
+ if (ScalarDstSize == 64) {
+ // case 1
+ Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
+ Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
+ SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(0, MVT::i32));
+ SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(1, MVT::i32));
+ y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
+ SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(2, MVT::i32));
+ SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Op, DAG.getConstant(3, MVT::i32));
+ w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
+ x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
+ y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
+ Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
+ return Res;
+ } else {
+ // case 2
+ SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
+ SDValue lor16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
+ SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
+ SDValue hir16
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
+ SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
+ MVT::v4i32, lo);
+ SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(1, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, lor16, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(2, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hi, idxVal);
+ idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(3, MVT::i32));
+ resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ resVec, hir16, idxVal);
+ resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
+ Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
+ return Res;
+ }
+ } else {
+ // There are four cases we need to worry about for bitcasts
+ // where the size of all
+ // source, intermediates and result is <= 128 bits, unlike
+ // the above case
+ // 1) Sub32bit bitcast 32bitAlign
+ // %dst = <4 x i8> bitcast i32
+ // (also <[2|4] x i16> to <[2|4] x i32>)
+ // 2) 32bitAlign bitcast Sub32bit
+ // %dst = i32 bitcast <4 x i8>
+ // 3) Sub32bit bitcast LargerSub32bit
+ // %dst = <2 x i8> bitcast i16
+ // (also <4 x i8> to <2 x i16>)
+ // 4) Sub32bit bitcast SmallerSub32bit
+ // %dst = i16 bitcast <2 x i8>
+ // (also <2 x i16> to <4 x i8>)
+ // This also only handles types that are powers of two
+ if ((ScalarDstSize & (ScalarDstSize - 1))
+ || (ScalarSrcSize & (ScalarSrcSize - 1))) {
+ } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
+ // case 1:
+ EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
+#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
+ SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
+#else
+ SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getUNDEF(IntTy.getScalarType()));
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
+ res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
+ res, temp, idx);
+ }
+#endif
+ SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
+ SDValue *newEle = new SDValue[SrcNumEle];
+ res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntTy.getScalarType(), res,
+ DAG.getConstant(x, MVT::i32));
+ }
+ uint32_t Ratio = SrcNumEle / DstNumEle;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ if (x % Ratio) {
+ newEle[x] = DAG.getNode(ISD::SHL, DL,
+ IntTy.getScalarType(), newEle[x],
+ DAG.getConstant(ScalarSrcSize * (x % Ratio),
+ MVT::i32));
+ }
+ }
+ for (uint32_t x = 0; x < SrcNumEle; x += 2) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 1]);
+ }
+ if (ScalarSrcSize == 8) {
+ for (uint32_t x = 0; x < SrcNumEle; x += 4) {
+ newEle[x] = DAG.getNode(ISD::OR, DL,
+ IntTy.getScalarType(), newEle[x], newEle[x + 2]);
+ }
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 4], idx);
+ }
+ }
+ } else {
+ if (DstNumEle == 1) {
+ Dst = newEle[0];
+ } else {
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
+ newEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ DstVT, Dst, newEle[x * 2], idx);
+ }
+ }
+ }
+ delete [] newEle;
+ return Dst;
+ } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
+ // case 2:
+ EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
+ SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
+ DAG.getUNDEF(IntTy.getScalarType()));
+ uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ for (uint32_t y = 0; y < mult; ++y) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(),
+ DAG.getConstant(x * mult + y, MVT::i32));
+ SDValue t;
+ if (SrcNumEle > 1) {
+ t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, SrcVT.getScalarType(), Src,
+ DAG.getConstant(x, MVT::i32));
+ } else {
+ t = Src;
+ }
+ if (y != 0) {
+ t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
+ t, DAG.getConstant(y * ScalarDstSize,
+ MVT::i32));
+ }
+ vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ DL, IntTy, vec, t, idx);
+ }
+ }
+ Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
+ return Dst;
+ } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
+ // case 3:
+ SDValue *numEle = new SDValue[SrcNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i8, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
+ numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
+ DAG.getConstant(0xFF, MVT::i16));
+ }
+ for (uint32_t x = 1; x < SrcNumEle; x += 2) {
+ numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
+ DAG.getConstant(8, MVT::i16));
+ numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
+ numEle[x-1], numEle[x]);
+ }
+ if (DstNumEle > 1) {
+ // If we are not a scalar i16, the only other case is a
+ // v2i16 since we can't have v8i8 at this point, v4i16
+ // cannot be generated
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
+ numEle[0]);
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(1, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
+ Dst, numEle[2], idx);
+ } else {
+ Dst = numEle[0];
+ }
+ delete [] numEle;
+ return Dst;
+ } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
+ // case 4:
+ SDValue *numEle = new SDValue[DstNumEle];
+ for (uint32_t x = 0; x < SrcNumEle; ++x) {
+ numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ MVT::i16, Src, DAG.getConstant(x, MVT::i32));
+ numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
+ numEle[x * 2], DAG.getConstant(8, MVT::i16));
+ }
+ MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
+ Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
+ for (uint32_t x = 1; x < DstNumEle; ++x) {
+ SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ getPointerTy(), DAG.getConstant(x, MVT::i32));
+ Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
+ Dst, numEle[x], idx);
+ }
+ delete [] numEle;
+ ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
+ Res = DAG.getSExtOrTrunc(Dst, DL, ty);
+ return Res;
+ }
+ }
+ }
+ Res = DAG.getNode(AMDILISD::BITCONV,
+ Dst.getDebugLoc(),
+ Dst.getValueType(), Src);
+ return Res;
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned int SPReg = AMDIL::SP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue SP = DAG.getCopyFromReg(Chain,
+ DL,
+ SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::ADD,
+ DL,
+ MVT::i32, SP, Size);
+ Chain = DAG.getCopyToReg(SP.getValue(1),
+ DL,
+ SPReg, NewSP);
+ SDValue Ops[2] = {NewSP, Chain};
+ Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Entry = Op.getOperand(1);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ ISD::CondCode CC = CCNode->get();
+ SDValue Result;
+ unsigned int cmpOpcode = CondCCodeToCC(
+ CC,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ CmpValue = DAG.getNode(
+ AMDILISD::CMP,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ DAG.getConstant(cmpOpcode, MVT::i32),
+ LHS, RHS);
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::DP_TO_FP,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Result = DAG.getNode(
+ AMDILISD::VCONCAT,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
+ return Result;
+}
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+#if LLVM_VERSION >= 2500
+ const SmallVectorImpl<SDValue> &OutVals,
+#endif
+ DebugLoc dl, SelectionDAG &DAG)
+const
+{
+ SDValue Orig = Chain;
+ //MachineFunction& MF = DAG.getMachineFunction();
+ // CCValAssign - represent the assignment of the return value
+ // to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+ }
+ }
+ // FIXME: implement this when tail call is implemented
+ // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+ // both x86 and ppc implement this in ISelLowering
+
+ // Regular return here
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain);
+ RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+#if LLVM_VERSION >= 2500
+ SDValue ValToCopy = OutVals[i];
+#else
+ SDValue ValToCopy = Outs[i].Val;
+#endif
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ // ISD::Ret => ret chain, (regnum1, val1), ...
+ // So i * 2 + 1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ VA.getLocReg(),
+ ValToCopy,
+ Flag);
+ // guarantee that all emitted copies are stuck together
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+ /*if (MF.getFunction()->hasStructRetAttr()) {
+ assert(0 && "Struct returns are not yet implemented!");
+ // Both MIPS and X86 have this
+ }*/
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ Flag = DAG.getNode(AMDILISD::RET_FLAG,
+ dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+ return Flag;
+}
+void
+AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const
+{
+ MachineOperand DST = MI->getOperand(0);
+ MachineOperand LHS = MI->getOperand(2);
+ MachineOperand RHS = MI->getOperand(3);
+ unsigned int opi32Code = 0, si32Code = 0;
+ unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
+ uint32_t REGS[12];
+ // All the relationals can be generated with with 6 temp registers
+ for (int x = 0; x < 12; ++x) {
+ REGS[x] = genVReg(simpleVT);
+ }
+ // Pull out the high and low components of each 64 bit register
+ generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
+ generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
+ generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
+ // Determine the correct opcode that we should use
+ switch(opCode) {
+ default:
+ assert(!"comparison case not handled!");
+ break;
+ case AMDIL::LEQ:
+ si32Code = opi32Code = AMDIL::IEQ;
+ break;
+ case AMDIL::LNE:
+ si32Code = opi32Code = AMDIL::INE;
+ break;
+ case AMDIL::LLE:
+ case AMDIL::ULLE:
+ case AMDIL::LGE:
+ case AMDIL::ULGE:
+ if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
+ std::swap(REGS[0], REGS[2]);
+ } else {
+ std::swap(REGS[1], REGS[3]);
+ }
+ if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::UGE;
+ break;
+ case AMDIL::LGT:
+ case AMDIL::ULGT:
+ std::swap(REGS[0], REGS[2]);
+ std::swap(REGS[1], REGS[3]);
+ case AMDIL::LLT:
+ case AMDIL::ULLT:
+ if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
+ opi32Code = AMDIL::ILT;
+ } else {
+ opi32Code = AMDIL::ULT;
+ }
+ si32Code = AMDIL::ULT;
+ break;
+ };
+ // Do the initial opcode on the high and low components.
+ // This leaves the following:
+ // REGS[4] = L_HI OP R_HI
+ // REGS[5] = L_LO OP R_LO
+ generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
+ generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
+ switch(opi32Code) {
+ case AMDIL::IEQ:
+ case AMDIL::INE:
+ {
+ // combine the results with an and or or depending on if
+ // we are eq or ne
+ uint32_t combineOp = (opi32Code == AMDIL::IEQ)
+ ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
+ generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
+ }
+ break;
+ default:
+ // this finishes codegen for the following pattern
+ // REGS[4] || (REGS[5] && (L_HI == R_HI))
+ generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
+ generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
+ REGS[9]);
+ generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
+ REGS[10]);
+ break;
+ }
+ generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+ return 0;
+}
+
+void
+AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL, const TargetInstrInfo *TII) const
+{
+ mBB = BB;
+ mBBI = BBI;
+ mDL = DL;
+ mTII = TII;
+}
+uint32_t
+AMDILTargetLowering::genVReg(uint32_t regType) const
+{
+ return mBB->getParent()->getRegInfo().createVirtualRegister(
+ getRegClassFromID(regType));
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
+{
+ return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1) const
+{
+ return generateMachineInst(opcode, dst).addReg(src1);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2) const
+{
+ return generateMachineInst(opcode, dst, src1).addReg(src2);
+}
+
+MachineInstrBuilder
+AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
+ uint32_t src1, uint32_t src2, uint32_t src3) const
+{
+ return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+
+ // The LowerUDIV24 function implements the following CL.
+ // int ia = (int)LHS
+ // float fa = (float)ia
+ // int ib = (int)RHS
+ // float fb = (float)ib
+ // float fq = native_divide(fa, fb)
+ // fq = trunc(fq)
+ // float t = mad(fq, fb, fb)
+ // int iq = (int)fq - (t <= fa)
+ // return (type)iq
+
+ // int ia = (int)LHS
+ SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
+
+ // float fa = (float)ia
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // int ib = (int)RHS
+ SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
+
+ // float fb = (float)ib
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb)
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq)
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float t = mad(fq, fb, fb)
+ SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
+
+ // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
+ SDValue iq;
+ fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+ if (INTTY == MVT::i32) {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ } else {
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
+ }
+ iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
+
+
+ // return (type)iq
+ iq = DAG.getZExtOrTrunc(iq, DL, OVT);
+ return iq;
+
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM8 function generates equivalent to the following IL.
+ // mov r0, as_u32(LHS)
+ // mov r1, as_u32(RHS)
+ // and r10, r0, 0xFF
+ // and r11, r1, 0xFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv r3, r10, r3
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and as_u8(DST), r3, 0xFF
+
+ // mov r0, as_u32(LHS)
+ SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // mov r1, as_u32(RHS)
+ SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // and r10, r0, 0xFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
+ DAG.getConstant(0xFF, INTTY));
+
+ // and r11, r1, 0xFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
+ DAG.getConstant(0xFF, INTTY));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
+ DAG.getConstant(0x01, INTTY));
+
+ // udiv r3, r10, r3
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
+ DAG.getConstant(0, INTTY));
+
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
+
+ // and as_u8(DST), r3, 0xFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
+ DAG.getConstant(0xFF, INTTY));
+ DST = DAG.getZExtOrTrunc(DST, DL, OVT);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM16 function generatest equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // DIV = LowerUDIV16(LHS, RHS)
+ // and r10, r0, 0xFFFF
+ // and r11, r1, 0xFFFF
+ // cmov_logical r3, r11, r11, 0x1
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ // and r3, r3, 0xFFFF
+ // cmov_logical r3, r11, r3, 0
+ // umul r3, r3, r11
+ // sub r3, r10, r3
+ // and DST, r3, 0xFFFF
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // and r10, r0, 0xFFFF
+ SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // and r11, r1, 0xFFFF
+ SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r11, 0x1
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
+ DAG.getConstant(0x01, OVT));
+
+ // udiv as_u16(r3), as_u32(r10), as_u32(r3)
+ r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
+ r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
+ r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
+ r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
+ r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
+
+ // and r3, r3, 0xFFFF
+ r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+
+ // cmov_logical r3, r11, r3, 0
+ r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
+ DAG.getConstant(0, OVT));
+ // umul r3, r3, r11
+ r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
+
+ // sub r3, r10, r3
+ r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
+
+ // and DST, r3, 0xFFFF
+ SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
+ DAG.getConstant(0xFFFF, OVT));
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerUREM32 function generates equivalent to the following IL.
+ // udiv r20, LHS, RHS
+ // umul r20, r20, RHS
+ // sub DST, LHS, r20
+
+ // udiv r20, LHS, RHS
+ SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
+
+ // umul r20, r20, RHS
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
+
+ // sub DST, LHS, r20
+ SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+
+SDValue
+AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2f32) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4f32) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue DST;
+ const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
+ &this->getTargetMachine())->getSubtargetImpl();
+ if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // TODO: This doesn't work for vector types yet
+ // The LowerFDIV32 function generates equivalent to the following
+ // IL:
+ // mov r20, as_int(LHS)
+ // mov r21, as_int(RHS)
+ // and r30, r20, 0x7f800000
+ // and r31, r20, 0x807FFFFF
+ // and r32, r21, 0x7f800000
+ // and r33, r21, 0x807FFFFF
+ // ieq r40, r30, 0x7F800000
+ // ieq r41, r31, 0x7F800000
+ // ieq r42, r32, 0
+ // ieq r43, r33, 0
+ // and r50, r20, 0x80000000
+ // and r51, r21, 0x80000000
+ // ior r32, r32, 0x3f800000
+ // ior r33, r33, 0x3f800000
+ // cmov_logical r32, r42, r50, r32
+ // cmov_logical r33, r43, r51, r33
+ // cmov_logical r32, r40, r20, r32
+ // cmov_logical r33, r41, r21, r33
+ // ior r50, r40, r41
+ // ior r51, r42, r43
+ // ior r50, r50, r51
+ // inegate r52, r31
+ // iadd r30, r30, r52
+ // cmov_logical r30, r50, 0, r30
+ // div_zeroop(infinity) r21, 1.0, r33
+ // mul_ieee r20, r32, r21
+ // and r22, r20, 0x7FFFFFFF
+ // and r23, r20, 0x80000000
+ // ishr r60, r22, 0x00000017
+ // ishr r61, r30, 0x00000017
+ // iadd r20, r20, r30
+ // iadd r21, r22, r30
+ // iadd r60, r60, r61
+ // ige r42, 0, R60
+ // ior r41, r23, 0x7F800000
+ // ige r40, r60, 0x000000FF
+ // cmov_logical r40, r50, 0, r40
+ // cmov_logical r20, r42, r23, r20
+ // cmov_logical DST, r40, r41, r20
+ // as_float(DST)
+
+ // mov r20, as_int(LHS)
+ SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
+
+ // mov r21, as_int(RHS)
+ SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
+
+ // and r30, r20, 0x7f800000
+ SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // and r31, r21, 0x7f800000
+ SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x7f800000, INTTY));
+
+ // and r32, r20, 0x807FFFFF
+ SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // and r33, r21, 0x807FFFFF
+ SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x807FFFFF, INTTY));
+
+ // ieq r40, r30, 0x7F800000
+ SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r41, r31, 0x7F800000
+ SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0x7F800000, INTTY));
+
+ // ieq r42, r30, 0
+ SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R30, DAG.getConstant(0, INTTY));
+
+ // ieq r43, r31, 0
+ SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
+ R31, DAG.getConstant(0, INTTY));
+
+ // and r50, r20, 0x80000000
+ SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // and r51, r21, 0x80000000
+ SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ior r32, r32, 0x3f800000
+ R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // ior r33, r33, 0x3f800000
+ R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
+ DAG.getConstant(0x3F800000, INTTY));
+
+ // cmov_logical r32, r42, r50, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
+
+ // cmov_logical r33, r43, r51, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
+
+ // cmov_logical r32, r40, r20, r32
+ R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
+
+ // cmov_logical r33, r41, r21, r33
+ R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
+
+ // ior r50, r40, r41
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
+
+ // ior r51, r42, r43
+ R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
+
+ // ior r50, r50, r51
+ R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
+
+ // inegate r52, r31
+ SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
+
+ // iadd r30, r30, r52
+ R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
+
+ // cmov_logical r30, r50, 0, r30
+ R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY), R30);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // div_zeroop(infinity) r21, 1.0, as_float(r33)
+ R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
+ R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ DAG.getConstantFP(1.0f, OVT), R33);
+
+ // mul_ieee as_int(r20), as_float(r32), r21
+ R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
+ R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
+ R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
+
+ // and r22, r20, 0x7FFFFFFF
+ SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x7FFFFFFF, INTTY));
+
+ // and r23, r20, 0x80000000
+ SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
+ DAG.getConstant(0x80000000, INTTY));
+
+ // ishr r60, r22, 0x00000017
+ SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // ishr r61, r30, 0x00000017
+ SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
+ DAG.getConstant(0x00000017, INTTY));
+
+ // iadd r20, r20, r30
+ R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
+
+ // iadd r21, r22, r30
+ R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
+
+ // iadd r60, r60, r61
+ R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
+
+ // ige r42, 0, R60
+ R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ DAG.getConstant(0, INTTY),
+ R60);
+
+ // ior r41, r23, 0x7F800000
+ R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
+ DAG.getConstant(0x7F800000, INTTY));
+
+ // ige r40, r60, 0x000000FF
+ R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
+ DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
+ R60,
+ DAG.getConstant(0x0000000FF, INTTY));
+
+ // cmov_logical r40, r50, 0, r40
+ R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
+ DAG.getConstant(0, INTTY),
+ R40);
+
+ // cmov_logical r20, r42, r23, r20
+ R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
+
+ // cmov_logical DST, r40, r41, r20
+ DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
+
+ // as_float(DST)
+ DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
+ } else {
+ // The following sequence of DAG nodes produce the following IL:
+ // fabs r1, RHS
+ // lt r2, 0x1.0p+96f, r1
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ // mul_ieee r1, RHS, r3
+ // div_zeroop(infinity) r0, LHS, r1
+ // mul_ieee DST, r0, r3
+
+ // fabs r1, RHS
+ SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
+ // lt r2, 0x1.0p+96f, r1
+ SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
+ DAG.getConstant(0x6f800000, INTTY), r1);
+ // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
+ SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
+ DAG.getConstant(0x2f800000, INTTY),
+ DAG.getConstant(0x3f800000, INTTY));
+ // mul_ieee r1, RHS, r3
+ r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
+ // div_zeroop(infinity) r0, LHS, r1
+ SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
+ // mul_ieee DST, r0, r3
+ DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.h b/src/gallium/drivers/radeon/AMDILISelLowering.h
new file mode 100644
index 00000000000..9d045e5e2c9
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.h
@@ -0,0 +1,576 @@
+//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_ISELLOWERING_H_
+#define AMDIL_ISELLOWERING_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm
+{
+ namespace AMDILISD
+ {
+ enum
+ {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ INTTOANY, // Dummy instruction that takes an int and goes to
+ // any type converts the SDNode to an int
+ DP_TO_FP, // Conversion from 64bit FP to 32bit FP
+ FP_TO_DP, // Conversion from 32bit FP to 64bit FP
+ BITCONV, // instruction that converts from any type to any type
+ CMOV, // 32bit FP Conditional move instruction
+ CMOVLOG, // 32bit FP Conditional move logical instruction
+ SELECT, // 32bit FP Conditional move logical instruction
+ SETCC, // 32bit FP Conditional move logical instruction
+ ISGN, // 32bit Int Sign instruction
+ INEGATE, // 32bit Int Negation instruction
+ MAD, // 32bit Fused Multiply Add instruction
+ ADD, // 32/64 bit pseudo instruction
+ AND, // 128 bit and instruction
+ OR, // 128 bit or instruction
+ NOT, // 128 bit not instruction
+ XOR, // 128 bit xor instruction
+ MOVE, // generic mov instruction
+ PHIMOVE, // generic phi-node mov instruction
+ VBUILD, // scalar to vector mov instruction
+ VEXTRACT, // extract vector components
+ VINSERT, // insert vector components
+ VCONCAT, // concat a single vector to another vector
+ UMAD, // 32bit UInt Fused Multiply Add instruction
+ CALL, // Function call based on a single integer
+ RET, // Return from a function call
+ SELECT_CC, // Select the correct conditional instruction
+ BRCC, // Select the correct branch instruction
+ CMPCC, // Compare to GPR operands
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC
+ //flags.
+ SELECT_FCC, // Select between two values using the current FCC
+ //flags.
+ LCREATE, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO, // Get the lo 32 bits from a 64 bit integer
+ DCREATE, // Create a 64bit float from two 32 bit integers
+ DCOMPHI, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO, // Get the lo 32 bits from a 64 bit float
+ LCREATE2, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI2, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO2, // Get the lo 32 bits from a 64 bit integer
+ DCREATE2, // Create a 64bit float from two 32 bit integers
+ DCOMPHI2, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO2, // Get the lo 32 bits from a 64 bit float
+ UMUL, // 32bit unsigned multiplication
+ IFFB_HI, // 32bit find first hi bit instruction
+ IFFB_LO, // 32bit find first low bit instruction
+ DIV_INF, // Divide with infinity returned on zero divisor
+ SMAX, // Signed integer max
+ CMP,
+ IL_CC_I_GT,
+ IL_CC_I_LT,
+ IL_CC_I_GE,
+ IL_CC_I_LE,
+ IL_CC_I_EQ,
+ IL_CC_I_NE,
+ RET_FLAG,
+ BRANCH_COND,
+ LOOP_NZERO,
+ LOOP_ZERO,
+ LOOP_CMP,
+ ADDADDR,
+ // ATOMIC Operations
+ // Global Memory
+ ATOM_G_ADD = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOM_G_AND,
+ ATOM_G_CMPXCHG,
+ ATOM_G_DEC,
+ ATOM_G_INC,
+ ATOM_G_MAX,
+ ATOM_G_UMAX,
+ ATOM_G_MIN,
+ ATOM_G_UMIN,
+ ATOM_G_OR,
+ ATOM_G_SUB,
+ ATOM_G_RSUB,
+ ATOM_G_XCHG,
+ ATOM_G_XOR,
+ ATOM_G_ADD_NORET,
+ ATOM_G_AND_NORET,
+ ATOM_G_CMPXCHG_NORET,
+ ATOM_G_DEC_NORET,
+ ATOM_G_INC_NORET,
+ ATOM_G_MAX_NORET,
+ ATOM_G_UMAX_NORET,
+ ATOM_G_MIN_NORET,
+ ATOM_G_UMIN_NORET,
+ ATOM_G_OR_NORET,
+ ATOM_G_SUB_NORET,
+ ATOM_G_RSUB_NORET,
+ ATOM_G_XCHG_NORET,
+ ATOM_G_XOR_NORET,
+ // Local Memory
+ ATOM_L_ADD,
+ ATOM_L_AND,
+ ATOM_L_CMPXCHG,
+ ATOM_L_DEC,
+ ATOM_L_INC,
+ ATOM_L_MAX,
+ ATOM_L_UMAX,
+ ATOM_L_MIN,
+ ATOM_L_UMIN,
+ ATOM_L_OR,
+ ATOM_L_MSKOR,
+ ATOM_L_SUB,
+ ATOM_L_RSUB,
+ ATOM_L_XCHG,
+ ATOM_L_XOR,
+ ATOM_L_ADD_NORET,
+ ATOM_L_AND_NORET,
+ ATOM_L_CMPXCHG_NORET,
+ ATOM_L_DEC_NORET,
+ ATOM_L_INC_NORET,
+ ATOM_L_MAX_NORET,
+ ATOM_L_UMAX_NORET,
+ ATOM_L_MIN_NORET,
+ ATOM_L_UMIN_NORET,
+ ATOM_L_OR_NORET,
+ ATOM_L_MSKOR_NORET,
+ ATOM_L_SUB_NORET,
+ ATOM_L_RSUB_NORET,
+ ATOM_L_XCHG_NORET,
+ ATOM_L_XOR_NORET,
+ // Region Memory
+ ATOM_R_ADD,
+ ATOM_R_AND,
+ ATOM_R_CMPXCHG,
+ ATOM_R_DEC,
+ ATOM_R_INC,
+ ATOM_R_MAX,
+ ATOM_R_UMAX,
+ ATOM_R_MIN,
+ ATOM_R_UMIN,
+ ATOM_R_OR,
+ ATOM_R_MSKOR,
+ ATOM_R_SUB,
+ ATOM_R_RSUB,
+ ATOM_R_XCHG,
+ ATOM_R_XOR,
+ ATOM_R_ADD_NORET,
+ ATOM_R_AND_NORET,
+ ATOM_R_CMPXCHG_NORET,
+ ATOM_R_DEC_NORET,
+ ATOM_R_INC_NORET,
+ ATOM_R_MAX_NORET,
+ ATOM_R_UMAX_NORET,
+ ATOM_R_MIN_NORET,
+ ATOM_R_UMIN_NORET,
+ ATOM_R_OR_NORET,
+ ATOM_R_MSKOR_NORET,
+ ATOM_R_SUB_NORET,
+ ATOM_R_RSUB_NORET,
+ ATOM_R_XCHG_NORET,
+ ATOM_R_XOR_NORET,
+ // Append buffer
+ APPEND_ALLOC,
+ APPEND_ALLOC_NORET,
+ APPEND_CONSUME,
+ APPEND_CONSUME_NORET,
+ // 2D Images
+ IMAGE2D_READ,
+ IMAGE2D_WRITE,
+ IMAGE2D_INFO0,
+ IMAGE2D_INFO1,
+ // 3D Images
+ IMAGE3D_READ,
+ IMAGE3D_WRITE,
+ IMAGE3D_INFO0,
+ IMAGE3D_INFO1,
+
+ LAST_ISD_NUMBER
+ };
+ } // AMDILISD
+
+ class MachineBasicBlock;
+ class MachineInstr;
+ class DebugLoc;
+ class TargetInstrInfo;
+
+ class AMDILTargetLowering : public TargetLowering
+ {
+ private:
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ public:
+ AMDILTargetLowering(TargetMachine &TM);
+
+ virtual SDValue
+ LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ int
+ getVarArgsFrameOffset() const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of
+ /// the bits specified
+ /// in Mask are known to be either zero or one and return them in
+ /// the
+ /// KnownZero/KnownOne bitsets.
+ virtual void
+ computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0
+ ) const;
+
+ virtual MachineBasicBlock*
+ EmitInstrWithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool
+ getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+ virtual const char*
+ getTargetNodeName(
+ unsigned Opcode
+ ) const;
+ // We want to mark f32/f64 floating point values as
+ // legal
+ bool
+ isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ // We don't want to shrink f64/f32 constants because
+ // they both take up the same amount of space and
+ // we don't want to use a f2d instruction.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this
+ /// function.
+ virtual unsigned int
+ getFunctionAlignment(const Function *F) const;
+
+ private:
+ CCAssignFn*
+ CCAssignFnForNode(unsigned int CC) const;
+
+ SDValue LowerCallResult(SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+#if LLVM_VERSION >= 2500
+ const SmallVectorImpl<SDValue> &OutVals,
+#endif
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+#if LLVM_VERSION >= 2500
+ const SmallVectorImpl<SDValue> &OutVals,
+#endif
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ //+++--- Function dealing with conversions between floating point and
+ //integer types ---+++//
+ SDValue
+ genCLZu64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ genCLZuN(SDValue Op, SelectionDAG &DAG, uint32_t bits) const;
+ SDValue
+ genCLZu32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ genf64toi32(SDValue Op, SelectionDAG &DAG,
+ bool includeSign) const;
+
+ SDValue
+ genf64toi64(SDValue Op, SelectionDAG &DAG,
+ bool includeSign) const;
+
+ SDValue
+ genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+ SDValue
+ genu64tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerINTRINSIC_VOID(SDValue Op, SelectionDAG& DAG) const;
+
+ SDValue
+ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerADD(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSUB(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerUDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerUDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerAND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerOR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ EVT
+ genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+
+ SDValue
+#if LLVM_VERSION >= 2500
+ LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+#else
+ LowerBIT_CONVERT(SDValue Op, SelectionDAG &DAG) const;
+#endif
+
+ SDValue
+ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ void
+ generateCMPInstr(MachineInstr*, MachineBasicBlock*,
+ const TargetInstrInfo&) const;
+ MachineOperand
+ convertToReg(MachineOperand) const;
+
+ // private members used by the set of instruction generation
+ // functions, these are marked mutable as they are cached so
+ // that they don't have to constantly be looked up when using the
+ // generateMachineInst/genVReg instructions. This is to simplify
+ // the code
+ // and to make it cleaner. The object itself doesn't change as
+ // only these functions use these three data types.
+ mutable MachineBasicBlock *mBB;
+ mutable DebugLoc *mDL;
+ mutable const TargetInstrInfo *mTII;
+ mutable MachineBasicBlock::iterator mBBI;
+ void
+ setPrivateData(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator &BBI,
+ DebugLoc *DL,
+ const TargetInstrInfo *TII) const;
+ uint32_t genVReg(uint32_t regType) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1, uint32_t src2) const;
+ MachineInstrBuilder
+ generateMachineInst(uint32_t opcode,
+ uint32_t dst, uint32_t src1, uint32_t src2,
+ uint32_t src3) const;
+ uint32_t
+ addExtensionInstructions(
+ uint32_t reg, bool signedShift,
+ unsigned int simpleVT) const;
+ void
+ generateLongRelational(MachineInstr *MI,
+ unsigned int opCode) const;
+
+ }; // AMDILTargetLowering
+} // end namespace llvm
+
+#endif // AMDIL_ISELLOWERING_H_
diff --git a/src/gallium/drivers/radeon/AMDILImageExpansion.cpp b/src/gallium/drivers/radeon/AMDILImageExpansion.cpp
new file mode 100644
index 00000000000..81a6a6e8615
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILImageExpansion.cpp
@@ -0,0 +1,210 @@
+//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// @file AMDILImageExpansion.cpp
+// @details Implementatino of the Image expansion class for image capable devices
+//
+#include "AMDILIOExpansion.h"
+#include "AMDILKernelManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+AMDILImageExpansion::AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : AMDIL789IOExpansion(tm, OptLevel)
+{
+}
+
+AMDILImageExpansion::~AMDILImageExpansion()
+{
+}
+void AMDILImageExpansion::expandInefficientImageLoad(
+ MachineBasicBlock *mBB, MachineInstr *MI)
+{
+#if 0
+ const llvm::StringRef &name = MI->getOperand(0).getGlobal()->getName();
+ const char *tReg1, *tReg2, *tReg3, *tReg4;
+ tReg1 = mASM->getRegisterName(MI->getOperand(1).getReg());
+ if (MI->getOperand(2).isReg()) {
+ tReg2 = mASM->getRegisterName(MI->getOperand(2).getReg());
+ } else {
+ tReg2 = mASM->getRegisterName(AMDIL::R1);
+ O << "\tmov " << tReg2 << ", l" << MI->getOperand(2).getImm() << "\n";
+ }
+ if (MI->getOperand(3).isReg()) {
+ tReg3 = mASM->getRegisterName(MI->getOperand(3).getReg());
+ } else {
+ tReg3 = mASM->getRegisterName(AMDIL::R2);
+ O << "\tmov " << tReg3 << ", l" << MI->getOperand(3).getImm() << "\n";
+ }
+ if (MI->getOperand(4).isReg()) {
+ tReg4 = mASM->getRegisterName(MI->getOperand(4).getReg());
+ } else {
+ tReg4 = mASM->getRegisterName(AMDIL::R3);
+ O << "\tmov " << tReg2 << ", l" << MI->getOperand(4).getImm() << "\n";
+ }
+ bool internalSampler = false;
+ //bool linear = true;
+ unsigned ImageCount = 3; // OPENCL_MAX_READ_IMAGES
+ unsigned SamplerCount = 3; // OPENCL_MAX_SAMPLERS
+ if (ImageCount - 1) {
+ O << "\tswitch " << mASM->getRegisterName(MI->getOperand(1).getReg())
+ << "\n";
+ }
+ for (unsigned rID = 0; rID < ImageCount; ++rID) {
+ if (ImageCount - 1) {
+ if (!rID) {
+ O << "\tdefault\n";
+ } else {
+ O << "\tcase " << rID << "\n" ;
+ }
+ O << "\tswitch " << mASM->getRegisterName(MI->getOperand(2).getReg())
+ << "\n";
+ }
+ for (unsigned sID = 0; sID < SamplerCount; ++sID) {
+ if (SamplerCount - 1) {
+ if (!sID) {
+ O << "\tdefault\n";
+ } else {
+ O << "\tcase " << sID << "\n" ;
+ }
+ }
+ if (internalSampler) {
+ // Check if sampler has normalized setting.
+ O << "\tand r0.x, " << tReg2 << ".x, l0.y\n"
+ << "\tif_logicalz r0.x\n"
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(unnormalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"
+ << "\telse\n"
+ << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
+ << "\titof " << tReg2 << ", cb1[" << tReg1 << ".x].xyz\n"
+ << "\tmul " << tReg3 << ", " << tReg3 << ", " << tReg2 << "\n"
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+ << tReg1 << ".y].xyz\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(normalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n"
+ << "\tendif\n";
+ } else {
+ O << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
+ // Check if sampler has normalized setting.
+ << "\tand r0, " << tReg2 << ".x, l0.y\n"
+ // Convert image dimensions to float.
+ << "\titof " << tReg4 << ", cb1[" << tReg1 << ".x].xyz\n"
+ // Move into R0 1 if unnormalized or dimensions if normalized.
+ << "\tcmov_logical r0, r0, " << tReg4 << ", r1.1111\n"
+ // Make coordinates unnormalized.
+ << "\tmul " << tReg3 << ", r0, " << tReg3 << "\n"
+ // Get linear filtering if set.
+ << "\tand " << tReg4 << ", " << tReg2 << ".x, l6.x\n"
+ // Save unnormalized coordinates in R0.
+ << "\tmov r0, " << tReg3 << "\n"
+ // Floor the coordinates due to HW incompatibility with precision
+ // requirements.
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ // get Origianl coordinates (without floor) if linear filtering
+ << "\tcmov_logical " << tReg3 << ", " << tReg4
+ << ".xxxx, r0, " << tReg3 << "\n"
+ // Normalize the coordinates with multiplying by 1/dimensions
+ << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+ << tReg1 << ".y].xyz\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(normalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n";
+ }
+ if (SamplerCount - 1) {
+ O << "\tbreak\n";
+ }
+ }
+ if (SamplerCount - 1) {
+ O << "\tendswitch\n";
+ }
+ if (ImageCount - 1) {
+ O << "\tbreak\n";
+ }
+ }
+ if (ImageCount - 1) {
+ O << "\tendswitch\n";
+ }
+#endif
+}
+ void
+AMDILImageExpansion::expandImageLoad(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+ uint32_t imageID = getPointerID(MI);
+ MI->getOperand(1).ChangeToImmediate(imageID);
+ saveInst = true;
+}
+ void
+AMDILImageExpansion::expandImageStore(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+ uint32_t imageID = getPointerID(MI);
+ mKM->setOutputInst();
+ MI->getOperand(0).ChangeToImmediate(imageID);
+ saveInst = true;
+}
+ void
+AMDILImageExpansion::expandImageParam(MachineBasicBlock *mBB, MachineInstr *MI)
+{
+ uint32_t ID = getPointerID(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*mBB, *MI, DL, mTII->get(AMDIL::CBLOAD),
+ MI->getOperand(0).getReg())
+ .addImm(ID)
+ .addImm(1);
+}
diff --git a/src/gallium/drivers/radeon/AMDILInliner.cpp b/src/gallium/drivers/radeon/AMDILInliner.cpp
new file mode 100644
index 00000000000..1d0f43866ce
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInliner.cpp
@@ -0,0 +1,318 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdilinline"
+#include "AMDIL.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+namespace
+{
+ class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass
+
+ {
+ public:
+ TargetMachine &TM;
+ static char ID;
+ AMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL);
+ ~AMDILInlinePass();
+ virtual const char* getPassName() const;
+ virtual bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ private:
+ typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*,
+ DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy;
+ bool
+ AMDILInlineCallIfPossible(CallSite CS,
+ const TargetData *TD,
+ InlinedArrayAllocasTy &InlinedArrayAllocas);
+ CodeGenOpt::Level OptLevel;
+ };
+ char AMDILInlinePass::ID = 0;
+} // anonymouse namespace
+
+
+namespace llvm
+{
+ FunctionPass*
+ createAMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL)
+ {
+ return new AMDILInlinePass(tm, OL);
+ }
+} // llvm namespace
+
+ AMDILInlinePass::AMDILInlinePass(TargetMachine &tm, CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+: FunctionPass(ID), TM(tm)
+#else
+: FunctionPass((intptr_t)&ID), TM(tm)
+#endif
+{
+ OptLevel = OL;
+}
+AMDILInlinePass::~AMDILInlinePass()
+{
+}
+
+
+bool
+AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS,
+ const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas) {
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ SmallVector<AllocaInst*, 16> StaticAllocas;
+ InlineFunctionInfo IFI;
+ if (!InlineFunction(CS, IFI))
+ return false;
+ DEBUG(errs() << "<amdilinline> function " << Caller->getName()
+ << ": inlined call to "<< Callee->getName() << "\n");
+
+ // If the inlined function had a higher stack protection level than the
+ // calling function, then bump up the caller's stack protection level.
+ if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+ !Caller->hasFnAttr(Attribute::StackProtectReq))
+ Caller->addFnAttr(Attribute::StackProtect);
+
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+
+ for (unsigned AllocaNo = 0,
+ e = IFI.StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+
+ AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType
+ = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(errs() << " ***MERGED ALLOCA: " << *AI);
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ break;
+ }
+
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
+
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
+ }
+
+ return true;
+}
+
+ bool
+AMDILInlinePass::runOnFunction(Function &MF)
+{
+ Function *F = &MF;
+ const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>();
+ if (STM.device()->isSupported(AMDILDeviceInfo::NoInline)) {
+ return false;
+ }
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ SmallVector<CallSite, 16> CallSites;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+#if LLVM_VERSION >= 2500
+ CallSite CS = CallSite(cast<Value>(I));
+#else
+ CallSite CS = CallSiteBase(I);
+#endif
+ // If this isn't a call, or it is a call to an intrinsic, it can
+ // never be inlined.
+ if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+ continue;
+
+ // If this is a direct call to an external function, we can never inline
+ // it. If it is an indirect call, inlining may resolve it to be a
+ // direct call, so we keep it.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+ continue;
+
+ // We don't want to inline if we are recursive.
+ if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addErrorMsg(amd::CompilerErrorMessage[RECURSIVE_FUNCTION]);
+ continue;
+ }
+
+ CallSites.push_back(CS);
+ }
+ }
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+ bool Changed = false;
+ for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+ CallSite CS = CallSites[CSi];
+
+ Function *Callee = CS.getCalledFunction();
+
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // Attempt to inline the function...
+ if (!AMDILInlineCallIfPossible(CS, TD, InlinedArrayAllocas))
+ continue;
+ Changed = true;
+ }
+ return Changed;
+}
+
+const char*
+AMDILInlinePass::getPassName() const
+{
+ return "AMDIL Inline Function Pass";
+}
+ bool
+AMDILInlinePass::doInitialization(Module &M)
+{
+ return false;
+}
+
+ bool
+AMDILInlinePass::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
diff --git a/src/gallium/drivers/radeon/AMDILInstPrinter.cpp b/src/gallium/drivers/radeon/AMDILInstPrinter.cpp
new file mode 100644
index 00000000000..ee4e38138ba
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstPrinter.cpp
@@ -0,0 +1,67 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+AMDILInstPrinter::AMDILInstPrinter(const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI)
+{
+}
+void
+AMDILInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef annot)
+{
+ abort();
+}
+
+AMDILInstPrinter::~AMDILInstPrinter()
+{
+}
diff --git a/src/gallium/drivers/radeon/AMDILInstPrinter.h b/src/gallium/drivers/radeon/AMDILInstPrinter.h
new file mode 100644
index 00000000000..ac2753d0408
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstPrinter.h
@@ -0,0 +1,72 @@
+//=====-- AMDILInstPrinter.h - Convert AMDIL MCInst to assembly syntax --=====//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+
+#ifndef AMDILMINSTPRINTER_H_
+#define AMDILMINSTPRINTER_H_
+#include "AMDILLLVMVersion.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCAsmInfo;
+ class MCInst;
+ class raw_ostream;
+ // FIXME: We will need to implement this class when we transition to use
+ // MCStreamer.
+ class AMDILInstPrinter : public MCInstPrinter {
+ public:
+ virtual ~AMDILInstPrinter();
+ AMDILInstPrinter(const MCAsmInfo &MAI);
+ virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef annot);
+ };
+
+} // namespace llvm
+
+#endif // AMDILMINSTPRINTER_H_
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
new file mode 100644
index 00000000000..98a97e9d2cb
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
@@ -0,0 +1,763 @@
+//===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILInstrInfo.h"
+#include "AMDILUtilityFunctions.h"
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_MC_DESC
+#include "AMDILGenInstrInfo.inc"
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
+ : AMDILGenInstrInfo(AMDIL::ADJCALLSTACKDOWN, AMDIL::ADJCALLSTACKUP),
+ RI(tm, *this),
+ TM(tm) {
+}
+
+const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// Return true if the instruction is a register to register move and leave the
+/// source and dest operands in the passed parameters.
+bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+ unsigned int &DstReg, unsigned int &SrcSubIdx,
+ unsigned int &DstSubIdx) const {
+ // FIXME: we should look for:
+ // add with 0
+ //assert(0 && "is Move Instruction has not been implemented yet!");
+ //return true;
+ if (!isMove(MI.getOpcode())) {
+ return false;
+ }
+ if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
+ return false;
+ }
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ DstSubIdx = 0;
+ SrcSubIdx = 0;
+ return true;
+}
+
+bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+// TODO: Implement this function
+ return false;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+#if 0
+void
+AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
+// TODO: Implement this function
+}
+
+MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const {
+// TODO: Implement this function
+ return NULL;
+}
+#endif
+MachineInstr *
+AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+// TODO: Implement this function
+ return NULL;
+}
+#if 0
+MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI = false) const {
+// TODO: Implement this function
+ return NULL;
+}
+bool
+AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const
+{
+// TODO: Implement this function
+}
+bool
+AMDILInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const
+{
+// TODO: Implement this function
+}
+#endif
+bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const {
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ bool retVal = true;
+ return retVal;
+ MachineBasicBlock::iterator iter = MBB.begin();
+ if (!getNextBranchInstr(iter, MBB)) {
+ retVal = false;
+ } else {
+ MachineInstr *firstBranch = iter;
+ if (!getNextBranchInstr(++iter, MBB)) {
+ if (firstBranch->getOpcode() == AMDIL::BRANCH) {
+ TBB = firstBranch->getOperand(0).getMBB();
+ firstBranch->eraseFromParent();
+ retVal = false;
+ } else {
+ TBB = firstBranch->getOperand(0).getMBB();
+ FBB = *(++MBB.succ_begin());
+ if (FBB == TBB) {
+ FBB = *(MBB.succ_begin());
+ }
+ Cond.push_back(firstBranch->getOperand(1));
+ retVal = false;
+ }
+ } else {
+ MachineInstr *secondBranch = iter;
+ if (!getNextBranchInstr(++iter, MBB)) {
+ if (secondBranch->getOpcode() == AMDIL::BRANCH) {
+ TBB = firstBranch->getOperand(0).getMBB();
+ Cond.push_back(firstBranch->getOperand(1));
+ FBB = secondBranch->getOperand(0).getMBB();
+ secondBranch->eraseFromParent();
+ retVal = false;
+ } else {
+ assert(0 && "Should not have two consecutive conditional branches");
+ }
+ } else {
+ MBB.getParent()->viewCFG();
+ assert(0 && "Should not have three branch instructions in"
+ " a single basic block");
+ retVal = false;
+ }
+ }
+ }
+ return retVal;
+}
+
+unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDIL::GPRI8RegClassID: return AMDIL::BRANCH_COND_i8;
+ case AMDIL::GPRI16RegClassID: return AMDIL::BRANCH_COND_i16;
+ case AMDIL::GPRI32RegClassID: return AMDIL::BRANCH_COND_i32;
+ case AMDIL::GPRI64RegClassID: return AMDIL::BRANCH_COND_i64;
+ case AMDIL::GPRF32RegClassID: return AMDIL::BRANCH_COND_f32;
+ case AMDIL::GPRF64RegClassID: return AMDIL::BRANCH_COND_f64;
+ };
+}
+
+unsigned int
+AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ for (unsigned int x = 0; x < Cond.size(); ++x) {
+ Cond[x].getParent()->dump();
+ }
+ DebugLoc DL;
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+ .addMBB(TBB).addReg(Cond[0].getReg());
+ }
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
+ .addMBB(TBB).addReg(Cond[0].getReg());
+ BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(FBB);
+ }
+ assert(0 && "Inserting two branches not supported");
+ return 0;
+}
+
+unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCH:
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDIL::ENDLOOP
+ || tmp->getOpcode() == AMDIL::ENDIF
+ || tmp->getOpcode() == AMDIL::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+bool
+AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
+ // If we are adding to the end of a basic block we can safely assume that the
+ // move is caused by a PHI node since all move instructions that are non-PHI
+ // have already been inserted into the basic blocks Therefor we call the skip
+ // flow control instruction to move the iterator before the flow control
+ // instructions and put the move instruction there.
+ bool phi = (DestReg < 1025) || (SrcReg < 1025);
+ int movInst = phi ? getMoveInstFromID(DestRC->getID())
+ : getPHIMoveInstFromID(DestRC->getID());
+
+ MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
+ : I;
+ if (DestRC != SrcRC) {
+ //int convInst;
+ size_t dSize = DestRC->getSize();
+ size_t sSize = SrcRC->getSize();
+ if (dSize > sSize) {
+ // Elements are going to get duplicated.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ } else if (dSize == sSize) {
+ // Direct copy, conversions are not handled.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ } else if (dSize < sSize) {
+ // Elements are going to get dropped.
+ BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ }
+ } else {
+ BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
+ }
+ return true;
+}
+#if LLVM_VERSION >= 2500
+void
+AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ BuildMI(MBB, MI, DL, get(AMDIL::MOVE_v4i32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+#if 0
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ abort();
+#endif
+}
+#endif
+void
+AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ unsigned int Opc = 0;
+ // MachineInstr *curMI = MI;
+ MachineFunction &MF = *(MBB.getParent());
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ DebugLoc DL;
+ switch (RC->getID()) {
+ default:
+ Opc = AMDIL::PRIVATESTORE_v4i32;
+ break;
+ case AMDIL::GPRF32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_f32;
+ break;
+ case AMDIL::GPRF64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_f64;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i16;
+ break;
+ case AMDIL::GPRI32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i32;
+ break;
+ case AMDIL::GPRI8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i8;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_i64;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2f32;
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2f64;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i16;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i32;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i8;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v2i64;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4f32;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i16;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i32;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ Opc = AMDIL::PRIVATESTORE_v4i8;
+ break;
+ }
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineMemOperand *MMO =
+ new MachineMemOperand(
+#if LLVM_VERSION < 2500
+ PseudoSourceValue::getFixedStack(FrameIndex),
+#else
+ MachinePointerInfo::getFixedStack(FrameIndex),
+#endif
+ MachineMemOperand::MOLoad,
+#if LLVM_VERSION < 2500
+ MFI.getObjectOffset(FrameIndex),
+#endif
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ if (MI != MBB.end()) {
+ DL = MI->getDebugLoc();
+ }
+ MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addMemOperand(MMO)
+ .addImm(0);
+ AMDILAS::InstrResEnc curRes;
+ curRes.bits.ResourceID
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ setAsmPrinterFlags(nMI, curRes);
+}
+
+void
+AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ unsigned int Opc = 0;
+ MachineFunction &MF = *(MBB.getParent());
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ DebugLoc DL;
+ switch (RC->getID()) {
+ default:
+ Opc = AMDIL::PRIVATELOAD_v4i32;
+ break;
+ case AMDIL::GPRF32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_f32;
+ break;
+ case AMDIL::GPRF64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_f64;
+ break;
+ case AMDIL::GPRI16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i16;
+ break;
+ case AMDIL::GPRI32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i32;
+ break;
+ case AMDIL::GPRI8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i8;
+ break;
+ case AMDIL::GPRI64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_i64;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2f32;
+ break;
+ case AMDIL::GPRV2F64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2f64;
+ break;
+ case AMDIL::GPRV2I16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i16;
+ break;
+ case AMDIL::GPRV2I32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i32;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i8;
+ break;
+ case AMDIL::GPRV2I64RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v2i64;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4f32;
+ break;
+ case AMDIL::GPRV4I16RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i16;
+ break;
+ case AMDIL::GPRV4I32RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i32;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ Opc = AMDIL::PRIVATELOAD_v4i8;
+ break;
+ }
+
+ MachineMemOperand *MMO =
+ new MachineMemOperand(
+#if LLVM_VERSION < 2500
+ PseudoSourceValue::getFixedStack(FrameIndex),
+#else
+ MachinePointerInfo::getFixedStack(FrameIndex),
+#endif
+ MachineMemOperand::MOLoad,
+#if LLVM_VERSION < 2500
+ MFI.getObjectOffset(FrameIndex),
+#endif
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ if (MI != MBB.end()) {
+ DL = MI->getDebugLoc();
+ }
+ MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(DestReg, RegState::Define)
+ .addFrameIndex(FrameIndex)
+ .addMemOperand(MMO)
+ .addImm(0);
+ AMDILAS::InstrResEnc curRes;
+ curRes.bits.ResourceID
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ setAsmPrinterFlags(nMI, curRes);
+
+}
+MachineInstr *
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // TODO: Implement this function
+ return 0;
+}
+bool
+AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const
+{
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+bool
+AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ return false;
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
+ return false;
+ }
+ const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1);
+ const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2);
+ if (!mload1 || !mload2) {
+ return false;
+ }
+ if (mload1->memoperands_empty() ||
+ mload2->memoperands_empty()) {
+ return false;
+ }
+ MachineMemOperand *memOp1 = (*mload1->memoperands_begin());
+ MachineMemOperand *memOp2 = (*mload2->memoperands_begin());
+ const Value *mv1 = memOp1->getValue();
+ const Value *mv2 = memOp2->getValue();
+ if (!memOp1->isLoad() || !memOp2->isLoad()) {
+ return false;
+ }
+ if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) {
+ if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) {
+ const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1);
+ const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2);
+ if (!gep1 || !gep2) {
+ return false;
+ }
+ if (gep1->getNumOperands() != gep2->getNumOperands()) {
+ return false;
+ }
+ for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) {
+ const Value *op1 = gep1->getOperand(i);
+ const Value *op2 = gep2->getOperand(i);
+ if (op1 != op2) {
+ // If any value except the last one is different, return false.
+ return false;
+ }
+ }
+ unsigned size = gep1->getNumOperands()-1;
+ if (!isa<ConstantInt>(gep1->getOperand(size))
+ || !isa<ConstantInt>(gep2->getOperand(size))) {
+ return false;
+ }
+ Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue();
+ Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue();
+ return true;
+ } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) {
+ return false;
+ } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) {
+ return false;
+ }
+ }
+ return false;
+}
+
+bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // TODO: Implement this function
+ return true;
+}
+void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return false;
+}
+#if 0
+bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+}
+#endif
+bool
+AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+ const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const {
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // TODO: Implement this function
+ return true;
+}
+
+unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+#if 0
+unsigned
+AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+ // TODO: Implement this function
+ return 0;
+}
+#endif
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.h b/src/gallium/drivers/radeon/AMDILInstrInfo.h
new file mode 100644
index 00000000000..879561384a7
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.h
@@ -0,0 +1,217 @@
+//===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILINSTRUCTIONINFO_H_
+#define AMDILINSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include "AMDILRegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AMDILGenInstrInfo.inc"
+
+namespace llvm {
+ // AMDIL - This namespace holds all of the target specific flags that
+ // instruction info tracks.
+ //
+ //class AMDILTargetMachine;
+class AMDILInstrInfo : public AMDILGenInstrInfo {
+private:
+ const AMDILRegisterInfo RI;
+ AMDILTargetMachine &TM;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+ unsigned int getBranchInstr(const MachineOperand &op) const;
+public:
+ explicit AMDILInstrInfo(AMDILTargetMachine &tm);
+
+ // getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ // such, whenever a client has an instance of instruction info, it should
+ // always be able to get register info as well (through this method).
+ const AMDILRegisterInfo &getRegisterInfo() const;
+
+ // Return true if the instruction is a register to register move and leave the
+ // source and dest operands in the passed parameters.
+ bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
+ unsigned int &DstReg, unsigned int &SrcSubIdx,
+ unsigned int &DstSubIdx) const;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+
+#if 0
+ void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
+ MachineInstr *duplicate(MachineInstr *Orig,
+ MachineFunction &MF) const;
+#endif
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+#if 0
+ MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI = false) const;
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const;
+ bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const;
+
+#endif
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ unsigned
+ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+public:
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+ bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+#if 0
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+#endif
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+#if 0
+ unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const;
+#endif
+ };
+
+}
+
+#endif // AMDILINSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td
new file mode 100644
index 00000000000..bb3524bfda9
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.td
@@ -0,0 +1,156 @@
+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+// AMDIL Instruction Predicate Definitions
+// Predicate that is set to true if the hardware supports double precision
+// divide
+def HasHWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDILDeviceInfo::HD4XXX && "
+ "Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDILDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDILDeviceInfo::LongOps)">;
+def HasSW64Bit : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDILDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister : Predicate<"Subtarget.device()"
+ "->isSupported(AMDILDeviceInfo::TmrReg)">;
+// Predicate that is true if we are at least evergreen series
+def HasDeviceIDInst : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDILDeviceInfo::HD5XXX">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDILDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS : Predicate<"!Subtarget.device()"
+ "->isSupported(AMDILDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul : Predicate<"Subtarget.calVersion()"
+ ">= CAL_VERSION_SC_139"
+ "&& Subtarget.device()"
+ "->getGeneration() >="
+ "AMDILDeviceInfo::HD5XXX">;
+def HasSW64Mul : Predicate<"Subtarget.calVersion()"
+ "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">;
+
+// Predicate that is set to true if 64bit pointer are used.
+def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+include "AMDILOperands.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+include "AMDILProfiles.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+include "AMDILNodes.td"
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+include "AMDILPatterns.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+include "AMDILFormats.td"
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+include "AMDILMultiClass.td"
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+include "AMDILInstructions.td"
+
+//===--------------------------------------------------------------------===//
+// Instruction Pattern support - This Must be the last include in the file
+// as it requires items defined in other files
+//===--------------------------------------------------------------------===//
+include "AMDILInstrPatterns.td"
+
diff --git a/src/gallium/drivers/radeon/AMDILInstrPatterns.td b/src/gallium/drivers/radeon/AMDILInstrPatterns.td
new file mode 100644
index 00000000000..7c2a0ef0458
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstrPatterns.td
@@ -0,0 +1,107 @@
+//===- AMDILInstrPatterns.td - AMDIL Target ------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// This file holds all the custom patterns that are used by the amdil backend
+//
+//===--------------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Custom patterns for conversion operations
+//===--------------------------------------------------------------------===////
+// Pattern to remap integer or to IL_or
+def : Pat<(i32 (or GPRI32:$src0, GPRI32:$src1)),
+ (i32 (BINARY_OR_i32 GPRI32:$src0, GPRI32:$src1))>;
+// float ==> long patterns
+// unsigned: f32 -> i64
+def FTOUL : Pat<(i64 (fp_to_uint GPRF32:$src)),
+ (LCREATE (FTOU GPRF32:$src), (LOADCONST_i32 0))>;
+// signed: f32 -> i64
+def FTOL : Pat<(i64 (fp_to_sint GPRF32:$src)),
+ (LCREATE (FTOI GPRF32:$src), (LOADCONST_i32 0))>;
+// unsigned: i64 -> f32
+def ULTOF : Pat<(f32 (uint_to_fp GPRI64:$src)),
+ (UTOF (LLO GPRI64:$src))>;
+// signed: i64 -> f32
+def LTOF : Pat<(f32 (sint_to_fp GPRI64:$src)),
+ (ITOF (LLO GPRI64:$src))>;
+
+// integer subtraction
+// a - b ==> a + (-b)
+def SUB_i8 : Pat<(sub GPRI8:$src0, GPRI8:$src1),
+ (ADD_i8 GPRI8:$src0, (NEGATE_i8 GPRI8:$src1))>;
+def SUB_v2i8 : Pat<(sub GPRV2I8:$src0, GPRV2I8:$src1),
+ (ADD_v2i8 GPRV2I8:$src0, (NEGATE_v2i8 GPRV2I8:$src1))>;
+def SUB_v4i8 : Pat<(sub GPRV4I8:$src0, GPRV4I8:$src1),
+ (ADD_v4i8 GPRV4I8:$src0, (NEGATE_v4i8 GPRV4I8:$src1))>;
+def SUB_i16 : Pat<(sub GPRI16:$src0, GPRI16:$src1),
+ (ADD_i16 GPRI16:$src0, (NEGATE_i16 GPRI16:$src1))>;
+def SUB_v2i16 : Pat<(sub GPRV2I16:$src0, GPRV2I16:$src1),
+ (ADD_v2i16 GPRV2I16:$src0, (NEGATE_v2i16 GPRV2I16:$src1))>;
+def SUB_v4i16 : Pat<(sub GPRV4I16:$src0, GPRV4I16:$src1),
+ (ADD_v4i16 GPRV4I16:$src0, (NEGATE_v4i16 GPRV4I16:$src1))>;
+def SUB_i32 : Pat<(sub GPRI32:$src0, GPRI32:$src1),
+ (ADD_i32 GPRI32:$src0, (NEGATE_i32 GPRI32:$src1))>;
+def SUB_v2i32 : Pat<(sub GPRV2I32:$src0, GPRV2I32:$src1),
+ (ADD_v2i32 GPRV2I32:$src0, (NEGATE_v2i32 GPRV2I32:$src1))>;
+def SUB_v4i32 : Pat<(sub GPRV4I32:$src0, GPRV4I32:$src1),
+ (ADD_v4i32 GPRV4I32:$src0, (NEGATE_v4i32 GPRV4I32:$src1))>;
+// LLVM isn't lowering this correctly, so writing a pattern that
+// matches it isntead.
+def : Pat<(build_vector (i32 imm:$src)),
+ (VCREATE_v4i32 (LOADCONST_i32 imm:$src))>;
+
+// Calls:
+def : Pat<(IL_call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(IL_call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+def : Pat<(IL_call tconstpool:$dst),
+ (CALL tconstpool:$dst)>;
+
+include "AMDILConversions.td"
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td
new file mode 100644
index 00000000000..6299696839f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -0,0 +1,2476 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+// Operations in this file are generic to all data types
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ defm LOADCONST : ILConstant<"mov $dst, $val">;
+ defm MOVE : UnaryOpMC<IL_OP_MOV, IL_mov>;
+ defm PHIMOVE : UnaryOpMC<IL_OP_MOV, IL_phimov>;
+}
+defm BINARY_NOT : UnaryOpMC<IL_OP_I_NOT, IL_not>;
+defm BINARY_OR : BinaryOpMC<IL_OP_I_OR, IL_or>;
+defm BINARY_AND : BinaryOpMC<IL_OP_AND, IL_and>;
+defm BINARY_XOR : BinaryOpMC<IL_OP_I_XOR, IL_xor>;
+defm AND : BinaryOpMCInt<IL_OP_AND, and>;
+defm CMOV : BinaryOpMC<IL_OP_CMOV, IL_cmov>;
+defm DIV_INF : BinaryOpMC<IL_OP_DIV_INF, IL_div_inf>;
+defm SMAX : BinaryOpMCInt<IL_OP_I_MAX, IL_smax>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder for 64bit
+// instructions
+defm CMOVLOG : TernaryOpMC<IL_OP_CMOV_LOGICAL, IL_cmov_logical>;
+// This opcode has a custom swizzle pattern in the Swizzle Encoder and
+// should never be selected in ISel. It should only be generated in the
+// I/O expansion code. These are different from the CMOVLOG instruction
+// in that the src0 argument uses a custom swizzle for the Y/Z/W
+// vector channel respectively instead of the default channel.
+def CMOVLOG_Y_i32 : ThreeInOneOut<IL_OP_CMOV_LOGICAL, (outs GPRI32:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_CMOV_LOGICAL.Text, " $dst, $src0, $src1, $src2"),
+ []>;
+def CMOVLOG_Z_i32 : ThreeInOneOut<IL_OP_CMOV_LOGICAL, (outs GPRI32:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_CMOV_LOGICAL.Text, " $dst, $src0, $src1, $src2"),
+ []>;
+def CMOVLOG_W_i32 : ThreeInOneOut<IL_OP_CMOV_LOGICAL, (outs GPRI32:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1 ,GPRI32:$src2),
+ !strconcat(IL_OP_CMOV_LOGICAL.Text, " $dst, $src0, $src1, $src2"),
+ []>;
+defm SELECTBIN : TernaryOpMCScalar<IL_OP_CMOV_LOGICAL, select>;
+//===---------------------------------------------------------------------===//
+// Signed 8bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def INTTOANY_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRI8:$dst, (IL_inttoany GPRI32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// Signed 16bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+def INTTOANY_i16: OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text," $dst, $src0"),
+ [(set GPRI16:$dst, (IL_inttoany GPRI32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// Signed 32bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+defm NEGATE : UnaryOpMCi32<IL_OP_I_NEGATE, IL_inegate>;
+defm SMUL : BinaryOpMCi32<IL_OP_I_MUL, mul>;
+defm SMULHI : BinaryOpMCi32<IL_OP_I_MUL_HIGH, mulhs>;
+defm SHL : BinaryOpMCi32Const<IL_OP_I_SHL, shl>;
+defm SHR : BinaryOpMCi32Const<IL_OP_I_SHR, sra>;
+defm SHLVEC : BinaryOpMCi32<IL_OP_I_SHL, shl>;
+defm SHRVEC : BinaryOpMCi32<IL_OP_I_SHR, sra>;
+defm ADD : BinaryOpMCi32<IL_OP_I_ADD, add>;
+defm CUSTOM_XOR : BinaryOpMCInt<IL_OP_I_XOR, xor>;
+// get rid of the addri via the tablegen instead of custom lowered instruction
+defm CUSTOM_ADD : BinaryOpMCi32<IL_OP_I_ADD, IL_add>;
+defm EADD : BinaryOpMCi32<IL_OP_I_ADD, adde>;
+def INTTOANY_i32: OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRI32:$dst, (IL_inttoany GPRI32:$src0))]>;
+// Integer offsets for addressing
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def ADDir : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$offset),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $ptr, $offset"),
+ [(set GPRI32:$dst,
+ (IL_addaddrri ADDR:$ptr,
+ (i32 GPRI32:$offset)))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def ADDri : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+ (ins GPRI32:$offset, MEMI32:$ptr),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $offset, $ptr"),
+ [(set GPRI32:$dst,
+ (IL_addaddrir
+ (i32 GPRI32:$offset), ADDR:$ptr))]>;
+
+defm IFFB_HI : UnaryOpMCi32<IL_OP_I_FFB_HI, IL_ffb_hi>;
+defm IFFB_LO : UnaryOpMCi32<IL_OP_I_FFB_LO, IL_ffb_lo>;
+let mayLoad = 0, mayStore = 0 in {
+defm ABS : UnaryIntrinsicInt<IL_OP_ABS, int_AMDIL_abs>;
+defm BITCOUNT : UnaryIntrinsicInt<IL_OP_IBIT_COUNT, int_AMDIL_bit_count_i32>;
+defm FFB_LO : UnaryIntrinsicInt<IL_OP_I_FFB_LO, int_AMDIL_bit_find_first_lo>;
+defm FFB_HI : UnaryIntrinsicInt<IL_OP_I_FFB_HI, int_AMDIL_bit_find_first_hi>;
+defm FFB_SGN : UnaryIntrinsicInt<IL_OP_I_FFB_SGN,
+ int_AMDIL_bit_find_first_sgn>;
+defm IMULHI : BinaryIntrinsicInt<IL_OP_I_MUL_HIGH, int_AMDIL_mulhi_i32>;
+let Predicates = [HasHWSign24Bit] in {
+defm IMUL24 : BinaryIntrinsicInt<IL_OP_I_MUL24, int_AMDIL_mul24_i32>;
+defm IMULHI24 : BinaryIntrinsicInt<IL_OP_I_MULHI24, int_AMDIL_mulhi24_i32>;
+defm IMAD24 : TernaryIntrinsicInt<IL_OP_I_MAD24, int_AMDIL_mad24_i32>;
+}
+defm CARRY : BinaryIntrinsicInt<IL_OP_I_CARRY, int_AMDIL_carry_i32>;
+defm BORROW : BinaryIntrinsicInt<IL_OP_I_BORROW, int_AMDIL_borrow_i32>;
+defm IMIN : BinaryIntrinsicInt<IL_OP_I_MIN, int_AMDIL_min_i32>;
+defm IMAX : BinaryIntrinsicInt<IL_OP_I_MAX, int_AMDIL_max_i32>;
+defm CMOV_LOG : TernaryIntrinsicInt<IL_OP_CMOV_LOGICAL,
+ int_AMDIL_cmov_logical>;
+defm IBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_IBIT_EXTRACT,
+ int_AMDIL_bit_extract_i32>;
+defm IMAD : TernaryIntrinsicInt<IL_OP_I_MAD, int_AMDIL_mad_i32>;
+defm SAD : TernaryIntrinsicInt<IL_OP_SAD, int_AMDIL_media_sad>;
+defm SADHI : TernaryIntrinsicInt<IL_OP_SAD_HI,
+ int_AMDIL_media_sad_hi>;
+}
+def SAD4_i32 : ThreeInOneOut<IL_OP_SAD4, (outs GPRI32:$dst),
+ (ins GPRV4I32:$src, GPRV4I32:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_SAD4.Text, " $dst, $src, $src1, $src2"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_media_sad4 GPRV4I32:$src, GPRV4I32:$src1,
+ GPRI32:$src2))]>;
+def FTOV4U8_i32 : OneInOneOut<IL_OP_F2U4, (outs GPRI32:$dst),
+ (ins GPRV4F32:$src),
+ !strconcat(IL_OP_F2U4.Text, " $dst, $src"),
+ [(set GPRI32:$dst,
+ (int_AMDIL_media_convert_f2v4u8 GPRV4F32:$src))]>;
+//===---------------------------------------------------------------------===//
+// Unsigned 32bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+defm UMUL : BinaryOpMCi32<IL_OP_U_MUL, IL_umul>;
+defm UMULHI : BinaryOpMCi32<IL_OP_U_MUL_HIGH, mulhu>;
+defm USHR : BinaryOpMCi32Const<IL_OP_U_SHR, srl>;
+defm USHRVEC : BinaryOpMCi32<IL_OP_U_SHR, srl>;
+defm UDIV : BinaryOpMCi32<IL_OP_U_DIV, udiv>;
+defm NATIVE_UDIV : BinaryIntrinsicInt<IL_OP_U_DIV, int_AMDIL_udiv>;
+let mayLoad=0, mayStore=0 in {
+defm UBIT_REVERSE : UnaryIntrinsicInt<IL_OP_UBIT_REVERSE,
+ int_AMDIL_bit_reverse_u32>;
+defm UMULHI_INT : BinaryIntrinsicInt<IL_OP_U_MUL_HIGH, int_AMDIL_mulhi_u32>;
+defm UMULHI24 : BinaryIntrinsicInt<IL_OP_U_MULHI24, int_AMDIL_mulhi24_u32>;
+defm UMUL24 : BinaryIntrinsicInt<IL_OP_U_MUL24, int_AMDIL_mul24_u32>;
+defm UMIN : BinaryIntrinsicInt<IL_OP_U_MIN, int_AMDIL_min_u32>;
+defm UMAX : BinaryIntrinsicInt<IL_OP_U_MAX, int_AMDIL_max_u32>;
+defm UBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_UBIT_EXTRACT,
+ int_AMDIL_bit_extract_u32>;
+defm UBIT_INSERT : QuaternaryIntrinsicInt<IL_OP_UBIT_INSERT,
+ int_AMDIL_bit_insert_u32>;
+defm BFI : TernaryIntrinsicInt<IL_OP_BFI, int_AMDIL_bfi>;
+defm BFM : BinaryIntrinsicInt<IL_OP_BFM, int_AMDIL_bfm>;
+defm UMAD : TernaryIntrinsicInt<IL_OP_U_MAD, int_AMDIL_mad_u32>;
+defm UMAD24 : TernaryIntrinsicInt<IL_OP_U_MAD24, int_AMDIL_mad24_u32>;
+defm U4LERP : TernaryIntrinsicInt<IL_OP_U4_LERP,
+ int_AMDIL_media_lerp_u4>;
+defm BITALIGN : TernaryIntrinsicInt<IL_OP_BIT_ALIGN, int_AMDIL_media_bitalign>;
+defm BYTEALIGN : TernaryIntrinsicInt<IL_OP_BYTE_ALIGN, int_AMDIL_media_bytealign>;
+}
+//===---------------------------------------------------------------------===//
+// Signed 64bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LNEGATE : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins GPRI64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI64:$dst, (IL_inegate GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LNEGATE_v2i64: OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst),
+ (ins GPRV2I64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I64:$dst, (IL_inegate GPRV2I64:$src))]>;
+let Predicates = [HasHW64Bit] in {
+def LADD : TwoInOneOut<IL_OP_I64_ADD, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI64:$src2),
+ !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (IL_add GPRI64:$src1, GPRI64:$src2))]>;
+defm IMIN64 : BinaryIntrinsicLong<IL_OP_I64_MIN, int_AMDIL_min_i32>;
+defm UMIN64 : BinaryIntrinsicLong<IL_OP_U64_MIN, int_AMDIL_min_u32>;
+defm IMAX64 : BinaryIntrinsicLong<IL_OP_I64_MAX, int_AMDIL_max_i32>;
+defm UMAX64 : BinaryIntrinsicLong<IL_OP_U64_MAX, int_AMDIL_max_u32>;
+}
+let Predicates = [HasHW64Bit] in {
+def LSHR : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI32:$src2))]>;
+def LSHL : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI32:$src2))]>;
+}
+
+
+//===---------------------------------------------------------------------===//
+// Unsigned 64bit integer math instructions start here
+//===---------------------------------------------------------------------===//
+let Predicates = [HasTmrRegister] in {
+ def Tmr : ILFormat<IL_OP_MOV, (outs GPRI64:$tmr),
+ (ins), !strconcat(IL_OP_MOV.Text, " $tmr, Tmr"),
+ [(set GPRI64:$tmr, (int_AMDIL_get_cycle_count))]>;
+}
+let Predicates = [HasDeviceIDInst] in {
+def CU_ID : ILFormat<IL_OP_CU_ID, (outs GPRI32:$id), (ins),
+ !strconcat(IL_OP_CU_ID.Text, " $id"),
+ [(set GPRI32:$id, (int_AMDIL_compute_unit_id))]>;
+def WAVE_ID : ILFormat<IL_OP_WAVE_ID, (outs GPRI32:$id), (ins),
+ !strconcat(IL_OP_WAVE_ID.Text, " $id"),
+ [(set GPRI32:$id, (int_AMDIL_wavefront_id))]>;
+}
+let Predicates = [HasHW64Bit] in {
+def LUSHR : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
+ (ins GPRI64:$src1, GPRI32:$src2),
+ !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
+ [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI32:$src2))]>;
+}
+
+
+//===---------------------------------------------------------------------===//
+// Generic Float Instructions
+//===---------------------------------------------------------------------===//
+let hasIEEEFlag = 1 in {
+defm MUL_IEEE : BinaryOpMCFloat<IL_OP_MUL_IEEE, IL_OP_D_MUL, fmul>;
+}
+defm ADD : BinaryOpMCFloat<IL_OP_ADD, IL_OP_D_ADD, fadd>;
+//===---------------------------------------------------------------------===//
+// float math instructions start here
+//===---------------------------------------------------------------------===//
+let mayLoad=0, mayStore=0 in {
+defm ABS : UnaryIntrinsicFloat<IL_OP_ABS, int_AMDIL_fabs>;
+defm FRAC : UnaryIntrinsicFloat<IL_OP_FRC, int_AMDIL_fraction>;
+defm PIREDUCE : UnaryIntrinsicFloat<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
+defm ROUND_NEAREST : UnaryIntrinsicFloat<IL_OP_ROUND_NEAR,
+ int_AMDIL_round_nearest>;
+defm ROUND_NEGINF : UnaryIntrinsicFloat<IL_OP_ROUND_NEG_INF,
+ int_AMDIL_round_neginf>;
+defm ROUND_POSINF : UnaryIntrinsicFloat<IL_OP_ROUND_POS_INF,
+ int_AMDIL_round_posinf>;
+defm ROUND_ZERO : UnaryIntrinsicFloat<IL_OP_ROUND_ZERO,
+ int_AMDIL_round_zero>;
+defm ACOS : UnaryIntrinsicFloatScalar<IL_OP_ACOS, int_AMDIL_acos>;
+defm ATAN : UnaryIntrinsicFloatScalar<IL_OP_ATAN, int_AMDIL_atan>;
+defm ASIN : UnaryIntrinsicFloatScalar<IL_OP_ASIN, int_AMDIL_asin>;
+defm TAN : UnaryIntrinsicFloatScalar<IL_OP_TAN, int_AMDIL_tan>;
+defm SIN : UnaryIntrinsicFloatScalar<IL_OP_SIN, int_AMDIL_sin>;
+defm COS : UnaryIntrinsicFloatScalar<IL_OP_COS, int_AMDIL_cos>;
+defm SQRT : UnaryIntrinsicFloatScalar<IL_OP_SQRT, int_AMDIL_sqrt>;
+defm EXP : UnaryIntrinsicFloatScalar<IL_OP_EXP, int_AMDIL_exp>;
+defm EXPVEC : UnaryIntrinsicFloat<IL_OP_EXP_VEC, int_AMDIL_exp_vec>;
+defm SQRTVEC : UnaryIntrinsicFloat<IL_OP_SQRT_VEC, int_AMDIL_sqrt_vec>;
+defm COSVEC : UnaryIntrinsicFloat<IL_OP_COS_VEC, int_AMDIL_cos_vec>;
+defm SINVEC : UnaryIntrinsicFloat<IL_OP_SIN_VEC, int_AMDIL_sin_vec>;
+defm LOGVEC : UnaryIntrinsicFloat<IL_OP_LOG_VEC, int_AMDIL_log_vec>;
+defm RSQVEC : UnaryIntrinsicFloat<IL_OP_RSQ_VEC, int_AMDIL_rsq_vec>;
+defm EXN : UnaryIntrinsicFloatScalar<IL_OP_EXN, int_AMDIL_exn>;
+defm SIGN : UnaryIntrinsicFloat<IL_OP_SGN, int_AMDIL_sign>;
+defm LENGTH : UnaryIntrinsicFloat<IL_OP_LEN, int_AMDIL_length>;
+defm POW : BinaryIntrinsicFloat<IL_OP_POW, int_AMDIL_pow>;
+}
+
+let hasIEEEFlag = 1 in {
+ let mayLoad = 0, mayStore=0 in {
+defm MIN : BinaryIntrinsicFloat<IL_OP_MIN, int_AMDIL_min>;
+defm MAX : BinaryIntrinsicFloat<IL_OP_MAX, int_AMDIL_max>;
+defm MAD : TernaryIntrinsicFloat<IL_OP_MAD, int_AMDIL_mad>;
+ }
+defm MOD : BinaryOpMCf32<IL_OP_MOD, frem>;
+}
+let hasZeroOpFlag = 1 in {
+ let mayLoad = 0, mayStore=0 in {
+defm LN : UnaryIntrinsicFloatScalar<IL_OP_LN, int_AMDIL_ln>;
+defm LOG : UnaryIntrinsicFloatScalar<IL_OP_LOG, int_AMDIL_log>;
+defm RSQ : UnaryIntrinsicFloatScalar<IL_OP_RSQ, int_AMDIL_rsq>;
+defm DIV : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
+ }
+}
+ let mayLoad = 0, mayStore=0 in {
+defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
+defm FMA : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
+defm LERP : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
+ }
+defm SUB : BinaryOpMCf32<IL_OP_SUB, fsub>;
+defm FABS : UnaryOpMCf32<IL_OP_ABS, fabs>;
+defm FMAD : TernaryOpMCf32<IL_OP_MAD, IL_mad>;
+defm NEAR : UnaryOpMCf32<IL_OP_ROUND_NEAR, fnearbyint>;
+defm RND_Z : UnaryOpMCf32<IL_OP_ROUND_ZERO, ftrunc>;
+
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def NEG_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
+ (ins GPRF32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRF32:$dst, (fneg GPRF32:$src0))]>;
+def INTTOANY_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
+ (ins GPRI32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRF32:$dst, (IL_inttoany GPRI32:$src0))]>;
+let hasIEEEFlag = 1 in {
+def DP2ADD_f32 : ThreeInOneOut<IL_OP_DP2_ADD, (outs GPRF32:$dst),
+ (ins GPRV2F32:$src0, GPRV2F32:$src1, GPRF32:$src2),
+ !strconcat(IL_OP_DP2_ADD.Text, " $dst, $src0, $src1, $src2"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp2_add GPRV2F32:$src0,
+ GPRV2F32:$src1, GPRF32:$src2))]>;
+def DP2_f32 : TwoInOneOut<IL_OP_DP2, (outs GPRF32:$dst),
+ (ins GPRV2F32:$src0, GPRV2F32:$src1),
+ !strconcat(IL_OP_DP2.Text, " $dst, $src0, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp2 GPRV2F32:$src0, GPRV2F32:$src1))]>;
+def DP3_f32 : TwoInOneOut<IL_OP_DP3, (outs GPRF32:$dst),
+ (ins GPRV4F32:$src0, GPRV4F32:$src1),
+ !strconcat(IL_OP_DP3.Text, " $dst, $src0, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp3 GPRV4F32:$src0, GPRV4F32:$src1))]>;
+def DP4_f32 : TwoInOneOut<IL_OP_DP4, (outs GPRF32:$dst),
+ (ins GPRV4F32:$src0, GPRV4F32:$src1),
+ !strconcat(IL_OP_DP4.Text, " $dst, $src0, $src1"),
+ [(set GPRF32:$dst,
+ (int_AMDIL_dp4 GPRV4F32:$src0, GPRV4F32:$src1))]>;
+}
+defm UNPACK_B0 : IntrConvertI32TOF32<IL_OP_UNPACK_0, int_AMDIL_media_unpack_byte_0>;
+defm UNPACK_B1 : IntrConvertI32TOF32<IL_OP_UNPACK_1, int_AMDIL_media_unpack_byte_1>;
+defm UNPACK_B2 : IntrConvertI32TOF32<IL_OP_UNPACK_2, int_AMDIL_media_unpack_byte_2>;
+defm UNPACK_B3 : IntrConvertI32TOF32<IL_OP_UNPACK_3, int_AMDIL_media_unpack_byte_3>;
+defm FTOI_FLR : IntrConvertF32TOI32<IL_OP_FTOI_FLR, int_AMDIL_convert_f32_i32_flr>;
+defm FTOI_RPI : IntrConvertF32TOI32<IL_OP_FTOI_RPI, int_AMDIL_convert_f32_i32_rpi>;
+defm HTOF : IntrConvertF16TOF32<IL_OP_F16_TO_F32, int_AMDIL_convert_f16_f32>;
+defm FTOH : IntrConvertF32TOF16<IL_OP_F32_TO_F16, int_AMDIL_convert_f32_f16>;
+defm FTOH_NEAR : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEAR, int_AMDIL_convert_f32_f16_near>;
+defm FTOH_NEG_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEG_INF, int_AMDIL_convert_f32_f16_neg_inf>;
+defm FTOH_PLUS_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_PLUS_INF, int_AMDIL_convert_f32_f16_plus_inf>;
+//===---------------------------------------------------------------------===//
+// float math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// float2 math instructions start here
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def NEG_v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRV2F32:$dst, (fneg GPRV2F32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// float2 math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// float4 math instructions start here
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def NEG_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRV4F32:$dst, (fneg GPRV4F32:$src0))]>;
+//===---------------------------------------------------------------------===//
+// float4 math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// double math instructions start here
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def SUB_f64 : TwoInOneOut<IL_OP_D_ADD, (outs GPRF64:$dst),
+ (ins GPRF64:$src0, GPRF64:$src1),
+ !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRF64:$dst, (fsub GPRF64:$src0, GPRF64:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def SUB_v2f64 : TwoInOneOut<IL_OP_D_ADD, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src0, GPRV2F64:$src1),
+ !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRV2F64:$dst, (fsub GPRV2F64:$src0, GPRV2F64:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def NEG_f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst),
+ (ins GPRF64:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRF64:$dst, (fneg GPRF64:$src0))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def NEG_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src0),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
+ [(set GPRV2F64:$dst, (fneg GPRV2F64:$src0))]>;
+ let mayLoad = 0, mayStore=0 in {
+defm MIN : BinaryIntrinsicDouble<IL_OP_D_MIN, int_AMDIL_min>;
+defm MAX : BinaryIntrinsicDouble<IL_OP_D_MAX, int_AMDIL_max>;
+defm DIV : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
+defm MAD : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_mad>;
+defm DFMA : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_fma>;
+defm FRAC : UnaryIntrinsicDouble<IL_OP_D_FRC, int_AMDIL_fraction>;
+defm SQRT : UnaryIntrinsicDouble<IL_OP_D_SQRT, int_AMDIL_sqrt>;
+defm RSQ : UnaryIntrinsicDoubleScalar<IL_OP_D_RSQ, int_AMDIL_rsq>;
+defm RCP : UnaryIntrinsicDoubleScalar<IL_OP_D_RCP, int_AMDIL_drcp>;
+defm DMAD : TernaryOpMCf64<IL_OP_D_MAD, IL_mad>;
+ }
+def FREXP_f64 : OneInOneOut<IL_OP_D_FREXP, (outs GPRV2I64:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_D_FREXP.Text," $dst, $src"),
+ [(set GPRV2I64:$dst,
+ (int_AMDIL_frexp_f64 GPRF64:$src))]>;
+def LDEXP_f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRF64:$dst),
+ (ins GPRF64:$src, GPRI32:$src1),
+ !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRF64:$dst,
+ (int_AMDIL_ldexp GPRF64:$src, GPRI32:$src1))]>;
+def LDEXP_v2f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src, GPRV2I32:$src1),
+ !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
+ [(set GPRV2F64:$dst,
+ (int_AMDIL_ldexp GPRV2F64:$src, GPRV2I32:$src1))]>;
+//===---------------------------------------------------------------------===//
+// double math instructions end here
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Various Macros
+//===---------------------------------------------------------------------===//
+def MACRO__sdiv_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, sdiv>;
+def MACRO__sdiv_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, sdiv>;
+def MACRO__sdiv_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, sdiv>;
+def MACRO__udiv_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, udiv>;
+def MACRO__udiv_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, udiv>;
+def MACRO__udiv_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, udiv>;
+def MACRO__smod_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, srem>;
+def MACRO__smod_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, srem>;
+def MACRO__smod_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, srem>;
+def MACRO__umod_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, urem>;
+def MACRO__umod_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, urem>;
+def MACRO__umod_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, urem>;
+let Predicates = [HasSWDDiv] in {
+ def MACRO__ddiv_f64: BinaryMacro<GPRF64, GPRF64, GPRF64, fdiv>;
+}
+let Predicates = [HasHWDDiv] in {
+ def MACRO__ddiv_f64_fma: BinaryMacro<GPRF64, GPRF64, GPRF64, fdiv>;
+}
+def MACRO__ftol_i64 : UnaryMacro<GPRI64, GPRF32, fp_to_sint>;
+def MACRO__ftoul_i64 : UnaryMacro<GPRI64, GPRF32, fp_to_uint>;
+def MACRO__ultof_f32 : UnaryMacro<GPRF32, GPRI64, uint_to_fp>;
+def MACRO__ltof_f32 : UnaryMacro<GPRF32, GPRI64, sint_to_fp>;
+let Predicates = [HasSW64Mul] in {
+def MACRO__mul_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, mul>;
+def MACRO__mul_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, mul>;
+}
+let Predicates = [HasSW64DivMod] in {
+def MACRO__sdiv_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, sdiv>;
+def MACRO__udiv_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, udiv>;
+def MACRO__smod_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, srem>;
+def MACRO__umod_i64 : BinaryMacro<GPRI64, GPRI64, GPRI64, urem>;
+}
+let Predicates = [HasHW64DivMod] in {
+ defm SDIV : BinaryOpMCi64<IL_OP_I64_DIV, sdiv>;
+ defm UDIV : BinaryOpMCi64<IL_OP_U64_DIV, udiv>;
+ defm SMOD : BinaryOpMCi64<IL_OP_I64_MOD, srem>;
+ defm UMOD : BinaryOpMCi64<IL_OP_U64_MOD, urem>;
+}
+let Predicates = [HasHW64Mul] in {
+ defm SMUL : BinaryOpMCi64<IL_OP_I64_MUL, mul>;
+ defm UMUL : BinaryOpMCi64<IL_OP_U64_MUL, IL_umul>;
+}
+def MACRO__shr_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, srl>;
+def MACRO__shl_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, shl>;
+def MACRO__sra_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, sra>;
+
+let Predicates = [HasSW64Bit] in {
+def MACRO__shr_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, srl>;
+def MACRO__shl_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, shl>;
+def MACRO__sra_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, sra>;
+}
+//===---------------------------------------------------------------------===//
+// Comparison Instructions
+//===---------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+ defm CMP : Compare<"Pseudo comparison instr">;
+}
+//===---------------------------------------------------------------------===//
+// 32-bit floating point operations
+//===---------------------------------------------------------------------===//
+def FEQ : TwoInOneOut<IL_OP_EQ, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FGE : TwoInOneOut<IL_OP_GE, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FLT : TwoInOneOut<IL_OP_LT, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FLT_v2f32 : TwoInOneOut<IL_OP_LT, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$lhs, GPRV2F32:$rhs),
+ !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FLT_v4f32 : TwoInOneOut<IL_OP_LT, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$lhs, GPRV4F32:$rhs),
+ !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def FNE : TwoInOneOut<IL_OP_NE, (outs GPRF32:$dst),
+ (ins GPRF32:$lhs, GPRF32:$rhs),
+ !strconcat(IL_OP_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def DEQ : TwoInOneOut<IL_OP_D_EQ, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DEQ_v2f64 : TwoInOneOut<IL_OP_D_EQ, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$lhs, GPRV2F64:$rhs),
+ !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DGE : TwoInOneOut<IL_OP_D_GE, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DLT : TwoInOneOut<IL_OP_D_LT, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def DNE : TwoInOneOut<IL_OP_D_NE, (outs GPRF64:$dst),
+ (ins GPRF64:$lhs, GPRF64:$rhs),
+ !strconcat(IL_OP_D_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def IEQ : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IEQ_v2i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IEQ_v4i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IGE : TwoInOneOut<IL_OP_I_GE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IGE_v2i32 : TwoInOneOut<IL_OP_I_GE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def IGE_v4i32 : TwoInOneOut<IL_OP_I_GE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ILT : TwoInOneOut<IL_OP_I_LT, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ILT_v2i32 : TwoInOneOut<IL_OP_I_LT, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ILT_v4i32 : TwoInOneOut<IL_OP_I_LT, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def INE : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def INE_v2i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def INE_v4i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+let Predicates = [HasHW64Bit] in {
+def LEQ : TwoInOneOut<IL_OP_I64_EQ, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def LGE : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def LLE : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_GE.Text, " $dst, $rhs, $lhs")
+ , []>;
+def LGT : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_LT.Text, " $dst, $rhs, $lhs")
+ , []>;
+def LLT : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def LNE : TwoInOneOut<IL_OP_I64_NE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_I64_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+}
+
+//===---------------------------------------------------------------------===//
+// Unsigned Integer Operations
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+//TODO: need to correctly define comparison instructions
+//===---------------------------------------------------------------------===//
+def UEQ : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UEQ_v2i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UEQ_v4i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULE : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULE_v2i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULE_v4i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGT : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGT_v2i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGT_v4i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGE : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGE_v2i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UGE_v4i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULT : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULT_v2i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULT_v4i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UNE : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
+ (ins GPRI32:$lhs, GPRI32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UNE_v2i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def UNE_v4i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+ !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
+ , []>;
+let Predicates = [HasHW64Bit] in {
+def ULLE : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_GE.Text, " $dst, $rhs, $lhs")
+ , []>;
+def ULGT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_LT.Text, " $dst, $rhs, $lhs")
+ , []>;
+def ULGE : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_GE.Text, " $dst, $lhs, $rhs")
+ , []>;
+def ULLT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
+ (ins GPRI64:$lhs, GPRI64:$rhs),
+ !strconcat(IL_OP_U64_LT.Text, " $dst, $lhs, $rhs")
+ , []>;
+}
+//===---------------------------------------------------------------------===//
+// Scalar ==> Scalar conversion functions
+//===---------------------------------------------------------------------===//
+// f32 ==> f64
+def FTOD : UnaryOp<IL_OP_F_2_D, fextend, GPRF64, GPRF32>;
+// f64 ==> f32
+def DTOF : UnaryOp<IL_OP_D_2_F, IL_d2f, GPRF32, GPRF64>;
+// f32 ==> i32 signed
+def FTOI : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRI32, GPRF32>;
+def FTOI_v2i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV2I32, GPRV2F32>;
+def FTOI_v4i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV4I32, GPRV4F32>;
+// i32 ==> f32 signed
+def ITOF : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRF32, GPRI32>;
+def ITOF_v2f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV2F32, GPRV2I32>;
+def ITOF_v4f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV4F32, GPRV4I32>;
+// f32 ==> i32 unsigned
+def FTOU : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRI32, GPRF32>;
+def FTOU_v2i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV2I32, GPRV2F32>;
+def FTOU_v4i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV4I32, GPRV4F32>;
+// i32 ==> f32 unsigned
+def UTOF : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRF32, GPRI32>;
+def UTOF_v2f32 : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRV2F32, GPRV2I32>;
+def UTOF_v4f32 : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRV4F32, GPRV4I32>;
+// Get upper 32 bits of f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DHI : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_dcomphi GPRF64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DHI_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_dcomphi2 GPRV2F64:$src))]>;
+// Get lower 32 bits of f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DLO : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRF64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_dcomplo GPRF64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DLO_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_dcomplo2 GPRV2F64:$src))]>;
+// Convert two 32 bit integers into a f64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DCREATE : TwoInOneOut<IL_OP_I_ADD, (outs GPRF64:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRF64:$dst, (IL_dcreate GPRI32:$src0, GPRI32:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def DCREATE_v2f64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2F64:$dst),
+ (ins GPRV2I32:$src0, GPRV2I32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRV2F64:$dst,
+ (IL_dcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+// Get upper 32 bits of i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LHI : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRI64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_lcomphi GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LHI_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2I64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_lcomphi2 GPRV2I64:$src))]>;
+// Get lower 32 bits of i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LLO : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins GPRI64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (IL_lcomplo GPRI64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LLO_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ (ins GPRV2I64:$src),
+ !strconcat(IL_OP_MOV.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (IL_lcomplo2 GPRV2I64:$src))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v4i16 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2),
+ !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v2i32 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2),
+ !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def HILO_BITOR_v2i64 : TwoInOneOut<IL_OP_I_OR, (outs GPRI64:$dst),
+ (ins GPRI64:$src, GPRI64:$src2),
+ !strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
+// Convert two 32 bit integers into a i64
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LCREATE : TwoInOneOut<IL_OP_I_ADD, (outs GPRI64:$dst),
+ (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRI64:$dst, (IL_lcreate GPRI32:$src0, GPRI32:$src1))]>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+def LCREATE_v2i64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2I64:$dst),
+ (ins GPRV2I32:$src0, GPRV2I32:$src1),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
+ [(set GPRV2I64:$dst,
+ (IL_lcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+//===---------------------------------------------------------------------===//
+// Scalar ==> Vector conversion functions
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VCREATE : UnaryOpMCVec<IL_OP_MOV, IL_vbuild>;
+
+//===---------------------------------------------------------------------===//
+// Vector ==> Scalar conversion functions
+//===---------------------------------------------------------------------===//
+
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VEXTRACT : VectorExtract<IL_vextract>;
+
+//===---------------------------------------------------------------------===//
+// Vector ==> Vector conversion functions
+//===---------------------------------------------------------------------===//
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VINSERT : VectorInsert<IL_vinsert>;
+// This opcode has custom swizzle pattern encoded in Swizzle Encoder
+defm VCONCAT : VectorConcat<IL_vconcat>;
+
+//===---------------------------------------------------------------------===//
+// Bit conversion functions
+//===---------------------------------------------------------------------===//
+defm IL_ASCHAR : BitConversion<IL_OP_MOV, GPRI8, IL_bitconv>;
+defm IL_ASSHORT : BitConversion<IL_OP_MOV, GPRI16, IL_bitconv>;
+defm IL_ASINT : BitConversion<IL_OP_MOV, GPRI32, IL_bitconv>;
+defm IL_ASFLOAT : BitConversion<IL_OP_MOV, GPRF32, IL_bitconv>;
+defm IL_ASDOUBLE : BitConversion<IL_OP_MOV, GPRF64, IL_bitconv>;
+defm IL_ASLONG : BitConversion<IL_OP_MOV, GPRI64, IL_bitconv>;
+defm IL_ASV2CHAR : BitConversion<IL_OP_MOV, GPRV2I8, IL_bitconv>;
+defm IL_ASV2SHORT : BitConversion<IL_OP_MOV, GPRV2I16, IL_bitconv>;
+defm IL_ASV2INT : BitConversion<IL_OP_MOV, GPRV2I32, IL_bitconv>;
+defm IL_ASV2FLOAT : BitConversion<IL_OP_MOV, GPRV2F32, IL_bitconv>;
+defm IL_ASV2DOUBLE : BitConversion<IL_OP_MOV, GPRV2F64, IL_bitconv>;
+defm IL_ASV2LONG : BitConversion<IL_OP_MOV, GPRV2I64, IL_bitconv>;
+defm IL_ASV4CHAR : BitConversion<IL_OP_MOV, GPRV4I8, IL_bitconv>;
+defm IL_ASV4SHORT : BitConversion<IL_OP_MOV, GPRV4I16, IL_bitconv>;
+defm IL_ASV4INT : BitConversion<IL_OP_MOV, GPRV4I32, IL_bitconv>;
+defm IL_ASV4FLOAT : BitConversion<IL_OP_MOV, GPRV4F32, IL_bitconv>;
+
+let Predicates = [Has32BitPtr] in {
+ let isCodeGenOnly=1 in {
+ //===----------------------------------------------------------------------===//
+ // Store Memory Operations
+ //===----------------------------------------------------------------------===//
+ defm GLOBALTRUNCSTORE : GTRUNCSTORE<"!global trunc store">;
+ defm GLOBALSTORE : STORE<"!global store" , global_store>;
+ defm LOCALTRUNCSTORE : LTRUNCSTORE<"!local trunc store">;
+ defm LOCALSTORE : STORE<"!local store" , local_store>;
+ defm PRIVATETRUNCSTORE : PTRUNCSTORE<"!private trunc store">;
+ defm PRIVATESTORE : STORE<"!private store" , private_store>;
+ defm REGIONTRUNCSTORE : RTRUNCSTORE<"!region trunc store">;
+ defm REGIONSTORE : STORE<"!region hw store" , region_store>;
+
+
+ //===---------------------------------------------------------------------===//
+ // Load Memory Operations
+ //===---------------------------------------------------------------------===//
+ defm GLOBALLOAD : LOAD<"!global load" , global_load>;
+ defm GLOBALZEXTLOAD : LOAD<"!global zext load" , global_zext_load>;
+ defm GLOBALSEXTLOAD : LOAD<"!global sext load" , global_sext_load>;
+ defm GLOBALAEXTLOAD : LOAD<"!global aext load" , global_aext_load>;
+ defm PRIVATELOAD : LOAD<"!private load" , private_load>;
+ defm PRIVATEZEXTLOAD : LOAD<"!private zext load" , private_zext_load>;
+ defm PRIVATESEXTLOAD : LOAD<"!private sext load" , private_sext_load>;
+ defm PRIVATEAEXTLOAD : LOAD<"!private aext load" , private_aext_load>;
+ defm CPOOLLOAD : LOAD<"!constant pool load" , cp_load>;
+ defm CPOOLZEXTLOAD : LOAD<"!constant pool zext load", cp_zext_load>;
+ defm CPOOLSEXTLOAD : LOAD<"!constant pool sext load", cp_sext_load>;
+ defm CPOOLAEXTLOAD : LOAD<"!constant aext pool load", cp_aext_load>;
+ defm CONSTANTLOAD : LOAD<"!constant load" , constant_load>;
+ defm CONSTANTZEXTLOAD : LOAD<"!constant zext load" , constant_zext_load>;
+ defm CONSTANTSEXTLOAD : LOAD<"!constant sext load" , constant_sext_load>;
+ defm CONSTANTAEXTLOAD : LOAD<"!constant aext load" , constant_aext_load>;
+ defm LOCALLOAD : LOAD<"!local load" , local_load>;
+ defm LOCALZEXTLOAD : LOAD<"!local zext load" , local_zext_load>;
+ defm LOCALSEXTLOAD : LOAD<"!local sext load" , local_sext_load>;
+ defm LOCALAEXTLOAD : LOAD<"!local aext load" , local_aext_load>;
+ defm REGIONLOAD : LOAD<"!region load" , region_load>;
+ defm REGIONZEXTLOAD : LOAD<"!region zext load" , region_zext_load>;
+ defm REGIONSEXTLOAD : LOAD<"!region sext load" , region_sext_load>;
+ defm REGIONAEXTLOAD : LOAD<"!region aext load" , region_aext_load>;
+ }
+
+
+ //===---------------------------------------------------------------------===//
+ // IO Expansion Load/Store Instructions
+ //===---------------------------------------------------------------------===//
+ let mayLoad = 1 in {
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHLOAD : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " $dst, x$id[$addy]"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def CBLOAD : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " $dst, cb$id[$addy]"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD_Y : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD_Z : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD_W : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOADVEC : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOADVEC_v2i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV2I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOADVEC_v4i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_i8 : TwoInOneOut<IL_OP_LDS_LOAD_BYTE, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_BYTE.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_u8 : TwoInOneOut<IL_OP_LDS_LOAD_UBYTE, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_UBYTE.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_i16 : TwoInOneOut<IL_OP_LDS_LOAD_SHORT, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_SHORT.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_u16 : TwoInOneOut<IL_OP_LDS_LOAD_USHORT, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_USHORT.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_Y : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_Z : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD_W : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD_i8 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(byte) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD_i16 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(short) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD_Y_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD_Z_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD_W_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOAD_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOAD_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOAD_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOADCACHED_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOADCACHED_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOADCACHED_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+ }
+ let mayStore = 1 in {
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRV4I32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy], $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE_X : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].x___, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE_Y : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy]._y__, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE_Z : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].__z_, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE_W : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].___w, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE_XY : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRV2I32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].xy__, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE_ZW : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRV2I32:$data, i32imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].__zw, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE_Y : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE_Z : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE_W : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTOREVEC : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRI32:$mem),
+ (ins GPRI32:$addy, GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTOREVEC_v2i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV2I32:$mem),
+ (ins GPRI32:$addy, GPRV2I32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTOREVEC_v4i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV4I32:$mem),
+ (ins GPRI32:$addy, GPRV4I32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE_i8 : TwoInOneOut<IL_OP_LDS_STORE_BYTE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE_BYTE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE_i16 : TwoInOneOut<IL_OP_LDS_STORE_SHORT, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE_SHORT.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE_Y : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE_Z : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE_W : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE_i8 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI8:$src, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(byte) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE_i16 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI16:$src, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(short) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE_Y_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE_Z_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE_W_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWSTORE_i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRI32:$mem),
+ (ins GPRI32:$addy, GPRI32:$src, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWSTORE_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV2I32:$mem),
+ (ins GPRI32:$addy, GPRV2I32:$src, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWSTORE_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV4I32:$mem),
+ (ins GPRI32:$addy, GPRV4I32:$src, i32imm:$id),
+ !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+ }
+}
+let Predicates = [Has64BitPtr] in {
+ let isCodeGenOnly=1 in {
+ //===----------------------------------------------------------------------===//
+ // Store Memory Operations
+ //===----------------------------------------------------------------------===//
+ defm GLOBALTRUNCSTORE64 : GTRUNCSTORE64<"!global trunc store">;
+ defm GLOBALSTORE64 : STORE64<"!global store" , global_store>;
+ defm LOCALTRUNCSTORE64 : LTRUNCSTORE64<"!local trunc store">;
+ defm LOCALSTORE64 : STORE64<"!local store" , local_store>;
+ defm PRIVATETRUNCSTORE64 : PTRUNCSTORE64<"!private trunc store">;
+ defm PRIVATESTORE64 : STORE64<"!private store" , private_store>;
+ defm REGIONTRUNCSTORE64 : RTRUNCSTORE64<"!region trunc store">;
+ defm REGIONSTORE64 : STORE64<"!region hw store" , region_store>;
+
+
+ //===---------------------------------------------------------------------===//
+ // Load Memory Operations
+ //===---------------------------------------------------------------------===//
+ defm GLOBALLOAD64 : LOAD64<"!global load" , global_load>;
+ defm GLOBALZEXTLOAD64 : LOAD64<"!global zext load" , global_zext_load>;
+ defm GLOBALSEXTLOAD64 : LOAD64<"!global sext load" , global_sext_load>;
+ defm GLOBALAEXTLOAD64 : LOAD64<"!global aext load" , global_aext_load>;
+ defm PRIVATELOAD64 : LOAD64<"!private load" , private_load>;
+ defm PRIVATEZEXTLOAD64 : LOAD64<"!private zext load" , private_zext_load>;
+ defm PRIVATESEXTLOAD64 : LOAD64<"!private sext load" , private_sext_load>;
+ defm PRIVATEAEXTLOAD64 : LOAD64<"!private aext load" , private_aext_load>;
+ defm CPOOLLOAD64 : LOAD64<"!constant pool load" , cp_load>;
+ defm CPOOLZEXTLOAD64 : LOAD64<"!constant pool zext load", cp_zext_load>;
+ defm CPOOLSEXTLOAD64 : LOAD64<"!constant pool sext load", cp_sext_load>;
+ defm CPOOLAEXTLOAD64 : LOAD64<"!constant aext pool load", cp_aext_load>;
+ defm CONSTANTLOAD64 : LOAD64<"!constant load" , constant_load>;
+ defm CONSTANTZEXTLOAD64 : LOAD64<"!constant zext load" , constant_zext_load>;
+ defm CONSTANTSEXTLOAD64 : LOAD64<"!constant sext load" , constant_sext_load>;
+ defm CONSTANTAEXTLOAD64 : LOAD64<"!constant aext load" , constant_aext_load>;
+ defm LOCALLOAD64 : LOAD64<"!local load" , local_load>;
+ defm LOCALZEXTLOAD64 : LOAD64<"!local zext load" , local_zext_load>;
+ defm LOCALSEXTLOAD64 : LOAD64<"!local sext load" , local_sext_load>;
+ defm LOCALAEXTLOAD64 : LOAD64<"!local aext load" , local_aext_load>;
+ defm REGIONLOAD64 : LOAD64<"!region load" , region_load>;
+ defm REGIONZEXTLOAD64 : LOAD64<"!region zext load" , region_zext_load>;
+ defm REGIONSEXTLOAD64 : LOAD64<"!region sext load" , region_sext_load>;
+ defm REGIONAEXTLOAD64 : LOAD64<"!region aext load" , region_aext_load>;
+ }
+
+
+ //===---------------------------------------------------------------------===//
+ // IO Expansion Load/Store Instructions
+ //===---------------------------------------------------------------------===//
+ let mayLoad = 1 in {
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHLOAD64 : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " $dst, x$id[$addy]"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def CBLOAD64 : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " $dst, cb$id[$addy]"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD64 : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD64_Y : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD64_Z : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSLOAD64_W : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOADVEC64 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOADVEC64_v2i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV2I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOADVEC64_v4i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64 : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_i8 : TwoInOneOut<IL_OP_LDS_LOAD_BYTE, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_BYTE.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_u8 : TwoInOneOut<IL_OP_LDS_LOAD_UBYTE, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_UBYTE.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_i16 : TwoInOneOut<IL_OP_LDS_LOAD_SHORT, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_SHORT.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_u16 : TwoInOneOut<IL_OP_LDS_LOAD_USHORT, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD_USHORT.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_Y : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_Z : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSLOAD64_W : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD64_i8 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(byte) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD64_i16 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(short) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD64_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD64_Y_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD64_Z_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENALOAD64_W_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOAD64_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOAD64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOAD64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOADCACHED64_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOADCACHED64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWLOADCACHED64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+ (ins GPRI32:$addy, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+ }
+ let mayStore = 1 in {
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64 : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRV4I32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy], $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64_X : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].x___, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64_Y : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy]._y__, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64_Z : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].__z_, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64_W : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRI32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].___w, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64_XY : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRV2I32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].xy__, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def SCRATCHSTORE64_ZW : TwoInOneOut<IL_OP_MOV, (outs GPRI32:$addy),
+ (ins GPRV2I32:$data, i64imm:$id),
+ !strconcat(IL_OP_MOV.Text, " x$id[$addy].__zw, $data"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE64 : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE64_Y : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE64_Z : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def GDSSTORE64_W : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTOREVEC64 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRI32:$mem),
+ (ins GPRI32:$addy, GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTOREVEC64_v2i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV2I32:$mem),
+ (ins GPRI32:$addy, GPRV2I32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTOREVEC64_v4i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV4I32:$mem),
+ (ins GPRI32:$addy, GPRV4I32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE64 : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE64_i8 : TwoInOneOut<IL_OP_LDS_STORE_BYTE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE_BYTE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE64_i16 : TwoInOneOut<IL_OP_LDS_STORE_SHORT, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE_SHORT.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE64_Y : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE64_Z : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def LDSSTORE64_W : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE64_i8 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI8:$src, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(byte) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE64_i16 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI16:$src, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(short) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE64_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE64_Y_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE64_Z_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVARENASTORE64_W_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRI32:$addy),
+ (ins GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+ "_id($id)_size(dword) $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWSTORE64_i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRI32:$mem),
+ (ins GPRI32:$addy, GPRI32:$src, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWSTORE64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV2I32:$mem),
+ (ins GPRI32:$addy, GPRV2I32:$src, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+ // This opcode has custom swizzle patterns for some of the arguments.
+ def UAVRAWSTORE64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV4I32:$mem),
+ (ins GPRI32:$addy, GPRV4I32:$src, i64imm:$id),
+ !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+ }
+}
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1 in {
+ def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
+ "; Pseudo unconditional branch instruction",
+ [(br bb:$target)]>;
+ defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+//===---------------------------------------------------------------------===//
+// return instructions
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def RETURN : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
+ IL_OP_RET.Text, [(IL_retflag)]>;
+}
+//===---------------------------------------------------------------------===//
+// Lower and raise the stack x amount
+//===---------------------------------------------------------------------===//
+def ADJCALLSTACKDOWN : ILFormat<IL_PSEUDO_INST, (outs), (ins i32imm:$amt),
+ "; begin of call sequence $amt",
+ [(IL_callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : ILFormat<IL_PSEUDO_INST, (outs), (ins i32imm:$amt1,
+ i32imm:$amt2),
+ "; end of call sequence $amt1 $amt2",
+ [(IL_callseq_end timm:$amt1, timm:$amt2)]>;
+
+//===---------------------------------------------------------------------===//
+// Handle a function call
+//===---------------------------------------------------------------------===//
+let isCall = 1,
+ Defs = [
+ R110, R111,
+ R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124,
+ R125, R126, R127,
+ R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140,
+ R141, R142, R143,
+ R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156,
+ R157, R158, R159,
+ R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172,
+ R173, R174, R175,
+ R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188,
+ R189, R190, R191,
+ R192, R193, R194, R195, R196, R197, R198, R199, R200, R201, R202, R203, R204,
+ R205, R206, R207,
+ R208, R209, R210, R211, R212, R213, R214, R215, R216, R217, R218, R219, R220,
+ R221, R222, R223,
+ R224, R225, R226, R227, R228, R229, R230, R231, R232, R233, R234, R235, R236,
+ R237, R238, R239,
+ R240, R241, R242, R243, R244, R245, R246, R247, R248, R249, R250, R251, R252,
+ R253, R254, R255
+ ]
+ ,
+ Uses = [
+ R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15,
+ R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63,
+ R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79,
+ R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95,
+ R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109
+ ]
+ in {
+ def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
+ (ins calltarget:$dst, variable_ops),
+ !strconcat(IL_OP_CALL.Text, " $dst"), []>;
+ }
+
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+ def SWITCH : ILFormat<IL_OP_SWITCH, (outs), (ins GPRI32:$src),
+ !strconcat(IL_OP_SWITCH.Text, " $src"), []>;
+ def CASE : ILFormat<IL_OP_CASE, (outs), (ins GPRI32:$src),
+ !strconcat(IL_OP_CASE.Text, " $src"), []>;
+ def BREAK : ILFormat<IL_OP_BREAK, (outs), (ins),
+ IL_OP_BREAK.Text, []>;
+ def CONTINUE : ILFormat<IL_OP_CONTINUE, (outs), (ins),
+ IL_OP_CONTINUE.Text, []>;
+ def DEFAULT : ILFormat<IL_OP_DEFAULT, (outs), (ins),
+ IL_OP_DEFAULT.Text, []>;
+ def ELSE : ILFormat<IL_OP_ELSE, (outs), (ins),
+ IL_OP_ELSE.Text, []>;
+ def ENDSWITCH : ILFormat<IL_OP_ENDSWITCH, (outs), (ins),
+ IL_OP_ENDSWITCH.Text, []>;
+ def ENDMAIN : ILFormat<IL_OP_ENDMAIN, (outs), (ins),
+ IL_OP_ENDMAIN.Text, []>;
+ def END : ILFormat<IL_OP_END, (outs), (ins),
+ IL_OP_END.Text, []>;
+ def ENDFUNC : ILFormat<IL_OP_ENDFUNC, (outs), (ins),
+ IL_OP_ENDFUNC.Text, []>;
+ def ENDIF : ILFormat<IL_OP_ENDIF, (outs), (ins),
+ IL_OP_ENDIF.Text, []>;
+ def WHILELOOP : ILFormat<IL_OP_WHILE, (outs), (ins),
+ IL_OP_WHILE.Text, []>;
+ def ENDLOOP : ILFormat<IL_OP_ENDLOOP, (outs), (ins),
+ IL_OP_ENDLOOP.Text, []>;
+ def FUNC : ILFormat<IL_OP_FUNC, (outs), (ins),
+ IL_OP_FUNC.Text, []>;
+ def RETDYN : ILFormat<IL_OP_RET_DYN, (outs), (ins),
+ IL_OP_RET_DYN.Text, []>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALNZ : BranchInstr<IL_OP_IF_LOGICALNZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALZ : BranchInstr<IL_OP_IF_LOGICALZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALNZ : BranchInstr<IL_OP_BREAK_LOGICALNZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALZ : BranchInstr<IL_OP_BREAK_LOGICALZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALNZ : BranchInstr<IL_OP_CONTINUE_LOGICALNZ>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALZ : BranchInstr<IL_OP_CONTINUE_LOGICALZ>;
+ defm IFC : BranchInstr2<IL_OP_IFC>;
+ defm BREAKC : BranchInstr2<IL_OP_BREAKC>;
+ defm CONTINUEC : BranchInstr2<IL_OP_CONTINUEC>;
+}
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
+ IL_OP_NOP.Text, [(trap)]>;
+}
+
+//===---------------------------------------------------------------------===//
+//----------------- Work Item Functions - OpenCL 6.11.1 ---------------------//
+//===---------------------------------------------------------------------===//
+let isCall = 1, isAsCheapAsAMove = 1 in {
+ def GET_WORK_DIM : ILFormat<IL_OP_MOV, (outs GPRI32:$dst), (ins),
+ !strconcat(IL_OP_MOV.Text, " $dst, cb0[0].w"),
+ [(set GPRI32:$dst, (int_AMDIL_get_work_dim))]>;
+
+ def GET_GLOBAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1021.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_id))]>;
+
+ def GET_LOCAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1022.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_local_id))]>;
+
+ def GET_GROUP_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1023.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_group_id))]>;
+
+ def GET_GLOBAL_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[0].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_size))]>;
+
+ def GET_LOCAL_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[1].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_local_size))]>;
+
+ def GET_NUM_GROUPS : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[2].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_num_groups))]>;
+
+ def GET_GLOBAL_OFFSET : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_global_offset))]>;
+
+ let Predicates = [Has64BitPtr] in {
+ def GET_PRINTF_OFFSET_i64: ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[8].zw"),
+ [(set GPRI32:$dst, (int_AMDIL_get_printf_offset))]>;
+ def GET_PRINTF_SIZE_i64 : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].x0"),
+ [(set GPRI32:$dst, (int_AMDIL_get_printf_size))]>;
+ }
+ let Predicates = [Has32BitPtr] in {
+ def GET_PRINTF_OFFSET_i32 : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[8].y0"),
+ [(set GPRI32:$dst, (int_AMDIL_get_printf_offset))]>;
+ def GET_PRINTF_SIZE_i32 : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[8].z0"),
+ [(set GPRI32:$dst, (int_AMDIL_get_printf_size))]>;
+ }
+}
+//===---------------------------------------------------------------------===//
+//------------- Synchronization Functions - OpenCL 6.11.9 -------------------//
+//===---------------------------------------------------------------------===//
+let isCall=1 in {
+
+ def FENCE : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_lds_memory_gds",
+ [(int_AMDIL_fence GPRI32:$flag)]>;
+
+ def FENCE_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_lds",
+ [(int_AMDIL_fence_local GPRI32:$flag)]>;
+
+ def FENCE_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_memory",
+ [(int_AMDIL_fence_global GPRI32:$flag)]>;
+
+ def FENCE_REGION : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
+ "fence_gds",
+ [(int_AMDIL_fence_region GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_lds_gds_memory_mem_read_only",
+ [(int_AMDIL_fence_read_only GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_lds_mem_read_only",
+ [(int_AMDIL_fence_read_only_local GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_mem_read_only",
+ [(int_AMDIL_fence_read_only_global GPRI32:$flag)]>;
+
+ def FENCE_READ_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_gds_mem_read_only",
+ [(int_AMDIL_fence_read_only_region GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_lds_gds_memory_mem_write_only",
+ [(int_AMDIL_fence_write_only GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_lds_mem_write_only",
+ [(int_AMDIL_fence_write_only_local GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_mem_write_only",
+ [(int_AMDIL_fence_write_only_global GPRI32:$flag)]>;
+
+ def FENCE_WRITE_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs),
+ (ins GPRI32:$flag),
+ "fence_gds_mem_write_only",
+ [(int_AMDIL_fence_write_only_region GPRI32:$flag)]>;
+}
+let isReturn = 1 in {
+ def EARLY_EXIT : UnaryOpNoRet<IL_OP_RET_LOGICALNZ, (outs),
+ (ins GPRI32:$flag),
+ !strconcat(IL_OP_RET_LOGICALNZ.Text, " $flag"),
+ [(int_AMDIL_early_exit GPRI32:$flag)]>;
+}
+def MEDIA_UNPACK_0 : OneInOneOut<IL_OP_UNPACK_0, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_0.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_0 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_1 : OneInOneOut<IL_OP_UNPACK_1, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_1.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_1 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_2 : OneInOneOut<IL_OP_UNPACK_2, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_2.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_2 GPRV4I32:$src)))]>;
+def MEDIA_UNPACK_3 : OneInOneOut<IL_OP_UNPACK_3, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(IL_OP_UNPACK_3.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst,
+ (v4f32 (int_AMDIL_media_unpack_byte_3 GPRV4I32:$src)))]>;
+let Predicates = [Has32BitPtr] in {
+// All of the image functions
+def IMAGE1D_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1DA_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_array_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2D_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2DA_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_array_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE3D_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image3d_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1D_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1DA_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_array_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2D_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2DA_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_array_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE3D_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image3d_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1D_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_info0 ADDR:$ptr))]>;
+def IMAGE1D_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_info1 ADDR:$ptr))]>;
+def IMAGE1DA_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info0 ADDR:$ptr))]>;
+def IMAGE1DA_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info1 ADDR:$ptr))]>;
+def IMAGE2D_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_info0 ADDR:$ptr))]>;
+def IMAGE2D_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_info1 ADDR:$ptr))]>;
+def IMAGE2DA_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info0 ADDR:$ptr))]>;
+def IMAGE2DA_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info1 ADDR:$ptr))]>;
+def IMAGE3D_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image3d_info0 ADDR:$ptr))]>;
+def IMAGE3D_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image3d_info1 ADDR:$ptr))]>;
+def IMAGE1D_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI32:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image1d_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE1DA_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI32:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image1d_array_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE2D_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI32:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image2d_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE2DA_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI32:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image2d_array_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE3D_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI32:$ptr, GPRV4I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image3d_write ADDR:$ptr, GPRV4I32:$addy, GPRV4I32:$data)]>;
+let hasSideEffects = 1, isNotDuplicable = 1 in {
+ // All of the noret atomic functions
+ def ATOM_G_ADD_NORET : BinAtomNoRet<IL_OP_UAV_ADD,
+ "_id($id)", atom_g_add_noret>;
+ def ATOM_G_AND_NORET : BinAtomNoRet<IL_OP_UAV_AND,
+ "_id($id)", atom_g_and_noret>;
+ def ATOM_G_MAX_NORET : BinAtomNoRet<IL_OP_UAV_MAX,
+ "_id($id)", atom_g_max_noret>;
+ def ATOM_G_MIN_NORET : BinAtomNoRet<IL_OP_UAV_MIN,
+ "_id($id)", atom_g_min_noret>;
+ def ATOM_G_UMAX_NORET : BinAtomNoRet<IL_OP_UAV_UMAX,
+ "_id($id)", atom_g_umax_noret>;
+ def ATOM_G_UMIN_NORET : BinAtomNoRet<IL_OP_UAV_UMIN,
+ "_id($id)", atom_g_umin_noret>;
+ def ATOM_G_OR_NORET : BinAtomNoRet<IL_OP_UAV_OR,
+ "_id($id)", atom_g_or_noret>;
+ def ATOM_G_RSUB_NORET : BinAtomNoRet<IL_OP_UAV_RSUB,
+ "_id($id)", atom_g_rsub_noret>;
+ def ATOM_G_SUB_NORET : BinAtomNoRet<IL_OP_UAV_SUB,
+ "_id($id)", atom_g_sub_noret>;
+ def ATOM_G_XOR_NORET : BinAtomNoRet<IL_OP_UAV_XOR,
+ "_id($id)", atom_g_xor_noret>;
+ def ATOM_G_INC_NORET : BinAtomNoRet<IL_OP_UAV_INC,
+ "_id($id)", atom_g_inc_noret>;
+ def ATOM_G_DEC_NORET : BinAtomNoRet<IL_OP_UAV_DEC,
+ "_id($id)", atom_g_dec_noret>;
+ def ATOM_G_CMPXCHG_NORET : CmpXChgNoRet<IL_OP_UAV_CMP,
+ "_id($id)", atom_g_cmpxchg_noret>;
+ def ATOM_A_ADD_NORET : BinAtomNoRet<IL_OP_UAV_ADD,
+ "_id($id)_arena", atom_g_add_noret>;
+ def ATOM_A_AND_NORET : BinAtomNoRet<IL_OP_UAV_AND,
+ "_id($id)_arena", atom_g_and_noret>;
+ def ATOM_A_MAX_NORET : BinAtomNoRet<IL_OP_UAV_MAX,
+ "_id($id)_arena", atom_g_max_noret>;
+ def ATOM_A_MIN_NORET : BinAtomNoRet<IL_OP_UAV_MIN,
+ "_id($id)_arena", atom_g_min_noret>;
+ def ATOM_A_UMAX_NORET : BinAtomNoRet<IL_OP_UAV_UMAX,
+ "_id($id)_arena", atom_g_umax_noret>;
+ def ATOM_A_UMIN_NORET : BinAtomNoRet<IL_OP_UAV_UMIN,
+ "_id($id)_arena", atom_g_umin_noret>;
+ def ATOM_A_OR_NORET : BinAtomNoRet<IL_OP_UAV_OR,
+ "_id($id)_arena", atom_g_or_noret>;
+ def ATOM_A_RSUB_NORET : BinAtomNoRet<IL_OP_UAV_RSUB,
+ "_id($id)_arena", atom_g_rsub_noret>;
+ def ATOM_A_SUB_NORET : BinAtomNoRet<IL_OP_UAV_SUB,
+ "_id($id)_arena", atom_g_sub_noret>;
+ def ATOM_A_XOR_NORET : BinAtomNoRet<IL_OP_UAV_XOR,
+ "_id($id)_arena", atom_g_xor_noret>;
+ def ATOM_A_INC_NORET : BinAtomNoRet<IL_OP_UAV_INC,
+ "_id($id)_arena", atom_g_inc_noret>;
+ def ATOM_A_DEC_NORET : BinAtomNoRet<IL_OP_UAV_DEC,
+ "_id($id)_arena", atom_g_dec_noret>;
+ def ATOM_A_CMPXCHG_NORET : CmpXChgNoRet<IL_OP_UAV_CMP,
+ "_id($id)_arena", atom_g_cmpxchg_noret>;
+ def ATOM_L_ADD_NORET : BinAtomNoRet<IL_OP_LDS_ADD,
+ "_resource($id)", atom_l_add_noret>;
+ def ATOM_L_AND_NORET : BinAtomNoRet<IL_OP_LDS_AND,
+ "_resource($id)", atom_l_and_noret>;
+ def ATOM_L_MAX_NORET : BinAtomNoRet<IL_OP_LDS_MAX,
+ "_resource($id)", atom_l_max_noret>;
+ def ATOM_L_MIN_NORET : BinAtomNoRet<IL_OP_LDS_MIN,
+ "_resource($id)", atom_l_min_noret>;
+ def ATOM_L_UMAX_NORET : BinAtomNoRet<IL_OP_LDS_UMAX,
+ "_resource($id)", atom_l_umax_noret>;
+ def ATOM_L_UMIN_NORET : BinAtomNoRet<IL_OP_LDS_UMIN,
+ "_resource($id)", atom_l_umin_noret>;
+ def ATOM_L_MSKOR_NORET : TriAtomNoRet<IL_OP_LDS_MSKOR,
+ "_resource($id)", atom_l_mskor_noret>;
+ def ATOM_L_OR_NORET : BinAtomNoRet<IL_OP_LDS_OR,
+ "_resource($id)", atom_l_or_noret>;
+ def ATOM_L_RSUB_NORET : BinAtomNoRet<IL_OP_LDS_RSUB,
+ "_resource($id)", atom_l_rsub_noret>;
+ def ATOM_L_SUB_NORET : BinAtomNoRet<IL_OP_LDS_SUB,
+ "_resource($id)", atom_l_sub_noret>;
+ def ATOM_L_XOR_NORET : BinAtomNoRet<IL_OP_LDS_XOR,
+ "_resource($id)", atom_l_xor_noret>;
+ def ATOM_L_INC_NORET : BinAtomNoRet<IL_OP_LDS_INC,
+ "_resource($id)", atom_l_inc_noret>;
+ def ATOM_L_DEC_NORET : BinAtomNoRet<IL_OP_LDS_DEC,
+ "_resource($id)", atom_l_dec_noret>;
+ def ATOM_L_CMPXCHG_NORET : TriAtomNoRet<IL_OP_LDS_CMP,
+ "_resource($id)", atom_l_cmpxchg_noret>;
+ def ATOM_R_ADD_NORET : BinAtomNoRet<IL_OP_GDS_ADD,
+ "_resource($id)", atom_r_add_noret>;
+ def ATOM_R_AND_NORET : BinAtomNoRet<IL_OP_GDS_AND,
+ "_resource($id)", atom_r_and_noret>;
+ def ATOM_R_MAX_NORET : BinAtomNoRet<IL_OP_GDS_MAX,
+ "_resource($id)", atom_r_max_noret>;
+ def ATOM_R_MIN_NORET : BinAtomNoRet<IL_OP_GDS_MIN,
+ "_resource($id)", atom_r_min_noret>;
+ def ATOM_R_UMAX_NORET : BinAtomNoRet<IL_OP_GDS_UMAX,
+ "_resource($id)", atom_r_umax_noret>;
+ def ATOM_R_UMIN_NORET : BinAtomNoRet<IL_OP_GDS_UMIN,
+ "_resource($id)", atom_r_umin_noret>;
+ def ATOM_R_MSKOR_NORET : TriAtomNoRet<IL_OP_GDS_MSKOR,
+ "_resource($id)", atom_r_mskor_noret>;
+ def ATOM_R_OR_NORET : BinAtomNoRet<IL_OP_GDS_OR,
+ "_resource($id)", atom_r_or_noret>;
+ def ATOM_R_RSUB_NORET : BinAtomNoRet<IL_OP_GDS_RSUB,
+ "_resource($id)", atom_r_rsub_noret>;
+ def ATOM_R_SUB_NORET : BinAtomNoRet<IL_OP_GDS_SUB,
+ "_resource($id)", atom_r_sub_noret>;
+ def ATOM_R_XOR_NORET : BinAtomNoRet<IL_OP_GDS_XOR,
+ "_resource($id)", atom_r_xor_noret>;
+ def ATOM_R_INC_NORET : BinAtomNoRet<IL_OP_GDS_INC,
+ "_resource($id)", atom_r_inc_noret>;
+ def ATOM_R_DEC_NORET : BinAtomNoRet<IL_OP_GDS_DEC,
+ "_resource($id)", atom_r_dec_noret>;
+ def ATOM_R_CMPXCHG_NORET : CmpXChgNoRet<IL_OP_GDS_CMP,
+ "_resource($id)", atom_r_cmpxchg_noret>;
+ def APPEND_ALLOC_NORET : AppendNoRet<IL_OP_APPEND_BUF_ALLOC,
+ "_id($id)", append_alloc_noret>;
+ def APPEND_CONSUME_NORET : AppendNoRet<IL_OP_APPEND_BUF_CONSUME,
+ "_id($id)", append_consume_noret>;
+ // All of the atomic functions that return
+ def ATOM_G_ADD : BinAtom<IL_OP_UAV_READ_ADD,
+ "_id($id)", atom_g_add>;
+ def ATOM_G_AND : BinAtom<IL_OP_UAV_READ_AND,
+ "_id($id)", atom_g_and>;
+ def ATOM_G_MAX : BinAtom<IL_OP_UAV_READ_MAX,
+ "_id($id)", atom_g_max>;
+ def ATOM_G_MIN : BinAtom<IL_OP_UAV_READ_MIN,
+ "_id($id)", atom_g_min>;
+ def ATOM_G_UMAX : BinAtom<IL_OP_UAV_READ_UMAX,
+ "_id($id)", atom_g_umax>;
+ def ATOM_G_UMIN : BinAtom<IL_OP_UAV_READ_UMIN,
+ "_id($id)", atom_g_umin>;
+ def ATOM_G_OR : BinAtom<IL_OP_UAV_READ_OR,
+ "_id($id)", atom_g_or>;
+ def ATOM_G_RSUB : BinAtom<IL_OP_UAV_READ_RSUB,
+ "_id($id)", atom_g_rsub>;
+ def ATOM_G_SUB : BinAtom<IL_OP_UAV_READ_SUB,
+ "_id($id)", atom_g_sub>;
+ def ATOM_G_XOR : BinAtom<IL_OP_UAV_READ_XOR,
+ "_id($id)", atom_g_xor>;
+ def ATOM_G_INC : BinAtom<IL_OP_UAV_READ_INC,
+ "_id($id)", atom_g_inc>;
+ def ATOM_G_DEC : BinAtom<IL_OP_UAV_READ_DEC,
+ "_id($id)", atom_g_dec>;
+ def ATOM_G_XCHG : BinAtom<IL_OP_UAV_READ_XCHG,
+ "_id($id)", atom_g_xchg>;
+ def ATOM_G_CMPXCHG : CmpXChg<IL_OP_UAV_READ_CMPXCHG,
+ "_id($id)", atom_g_cmpxchg>;
+ // Arena atomic accesses
+ def ATOM_A_ADD : BinAtom<IL_OP_UAV_READ_ADD,
+ "_id($id)_arena", atom_g_add>;
+ def ATOM_A_AND : BinAtom<IL_OP_UAV_READ_AND,
+ "_id($id)_arena", atom_g_and>;
+ def ATOM_A_MAX : BinAtom<IL_OP_UAV_READ_MAX,
+ "_id($id)_arena", atom_g_max>;
+ def ATOM_A_MIN : BinAtom<IL_OP_UAV_READ_MIN,
+ "_id($id)_arena", atom_g_min>;
+ def ATOM_A_UMAX : BinAtom<IL_OP_UAV_READ_UMAX,
+ "_id($id)_arena", atom_g_umax>;
+ def ATOM_A_UMIN : BinAtom<IL_OP_UAV_READ_UMIN,
+ "_id($id)_arena", atom_g_umin>;
+ def ATOM_A_OR : BinAtom<IL_OP_UAV_READ_OR,
+ "_id($id)_arena", atom_g_or>;
+ def ATOM_A_RSUB : BinAtom<IL_OP_UAV_READ_RSUB,
+ "_id($id)_arena", atom_g_rsub>;
+ def ATOM_A_SUB : BinAtom<IL_OP_UAV_READ_SUB,
+ "_id($id)_arena", atom_g_sub>;
+ def ATOM_A_XOR : BinAtom<IL_OP_UAV_READ_XOR,
+ "_id($id)_arena", atom_g_xor>;
+ def ATOM_A_INC : BinAtom<IL_OP_UAV_READ_INC,
+ "_id($id)_arena", atom_g_inc>;
+ def ATOM_A_DEC : BinAtom<IL_OP_UAV_READ_DEC,
+ "_id($id)_arena", atom_g_dec>;
+ def ATOM_A_XCHG : BinAtom<IL_OP_UAV_READ_XCHG,
+ "_id($id)_arena", atom_g_xchg>;
+ def ATOM_A_CMPXCHG : CmpXChg<IL_OP_UAV_READ_CMPXCHG,
+ "_id($id)_arena", atom_g_cmpxchg>;
+ def ATOM_L_ADD : BinAtom<IL_OP_LDS_READ_ADD,
+ "_resource($id)", atom_l_add>;
+ def ATOM_L_AND : BinAtom<IL_OP_LDS_READ_AND,
+ "_resource($id)", atom_l_and>;
+ def ATOM_L_MAX : BinAtom<IL_OP_LDS_READ_MAX,
+ "_resource($id)", atom_l_max>;
+ def ATOM_L_MIN : BinAtom<IL_OP_LDS_READ_MIN,
+ "_resource($id)", atom_l_min>;
+ def ATOM_L_UMAX : BinAtom<IL_OP_LDS_READ_UMAX,
+ "_resource($id)", atom_l_umax>;
+ def ATOM_L_UMIN : BinAtom<IL_OP_LDS_READ_UMIN,
+ "_resource($id)", atom_l_umin>;
+ def ATOM_L_OR : BinAtom<IL_OP_LDS_READ_OR,
+ "_resource($id)", atom_l_or>;
+ def ATOM_L_MSKOR : TriAtom<IL_OP_LDS_READ_MSKOR,
+ "_resource($id)", atom_l_mskor>;
+ def ATOM_L_RSUB : BinAtom<IL_OP_LDS_READ_RSUB,
+ "_resource($id)", atom_l_rsub>;
+ def ATOM_L_SUB : BinAtom<IL_OP_LDS_READ_SUB,
+ "_resource($id)", atom_l_sub>;
+ def ATOM_L_XOR : BinAtom<IL_OP_LDS_READ_XOR,
+ "_resource($id)", atom_l_xor>;
+ def ATOM_L_INC : BinAtom<IL_OP_LDS_READ_INC,
+ "_resource($id)", atom_l_inc>;
+ def ATOM_L_DEC : BinAtom<IL_OP_LDS_READ_DEC,
+ "_resource($id)", atom_l_dec>;
+ def ATOM_L_XCHG : BinAtom<IL_OP_LDS_READ_XCHG,
+ "_resource($id)", atom_l_xchg>;
+ def ATOM_L_CMPXCHG : TriAtom<IL_OP_LDS_READ_CMPXCHG,
+ "_resource($id)", atom_l_cmpxchg>;
+ def ATOM_R_ADD : BinAtom<IL_OP_GDS_READ_ADD,
+ "_resource($id)", atom_r_add>;
+ def ATOM_R_AND : BinAtom<IL_OP_GDS_READ_AND,
+ "_resource($id)", atom_r_and>;
+ def ATOM_R_MAX : BinAtom<IL_OP_GDS_READ_MAX,
+ "_resource($id)", atom_r_max>;
+ def ATOM_R_MIN : BinAtom<IL_OP_GDS_READ_MIN,
+ "_resource($id)", atom_r_min>;
+ def ATOM_R_UMAX : BinAtom<IL_OP_GDS_READ_UMAX,
+ "_resource($id)", atom_r_umax>;
+ def ATOM_R_UMIN : BinAtom<IL_OP_GDS_READ_UMIN,
+ "_resource($id)", atom_r_umin>;
+ def ATOM_R_OR : BinAtom<IL_OP_GDS_READ_OR,
+ "_resource($id)", atom_r_or>;
+ def ATOM_R_MSKOR : TriAtom<IL_OP_GDS_READ_MSKOR,
+ "_resource($id)", atom_r_mskor>;
+ def ATOM_R_RSUB : BinAtom<IL_OP_GDS_READ_RSUB,
+ "_resource($id)", atom_r_rsub>;
+ def ATOM_R_SUB : BinAtom<IL_OP_GDS_READ_SUB,
+ "_resource($id)", atom_r_sub>;
+ def ATOM_R_XOR : BinAtom<IL_OP_GDS_READ_XOR,
+ "_resource($id)", atom_r_xor>;
+ def ATOM_R_INC : BinAtom<IL_OP_GDS_READ_INC,
+ "_resource($id)", atom_r_inc>;
+ def ATOM_R_DEC : BinAtom<IL_OP_GDS_READ_DEC,
+ "_resource($id)", atom_r_dec>;
+ def ATOM_R_XCHG : BinAtom<IL_OP_GDS_READ_XCHG,
+ "_resource($id)", atom_r_xchg>;
+ def ATOM_R_CMPXCHG : CmpXChg<IL_OP_GDS_READ_CMPXCHG,
+ "_resource($id)", atom_r_cmpxchg>;
+ def APPEND_ALLOC : Append<IL_OP_APPEND_BUF_ALLOC,
+ "_id($id)", append_alloc>;
+ def APPEND_CONSUME : Append<IL_OP_APPEND_BUF_CONSUME,
+ "_id($id)", append_consume>;
+}
+}
+let Predicates = [Has64BitPtr] in {
+// All of the image functions
+def IMAGE1D64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1DA64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_array_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2D64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2DA64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_array_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE3D64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image3d_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1D64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1DA64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image1d_array_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2D64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE2DA64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image2d_array_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE3D64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+ !strconcat(IL_OP_SAMPLE.Text,
+ "_resource($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+ [(set GPRV4I32:$dst,
+ (int_AMDIL_image3d_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+def IMAGE1D64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_info0 ADDR64:$ptr))]>;
+def IMAGE1D64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_info1 ADDR64:$ptr))]>;
+def IMAGE1DA64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info0 ADDR64:$ptr))]>;
+def IMAGE1DA64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info1 ADDR64:$ptr))]>;
+def IMAGE2DA64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info0 ADDR64:$ptr))]>;
+def IMAGE2DA64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info1 ADDR64:$ptr))]>;
+def IMAGE2D64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_info0 ADDR64:$ptr))]>;
+def IMAGE2D64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image2d_info1 ADDR64:$ptr))]>;
+def IMAGE3D64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image3d_info0 ADDR64:$ptr))]>;
+def IMAGE3D64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins MEMI64:$ptr),
+ !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+ [(set GPRV4I32:$dst, (int_AMDIL_image3d_info1 ADDR64:$ptr))]>;
+def IMAGE1D64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI64:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image1d_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE1DA64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI64:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image1d_array_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE2D64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI64:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image2d_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE2DA64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI64:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image2d_array_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+def IMAGE3D64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+ (ins MEMI64:$ptr, GPRV4I32:$addy, GPRV4I32:$data),
+ !strconcat(IL_OP_UAV_STORE.Text,
+ "_id($ptr) $addy, $data"),
+ [(int_AMDIL_image3d_write ADDR64:$ptr, GPRV4I32:$addy, GPRV4I32:$data)]>;
+let hasSideEffects= 1 in {
+ // All of the noret atomic functions
+ def ATOM_G64_ADD_NORET : BinAtomNoRet64<IL_OP_UAV_ADD,
+ "_id($id)", atom_g_add_noret>;
+ def ATOM_G64_AND_NORET : BinAtomNoRet64<IL_OP_UAV_AND,
+ "_id($id)", atom_g_and_noret>;
+ def ATOM_G64_MAX_NORET : BinAtomNoRet64<IL_OP_UAV_MAX,
+ "_id($id)", atom_g_max_noret>;
+ def ATOM_G64_MIN_NORET : BinAtomNoRet64<IL_OP_UAV_MIN,
+ "_id($id)", atom_g_min_noret>;
+ def ATOM_G64_UMAX_NORET : BinAtomNoRet64<IL_OP_UAV_UMAX,
+ "_id($id)", atom_g_umax_noret>;
+ def ATOM_G64_UMIN_NORET : BinAtomNoRet64<IL_OP_UAV_UMIN,
+ "_id($id)", atom_g_umin_noret>;
+ def ATOM_G64_OR_NORET : BinAtomNoRet64<IL_OP_UAV_OR,
+ "_id($id)", atom_g_or_noret>;
+ def ATOM_G64_RSUB_NORET : BinAtomNoRet64<IL_OP_UAV_RSUB,
+ "_id($id)", atom_g_rsub_noret>;
+ def ATOM_G64_SUB_NORET : BinAtomNoRet64<IL_OP_UAV_SUB,
+ "_id($id)", atom_g_sub_noret>;
+ def ATOM_G64_XOR_NORET : BinAtomNoRet64<IL_OP_UAV_XOR,
+ "_id($id)", atom_g_xor_noret>;
+ def ATOM_G64_INC_NORET : BinAtomNoRet64<IL_OP_UAV_INC,
+ "_id($id)", atom_g_inc_noret>;
+ def ATOM_G64_DEC_NORET : BinAtomNoRet64<IL_OP_UAV_DEC,
+ "_id($id)", atom_g_dec_noret>;
+ def ATOM_G64_CMPXCHG_NORET : CmpXChgNoRet64<IL_OP_UAV_CMP,
+ "_id($id)", atom_g_cmpxchg_noret>;
+ def ATOM_A64_ADD_NORET : BinAtomNoRet64<IL_OP_UAV_ADD,
+ "_id($id)_arena", atom_g_add_noret>;
+ def ATOM_A64_AND_NORET : BinAtomNoRet64<IL_OP_UAV_AND,
+ "_id($id)_arena", atom_g_and_noret>;
+ def ATOM_A64_MAX_NORET : BinAtomNoRet64<IL_OP_UAV_MAX,
+ "_id($id)_arena", atom_g_max_noret>;
+ def ATOM_A64_MIN_NORET : BinAtomNoRet64<IL_OP_UAV_MIN,
+ "_id($id)_arena", atom_g_min_noret>;
+ def ATOM_A64_UMAX_NORET : BinAtomNoRet64<IL_OP_UAV_UMAX,
+ "_id($id)_arena", atom_g_umax_noret>;
+ def ATOM_A64_UMIN_NORET : BinAtomNoRet64<IL_OP_UAV_UMIN,
+ "_id($id)_arena", atom_g_umin_noret>;
+ def ATOM_A64_OR_NORET : BinAtomNoRet64<IL_OP_UAV_OR,
+ "_id($id)_arena", atom_g_or_noret>;
+ def ATOM_A64_RSUB_NORET : BinAtomNoRet64<IL_OP_UAV_RSUB,
+ "_id($id)_arena", atom_g_rsub_noret>;
+ def ATOM_A64_SUB_NORET : BinAtomNoRet64<IL_OP_UAV_SUB,
+ "_id($id)_arena", atom_g_sub_noret>;
+ def ATOM_A64_XOR_NORET : BinAtomNoRet64<IL_OP_UAV_XOR,
+ "_id($id)_arena", atom_g_xor_noret>;
+ def ATOM_A64_INC_NORET : BinAtomNoRet64<IL_OP_UAV_INC,
+ "_id($id)_arena", atom_g_inc_noret>;
+ def ATOM_A64_DEC_NORET : BinAtomNoRet64<IL_OP_UAV_DEC,
+ "_id($id)_arena", atom_g_dec_noret>;
+ def ATOM_A64_CMPXCHG_NORET : CmpXChgNoRet64<IL_OP_UAV_CMP,
+ "_id($id)_arena", atom_g_cmpxchg_noret>;
+ def ATOM_L64_ADD_NORET : BinAtomNoRet64<IL_OP_LDS_ADD,
+ "_resource($id)", atom_l_add_noret>;
+ def ATOM_L64_AND_NORET : BinAtomNoRet64<IL_OP_LDS_AND,
+ "_resource($id)", atom_l_and_noret>;
+ def ATOM_L64_MAX_NORET : BinAtomNoRet64<IL_OP_LDS_MAX,
+ "_resource($id)", atom_l_max_noret>;
+ def ATOM_L64_MIN_NORET : BinAtomNoRet64<IL_OP_LDS_MIN,
+ "_resource($id)", atom_l_min_noret>;
+ def ATOM_L64_UMAX_NORET : BinAtomNoRet64<IL_OP_LDS_UMAX,
+ "_resource($id)", atom_l_umax_noret>;
+ def ATOM_L64_UMIN_NORET : BinAtomNoRet64<IL_OP_LDS_UMIN,
+ "_resource($id)", atom_l_umin_noret>;
+ def ATOM_L64_MSKOR_NORET : TriAtomNoRet64<IL_OP_LDS_MSKOR,
+ "_resource($id)", atom_l_mskor_noret>;
+ def ATOM_L64_OR_NORET : BinAtomNoRet64<IL_OP_LDS_OR,
+ "_resource($id)", atom_l_or_noret>;
+ def ATOM_L64_RSUB_NORET : BinAtomNoRet64<IL_OP_LDS_RSUB,
+ "_resource($id)", atom_l_rsub_noret>;
+ def ATOM_L64_SUB_NORET : BinAtomNoRet64<IL_OP_LDS_SUB,
+ "_resource($id)", atom_l_sub_noret>;
+ def ATOM_L64_XOR_NORET : BinAtomNoRet64<IL_OP_LDS_XOR,
+ "_resource($id)", atom_l_xor_noret>;
+ def ATOM_L64_INC_NORET : BinAtomNoRet64<IL_OP_LDS_INC,
+ "_resource($id)", atom_l_inc_noret>;
+ def ATOM_L64_DEC_NORET : BinAtomNoRet64<IL_OP_LDS_DEC,
+ "_resource($id)", atom_l_dec_noret>;
+ def ATOM_L64_CMPXCHG_NORET : TriAtomNoRet64<IL_OP_LDS_CMP,
+ "_resource($id)", atom_l_cmpxchg_noret>;
+ def ATOM_R64_ADD_NORET : BinAtomNoRet64<IL_OP_GDS_ADD,
+ "_resource($id)", atom_r_add_noret>;
+ def ATOM_R64_AND_NORET : BinAtomNoRet64<IL_OP_GDS_AND,
+ "_resource($id)", atom_r_and_noret>;
+ def ATOM_R64_MAX_NORET : BinAtomNoRet64<IL_OP_GDS_MAX,
+ "_resource($id)", atom_r_max_noret>;
+ def ATOM_R64_MIN_NORET : BinAtomNoRet64<IL_OP_GDS_MIN,
+ "_resource($id)", atom_r_min_noret>;
+ def ATOM_R64_UMAX_NORET : BinAtomNoRet64<IL_OP_GDS_UMAX,
+ "_resource($id)", atom_r_umax_noret>;
+ def ATOM_R64_UMIN_NORET : BinAtomNoRet64<IL_OP_GDS_UMIN,
+ "_resource($id)", atom_r_umin_noret>;
+ def ATOM_R64_MSKOR_NORET : TriAtomNoRet64<IL_OP_GDS_MSKOR,
+ "_resource($id)", atom_r_mskor_noret>;
+ def ATOM_R64_OR_NORET : BinAtomNoRet64<IL_OP_GDS_OR,
+ "_resource($id)", atom_r_or_noret>;
+ def ATOM_R64_RSUB_NORET : BinAtomNoRet64<IL_OP_GDS_RSUB,
+ "_resource($id)", atom_r_rsub_noret>;
+ def ATOM_R64_SUB_NORET : BinAtomNoRet64<IL_OP_GDS_SUB,
+ "_resource($id)", atom_r_sub_noret>;
+ def ATOM_R64_XOR_NORET : BinAtomNoRet64<IL_OP_GDS_XOR,
+ "_resource($id)", atom_r_xor_noret>;
+ def ATOM_R64_INC_NORET : BinAtomNoRet64<IL_OP_GDS_INC,
+ "_resource($id)", atom_r_inc_noret>;
+ def ATOM_R64_DEC_NORET : BinAtomNoRet64<IL_OP_GDS_DEC,
+ "_resource($id)", atom_r_dec_noret>;
+ def ATOM_R64_CMPXCHG_NORET : CmpXChgNoRet64<IL_OP_GDS_CMP,
+ "_resource($id)", atom_r_cmpxchg_noret>;
+ def APPEND_ALLOC64_NORET : AppendNoRet64<IL_OP_APPEND_BUF_ALLOC,
+ "_id($id)", append_alloc_noret>;
+ def APPEND_CONSUME64_NORET : AppendNoRet64<IL_OP_APPEND_BUF_CONSUME,
+ "_id($id)", append_consume_noret>;
+ // All of the atomic functions that return
+ def ATOM_G64_ADD : BinAtom64<IL_OP_UAV_READ_ADD,
+ "_id($id)", atom_g_add>;
+ def ATOM_G64_AND : BinAtom64<IL_OP_UAV_READ_AND,
+ "_id($id)", atom_g_and>;
+ def ATOM_G64_MAX : BinAtom64<IL_OP_UAV_READ_MAX,
+ "_id($id)", atom_g_max>;
+ def ATOM_G64_MIN : BinAtom64<IL_OP_UAV_READ_MIN,
+ "_id($id)", atom_g_min>;
+ def ATOM_G64_UMAX : BinAtom64<IL_OP_UAV_READ_UMAX,
+ "_id($id)", atom_g_umax>;
+ def ATOM_G64_UMIN : BinAtom64<IL_OP_UAV_READ_UMIN,
+ "_id($id)", atom_g_umin>;
+ def ATOM_G64_OR : BinAtom64<IL_OP_UAV_READ_OR,
+ "_id($id)", atom_g_or>;
+ def ATOM_G64_RSUB : BinAtom64<IL_OP_UAV_READ_RSUB,
+ "_id($id)", atom_g_rsub>;
+ def ATOM_G64_SUB : BinAtom64<IL_OP_UAV_READ_SUB,
+ "_id($id)", atom_g_sub>;
+ def ATOM_G64_XOR : BinAtom64<IL_OP_UAV_READ_XOR,
+ "_id($id)", atom_g_xor>;
+ def ATOM_G64_INC : BinAtom64<IL_OP_UAV_READ_INC,
+ "_id($id)", atom_g_inc>;
+ def ATOM_G64_DEC : BinAtom64<IL_OP_UAV_READ_DEC,
+ "_id($id)", atom_g_dec>;
+ def ATOM_G64_XCHG : BinAtom64<IL_OP_UAV_READ_XCHG,
+ "_id($id)", atom_g_xchg>;
+ def ATOM_G64_CMPXCHG : CmpXChg64<IL_OP_UAV_READ_CMPXCHG,
+ "_id($id)", atom_g_cmpxchg>;
+ // Arena atomic accesses
+ def ATOM_A64_ADD : BinAtom64<IL_OP_UAV_READ_ADD,
+ "_id($id)_arena", atom_g_add>;
+ def ATOM_A64_AND : BinAtom64<IL_OP_UAV_READ_AND,
+ "_id($id)_arena", atom_g_and>;
+ def ATOM_A64_MAX : BinAtom64<IL_OP_UAV_READ_MAX,
+ "_id($id)_arena", atom_g_max>;
+ def ATOM_A64_MIN : BinAtom64<IL_OP_UAV_READ_MIN,
+ "_id($id)_arena", atom_g_min>;
+ def ATOM_A64_UMAX : BinAtom64<IL_OP_UAV_READ_UMAX,
+ "_id($id)_arena", atom_g_umax>;
+ def ATOM_A64_UMIN : BinAtom64<IL_OP_UAV_READ_UMIN,
+ "_id($id)_arena", atom_g_umin>;
+ def ATOM_A64_OR : BinAtom64<IL_OP_UAV_READ_OR,
+ "_id($id)_arena", atom_g_or>;
+ def ATOM_A64_RSUB : BinAtom64<IL_OP_UAV_READ_RSUB,
+ "_id($id)_arena", atom_g_rsub>;
+ def ATOM_A64_SUB : BinAtom64<IL_OP_UAV_READ_SUB,
+ "_id($id)_arena", atom_g_sub>;
+ def ATOM_A64_XOR : BinAtom64<IL_OP_UAV_READ_XOR,
+ "_id($id)_arena", atom_g_xor>;
+ def ATOM_A64_INC : BinAtom64<IL_OP_UAV_READ_INC,
+ "_id($id)_arena", atom_g_inc>;
+ def ATOM_A64_DEC : BinAtom64<IL_OP_UAV_READ_DEC,
+ "_id($id)_arena", atom_g_dec>;
+ def ATOM_A64_XCHG : BinAtom64<IL_OP_UAV_READ_XCHG,
+ "_id($id)_arena", atom_g_xchg>;
+ def ATOM_A64_CMPXCHG : CmpXChg64<IL_OP_UAV_READ_CMPXCHG,
+ "_id($id)_arena", atom_g_cmpxchg>;
+ def ATOM_L64_ADD : BinAtom64<IL_OP_LDS_READ_ADD,
+ "_resource($id)", atom_l_add>;
+ def ATOM_L64_AND : BinAtom64<IL_OP_LDS_READ_AND,
+ "_resource($id)", atom_l_and>;
+ def ATOM_L64_MAX : BinAtom64<IL_OP_LDS_READ_MAX,
+ "_resource($id)", atom_l_max>;
+ def ATOM_L64_MIN : BinAtom64<IL_OP_LDS_READ_MIN,
+ "_resource($id)", atom_l_min>;
+ def ATOM_L64_UMAX : BinAtom64<IL_OP_LDS_READ_UMAX,
+ "_resource($id)", atom_l_umax>;
+ def ATOM_L64_UMIN : BinAtom64<IL_OP_LDS_READ_UMIN,
+ "_resource($id)", atom_l_umin>;
+ def ATOM_L64_OR : BinAtom64<IL_OP_LDS_READ_OR,
+ "_resource($id)", atom_l_or>;
+ def ATOM_L64_MSKOR : TriAtom64<IL_OP_LDS_READ_MSKOR,
+ "_resource($id)", atom_l_mskor>;
+ def ATOM_L64_RSUB : BinAtom64<IL_OP_LDS_READ_RSUB,
+ "_resource($id)", atom_l_rsub>;
+ def ATOM_L64_SUB : BinAtom64<IL_OP_LDS_READ_SUB,
+ "_resource($id)", atom_l_sub>;
+ def ATOM_L64_XOR : BinAtom64<IL_OP_LDS_READ_XOR,
+ "_resource($id)", atom_l_xor>;
+ def ATOM_L64_INC : BinAtom64<IL_OP_LDS_READ_INC,
+ "_resource($id)", atom_l_inc>;
+ def ATOM_L64_DEC : BinAtom64<IL_OP_LDS_READ_DEC,
+ "_resource($id)", atom_l_dec>;
+ def ATOM_L64_XCHG : BinAtom64<IL_OP_LDS_READ_XCHG,
+ "_resource($id)", atom_l_xchg>;
+ def ATOM_L64_CMPXCHG : TriAtom64<IL_OP_LDS_READ_CMPXCHG,
+ "_resource($id)", atom_l_cmpxchg>;
+ def ATOM_R64_ADD : BinAtom64<IL_OP_GDS_READ_ADD,
+ "_resource($id)", atom_r_add>;
+ def ATOM_R64_AND : BinAtom64<IL_OP_GDS_READ_AND,
+ "_resource($id)", atom_r_and>;
+ def ATOM_R64_MAX : BinAtom64<IL_OP_GDS_READ_MAX,
+ "_resource($id)", atom_r_max>;
+ def ATOM_R64_MIN : BinAtom64<IL_OP_GDS_READ_MIN,
+ "_resource($id)", atom_r_min>;
+ def ATOM_R64_UMAX : BinAtom64<IL_OP_GDS_READ_UMAX,
+ "_resource($id)", atom_r_umax>;
+ def ATOM_R64_UMIN : BinAtom64<IL_OP_GDS_READ_UMIN,
+ "_resource($id)", atom_r_umin>;
+ def ATOM_R64_OR : BinAtom64<IL_OP_GDS_READ_OR,
+ "_resource($id)", atom_r_or>;
+ def ATOM_R64_MSKOR : TriAtom64<IL_OP_GDS_READ_MSKOR,
+ "_resource($id)", atom_r_mskor>;
+ def ATOM_R64_RSUB : BinAtom64<IL_OP_GDS_READ_RSUB,
+ "_resource($id)", atom_r_rsub>;
+ def ATOM_R64_SUB : BinAtom64<IL_OP_GDS_READ_SUB,
+ "_resource($id)", atom_r_sub>;
+ def ATOM_R64_XOR : BinAtom64<IL_OP_GDS_READ_XOR,
+ "_resource($id)", atom_r_xor>;
+ def ATOM_R64_INC : BinAtom64<IL_OP_GDS_READ_INC,
+ "_resource($id)", atom_r_inc>;
+ def ATOM_R64_DEC : BinAtom64<IL_OP_GDS_READ_DEC,
+ "_resource($id)", atom_r_dec>;
+ def ATOM_R64_XCHG : BinAtom64<IL_OP_GDS_READ_XCHG,
+ "_resource($id)", atom_r_xchg>;
+ def ATOM_R64_CMPXCHG : CmpXChg64<IL_OP_GDS_READ_CMPXCHG,
+ "_resource($id)", atom_r_cmpxchg>;
+ def APPEND_ALLOC64 : Append64<IL_OP_APPEND_BUF_ALLOC,
+ "_id($id)", append_alloc>;
+ def APPEND_CONSUME64 : Append64<IL_OP_APPEND_BUF_CONSUME,
+ "_id($id)", append_consume>;
+}
+}
+/*
+def SEMAPHORE_INIT : BinaryOpNoRet<IL_OP_SEMAPHORE_INIT, (outs),
+ (ins MEMI32:$ptr, i32imm:$val),
+ !strconcat(IL_OP_SEMAPHORE_INIT.Text, "_id($ptr)_value($val)"),
+ [(int_AMDIL_semaphore_init ADDR:$ptr, timm:$val)]>;
+
+def SEMAPHORE_WAIT : UnaryOpNoRet<IL_OP_SEMAPHORE_WAIT, (outs),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_SEMAPHORE_WAIT.Text, "_id($ptr)"),
+ [(int_AMDIL_semaphore_wait ADDR:$ptr)]>;
+
+def SEMAPHORE_SIGNAL : UnaryOpNoRet<IL_OP_SEMAPHORE_SIGNAL, (outs),
+ (ins MEMI32:$ptr),
+ !strconcat(IL_OP_SEMAPHORE_SIGNAL.Text, "_id($ptr)"),
+ [(int_AMDIL_semaphore_signal ADDR:$ptr)]>;
+*/
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp
new file mode 100644
index 00000000000..fb94368b1a0
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,237 @@
+//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDIL.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include <cstring>
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDILGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDILIntrinsicInfo::AMDILIntrinsicInfo(AMDILTargetMachine *tm)
+ : TargetIntrinsicInfo(), mTM(tm)
+{
+}
+
+std::string
+AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const
+{
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ //assert(!isOverloaded(IntrID)
+ //&& "AMDIL Intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return 0;
+ }
+ assert(IntrID < AMDILIntrinsic::num_AMDIL_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+ static bool
+checkTruncation(const char *Name, unsigned int& Len)
+{
+ const char *ptr = Name + (Len - 1);
+ while(ptr != Name && *ptr != '_') {
+ --ptr;
+ }
+ // We don't want to truncate on atomic instructions
+ // but we do want to enter the check Truncation
+ // section so that we can translate the atomic
+ // instructions if we need to.
+ if (!strncmp(Name, "__atom", 6)) {
+ return true;
+ }
+ if (strstr(ptr, "i32")
+ || strstr(ptr, "u32")
+ || strstr(ptr, "i64")
+ || strstr(ptr, "u64")
+ || strstr(ptr, "f32")
+ || strstr(ptr, "f64")
+ || strstr(ptr, "i16")
+ || strstr(ptr, "u16")
+ || strstr(ptr, "i8")
+ || strstr(ptr, "u8")) {
+ Len = (unsigned int)(ptr - Name);
+ return true;
+ }
+ return false;
+}
+
+// We don't want to support both the OpenCL 1.0 atomics
+// and the 1.1 atomics with different names, so we translate
+// the 1.0 atomics to the 1.1 naming here if needed.
+static char*
+atomTranslateIfNeeded(const char *Name, unsigned int Len)
+{
+ char *buffer = NULL;
+ if (strncmp(Name, "__atom_", 7)) {
+ // If we are not starting with __atom_, then
+ // go ahead and continue on with the allocation.
+ buffer = new char[Len + 1];
+ memcpy(buffer, Name, Len);
+ } else {
+ buffer = new char[Len + 3];
+ memcpy(buffer, "__atomic_", 9);
+ memcpy(buffer + 9, Name + 7, Len - 7);
+ Len += 2;
+ }
+ buffer[Len] = '\0';
+ return buffer;
+}
+
+unsigned int
+AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
+{
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDILGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDILIntrinsic::ID IntrinsicID
+ = (AMDILIntrinsic::ID)Intrinsic::not_intrinsic;
+ if (checkTruncation(Name, Len)) {
+ char *buffer = atomTranslateIfNeeded(Name, Len);
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer);
+ delete [] buffer;
+ } else {
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
+ }
+ if (!isValidIntrinsic(IntrinsicID)) {
+ return 0;
+ }
+ if (IntrinsicID != (AMDILIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDILIntrinsicInfo::isOverloaded(unsigned IntrID) const
+{
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (!IntrID) {
+ return false;
+ }
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function*
+AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const
+{
+ assert(!isOverloaded(IntrID) && "AMDIL intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((AMDILIntrinsic::ID) IntrID);
+ LLVMContext& Context = M->getContext();
+ unsigned int id = IntrID;
+ Type *ResultTy = NULL;
+ std::vector<Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+ // We need to add the resource ID argument for atomics.
+ if (id >= AMDILIntrinsic::AMDIL_atomic_add_gi32
+ && id <= AMDILIntrinsic::AMDIL_atomic_xor_ru32_noret) {
+ ArgTys.push_back(IntegerType::get(Context, 32));
+ }
+
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ FunctionType::get(ResultTy, ArgTys, IsVarArg),
+ AList));
+}
+
+/// Because the code generator has to support different SC versions,
+/// this function is added to check that the intrinsic being used
+/// is actually valid. In the case where it isn't valid, the
+/// function call is not translated into an intrinsic and the
+/// fall back software emulated path should pick up the result.
+bool
+AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const
+{
+ const AMDILSubtarget *stm = mTM->getSubtargetImpl();
+ switch (IntrID) {
+ default:
+ return true;
+ case AMDILIntrinsic::AMDIL_convert_f32_i32_rpi:
+ case AMDILIntrinsic::AMDIL_convert_f32_i32_flr:
+ case AMDILIntrinsic::AMDIL_convert_f32_f16_near:
+ case AMDILIntrinsic::AMDIL_convert_f32_f16_neg_inf:
+ case AMDILIntrinsic::AMDIL_convert_f32_f16_plus_inf:
+ return stm->calVersion() >= CAL_VERSION_SC_139;
+ };
+}
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h
new file mode 100644
index 00000000000..1b64f5a4f4d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIntrinsicInfo.h
@@ -0,0 +1,90 @@
+//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the AMDIL Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef _AMDIL_INTRINSICS_H_
+#define _AMDIL_INTRINSICS_H_
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Intrinsics.h"
+
+namespace llvm {
+ class AMDILTargetMachine;
+ namespace AMDILIntrinsic {
+ enum ID {
+ last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDILGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDIL_intrinsics
+ };
+
+ }
+
+
+ class AMDILIntrinsicInfo : public TargetIntrinsicInfo {
+ AMDILTargetMachine *mTM;
+ public:
+ AMDILIntrinsicInfo(AMDILTargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ bool isValidIntrinsic(unsigned int) const;
+ }; // AMDILIntrinsicInfo
+}
+
+#endif // _AMDIL_INTRINSICS_H_
+
diff --git a/src/gallium/drivers/radeon/AMDILIntrinsics.td b/src/gallium/drivers/radeon/AMDILIntrinsics.td
new file mode 100644
index 00000000000..2c3313cc52a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILIntrinsics.td
@@ -0,0 +1,746 @@
+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics
+//
+//===---------------------------------------------------------------===//
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+//------------- Synchronization Functions - OpenCL 6.11.9 --------------------//
+ def int_AMDIL_fence : GCCBuiltin<"mem_fence">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_global : GCCBuiltin<"mem_fence_global">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_local : GCCBuiltin<"mem_fence_local">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_region : GCCBuiltin<"mem_fence_region">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_read_only : GCCBuiltin<"read_mem_fence">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_read_only_global : GCCBuiltin<"read_mem_fence_global">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_read_only_local : GCCBuiltin<"read_mem_fence_local">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_read_only_region : GCCBuiltin<"read_mem_fence_region">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_write_only : GCCBuiltin<"write_mem_fence">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_write_only_global : GCCBuiltin<"write_mem_fence_global">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_write_only_local : GCCBuiltin<"write_mem_fence_local">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_fence_write_only_region : GCCBuiltin<"write_mem_fence_region">,
+ UnaryIntNoRetInt;
+
+ def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">,
+ UnaryIntNoRetInt;
+
+ def int_AMDIL_cmov_logical : GCCBuiltin<"__amdil_cmov_logical">,
+ TernaryIntInt;
+ def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+ UnaryIntInt;
+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+ UnaryIntInt;
+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+ TernaryIntInt;
+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+ TernaryIntInt;
+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+ QuaternaryIntInt;
+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+ TernaryIntInt;
+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+ BinaryIntInt;
+ def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
+ TernaryIntInt;
+ def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
+ TernaryIntInt;
+ def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
+ TernaryIntFloat;
+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
+ TernaryIntInt;
+ def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
+ TernaryIntInt;
+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+ BinaryIntInt;
+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+ BinaryIntInt;
+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+ BinaryIntInt;
+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+ BinaryIntInt;
+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
+ BinaryIntFloat;
+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+ BinaryIntInt;
+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+ BinaryIntInt;
+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
+ BinaryIntFloat;
+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+ TernaryIntInt;
+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+ UnaryIntFloat;
+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+ TernaryIntFloat;
+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+ UnaryIntFloat;
+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+ UnaryIntFloat;
+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+ UnaryIntFloat;
+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+ UnaryIntFloat;
+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+ UnaryIntFloat;
+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+ UnaryIntFloat;
+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+ UnaryIntFloat;
+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+ UnaryIntFloat;
+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+ UnaryIntFloat;
+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+ UnaryIntFloat;
+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+ UnaryIntFloat;
+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+ UnaryIntFloat;
+ def int_AMDIL_log : GCCBuiltin<"__amdil_log">,
+ UnaryIntFloat;
+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+ UnaryIntFloat;
+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+ UnaryIntFloat;
+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+ TernaryIntFloat;
+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+ UnaryIntFloat;
+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+ UnaryIntFloat;
+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+ TernaryIntFloat;
+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i32_ty], []>;
+
+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+ ConvertIntITOF;
+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+ ConvertIntITOF;
+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty, llvm_float_ty], []>;
+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty], []>;
+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+//===---------------------- Image functions begin ------------------------===//
+ def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image1d_read_norm : GCCBuiltin<"__amdil_image1d_read_norm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image1d_read_unnorm : GCCBuiltin<"__amdil_image1d_read_unnorm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image1d_info0 : GCCBuiltin<"__amdil_image1d_info0">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image1d_array_read_norm : GCCBuiltin<"__amdil_image1d_array_read_norm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image1d_array_read_unnorm : GCCBuiltin<"__amdil_image1d_array_read_unnorm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image1d_array_info0 : GCCBuiltin<"__amdil_image1d_array_info0">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image2d_write : GCCBuiltin<"__amdil_image2d_write">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image2d_read_norm : GCCBuiltin<"__amdil_image2d_read_norm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image2d_read_unnorm : GCCBuiltin<"__amdil_image2d_read_unnorm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image2d_info0 : GCCBuiltin<"__amdil_image2d_info0">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image2d_info1 : GCCBuiltin<"__amdil_image2d_info1">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image2d_array_write : GCCBuiltin<"__amdil_image2d_array_write">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image2d_array_read_norm : GCCBuiltin<"__amdil_image2d_array_read_norm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image2d_array_read_unnorm : GCCBuiltin<"__amdil_image2d_array_read_unnorm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image2d_array_info0 : GCCBuiltin<"__amdil_image2d_array_info0">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image2d_array_info1 : GCCBuiltin<"__amdil_image2d_array_info1">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image3d_write : GCCBuiltin<"__amdil_image3d_write">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image3d_read_norm : GCCBuiltin<"__amdil_image3d_read_norm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image3d_read_unnorm : GCCBuiltin<"__amdil_image3d_read_unnorm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_image3d_info0 : GCCBuiltin<"__amdil_image3d_info0">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image3d_info1 : GCCBuiltin<"__amdil_image3d_info1">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+//===---------------------- Image functions end --------------------------===//
+
+ def int_AMDIL_append_alloc_i32 : GCCBuiltin<"__amdil_append_alloc">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+ def int_AMDIL_append_consume_i32 : GCCBuiltin<"__amdil_append_consume">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+ def int_AMDIL_append_alloc_i32_noret : GCCBuiltin<"__amdil_append_alloc_noret">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+ def int_AMDIL_append_consume_i32_noret : GCCBuiltin<"__amdil_append_consume_noret">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+
+ def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+ def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">,
+ Intrinsic<[llvm_i32_ty], [], []>;
+ def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">,
+ Intrinsic<[llvm_i32_ty], []>;
+ def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">,
+ Intrinsic<[llvm_i32_ty], []>;
+
+/// Intrinsics for atomic instructions with no return value
+/// Signed 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gi32_noret : GCCBuiltin<"__atomic_add_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_gi32_noret : GCCBuiltin<"__atomic_sub_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_gi32_noret : GCCBuiltin<"__atomic_rsub_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_gi32_noret : GCCBuiltin<"__atomic_xchg_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_gi32_noret : GCCBuiltin<"__atomic_inc_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_gi32_noret : GCCBuiltin<"__atomic_dec_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_gi32_noret : GCCBuiltin<"__atomic_cmpxchg_gi32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_gi32_noret : GCCBuiltin<"__atomic_min_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_gi32_noret : GCCBuiltin<"__atomic_max_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_gi32_noret : GCCBuiltin<"__atomic_and_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_gi32_noret : GCCBuiltin<"__atomic_or_gi32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">,
+ BinaryAtomicIntNoRet;
+
+
+
+/// Unsigned 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_gu32_noret : GCCBuiltin<"__atomic_sub_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_gu32_noret : GCCBuiltin<"__atomic_rsub_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_gu32_noret : GCCBuiltin<"__atomic_xchg_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_gu32_noret : GCCBuiltin<"__atomic_inc_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_gu32_noret : GCCBuiltin<"__atomic_dec_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_gu32_noret : GCCBuiltin<"__atomic_cmpxchg_gu32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_gu32_noret : GCCBuiltin<"__atomic_min_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_gu32_noret : GCCBuiltin<"__atomic_max_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_gu32_noret : GCCBuiltin<"__atomic_and_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_gu32_noret : GCCBuiltin<"__atomic_or_gu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_gu32_noret : GCCBuiltin<"__atomic_xor_gu32_noret">,
+ BinaryAtomicIntNoRet;
+
+
+/// Intrinsics for atomic instructions with a return value
+/// Signed 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gi32 : GCCBuiltin<"__atomic_add_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_sub_gi32 : GCCBuiltin<"__atomic_sub_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_gi32 : GCCBuiltin<"__atomic_rsub_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_gi32 : GCCBuiltin<"__atomic_xchg_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_inc_gi32 : GCCBuiltin<"__atomic_inc_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_dec_gi32 : GCCBuiltin<"__atomic_dec_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_gi32 : GCCBuiltin<"__atomic_cmpxchg_gi32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_min_gi32 : GCCBuiltin<"__atomic_min_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_max_gi32 : GCCBuiltin<"__atomic_max_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_and_gi32 : GCCBuiltin<"__atomic_and_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_or_gi32 : GCCBuiltin<"__atomic_or_gi32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xor_gi32 : GCCBuiltin<"__atomic_xor_gi32">,
+ BinaryAtomicInt;
+
+/// 32 bit float atomics required by OpenCL
+def int_AMDIL_atomic_xchg_gf32 : GCCBuiltin<"__atomic_xchg_gf32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_gf32_noret : GCCBuiltin<"__atomic_xchg_gf32_noret">,
+ BinaryAtomicIntNoRet;
+
+/// Unsigned 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gu32 : GCCBuiltin<"__atomic_add_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_sub_gu32 : GCCBuiltin<"__atomic_sub_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_gu32 : GCCBuiltin<"__atomic_rsub_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_gu32 : GCCBuiltin<"__atomic_xchg_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_inc_gu32 : GCCBuiltin<"__atomic_inc_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_dec_gu32 : GCCBuiltin<"__atomic_dec_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_gu32 : GCCBuiltin<"__atomic_cmpxchg_gu32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_min_gu32 : GCCBuiltin<"__atomic_min_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_max_gu32 : GCCBuiltin<"__atomic_max_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_and_gu32 : GCCBuiltin<"__atomic_and_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_or_gu32 : GCCBuiltin<"__atomic_or_gu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xor_gu32 : GCCBuiltin<"__atomic_xor_gu32">,
+ BinaryAtomicInt;
+
+
+/// Intrinsics for atomic instructions with no return value
+/// Signed 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_li32_noret : GCCBuiltin<"__atomic_add_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_li32_noret : GCCBuiltin<"__atomic_sub_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_li32_noret : GCCBuiltin<"__atomic_rsub_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_li32_noret : GCCBuiltin<"__atomic_xchg_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_li32_noret : GCCBuiltin<"__atomic_inc_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_li32_noret : GCCBuiltin<"__atomic_dec_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_li32_noret : GCCBuiltin<"__atomic_cmpxchg_li32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_li32_noret : GCCBuiltin<"__atomic_min_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_li32_noret : GCCBuiltin<"__atomic_max_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_li32_noret : GCCBuiltin<"__atomic_and_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_li32_noret : GCCBuiltin<"__atomic_or_li32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_li32_noret : GCCBuiltin<"__atomic_mskor_li32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_li32_noret : GCCBuiltin<"__atomic_xor_li32_noret">,
+ BinaryAtomicIntNoRet;
+
+/// Signed 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ri32_noret : GCCBuiltin<"__atomic_add_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_ri32_noret : GCCBuiltin<"__atomic_sub_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_ri32_noret : GCCBuiltin<"__atomic_rsub_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_ri32_noret : GCCBuiltin<"__atomic_xchg_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_ri32_noret : GCCBuiltin<"__atomic_inc_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_ri32_noret : GCCBuiltin<"__atomic_dec_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_ri32_noret : GCCBuiltin<"__atomic_cmpxchg_ri32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_ri32_noret : GCCBuiltin<"__atomic_min_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_ri32_noret : GCCBuiltin<"__atomic_max_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_ri32_noret : GCCBuiltin<"__atomic_and_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_ri32_noret : GCCBuiltin<"__atomic_or_ri32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_ri32_noret : GCCBuiltin<"__atomic_mskor_ri32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_ri32_noret : GCCBuiltin<"__atomic_xor_ri32_noret">,
+ BinaryAtomicIntNoRet;
+
+
+
+/// Unsigned 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_lu32_noret : GCCBuiltin<"__atomic_add_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_lu32_noret : GCCBuiltin<"__atomic_sub_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_lu32_noret : GCCBuiltin<"__atomic_rsub_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_lu32_noret : GCCBuiltin<"__atomic_xchg_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_lu32_noret : GCCBuiltin<"__atomic_inc_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_lu32_noret : GCCBuiltin<"__atomic_dec_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_lu32_noret : GCCBuiltin<"__atomic_cmpxchg_lu32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_lu32_noret : GCCBuiltin<"__atomic_min_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_lu32_noret : GCCBuiltin<"__atomic_max_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_lu32_noret : GCCBuiltin<"__atomic_and_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_lu32_noret : GCCBuiltin<"__atomic_or_lu32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_lu32_noret : GCCBuiltin<"__atomic_mskor_lu32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_lu32_noret : GCCBuiltin<"__atomic_xor_lu32_noret">,
+ BinaryAtomicIntNoRet;
+
+/// Unsigned 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ru32_noret : GCCBuiltin<"__atomic_add_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_ru32_noret : GCCBuiltin<"__atomic_sub_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_ru32_noret : GCCBuiltin<"__atomic_rsub_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_ru32_noret : GCCBuiltin<"__atomic_xchg_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_ru32_noret : GCCBuiltin<"__atomic_inc_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_ru32_noret : GCCBuiltin<"__atomic_dec_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_ru32_noret : GCCBuiltin<"__atomic_cmpxchg_ru32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_ru32_noret : GCCBuiltin<"__atomic_min_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_ru32_noret : GCCBuiltin<"__atomic_max_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_ru32_noret : GCCBuiltin<"__atomic_and_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_ru32_noret : GCCBuiltin<"__atomic_or_ru32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_ru32_noret : GCCBuiltin<"__atomic_mskor_ru32_noret">,
+ TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_ru32_noret : GCCBuiltin<"__atomic_xor_ru32_noret">,
+ BinaryAtomicIntNoRet;
+
+def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">,
+ VoidIntLong;
+
+def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">,
+ VoidIntInt;
+
+def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">,
+ VoidIntInt;
+
+
+/// Intrinsics for atomic instructions with a return value
+/// Signed 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_li32 : GCCBuiltin<"__atomic_add_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_sub_li32 : GCCBuiltin<"__atomic_sub_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_li32 : GCCBuiltin<"__atomic_rsub_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_li32 : GCCBuiltin<"__atomic_xchg_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_inc_li32 : GCCBuiltin<"__atomic_inc_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_dec_li32 : GCCBuiltin<"__atomic_dec_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_li32 : GCCBuiltin<"__atomic_cmpxchg_li32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_min_li32 : GCCBuiltin<"__atomic_min_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_max_li32 : GCCBuiltin<"__atomic_max_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_and_li32 : GCCBuiltin<"__atomic_and_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_or_li32 : GCCBuiltin<"__atomic_or_li32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_li32 : GCCBuiltin<"__atomic_mskor_li32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_xor_li32 : GCCBuiltin<"__atomic_xor_li32">,
+ BinaryAtomicInt;
+
+/// Signed 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ri32 : GCCBuiltin<"__atomic_add_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_sub_ri32 : GCCBuiltin<"__atomic_sub_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_ri32 : GCCBuiltin<"__atomic_rsub_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_ri32 : GCCBuiltin<"__atomic_xchg_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_inc_ri32 : GCCBuiltin<"__atomic_inc_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_dec_ri32 : GCCBuiltin<"__atomic_dec_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_ri32 : GCCBuiltin<"__atomic_cmpxchg_ri32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_min_ri32 : GCCBuiltin<"__atomic_min_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_max_ri32 : GCCBuiltin<"__atomic_max_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_and_ri32 : GCCBuiltin<"__atomic_and_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_or_ri32 : GCCBuiltin<"__atomic_or_ri32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_ri32 : GCCBuiltin<"__atomic_mskor_ri32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_xor_ri32 : GCCBuiltin<"__atomic_xor_ri32">,
+ BinaryAtomicInt;
+
+/// 32 bit float atomics required by OpenCL
+def int_AMDIL_atomic_xchg_lf32 : GCCBuiltin<"__atomic_xchg_lf32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_lf32_noret : GCCBuiltin<"__atomic_xchg_lf32_noret">,
+ BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_rf32 : GCCBuiltin<"__atomic_xchg_rf32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_rf32_noret : GCCBuiltin<"__atomic_xchg_rf32_noret">,
+ BinaryAtomicIntNoRet;
+
+/// Unsigned 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_lu32 : GCCBuiltin<"__atomic_add_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_sub_lu32 : GCCBuiltin<"__atomic_sub_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_lu32 : GCCBuiltin<"__atomic_rsub_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_lu32 : GCCBuiltin<"__atomic_xchg_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_inc_lu32 : GCCBuiltin<"__atomic_inc_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_dec_lu32 : GCCBuiltin<"__atomic_dec_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_lu32 : GCCBuiltin<"__atomic_cmpxchg_lu32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_min_lu32 : GCCBuiltin<"__atomic_min_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_max_lu32 : GCCBuiltin<"__atomic_max_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_and_lu32 : GCCBuiltin<"__atomic_and_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_or_lu32 : GCCBuiltin<"__atomic_or_lu32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_lu32 : GCCBuiltin<"__atomic_mskor_lu32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_xor_lu32 : GCCBuiltin<"__atomic_xor_lu32">,
+ BinaryAtomicInt;
+
+/// Unsigned 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ru32 : GCCBuiltin<"__atomic_add_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_sub_ru32 : GCCBuiltin<"__atomic_sub_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_ru32 : GCCBuiltin<"__atomic_rsub_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_ru32 : GCCBuiltin<"__atomic_xchg_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_inc_ru32 : GCCBuiltin<"__atomic_inc_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_dec_ru32 : GCCBuiltin<"__atomic_dec_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_ru32 : GCCBuiltin<"__atomic_cmpxchg_ru32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_min_ru32 : GCCBuiltin<"__atomic_min_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_max_ru32 : GCCBuiltin<"__atomic_max_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_and_ru32 : GCCBuiltin<"__atomic_and_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_or_ru32 : GCCBuiltin<"__atomic_or_ru32">,
+ BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_ru32 : GCCBuiltin<"__atomic_mskor_ru32">,
+ TernaryAtomicInt;
+def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">,
+ BinaryAtomicInt;
+
+/// Semaphore signal/wait/init
+def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
+def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">,
+ Intrinsic<[], [llvm_ptr_ty]>;
+def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">,
+ Intrinsic<[], [llvm_ptr_ty]>;
+def int_AMDIL_semaphore_size : GCCBuiltin<"__amdil_max_semaphore_size">,
+ Intrinsic<[llvm_i32_ty], []>;
+}
diff --git a/src/gallium/drivers/radeon/AMDILKernel.h b/src/gallium/drivers/radeon/AMDILKernel.h
new file mode 100644
index 00000000000..d671f68bc51
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILKernel.h
@@ -0,0 +1,124 @@
+//===------------- AMDILKernel.h - AMDIL Kernel Class ----------*- C++ -*--===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// Definition of a AMDILKernel object and the various subclasses that
+// are used.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL_KERNEL_H_
+#define _AMDIL_KERNEL_H_
+#include "AMDIL.h"
+#include "llvm/Value.h"
+#include "llvm/Constant.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+namespace llvm {
+ class AMDILSubtarget;
+ class AMDILTargetMachine;
+ /// structure that holds information for a single local/region address array
+ typedef struct _AMDILArrayMemRec {
+ uint32_t vecSize; // size of each vector
+ uint32_t offset; // offset into the memory section
+ bool isHW; // flag to specify if HW is used or SW is used
+ bool isRegion; // flag to specify if GDS is used or not
+ } AMDILArrayMem;
+
+ /// structure that holds information about a constant address
+ /// space pointer that is a kernel argument
+ typedef struct _AMDILConstPtrRec {
+ const llvm::Value *base;
+ uint32_t size;
+ uint32_t offset;
+ uint32_t cbNum; // value of 0 means that it does not use hw CB
+ bool isArray;
+ bool isArgument;
+ bool usesHardware;
+ std::string name;
+ } AMDILConstPtr;
+
+ /// Structure that holds information for all local/region address
+ /// arrays in the kernel
+ typedef struct _AMDILLocalArgRec {
+ llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS> local;
+ std::string name; // Kernel Name
+ } AMDILLocalArg;
+
+ /// Structure that holds information for each kernel argument
+ typedef struct _AMDILkernelArgRec {
+ uint32_t reqGroupSize[3];
+ uint32_t reqRegionSize[3];
+ llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo;
+ bool mHasRWG;
+ bool mHasRWR;
+ } AMDILKernelAttr;
+
+ /// Structure that holds information for each kernel
+ class AMDILKernel {
+ public:
+ AMDILKernel() {}
+ uint32_t curSize;
+ uint32_t curRSize;
+ uint32_t curHWSize;
+ uint32_t curHWRSize;
+ uint32_t constSize;
+ bool mKernel;
+ std::string mName;
+ AMDILKernelAttr *sgv;
+ AMDILLocalArg *lvgv;
+ llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS> constPtr;
+ uint32_t constSizes[HW_MAX_NUM_CB];
+ llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly;
+ llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly;
+ llvm::SmallVector<std::pair<uint32_t, const llvm::Constant *>,
+ DEFAULT_VEC_SLOTS> CPOffsets;
+ typedef llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS>::iterator constptr_iterator;
+ typedef llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator arraymem_iterator;
+ }; // AMDILKernel
+} // end llvm namespace
+#endif // _AMDIL_KERNEL_H_
diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.cpp b/src/gallium/drivers/radeon/AMDILKernelManager.cpp
new file mode 100644
index 00000000000..1c2468c7d25
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILKernelManager.cpp
@@ -0,0 +1,1387 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILKernelManager.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILAsmPrinter.h"
+#include "AMDILDeviceInfo.h"
+#include "AMDILDevices.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdio>
+#include <ostream>
+#include <algorithm>
+#include <string>
+#include <queue>
+#include <list>
+#include <utility>
+using namespace llvm;
+#define NUM_EXTRA_SLOTS_PER_IMAGE 1
+
+static bool errorPrint(const char *ptr, OSTREAM_TYPE &O) {
+ if (ptr[0] == 'E') {
+ O << ";error:" << ptr << "\n";
+ } else {
+ O << ";warning:" << ptr << "\n";
+ }
+ return false;
+}
+
+#if 0
+static bool
+samplerPrint(StringMap<SamplerInfo>::iterator &data, OSTREAM_TYPE &O) {
+ O << ";sampler:" << (*data).second.name << ":" << (*data).second.idx
+ << ":" << ((*data).second.val == (uint32_t)-1 ? 0 : 1)
+ << ":" << ((*data).second.val != (uint32_t)-1 ? (*data).second.val : 0)
+ << "\n";
+ return false;
+}
+#endif
+
+static bool arenaPrint(uint32_t val, OSTREAM_TYPE &O) {
+ if (val >= ARENA_SEGMENT_RESERVED_UAVS) {
+ O << "dcl_arena_uav_id(" << val << ")\n";
+ }
+ return false;
+}
+
+static bool uavPrint(uint32_t val, OSTREAM_TYPE &O) {
+ if (val < 8 || val == 11){
+ O << "dcl_raw_uav_id(" << val << ")\n";
+ }
+ return false;
+}
+
+static bool uavPrintSI(uint32_t val, OSTREAM_TYPE &O) {
+ O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n";
+ return false;
+}
+
+static bool
+printfPrint(std::pair<const std::string, PrintfInfo *> &data, OSTREAM_TYPE &O) {
+ O << ";printf_fmt:" << data.second->getPrintfID();
+ // Number of operands
+ O << ":" << data.second->getNumOperands();
+ // Size of each operand
+ for (size_t i = 0, e = data.second->getNumOperands(); i < e; ++i) {
+ O << ":" << (data.second->getOperandID(i) >> 3);
+ }
+ const char *ptr = data.first.c_str();
+ uint32_t size = data.first.size() - 1;
+ // The format string size
+ O << ":" << size << ":";
+ for (size_t i = 0; i < size; ++i) {
+ if (ptr[i] == '\r') {
+ O << "\\r";
+ } else if (ptr[i] == '\n') {
+ O << "\\n";
+ } else {
+ O << ptr[i];
+ }
+ }
+ O << ";\n"; // c_str() is cheap way to trim
+ return false;
+}
+
+
+void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip,
+ int numWriteImages, int raw_uav_buffer,
+ int counter, bool isKernel,
+ const Function *F) {
+ assert(F && "Cannot pass a NULL Pointer to F!");
+ assert(Ip->getType()->isPointerTy() &&
+ "Argument must be a pointer to be passed into this function!\n");
+ std::string ptrArg(";pointer:");
+ const char *symTab = "NoSymTab";
+ uint32_t ptrID = getUAVID(Ip);
+ const PointerType *PT = cast<PointerType>(Ip->getType());
+ uint32_t Align = 4;
+ const char *MemType = "uav";
+ if (PT->getElementType()->isSized()) {
+ Align = NextPowerOf2((uint32_t)mTM->getTargetData()->
+ getTypeAllocSize(PT->getElementType()));
+ }
+ ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab) + ":1:1:" +
+ itostr(counter * 16) + ":";
+ switch (PT->getAddressSpace()) {
+ case AMDILAS::ADDRESS_NONE:
+ //O << "No Address space qualifier!";
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ assert(1);
+ break;
+ case AMDILAS::GLOBAL_ADDRESS:
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ if (ptrID >= ARENA_SEGMENT_RESERVED_UAVS) {
+ ptrID = 8;
+ }
+ }
+ mMFI->uav_insert(ptrID);
+ break;
+ case AMDILAS::CONSTANT_ADDRESS: {
+ if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)){
+ const kernel t = mGM->getKernel(F->getName());
+ if (mGM->usesHWConstant(t, Ip->getName())) {
+ MemType = "hc\0";
+ ptrID = mGM->getConstPtrCB(t, Ip->getName());
+ } else {
+ MemType = "c\0";
+ mMFI->uav_insert(ptrID);
+ }
+ } else {
+ MemType = "c\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ }
+ default:
+ case AMDILAS::PRIVATE_ADDRESS:
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ MemType = (mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV))
+ ? "up\0" : "hp\0";
+ } else {
+ MemType = "p\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ mMFI->setUsesRegion();
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ MemType = "hr\0";
+ ptrID = 0;
+ } else {
+ MemType = "r\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ mMFI->setUsesLocal();
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ MemType = "hl\0";
+ ptrID = 1;
+ } else {
+ MemType = "l\0";
+ mMFI->uav_insert(ptrID);
+ }
+ break;
+ };
+ ptrArg += std::string(MemType) + ":";
+ ptrArg += itostr(ptrID) + ":";
+ ptrArg += itostr(Align);
+ mMFI->addMetadata(ptrArg, true);
+}
+
+AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM,
+ AMDILGlobalManager *GM)
+{
+ mTM = TM;
+ mSTM = mTM->getSubtargetImpl();
+ mGM = GM;
+ clear();
+}
+
+AMDILKernelManager::~AMDILKernelManager() {
+ clear();
+}
+
+void
+AMDILKernelManager::setMF(MachineFunction *MF)
+{
+ mMF = MF;
+ mMFI = MF->getInfo<AMDILMachineFunctionInfo>();
+}
+
+void AMDILKernelManager::clear() {
+ mUniqueID = 0;
+ mIsKernel = false;
+ mWasKernel = false;
+ mHasImageWrite = false;
+ mHasOutputInst = false;
+}
+
+bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI) {
+ return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite
+ && !mHasOutputInst);
+}
+
+void AMDILKernelManager::processArgMetadata(OSTREAM_TYPE &O,
+ uint32_t buf,
+ bool isKernel)
+{
+ const Function *F = mMF->getFunction();
+ const char * symTab = "NoSymTab";
+ Function::const_arg_iterator Ip = F->arg_begin();
+ Function::const_arg_iterator Ep = F->arg_end();
+
+ if (F->hasStructRetAttr()) {
+ assert(Ip != Ep && "Invalid struct return fucntion!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++Ip;
+ }
+ uint32_t mCBSize = 0;
+ int raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ bool MultiUAV = mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV);
+ bool ArenaSegment =
+ mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment);
+ int numWriteImages =
+ mSTM->getGlobalManager()->getNumWriteImages(F->getName());
+ if (numWriteImages == OPENCL_MAX_WRITE_IMAGES || MultiUAV || ArenaSegment) {
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ }
+ }
+ uint32_t CounterNum = 0;
+ uint32_t ROArg = 0;
+ uint32_t WOArg = 0;
+ uint32_t NumArg = 0;
+ while (Ip != Ep) {
+ Type *cType = Ip->getType();
+ if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) {
+ std::string argMeta(";value:");
+ argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab) + ":";
+ int bitsize = cType->getPrimitiveSizeInBits();
+ int numEle = 1;
+ if (cType->getTypeID() == Type::VectorTyID) {
+ numEle = cast<VectorType>(cType)->getNumElements();
+ }
+ argMeta += itostr(numEle) + ":1:" + itostr(mCBSize << 4);
+ mMFI->addMetadata(argMeta, true);
+
+ // FIXME: simplify
+ if ((bitsize / numEle) < 32) {
+ bitsize = numEle >> 2;
+ } else {
+ bitsize >>= 7;
+ }
+ if (!bitsize) {
+ bitsize = 1;
+ }
+
+ mCBSize += bitsize;
+ ++NumArg;
+ } else if (const PointerType *PT = dyn_cast<PointerType>(cType)) {
+ Type *CT = PT->getElementType();
+ const StructType *ST = dyn_cast<StructType>(CT);
+ if (ST && ST->isOpaque()) {
+ StringRef name = ST->getName();
+ bool i1d = name.equals( "struct._image1d_t" );
+ bool i1da = name.equals( "struct._image1d_array_t" );
+ bool i1db = name.equals( "struct._image1d_buffer_t" );
+ bool i2d = name.equals( "struct._image2d_t" );
+ bool i2da = name.equals( "struct._image2d_array_t" );
+ bool i3d = name.equals( "struct._image3d_t" );
+ bool c32 = name.equals( "struct._counter32_t" );
+ bool c64 = name.equals( "struct._counter64_t" );
+ if (i1d || i1da || i1db || i2d | i2da || i3d) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ std::string imageArg(";image:");
+ imageArg += Ip->getName().str() + ":";
+ if (i1d) imageArg += "1D:";
+ else if (i1da) imageArg += "1DA:";
+ else if (i1db) imageArg += "1DB:";
+ else if (i2d) imageArg += "2D:";
+ else if (i2da) imageArg += "2DA:";
+ else if (i3d) imageArg += "3D:";
+
+ if (isKernel) {
+ if (mGM->isReadOnlyImage (mMF->getFunction()->getName(),
+ (ROArg + WOArg))) {
+ imageArg += "RO:" + itostr(ROArg);
+ O << "dcl_resource_id(" << ROArg << ")_type(";
+ if (i1d) O << "1d";
+ else if (i1da) O << "1darray";
+ else if (i1db) O << "buffer";
+ else if (i2d) O << "2d";
+ else if (i2da) O << "2darray";
+ else if (i3d) O << "3d";
+ O << ")_fmtx(unknown)_fmty(unknown)"
+ << "_fmtz(unknown)_fmtw(unknown)\n";
+ ++ROArg;
+ } else if (mGM->isWriteOnlyImage(mMF->getFunction()->getName(),
+ (ROArg + WOArg))) {
+ uint32_t offset = 0;
+ offset += WOArg;
+ imageArg += "WO:" + itostr(offset & 0x7);
+ O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type(";
+ if (i1d) O << "1d";
+ else if (i1da) O << "1darray";
+ else if (i1db) O << "buffer";
+ else if (i2d) O << "2d";
+ else if (i2da) O << "2darray";
+ else if (i3d) O << "3d";
+ O << ")_fmtx(uint)\n";
+ ++WOArg;
+ } else {
+ imageArg += "RW:" + itostr(ROArg + WOArg);
+ }
+ }
+ imageArg += ":1:" + itostr(mCBSize * 16);
+ mMFI->addMetadata(imageArg, true);
+ mMFI->addi32Literal(mCBSize);
+ mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1;
+ ++NumArg;
+ } else {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ ++NumArg;
+ }
+ } else if (c32 || c64) {
+ std::string counterArg(";counter:");
+ counterArg += Ip->getName().str() + ":"
+ + itostr(c32 ? 32 : 64) + ":"
+ + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16);
+ mMFI->addMetadata(counterArg, true);
+ ++NumArg;
+ ++mCBSize;
+ } else {
+ updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel,
+ F);
+ ++NumArg;
+ ++mCBSize;
+ }
+ }
+ else if (CT->getTypeID() == Type::StructTyID
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ const TargetData *td = mTM->getTargetData();
+ const StructLayout *sl = td->getStructLayout(dyn_cast<StructType>(CT));
+ int bytesize = sl->getSizeInBytes();
+ int reservedsize = (bytesize + 15) & ~15;
+ int numSlots = reservedsize >> 4;
+ if (!numSlots) {
+ numSlots = 1;
+ }
+ std::string structArg(";value:");
+ structArg += Ip->getName().str() + ":struct:"
+ + itostr(bytesize) + ":1:" + itostr(mCBSize * 16);
+ mMFI->addMetadata(structArg, true);
+ mCBSize += numSlots;
+ ++NumArg;
+ } else if (CT->isIntOrIntVectorTy()
+ || CT->isFPOrFPVectorTy()
+ || CT->getTypeID() == Type::ArrayTyID
+ || CT->getTypeID() == Type::PointerTyID
+ || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F);
+ ++NumArg;
+ ++mCBSize;
+ } else {
+ assert(0 && "Cannot process current pointer argument");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++NumArg;
+ }
+ } else {
+ assert(0 && "Cannot process current kernel argument");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++NumArg;
+ }
+ ++Ip;
+ }
+}
+
+void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter,
+ OSTREAM_TYPE &O,
+ const std::string &name) {
+ mName = name;
+ std::string kernelName;
+ kernelName = name;
+ int kernelId = mGM->getOrCreateFunctionID(kernelName);
+ O << "func " << kernelId << " ; " << kernelName << "\n";
+ if (mSTM->is64bit()) {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].xy\n";
+ } else {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].x\n";
+ }
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0\n";
+}
+
+void AMDILKernelManager::printGroupSize(OSTREAM_TYPE& O) {
+ // The HD4XXX generation of hardware does not support a 3D launch, so we need
+ // to use dcl_num_thread_per_group to specify the launch size. If the launch
+ // size is specified via a kernel attribute, we print it here. Otherwise we
+ // use the the default size.
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (mGM->hasRWG(mName)
+ || !mMFI->usesLocal()) {
+ // if the user has specified what the required workgroup size is then we
+ // need to compile for that size and that size only. Otherwise we compile
+ // for the max workgroup size that is passed in as an option to the
+ // backend.
+ O << "dcl_num_thread_per_group ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << " \n";
+ } else {
+ // If the kernel uses local memory, then the kernel is being
+ // compiled in single wavefront mode. So we have to generate code slightly
+ // different.
+ O << "dcl_num_thread_per_group "
+ << mSTM->device()->getWavefrontSize()
+ << ", 1, 1 \n";
+ }
+ } else {
+ // Otherwise we generate for devices that support 3D launch natively. If
+ // the reqd_workgroup_size attribute was specified, then we can specify the
+ // exact launch dimensions.
+ if (mGM->hasRWG(mName)) {
+ O << "dcl_num_thread_per_group ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << " \n";
+ } else {
+ // Otherwise we specify the largest workgroup size that can be launched.
+ O << "dcl_max_thread_per_group " << mGM->getLocal(mName, 3) << " \n";
+ }
+ }
+ // Now that we have specified the workgroup size, lets declare the local
+ // memory size. If we are using hardware and we know the value at compile
+ // time, then we need to declare the correct value. Otherwise we should just
+ // declare the maximum size.
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ size_t kernelLocalSize = (mGM->getHWLocalSize(mName) + 3) & ~3;
+ if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]);
+ }
+ // If there is a local pointer as a kernel argument, we don't know the size
+ // at compile time, so we reserve all of the space.
+ if (mMFI->usesLocal() && (mMFI->hasLocalArg() || !kernelLocalSize)) {
+ O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
+ << mSTM->device()->getMaxLDSSize() << "\n";
+ mMFI->setUsesMem(AMDILDevice::LDS_ID);
+ } else if (kernelLocalSize) {
+ // We know the size, so lets declare it correctly.
+ O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
+ << kernelLocalSize << "\n";
+ mMFI->setUsesMem(AMDILDevice::LDS_ID);
+ }
+ }
+ // If the device supports the region memory extension, which maps to our
+ // hardware GDS memory, then lets declare it so we can use it later on.
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+ size_t kernelGDSSize = (mGM->getHWRegionSize(mName) + 3) & ~3;
+ if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]);
+ }
+ // If there is a region pointer as a kernel argument, we don't know the size
+ // at compile time, so we reserved all of the space.
+ if (mMFI->usesRegion() && (mMFI->hasRegionArg() || !kernelGDSSize)) {
+ O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
+ ") " << mSTM->device()->getMaxGDSSize() << "\n";
+ mMFI->setUsesMem(AMDILDevice::GDS_ID);
+ } else if (kernelGDSSize) {
+ // We know the size, so lets declare it.
+ O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
+ ") " << kernelGDSSize << "\n";
+ mMFI->setUsesMem(AMDILDevice::GDS_ID);
+ }
+ }
+}
+
+void
+AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O) {
+ // If we are a HD4XXX generation device, then we only support a single uav
+ // surface, so we declare it and leave
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ O << "dcl_raw_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ << ")\n";
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ getIntrinsicSetup(AsmPrinter, O);
+ return;
+ }
+ // If we are supporting multiple uav's view the MultiUAV capability, then we
+ // need to print out the declarations here. MultiUAV conflicts with write
+ // images, so they only use 8 - NumWriteImages uav's. Therefor only pointers
+ // with ID's < 8 will get printed.
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrint, O);
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ }
+ // If arena segments are supported, then we should emit them now. Arena
+ // segments are similiar to MultiUAV, except ArenaSegments are virtual and up
+ // to 1024 of them can coexist. These are more compiler hints for CAL and thus
+ // cannot overlap in any form. Each ID maps to a seperate piece of memory and
+ // CAL determines whether the load/stores should go to the fast path/slow path
+ // based on the usage and instruction.
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O);
+ }
+ // Now that we have printed out all of the arena and multi uav declaration,
+ // now we must print out the default raw uav id. This always exists on HD5XXX
+ // and HD6XXX hardware. The reason is that the hardware supports 12 UAV's and
+ // 11 are taken up by MultiUAV/Write Images and Arena. However, if we do not
+ // have UAV 11 as the raw UAV and there are 8 write images, we must revert
+ // everything to the arena and not print out the default raw uav id.
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD5XXX
+ || mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX) {
+ if ((mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) < 11 &&
+ mSTM->getGlobalManager()->getNumWriteImages(mName)
+ != OPENCL_MAX_WRITE_IMAGES
+ && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV))
+ || mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+ if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+ && mMFI->uav_count(mSTM->device()->
+ getResourceID(AMDILDevice::RAW_UAV_ID))) {
+ O << "dcl_raw_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ O << ")\n";
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ }
+ }
+ // If we have not printed out the arena ID yet, then do so here.
+ if (!mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+ && mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaUAV)) {
+ O << "dcl_arena_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) << ")\n";
+ mMFI->setUsesMem(AMDILDevice::ARENA_UAV_ID);
+ }
+ } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrintSI, O);
+ mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+ }
+ getIntrinsicSetup(AsmPrinter, O);
+}
+
+void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter,
+ OSTREAM_TYPE &O)
+{
+ O << "mov r0.z, vThreadGrpIdFlat.x\n"
+ << "mov r1022.xyz0, vTidInGrp.xyz\n";
+ if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) {
+ O << "mov r1023.xyz0, vThreadGrpId.xyz\n";
+ } else {
+ O << "imul r0.w, cb0[2].x, cb0[2].y\n"
+ // Calculates the local id.
+ // Calculates the group id.
+ << "umod r1023.x, r0.z, cb0[2].x\n"
+ << "udiv r1023.y, r0.z, cb0[2].x\n"
+ << "umod r1023.y, r1023.y, cb0[2].y\n"
+ << "udiv r1023.z, r0.z, r0.w\n";
+ }
+ // Calculates the global id.
+ if (mGM->hasRWG(mName) && 0) {
+ // Anytime we declare a literal, we need to reserve it, if it is not emitted
+ // in emitLiterals.
+ mMFI->addReservedLiterals(1);
+ O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << ", ";
+ O << "0\n";
+ O << "imad r1021.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n";
+ mMFI->addReservedLiterals(1);
+ } else {
+ O << "imad r1021.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n";
+ }
+
+ // Add the global/group offset for multi-launch support.
+ O << "iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0\n"
+ << "iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0\n"
+ // moves the flat group id.
+ << "mov r1023.w, r0.z\n";
+ if (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
+ if (mSTM->is64bit()) {
+ O << "umul " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".x0, r1023.w, cb0[4].z\n"
+ << "i64add " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".xy, cb0[4].xy\n";
+
+ } else {
+ O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2)
+ << ".x, r1023.w, cb0[4].y, cb0[4].x\n";
+ }
+ }
+ // Shift the flat group id to be in bytes instead of dwords.
+ O << "ishl r1023.w, r1023.w, l0.z\n";
+ if (mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
+ if (mSTM->is64bit()) {
+ O << "umul " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".x0, vAbsTidFlat.x, cb0[3].z\n"
+ << "i64add " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".xy, " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".xy, cb0[3].xy\n";
+
+ } else {
+ O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1)
+ << ".x, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n";
+ }
+ } else {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x, l0.0\n";
+ }
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ O << "udiv r1024.xyz, r1021.xyz, cb0[10].xyz\n";
+ if (mGM->hasRWR(mName) && 0) {
+ // Anytime we declare a literal, we need to reserve it, if it is not emitted
+ // in emitLiterals.
+ mMFI->addReservedLiterals(1);
+ O << "dcl_literal l" << mMFI->getNumLiterals() + 1 << ", ";
+ O << mGM->getLocal(mName, 0) << ", ";
+ O << mGM->getLocal(mName, 1) << ", ";
+ O << mGM->getLocal(mName, 2) << ", ";
+ O << "0\n";
+ O << "imad r1025.xyz0, r1023.xyz, l" << mMFI->getNumLiterals() + 1 << ".xyz, r1022.xyz\n";
+ mMFI->addReservedLiterals(1);
+ } else {
+ O << "imad r1025.xyz0, r1023.xyz, cb0[1].xyz, r1022.xyz\n";
+ }
+ }
+}
+
+void AMDILKernelManager::printFooter(OSTREAM_TYPE &O) {
+ O << "ret\n";
+ O << "endfunc ; " << mName << "\n";
+}
+
+void
+AMDILKernelManager::printMetaData(OSTREAM_TYPE &O, uint32_t id, bool kernel) {
+ if (kernel) {
+ int kernelId = mGM->getOrCreateFunctionID(mName);
+ mMFI->addCalledFunc(id);
+ mUniqueID = kernelId;
+ mIsKernel = true;
+ }
+ printKernelArgs(O);
+ if (kernel) {
+ mIsKernel = false;
+ mMFI->eraseCalledFunc(id);
+ mUniqueID = id;
+ }
+}
+
+void AMDILKernelManager::setKernel(bool kernel) {
+ mIsKernel = kernel;
+ if (kernel) {
+ mWasKernel = mIsKernel;
+ }
+}
+
+void AMDILKernelManager::setID(uint32_t id)
+{
+ mUniqueID = id;
+}
+
+void AMDILKernelManager::setName(const std::string &name) {
+ mName = name;
+}
+
+bool AMDILKernelManager::isKernel() {
+ return mIsKernel;
+}
+
+bool AMDILKernelManager::wasKernel() {
+ return mWasKernel;
+}
+
+void AMDILKernelManager::setImageWrite() {
+ mHasImageWrite = true;
+}
+
+void AMDILKernelManager::setOutputInst() {
+ mHasOutputInst = true;
+}
+
+void AMDILKernelManager::printConstantToRegMapping(
+ AMDILAsmPrinter *RegNames,
+ uint32_t &LII,
+ OSTREAM_TYPE &O,
+ uint32_t &Counter,
+ uint32_t Buffer,
+ uint32_t n,
+ const char *lit,
+ uint32_t fcall,
+ bool isImage,
+ bool isHWCB)
+{
+ // TODO: This needs to be enabled or SC will never statically index into the
+ // CB when a pointer is used.
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) && isHWCB) {
+ const char *name = RegNames->getRegisterName(LII);
+ O << "mov " << name << ", l5.x\n";
+ ++LII;
+ Counter++;
+ return;
+ }
+ for (uint32_t x = 0; x < n; ++x) {
+ const char *name = RegNames->getRegisterName(LII);
+ if (isImage) {
+ O << "mov " << name << ", l" << mMFI->getIntLits(Counter++) << "\n";
+ } else {
+ O << "mov " << name << ", cb" <<Buffer<< "[" <<Counter++<< "]\n";
+ }
+ switch(fcall) {
+ case 1093:
+ O << "ishr " << name << ", " << name << ".xxyy, l3.0y0y\n"
+ "ishl " << name << ", " << name << ", l3.y\n"
+ "ishr " << name << ", " << name << ", l3.y\n";
+ break;
+ case 1092:
+ O << "ishr " << name << ", " << name << ".xx, l3.0y\n"
+ "ishl " << name << ", " << name << ", l3.y\n"
+ "ishr " << name << ", " << name << ", l3.y\n";
+ break;
+ case 1091:
+ O << "ishr " << name << ", " << name << ".xxxx, l3.0zyx\n"
+ "ishl " << name << ", " << name << ", l3.x\n"
+ "ishr " << name << ", " << name << ", l3.x\n";
+ break;
+ case 1090:
+ O << "ishr " << name << ", " << name << ".xx, l3.0z\n"
+ "ishl " << name << ".xy__, " << name << ".xy, l3.x\n"
+ "ishr " << name << ".xy__, " << name << ".xy, l3.x\n";
+ break;
+ default:
+ break;
+ };
+ if (lit) {
+ O << "ishl " << name << ", " << name
+ << ", " << lit << "\n";
+ O << "ishr " << name << ", " << name
+ << ", " << lit << "\n";
+ }
+ if (isImage) {
+ Counter += NUM_EXTRA_SLOTS_PER_IMAGE;
+ }
+ ++LII;
+ }
+}
+
+void
+AMDILKernelManager::printCopyStructPrivate(const StructType *ST,
+ OSTREAM_TYPE &O,
+ size_t stackSize,
+ uint32_t Buffer,
+ uint32_t mLitIdx,
+ uint32_t &Counter)
+{
+ size_t n = ((stackSize + 15) & ~15) >> 4;
+ for (size_t x = 0; x < n; ++x) {
+ O << "mov r2, cb" << Buffer << "[" << Counter++ << "]\n";
+ O << "mov r1.x, r0.x\n";
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ O << "ishr r1.x, r1.x, l0.x\n";
+ O << "mov x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ <<"[r1.x], r2\n";
+ } else {
+ O << "uav_raw_store_id(" <<
+ mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)
+ << ") mem0, r1.x, r2\n";
+ }
+ } else {
+ O << "uav_raw_store_id(" <<
+ mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ << ") mem0, r1.x, r2\n";
+ }
+ O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n";
+ }
+}
+
+void AMDILKernelManager::printKernelArgs(OSTREAM_TYPE &O) {
+ std::string version(";version:");
+ version += itostr(AMDIL_MAJOR_VERSION) + ":"
+ + itostr(AMDIL_MINOR_VERSION) + ":" + itostr(AMDIL_REVISION_NUMBER);
+ O << ";ARGSTART:" <<mName<< "\n";
+ if (mIsKernel) {
+ O << version << "\n";
+ O << ";device:" <<mSTM->getDeviceName() << "\n";
+ }
+ O << ";uniqueid:" <<mUniqueID<< "\n";
+
+ size_t local = mGM->getLocalSize(mName);
+ size_t hwlocal = ((mGM->getHWLocalSize(mName) + 3) & (~0x3));
+ size_t region = mGM->getRegionSize(mName);
+ size_t hwregion = ((mGM->getHWRegionSize(mName) + 3) & (~0x3));
+ bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ bool usehwregion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV);
+ if (mIsKernel) {
+ O << ";memory:" << ((usehwprivate) ?
+ (useuavprivate) ? "uav" : "hw" : "" ) << "private:"
+ <<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n";
+ }
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+ O << ";memory:" << ((usehwregion) ? "hw" : "") << "region:"
+ << ((usehwregion) ? hwregion : hwregion + region) << "\n";
+ }
+ O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:"
+ << ((usehwlocal) ? hwlocal : hwlocal + local) << "\n";
+
+ if (mIsKernel) {
+ if (mGM->hasRWG(mName)) {
+ O << ";cws:" << mGM->getLocal(mName, 0) << ":";
+ O << mGM->getLocal(mName, 1) << ":";
+ O << mGM->getLocal(mName, 2) << "\n";
+ }
+ if (mGM->hasRWR(mName)) {
+ O << ";crs:" << mGM->getRegion(mName, 0) << ":";
+ O << mGM->getRegion(mName, 1) << ":";
+ O << mGM->getRegion(mName, 2) << "\n";
+ }
+ }
+ if (mIsKernel) {
+ for (std::vector<std::string>::iterator ib = mMFI->kernel_md_begin(),
+ ie = mMFI->kernel_md_end(); ib != ie; ++ib) {
+ O << (*ib) << "\n";
+ }
+ }
+ for (std::set<std::string>::iterator ib = mMFI->func_md_begin(),
+ ie = mMFI->func_md_end(); ib != ie; ++ib) {
+ O << (*ib) << "\n";
+ }
+ if (!mMFI->func_empty()) {
+ O << ";function:" << mMFI->func_size();
+ binaryForEach(mMFI->func_begin(), mMFI->func_end(), commaPrint, O);
+ O << "\n";
+ }
+
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::MacroDB)
+ && !mMFI->intr_empty()) {
+ O << ";intrinsic:" << mMFI->intr_size();
+ binaryForEach(mMFI->intr_begin(), mMFI->intr_end(), commaPrint, O);
+ O << "\n";
+ }
+
+ if (!mIsKernel) {
+ binaryForEach(mMFI->printf_begin(), mMFI->printf_end(), printfPrint, O);
+ mMF->getMMI().getObjFileInfo<AMDILModuleInfo>().add_printf_offset(
+ mMFI->printf_size());
+ } else {
+ for (StringMap<SamplerInfo>::iterator
+ smb = mMFI->sampler_begin(),
+ sme = mMFI->sampler_end(); smb != sme; ++ smb) {
+ O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx
+ << ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1)
+ << ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0)
+ << "\n";
+ }
+ }
+ if (mSTM->is64bit()) {
+ O << ";memory:64bitABI\n";
+ }
+
+ if (mMFI->errors_empty()) {
+ binaryForEach(mMFI->errors_begin(), mMFI->errors_end(), errorPrint, O);
+ }
+ // This has to come last
+ if (mIsKernel
+ && mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ if (mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
+ mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ if (mMFI->uav_size() == 1) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+ && *(mMFI->uav_begin()) >= ARENA_SEGMENT_RESERVED_UAVS) {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ O << "\n";
+ } else {
+ O << ";uavid:" << *(mMFI->uav_begin()) << "\n";
+ }
+ } else if (mMFI->uav_count(mSTM->device()->
+ getResourceID(AMDILDevice::RAW_UAV_ID))) {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ O << "\n";
+ } else {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ O << "\n";
+ }
+ } else if (mSTM->getGlobalManager()->getNumWriteImages(mName) !=
+ OPENCL_MAX_WRITE_IMAGES
+ && !mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+ && mMFI->uav_count(mSTM->device()->
+ getResourceID(AMDILDevice::RAW_UAV_ID))) {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) << "\n";
+ } else if (mMFI->uav_size() == 1) {
+ O << ";uavid:" << *(mMFI->uav_begin()) << "\n";
+ } else {
+ O << ";uavid:"
+ << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ O << "\n";
+ }
+ }
+ O << ";ARGEND:" << mName << "\n";
+}
+
+void AMDILKernelManager::printArgCopies(OSTREAM_TYPE &O,
+ AMDILAsmPrinter *RegNames)
+{
+ MachineFunction::const_iterator MFI = mMF->begin();
+ MachineBasicBlock::const_iterator MBI = MFI->begin();
+ Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+ Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+ uint32_t Counter = 0;
+
+ if (mMFI->getArgSize()) {
+ O << "dcl_cb cb1";
+ O << "[" << (mMFI->getArgSize() >> 4) << "]\n";
+ mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
+ }
+ const Function *F = mMF->getFunction();
+ // Get the stack size
+ uint32_t stackSize = mMFI->getStackSize();
+ uint32_t privateSize = mMFI->getScratchSize();
+ uint32_t stackOffset = (privateSize + 15) & (~0xF);
+ if (stackSize
+ && mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ // TODO: If the size is too large, we need to fall back to software emulated
+ // instead of using the hardware capability.
+ int size = (((stackSize + 15) & (~0xF)) >> 4);
+ if (size > 4096) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]);
+ }
+ if (size) {
+ // For any stack variables, we need to declare the literals for them so that
+ // we can use them when we copy our data to the stack.
+ mMFI->addReservedLiterals(1);
+ // Anytime we declare a literal, we need to reserve it, if it is not emitted
+ // in emitLiterals.
+ O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", "
+ << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n"
+ << "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l"
+ << mMFI->getNumLiterals() << ".w\n";
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ O << "dcl_indexed_temp_array x"
+ << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "["
+ << size << "]\n";
+ } else {
+ O << "dcl_typeless_uav_id("
+ << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ << ")_stride(4)_length(" << (size << 4 )<< ")_access(private)\n";
+
+ }
+ O << "mov " << RegNames->getRegisterName(AMDIL::FP)
+ << ".x, l" << mMFI->getNumLiterals() << ".0\n";
+
+ mMFI->setUsesMem(AMDILDevice::SCRATCH_ID);
+ }
+ }
+ I = mMF->getFunction()->arg_begin();
+ int32_t count = 0;
+ // uint32_t Image = 0;
+ bool displaced1 = false;
+ bool displaced2 = false;
+ uint32_t curReg = AMDIL::R1;
+ // TODO: We don't handle arguments that were pushed onto the stack!
+ for (; I != Ie; ++I) {
+ Type *curType = I->getType();
+ unsigned int Buffer = 1;
+ O << "; Kernel arg setup: " << I->getNameStr() << "\n";
+ if (curType->isIntegerTy() || curType->isFloatingPointTy()) {
+ switch (curType->getPrimitiveSizeInBits()) {
+ default:
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ break;
+ case 16:
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1,
+ "l3.y" );
+ break;
+ case 8:
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" );
+ break;
+ }
+ } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
+ Type *ET = VT->getElementType();
+ int numEle = VT->getNumElements();
+ switch (ET->getPrimitiveSizeInBits()) {
+ default:
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".x, cb" << Buffer << "[" << Counter << "].x\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".x, cb" << Buffer << "[" << Counter << "].y\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".x, cb" << Buffer << "[" << Counter << "].z\n";
+ curReg++;
+ Counter++;
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ (numEle+2) >> 2);
+ }
+ break;
+ case 64:
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".xy, cb" << Buffer << "[" << Counter++ << "].zw\n";
+ curReg++;
+ O << "mov " << RegNames->getRegisterName(curReg);
+ O << ".xy, cb" << Buffer << "[" << Counter << "].xy\n";
+ curReg++;
+ Counter++;
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ (numEle) >> 1);
+ }
+ break;
+ case 16:
+ {
+ switch (numEle) {
+ default:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, (numEle+2) >> 2, "l3.y", 1093);
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".y\n";
+ ++curReg;
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".z\n";
+ ++curReg;
+ }
+ break;
+ case 2:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, 1, "l3.y", 1092);
+ break;
+ }
+ break;
+ }
+ case 8:
+ {
+ switch (numEle) {
+ default:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, (numEle+2) >> 2, "l3.x", 1091);
+ if (numEle == 3) {
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".y\n";
+ ++curReg;
+ O << "mov " << RegNames->getRegisterName(curReg) << ".x, ";
+ O << RegNames->getRegisterName(curReg) << ".z\n";
+ ++curReg;
+ }
+ break;
+ case 2:
+ printConstantToRegMapping(RegNames, curReg, O, Counter,
+ Buffer, 1, "l3.x", 1090);
+ break;
+ }
+ break;
+ }
+ }
+ } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) {
+ Type *CT = PT->getElementType();
+ const StructType *ST = dyn_cast<StructType>(CT);
+ if (ST && ST->isOpaque()) {
+ bool i1d = ST->getName() == "struct._image1d_t";
+ bool i1da = ST->getName() == "struct._image1d_array_t";
+ bool i1db = ST->getName() == "struct._image1d_buffer_t";
+ bool i2d = ST->getName() == "struct._image2d_t";
+ bool i2da = ST->getName() == "struct._image2d_array_t";
+ bool i3d = ST->getName() == "struct._image3d_t";
+ bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+ if (is_image) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ 1, NULL, 0, is_image);
+ } else {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ ++curReg;
+ }
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ }
+ } else if (CT->isStructTy()
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ StructType *ST = dyn_cast<StructType>(CT);
+ bool i1d = ST->getName() == "struct._image1d_t";
+ bool i1da = ST->getName() == "struct._image1d_array_t";
+ bool i1db = ST->getName() == "struct._image1d_buffer_t";
+ bool i2d = ST->getName() == "struct._image2d_t";
+ bool i2da = ST->getName() == "struct._image2d_array_t";
+ bool i3d = ST->getName() == "struct._image3d_t";
+ bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+ if (is_image) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ 1, NULL, 0, is_image);
+ } else {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ ++curReg;
+ }
+ } else {
+ if (count) {
+ // Anytime we declare a literal, we need to reserve it, if it
+ // is not emitted in emitLiterals.
+ mMFI->addReservedLiterals(1);
+ O << "dcl_literal l" << mMFI->getNumLiterals() << ", "
+ << -stackSize << ", " << stackSize << ", 16, "
+ << stackOffset << "\n";
+ }
+ ++count;
+ size_t structSize;
+ structSize = (getTypeSize(ST) + 15) & ~15;
+ stackOffset += structSize;
+ O << "mov " << RegNames->getRegisterName((curReg)) << ", l"
+ << mMFI->getNumLiterals()<< ".w\n";
+ if (!displaced1) {
+ O << "mov r1011, r1\n";
+ displaced1 = true;
+ }
+ if (!displaced2 && strcmp(RegNames->getRegisterName(curReg), "r1")) {
+ O << "mov r1010, r2\n";
+ displaced2 = true;
+ }
+ printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(),
+ Counter);
+ ++curReg;
+ }
+ } else if (CT->isIntOrIntVectorTy()
+ || CT->isFPOrFPVectorTy()
+ || CT->isArrayTy()
+ || CT->isPointerTy()
+ || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ if (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+ const kernel& krnl = mGM->getKernel(F->getName());
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+ 1, NULL, 0, false,
+ mGM->usesHWConstant(krnl, I->getName()));
+ } else if (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+ // TODO: If we are region address space, the first region pointer, no
+ // array pointers exist, and hardware RegionMem is enabled then we can
+ // zero out register as the initial offset is zero.
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ } else if (PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+ // TODO: If we are local address space, the first local pointer, no
+ // array pointers exist, and hardware LocalMem is enabled then we can
+ // zero out register as the initial offset is zero.
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ } else {
+ printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+ }
+ } else {
+ assert(0 && "Current type is not supported!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++curReg;
+ }
+ } else {
+ assert(0 && "Current type is not supported!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ ++curReg;
+ }
+ }
+ if (displaced1) {
+ O << "mov r1, r1011\n";
+ }
+ if (displaced2) {
+ O << "mov r2, r1010\n";
+ }
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ const kernel& krnl = mGM->getKernel(F->getName());
+ uint32_t constNum = 0;
+ for (uint32_t x = 0; x < mSTM->device()->getMaxNumCBs(); ++x) {
+ if (krnl.constSizes[x]) {
+ O << "dcl_cb cb" << x + CB_BASE_OFFSET;
+ O << "[" << (((krnl.constSizes[x] + 15) & ~15) >> 4) << "]\n";
+ ++constNum;
+ mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
+ }
+ }
+ // TODO: If we run out of constant resources, we need to push some of the
+ // constant pointers to the software emulated section.
+ if (constNum > mSTM->device()->getMaxNumCBs()) {
+ assert(0 && "Max constant buffer limit passed!");
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]);
+ }
+ }
+}
+
+ const char *
+AMDILKernelManager::getTypeName(const Type *ptr, const char *symTab)
+{
+ // symTab argument is ignored...
+ LLVMContext& ctx = ptr->getContext();
+ switch (ptr->getTypeID()) {
+ case Type::StructTyID:
+ {
+ const StructType *ST = cast<StructType>(ptr);
+ if (!ST->isOpaque())
+ return "struct";
+ // ptr is a pre-LLVM 3.0 "opaque" type.
+ StringRef name = ST->getName();
+ if (name.equals( "struct._event_t" )) return "event";
+ if (name.equals( "struct._image1d_t" )) return "image1d";
+ if (name.equals( "struct._image1d_array_t" )) return "image1d_array";
+ if (name.equals( "struct._image2d_t" )) return "image2d";
+ if (name.equals( "struct._image2d_array_t" )) return "image2d_array";
+ if (name.equals( "struct._image3d_t" )) return "image3d";
+ if (name.equals( "struct._counter32_t" )) return "counter32";
+ if (name.equals( "struct._counter64_t" )) return "counter64";
+ return "opaque";
+ break;
+ }
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ {
+ const AMDILSubtarget *mSTM= mTM->getSubtargetImpl();
+ if (!mSTM->device()->usesHardware(AMDILDeviceInfo::DoubleOps)) {
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[DOUBLE_NOT_SUPPORTED]);
+ }
+ return "double";
+ }
+ case Type::IntegerTyID:
+ {
+ if (ptr == Type::getInt8Ty(ctx)) {
+ return "i8";
+ } else if (ptr == Type::getInt16Ty(ctx)) {
+ return "i16";
+ } else if (ptr == Type::getInt32Ty(ctx)) {
+ return "i32";
+ } else if(ptr == Type::getInt64Ty(ctx)) {
+ return "i64";
+ }
+ break;
+ }
+ default:
+ break;
+ case Type::ArrayTyID:
+ {
+ const ArrayType *AT = cast<ArrayType>(ptr);
+ const Type *name = AT->getElementType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ case Type::VectorTyID:
+ {
+ const VectorType *VT = cast<VectorType>(ptr);
+ const Type *name = VT->getElementType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ case Type::PointerTyID:
+ {
+ const PointerType *PT = cast<PointerType>(ptr);
+ const Type *name = PT->getElementType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ case Type::FunctionTyID:
+ {
+ const FunctionType *FT = cast<FunctionType>(ptr);
+ const Type *name = FT->getReturnType();
+ return getTypeName(name, symTab);
+ break;
+ }
+ }
+ ptr->dump();
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[UNKNOWN_TYPE_NAME]);
+ return "unknown";
+}
+
+void AMDILKernelManager::emitLiterals(OSTREAM_TYPE &O) {
+ char buffer[256];
+ std::map<uint32_t, uint32_t>::iterator ilb, ile;
+ for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) {
+ uint32_t a = ilb->first;
+ O << "dcl_literal l" <<ilb->second<< ", ";
+ sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x", a, a, a, a);
+ O << buffer << "; f32:i32 " << ilb->first << "\n";
+ }
+ std::map<uint64_t, uint32_t>::iterator llb, lle;
+ for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) {
+ uint32_t v[2];
+ uint64_t a = llb->first;
+ memcpy(v, &a, sizeof(uint64_t));
+ O << "dcl_literal l" <<llb->second<< ", ";
+ sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f64:i64 ",
+ v[0], v[1], v[0], v[1]);
+ O << buffer << llb->first << "\n";
+ }
+ std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator vlb, vle;
+ for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) {
+ uint32_t v[2][2];
+ uint64_t a = vlb->first.first;
+ uint64_t b = vlb->first.second;
+ memcpy(v[0], &a, sizeof(uint64_t));
+ memcpy(v[1], &b, sizeof(uint64_t));
+ O << "dcl_literal l" << vlb->second << ", ";
+ sprintf(buffer, "0x%08x, 0x%08x, 0x%08x, 0x%08x; f128:i128 ",
+ v[0][0], v[0][1], v[1][0], v[1][1]);
+ O << buffer << vlb->first.first << vlb->first.second << "\n";
+ }
+}
+
+// If the value is not known, then the uav is set, otherwise the mValueIDMap
+// is used.
+void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID) {
+ if (value) {
+ mValueIDMap[value] = ID;
+ }
+}
+
+uint32_t AMDILKernelManager::getUAVID(const Value *value) {
+ if (mValueIDMap.find(value) != mValueIDMap.end()) {
+ return mValueIDMap[value];
+ }
+
+ if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ return mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ } else {
+ return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ }
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILKernelManager.h b/src/gallium/drivers/radeon/AMDILKernelManager.h
new file mode 100644
index 00000000000..7df1515fa59
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILKernelManager.h
@@ -0,0 +1,216 @@
+//==-----------------------------------------------------------------------===//
+//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// @file AMDILKernelManager.h
+// @details Class that handles the metadata/abi management for the
+// ASM printer. Handles the parsing and generation of the metadata
+// for each kernel and keeps track of its arguments.
+//
+#ifndef _AMDILKERNELMANAGER_H_
+#define _AMDILKERNELMANAGER_H_
+#include "AMDIL.h"
+#include "AMDILDevice.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <string>
+#include <set>
+#include <map>
+#define IMAGETYPE_2D 0
+#define IMAGETYPE_3D 1
+#define RESERVED_LIT_COUNT 6
+
+namespace llvm {
+class AMDILGlobalManager;
+class AMDILSubtarget;
+class AMDILMachineFunctionInfo;
+class AMDILTargetMachine;
+class AMDILAsmPrinter;
+class StructType;
+class Value;
+class TypeSymbolTable;
+class MachineFunction;
+class MachineInstr;
+class ConstantFP;
+class PrintfInfo;
+
+
+class AMDILKernelManager {
+public:
+ typedef enum {
+ RELEASE_ONLY,
+ DEBUG_ONLY,
+ ALWAYS
+ } ErrorMsgEnum;
+ AMDILKernelManager(AMDILTargetMachine *TM, AMDILGlobalManager *GM);
+ virtual ~AMDILKernelManager();
+
+ /// Clear the state of the KernelManager putting it in its most initial state.
+ void clear();
+ void setMF(MachineFunction *MF);
+
+ /// Process the specific kernel parsing out the parameter information for the
+ /// kernel.
+ void processArgMetadata(OSTREAM_TYPE &O,
+ uint32_t buf, bool kernel);
+
+
+ /// Prints the header for the kernel which includes the groupsize declaration
+ /// and calculation of the local/group/global id's.
+ void printHeader(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O,
+ const std::string &name);
+
+ virtual void printDecls(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O);
+ virtual void printGroupSize(OSTREAM_TYPE &O);
+
+ /// Copies the data from the runtime setup constant buffers into registers so
+ /// that the program can correctly access memory or data that was set by the
+ /// host program.
+ void printArgCopies(OSTREAM_TYPE &O, AMDILAsmPrinter* RegNames);
+
+ /// Prints out the end of the function.
+ void printFooter(OSTREAM_TYPE &O);
+
+ /// Prints out the metadata for the specific function depending if it is a
+ /// kernel or not.
+ void printMetaData(OSTREAM_TYPE &O, uint32_t id, bool isKernel = false);
+
+ /// Set bool value on whether to consider the function a kernel or a normal
+ /// function.
+ void setKernel(bool kernel);
+
+ /// Set the unique ID of the kernel/function.
+ void setID(uint32_t id);
+
+ /// Set the name of the kernel/function.
+ void setName(const std::string &name);
+
+ /// Flag to specify whether the function is a kernel or not.
+ bool isKernel();
+
+ /// Flag that specifies whether this function has a kernel wrapper.
+ bool wasKernel();
+
+ void getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O);
+
+ // Returns whether a compiler needs to insert a write to memory or not.
+ bool useCompilerWrite(const MachineInstr *MI);
+
+ // Set the flag that there exists an image write.
+ void setImageWrite();
+ void setOutputInst();
+
+ const char *getTypeName(const Type *name, const char * symTab);
+
+ void emitLiterals(OSTREAM_TYPE &O);
+
+ // Set the uav id for the specific pointer value. If value is NULL, then the
+ // ID sets the default ID.
+ void setUAVID(const Value *value, uint32_t ID);
+
+ // Get the UAV id for the specific pointer value.
+ uint32_t getUAVID(const Value *value);
+
+private:
+
+ /// Helper function that prints the actual metadata and should only be called
+ /// by printMetaData.
+ void printKernelArgs(OSTREAM_TYPE &O);
+ void printCopyStructPrivate(const StructType *ST,
+ OSTREAM_TYPE &O,
+ size_t stackSize,
+ uint32_t Buffer,
+ uint32_t mLitIdx,
+ uint32_t &counter);
+ virtual void
+ printConstantToRegMapping(AMDILAsmPrinter *RegNames,
+ uint32_t &LII,
+ OSTREAM_TYPE &O,
+ uint32_t &counter,
+ uint32_t Buffer,
+ uint32_t n,
+ const char *lit = NULL,
+ uint32_t fcall = 0,
+ bool isImage = false,
+ bool isHWCB = false);
+ void updatePtrArg(llvm::Function::const_arg_iterator Ip,
+ int numWriteImages,
+ int raw_uav_buffer,
+ int counter,
+ bool isKernel,
+ const Function *F);
+ /// Name of the current kernel.
+ std::string mName;
+ uint32_t mUniqueID;
+ bool mIsKernel;
+ bool mWasKernel;
+ bool mCompilerWrite;
+ /// Flag to specify if an image write has occured or not in order to not add a
+ /// compiler specific write if no other writes to memory occured.
+ bool mHasImageWrite;
+ bool mHasOutputInst;
+
+ /// Map from const Value * to UAV ID.
+ std::map<const Value *, uint32_t> mValueIDMap;
+
+ AMDILTargetMachine * mTM;
+ const AMDILSubtarget * mSTM;
+ AMDILGlobalManager * mGM;
+ /// This is the global offset of the printf string id's.
+ MachineFunction *mMF;
+ AMDILMachineFunctionInfo *mMFI;
+}; // class AMDILKernelManager
+
+} // llvm namespace
+#endif // _AMDILKERNELMANAGER_H_
diff --git a/src/gallium/drivers/radeon/AMDILLLVMPC.h b/src/gallium/drivers/radeon/AMDILLLVMPC.h
new file mode 100644
index 00000000000..64039a9a18a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILLLVMPC.h
@@ -0,0 +1,61 @@
+//===--- PC version of macros to work around multiple llvm versions ---===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_LLVM_PC_H_
+#define _AMDIL_LLVM_PC_H_
+#include <string>
+#if !defined(LLVM_VERSION)
+#define LLVM_VERSION 3000
+#endif
+#define ASMPRINTER_KERNEL_NAME kernelName
+#define ASMPRINTER_RETURN_TYPE static AsmPrinter*
+#define LINEAR_MASK 0x20
+#define NORM_MASK 0x1
+
+#endif // _AMDIL_LLVM_PC_H_
diff --git a/src/gallium/drivers/radeon/AMDILLLVMVersion.h b/src/gallium/drivers/radeon/AMDILLLVMVersion.h
new file mode 100644
index 00000000000..066685bb11e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILLLVMVersion.h
@@ -0,0 +1,60 @@
+//===--- Macros that deal with the different versions of LLVM ---===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDIL_LLVM_VERSION_H_
+#define _AMDIL_LLVM_VERSION_H_
+
+// Macro that expands into the correct type for output streams
+#define OSTREAM_TYPE llvm::raw_ostream
+
+// AMDILAsmPrinter.cpp macros
+#define AMDIL_ASM_PRINTER_ARGUMENTS TargetMachine& TM, MCStreamer &Streamer
+#define ASM_PRINTER_ARGUMENTS TM, Streamer
+
+#endif // _AMDIL_LLVM_VERSION_H_
diff --git a/src/gallium/drivers/radeon/AMDILLiteralManager.cpp b/src/gallium/drivers/radeon/AMDILLiteralManager.cpp
new file mode 100644
index 00000000000..96376c5c348
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILLiteralManager.cpp
@@ -0,0 +1,171 @@
+//===--- AMDILLiteralManager.cpp - AMDIL Literal Manager Pass --*- C++ -*--===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "literal_manager"
+#include "AMDIL.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+
+// AMDIL Literal Manager traverses through all of the LOADCONST instructions and
+// converts them from an immediate value to the literal index. The literal index
+// is valid IL, but the immediate values are not. The Immediate values must be
+// aggregated and declared for clarity and to reduce the number of literals that
+// are used. It is also illegal to declare the same literal twice, so this keeps
+// that from occuring.
+
+namespace {
+ class AMDILLiteralManager : public MachineFunctionPass {
+ public:
+ static char ID;
+ AMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL);
+ virtual const char *getPassName() const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ private:
+ bool trackLiterals(MachineBasicBlock::iterator *bbb);
+ TargetMachine &TM;
+ const AMDILSubtarget *mSTM;
+ AMDILKernelManager *mKM;
+ AMDILMachineFunctionInfo *mMFI;
+ int32_t mLitIdx;
+ bool mChanged;
+ };
+ char AMDILLiteralManager::ID = 0;
+}
+
+namespace llvm {
+ FunctionPass *
+ createAMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL) {
+ return new AMDILLiteralManager(tm, OL);
+ }
+
+}
+
+AMDILLiteralManager::AMDILLiteralManager(TargetMachine &tm,
+ CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+ : MachineFunctionPass(ID),
+#else
+ : MachineFunctionPass((intptr_t)&ID),
+#endif
+ TM(tm) {
+}
+
+bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF) {
+ mChanged = false;
+ mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ const AMDILTargetMachine *amdtm =
+ reinterpret_cast<const AMDILTargetMachine *>(&TM);
+ mSTM = dynamic_cast<const AMDILSubtarget *>(amdtm->getSubtargetImpl());
+ mKM = const_cast<AMDILKernelManager *>(mSTM->getKernelManager());
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDILLiteralManager::trackLiterals), this));
+ return mChanged;
+}
+
+bool AMDILLiteralManager::trackLiterals(MachineBasicBlock::iterator *bbb) {
+ MachineInstr *MI = *bbb;
+ uint32_t Opcode = MI->getOpcode();
+ switch(Opcode) {
+ default:
+ return false;
+ case AMDIL::LOADCONST_i8:
+ case AMDIL::LOADCONST_i16:
+ case AMDIL::LOADCONST_i32:
+ case AMDIL::LOADCONST_i64:
+ case AMDIL::LOADCONST_f32:
+ case AMDIL::LOADCONST_f64:
+ break;
+ };
+ MachineOperand &dstOp = MI->getOperand(0);
+ MachineOperand &litOp = MI->getOperand(1);
+ if (!litOp.isImm() && !litOp.isFPImm()) {
+ return false;
+ }
+ if (!dstOp.isReg()) {
+ return false;
+ }
+ // Change the literal to the correct index for each literal that is found.
+ if (litOp.isImm()) {
+ int64_t immVal = litOp.getImm();
+ uint32_t idx = MI->getOpcode() == AMDIL::LOADCONST_i64
+ ? mMFI->addi64Literal(immVal)
+ : mMFI->addi32Literal(static_cast<int>(immVal), Opcode);
+ litOp.ChangeToImmediate(idx);
+ return false;
+ }
+
+ if (litOp.isFPImm()) {
+ const ConstantFP *fpVal = litOp.getFPImm();
+ uint32_t idx = MI->getOpcode() == AMDIL::LOADCONST_f64
+ ? mMFI->addf64Literal(fpVal)
+ : mMFI->addf32Literal(fpVal);
+ litOp.ChangeToImmediate(idx);
+ return false;
+ }
+
+ return false;
+}
+
+const char* AMDILLiteralManager::getPassName() const {
+ return "AMDIL Constant Propagation";
+}
+
+
diff --git a/src/gallium/drivers/radeon/AMDILMCAsmInfo.cpp b/src/gallium/drivers/radeon/AMDILMCAsmInfo.cpp
new file mode 100644
index 00000000000..28a4ae2a6ea
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMCAsmInfo.cpp
@@ -0,0 +1,169 @@
+//====-- AMDILMCAsmInfo.cpp - AMD IL asm properties --====//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILMCAsmInfo.h"
+#include "AMDILLLVMPC.h"
+#ifndef NULL
+#define NULL 0
+#endif
+
+using namespace llvm;
+AMDILMCAsmInfo::AMDILMCAsmInfo(const Triple &Triple) : MCAsmInfo()
+{
+ //===------------------------------------------------------------------===//
+ HasSubsectionsViaSymbols = true;
+ HasMachoZeroFillDirective = false;
+#if LLVM_VERSION >= 2500
+ HasMachoTBSSDirective = false;
+#endif
+ HasStaticCtorDtorReferenceInStaticMode = false;
+#if LLVM_VERSION >= 2500
+ LinkerRequiresNonEmptyDwarfLines = true;
+#endif
+ MaxInstLength = 16;
+ PCSymbol = "$";
+ SeparatorString = "\n";
+ CommentColumn = 40;
+ CommentString = ";";
+#if LLVM_VERSION >= 2500
+ LabelSuffix = ":";
+#endif
+ GlobalPrefix = "@";
+ PrivateGlobalPrefix = ";.";
+ LinkerPrivateGlobalPrefix = "!";
+ InlineAsmStart = ";#ASMSTART";
+ InlineAsmEnd = ";#ASMEND";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+#if LLVM_VERSION >= 2500
+ AllowPeriodsInName = false;
+#endif
+
+ //===--- Data Emission Directives -------------------------------------===//
+ ZeroDirective = ".zero";
+ AsciiDirective = ".ascii\t";
+ AscizDirective = ".asciz\t";
+ Data8bitsDirective = ".byte\t";
+ Data16bitsDirective = ".short\t";
+ Data32bitsDirective = ".long\t";
+ Data64bitsDirective = ".quad\t";
+ GPRel32Directive = NULL;
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+ HasMicrosoftFastStdCallMangling = false;
+
+ //===--- Alignment Information ----------------------------------------===//
+ AlignDirective = ".align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+
+ //===--- Global Variable Emission Directives --------------------------===//
+ GlobalDirective = ".global";
+ ExternDirective = ".extern";
+ HasSetDirective = false;
+#if LLVM_VERSION >= 2500
+ // TODO: This makes the symbol definition have the math instead
+ // of the symbol use. This could be disabled and handled as it
+ // would simplify the patching code in AMDILMDParser.cpp.
+ HasAggressiveSymbolFolding = true;
+ LCOMMDirectiveType = LCOMM::None;
+#endif
+ COMMDirectiveAlignmentIsInBytes = false;
+ // TODO: This generates .type @__OpenCL_<name>_kernel,@function
+ // and .size @__OpenCL_<name>_kernel, ;.<tmp>-@__OpenCL_<name>_kernel,
+ // which is not handled in AMDILMDParser.cpp.
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = true;
+ HasNoDeadStrip = true;
+#if LLVM_VERSION >= 2500
+ HasSymbolResolver = false;
+#endif
+ WeakRefDirective = ".weakref\t";
+ WeakDefDirective = ".weakdef\t";
+ LinkOnceDirective = NULL;
+ HiddenVisibilityAttr = MCSA_Hidden;
+#if LLVM_VERSION >= 2500
+ HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+#endif
+ ProtectedVisibilityAttr = MCSA_Protected;
+
+ //===--- Dwarf Emission Directives -----------------------------------===//
+ HasLEB128 = true;
+#if LLVM_VERSION < 2500
+ HasDotLocAndDotFile = false;
+#endif
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::None;
+ DwarfUsesInlineInfoSection = false;
+ DwarfSectionOffsetDirective = ".offset";
+#if LLVM_VERSION >= 2500
+ DwarfUsesLabelOffsetForRanges = true;
+#endif
+
+ //===--- CBE Asm Translation Table -----------------------------------===//
+ AsmTransCBE = NULL;
+}
+const char*
+AMDILMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
+{
+ switch (AS) {
+ default:
+ return NULL;
+ case 0:
+ return NULL;
+ };
+ return NULL;
+}
+
+MCSection*
+AMDILMCAsmInfo::getNonexecutableStackSection(MCContext &CTX)
+{
+ return NULL;
+}
diff --git a/src/gallium/drivers/radeon/AMDILMCAsmInfo.h b/src/gallium/drivers/radeon/AMDILMCAsmInfo.h
new file mode 100644
index 00000000000..9fec293ee61
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMCAsmInfo.h
@@ -0,0 +1,68 @@
+//=====-- AMDILMCAsmInfo.h - AMDIL Asm properties --=====//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+
+#ifndef AMDILMCASMINFO_H_
+#define AMDILMCASMINFO_H_
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "AMDILLLVMPC.h"
+namespace llvm {
+ class Triple;
+
+ class AMDILMCAsmInfo : public MCAsmInfo {
+ public:
+ AMDILMCAsmInfo(const Triple &Triple);
+ const char*
+ getDataASDirective(unsigned int Size, unsigned int AS) const;
+ MCSection* getNonexecutableStackSection(MCContext &CTX);
+ };
+} // namespace llvm
+#endif // AMDILMCASMINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp b/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp
new file mode 100644
index 00000000000..480593e929a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp
@@ -0,0 +1,200 @@
+//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdil-emitter"
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+
+
+using namespace llvm;
+#if 0
+namespace {
+ class AMDILMCCodeEmitter : public MCCodeEmitter {
+ AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT
+ void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ MCContext &Ctx;
+ bool Is64BitMode;
+ public:
+ AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit);
+ ~AMDILMCCodeEmitter();
+ unsigned getNumFixupKinds() const;
+ const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const;
+ static unsigned GetAMDILRegNum(const MCOperand &MO);
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const;
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const;
+ void EmitImmediate(const MCOperand &Disp, unsigned ImmSize,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ }; // class AMDILMCCodeEmitter
+}; // anonymous namespace
+
+namespace llvm {
+ MCCodeEmitter *createAMDILMCCodeEmitter(const Target &,
+ TargetMachine &TM, MCContext &Ctx)
+ {
+ return new AMDILMCCodeEmitter(TM, Ctx, false);
+ }
+}
+
+AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx
+ , bool is64Bit)
+: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx)
+{
+ Is64BitMode = is64Bit;
+}
+
+AMDILMCCodeEmitter::~AMDILMCCodeEmitter()
+{
+}
+
+unsigned
+AMDILMCCodeEmitter::getNumFixupKinds() const
+{
+ return 0;
+}
+
+const MCFixupKindInfo &
+AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const
+{
+// const static MCFixupKindInfo Infos[] = {};
+ if (Kind < FirstTargetFixupKind) {
+ return MCCodeEmitter::getFixupKindInfo(Kind);
+ }
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return MCCodeEmitter::getFixupKindInfo(Kind);
+ // return Infos[Kind - FirstTargetFixupKind];
+
+}
+
+void
+AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte,
+ raw_ostream &OS) const
+{
+ OS << (char) C;
+ ++CurByte;
+}
+void
+AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const
+{
+ // Output the constant in little endian byte order
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+}
+void
+AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize,
+ MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const
+{
+ // If this is a simple integer displacement that doesn't require a relocation
+ // emit it now.
+ if (DispOp.isImm()) {
+ EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS);
+ }
+
+ // If we have an immoffset, add it to the expression
+ const MCExpr *Expr = DispOp.getExpr();
+
+ if (ImmOffset) {
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(ImmOffset, Ctx), Ctx);
+ }
+ // Emit a symbolic constant as a fixup and 4 zeros.
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+ // TODO: Why the 4 zeros?
+ EmitConstant(0, ImmSize, CurByte, OS);
+}
+
+void
+AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const
+{
+#if 0
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ unsigned TSFlags = Desc.TSFlags;
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+
+ unsigned char BaseOpcode = 0;
+#ifndef NDEBUG
+ // FIXME: Verify.
+ if (// !Desc.isVariadic() &&
+ CurOp != NumOps) {
+ errs() << "Cannot encode all operands of: ";
+ MI.dump();
+ errs() << '\n';
+ abort();
+ }
+#endif
+#endif
+}
+#endif
diff --git a/src/gallium/drivers/radeon/AMDILMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/AMDILMachineFunctionInfo.cpp
new file mode 100644
index 00000000000..fd96a05a48b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMachineFunctionInfo.cpp
@@ -0,0 +1,643 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cstdio>
+#include <ostream>
+#include <algorithm>
+#include <string>
+#include <queue>
+#include <list>
+#include <utility>
+using namespace llvm;
+
+static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg) {
+ llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+ for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
+ begin != end; ++begin) {
+ if (!strcmp(begin->name.data(),arg.c_str())) {
+ return &(*begin);
+ }
+ }
+ return NULL;
+}
+
+void PrintfInfo::addOperand(size_t idx, uint32_t size) {
+ mOperands.resize((unsigned)(idx + 1));
+ mOperands[(unsigned)idx] = size;
+}
+
+uint32_t PrintfInfo::getPrintfID() {
+ return mPrintfID;
+}
+
+void PrintfInfo::setPrintfID(uint32_t id) {
+ mPrintfID = id;
+}
+
+size_t PrintfInfo::getNumOperands() {
+ return mOperands.size();
+}
+
+uint32_t PrintfInfo::getOperandID(uint32_t idx) {
+ return mOperands[idx];
+}
+
+AMDILMachineFunctionInfo::AMDILMachineFunctionInfo()
+ : CalleeSavedFrameSize(0), BytesToPopOnReturn(0),
+ DecorationStyle(None), ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0), UsesLDS(false), LDSArg(false),
+ UsesGDS(false), GDSArg(false),
+ mReservedLits(9)
+{
+ for (uint32_t x = 0; x < AMDILDevice::MAX_IDS; ++x) {
+ mUsedMem[x] = false;
+ }
+ mMF = NULL;
+ mKernel = NULL;
+ mScratchSize = -1;
+ mArgSize = -1;
+ mStackSize = -1;
+}
+
+AMDILMachineFunctionInfo::AMDILMachineFunctionInfo(MachineFunction& MF)
+ : CalleeSavedFrameSize(0), BytesToPopOnReturn(0),
+ DecorationStyle(None), ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0), UsesLDS(false), LDSArg(false),
+ UsesGDS(false), GDSArg(false),
+ mReservedLits(9)
+{
+ for (uint32_t x = 0; x < AMDILDevice::MAX_IDS; ++x) {
+ mUsedMem[x] = false;
+ }
+ const Function *F = MF.getFunction();
+ mMF = &MF;
+ MachineModuleInfo &mmi = MF.getMMI();
+ const AMDILTargetMachine *TM =
+ reinterpret_cast<const AMDILTargetMachine*>(&MF.getTarget());
+ AMDILModuleInfo *AMI = &(mmi.getObjFileInfo<AMDILModuleInfo>());
+ AMI->processModule(mmi.getModule(), TM);
+ mSTM = TM->getSubtargetImpl();
+ mKernel = AMI->getKernel(F->getName());
+
+ mScratchSize = -1;
+ mArgSize = -1;
+ mStackSize = -1;
+}
+
+AMDILMachineFunctionInfo::~AMDILMachineFunctionInfo()
+{
+ for (std::map<std::string, PrintfInfo*>::iterator pfb = printf_begin(),
+ pfe = printf_end(); pfb != pfe; ++pfb) {
+ delete pfb->second;
+ }
+}
+unsigned int
+AMDILMachineFunctionInfo::getCalleeSavedFrameSize() const
+{
+ return CalleeSavedFrameSize;
+}
+void
+AMDILMachineFunctionInfo::setCalleeSavedFrameSize(unsigned int bytes)
+{
+ CalleeSavedFrameSize = bytes;
+}
+unsigned int
+AMDILMachineFunctionInfo::getBytesToPopOnReturn() const
+{
+ return BytesToPopOnReturn;
+}
+void
+AMDILMachineFunctionInfo::setBytesToPopOnReturn(unsigned int bytes)
+{
+ BytesToPopOnReturn = bytes;
+}
+NameDecorationStyle
+AMDILMachineFunctionInfo::getDecorationStyle() const
+{
+ return DecorationStyle;
+}
+void
+AMDILMachineFunctionInfo::setDecorationStyle(NameDecorationStyle style)
+{
+ DecorationStyle = style;
+}
+int
+AMDILMachineFunctionInfo::getRAIndex() const
+{
+ return ReturnAddrIndex;
+}
+void
+AMDILMachineFunctionInfo::setRAIndex(int index)
+{
+ ReturnAddrIndex = index;
+}
+int
+AMDILMachineFunctionInfo::getTCReturnAddrDelta() const
+{
+ return TailCallReturnAddrDelta;
+}
+void
+AMDILMachineFunctionInfo::setTCReturnAddrDelta(int delta)
+{
+ TailCallReturnAddrDelta = delta;
+}
+unsigned int
+AMDILMachineFunctionInfo::getSRetReturnReg() const
+{
+ return SRetReturnReg;
+}
+void
+AMDILMachineFunctionInfo::setSRetReturnReg(unsigned int reg)
+{
+ SRetReturnReg = reg;
+}
+
+void
+AMDILMachineFunctionInfo::setUsesLocal()
+{
+ UsesLDS = true;
+}
+
+bool
+AMDILMachineFunctionInfo::usesLocal() const
+{
+ return UsesLDS;
+}
+
+void
+AMDILMachineFunctionInfo::setHasLocalArg()
+{
+ LDSArg = true;
+}
+
+bool
+AMDILMachineFunctionInfo::hasLocalArg() const
+{
+ return LDSArg;
+}
+
+
+
+void
+AMDILMachineFunctionInfo::setUsesRegion()
+{
+ UsesGDS = true;
+}
+
+bool
+AMDILMachineFunctionInfo::usesRegion() const
+{
+ return UsesGDS;
+}
+
+void
+AMDILMachineFunctionInfo::setHasRegionArg()
+{
+ GDSArg = true;
+}
+
+bool
+AMDILMachineFunctionInfo::hasRegionArg() const
+{
+ return GDSArg;
+}
+
+
+bool
+AMDILMachineFunctionInfo::usesHWConstant(std::string name) const
+{
+ const AMDILConstPtr *curConst = getConstPtr(mKernel, name);
+ if (curConst) {
+ return curConst->usesHardware;
+ } else {
+ return false;
+ }
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getLocal(uint32_t dim)
+{
+ if (mKernel && mKernel->sgv) {
+ AMDILKernelAttr *sgv = mKernel->sgv;
+ switch (dim) {
+ default: break;
+ case 0:
+ case 1:
+ case 2:
+ return sgv->reqGroupSize[dim];
+ break;
+ case 3:
+ return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+ };
+ }
+ switch (dim) {
+ default:
+ return 1;
+ case 3:
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+ case 2:
+ case 1:
+ case 0:
+ return mSTM->getDefaultSize(dim);
+ break;
+ };
+ return 1;
+}
+bool
+AMDILMachineFunctionInfo::isKernel() const
+{
+ return mKernel != NULL && mKernel->mKernel;
+}
+
+AMDILKernel*
+AMDILMachineFunctionInfo::getKernel()
+{
+ return mKernel;
+}
+
+std::string
+AMDILMachineFunctionInfo::getName()
+{
+ if (mMF) {
+ return mMF->getFunction()->getName();
+ } else {
+ return "";
+ }
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getArgSize()
+{
+ if (mArgSize == -1) {
+ Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+ Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+ uint32_t Counter = 0;
+ while (I != Ie) {
+ Type* curType = I->getType();
+ if (curType->isIntegerTy() || curType->isFloatingPointTy()) {
+ ++Counter;
+ } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
+ Type *ET = VT->getElementType();
+ int numEle = VT->getNumElements();
+ switch (ET->getPrimitiveSizeInBits()) {
+ default:
+ if (numEle == 3) {
+ Counter++;
+ } else {
+ Counter += ((numEle + 2) >> 2);
+ }
+ break;
+ case 64:
+ if (numEle == 3) {
+ Counter += 2;
+ } else {
+ Counter += (numEle >> 1);
+ }
+ break;
+ case 16:
+ case 8:
+ switch (numEle) {
+ default:
+ Counter += ((numEle + 2) >> 2);
+ case 2:
+ Counter++;
+ break;
+ }
+ break;
+ }
+ } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) {
+ Type *CT = PT->getElementType();
+ const StructType *ST = dyn_cast<StructType>(CT);
+ if (ST && ST->isOpaque()) {
+ bool i1d = ST->getName() == "struct._image1d_t";
+ bool i1da = ST->getName() == "struct._image1d_array_t";
+ bool i1db = ST->getName() == "struct._image1d_buffer_t";
+ bool i2d = ST->getName() == "struct._image2d_t";
+ bool i2da = ST->getName() == "struct._image2d_array_t";
+ bool i3d = ST->getName() == "struct._image3d_t";
+ bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+ if (is_image) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+ Counter += 2;
+ } else {
+ addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+ }
+ } else {
+ Counter++;
+ }
+ } else if (CT->isStructTy()
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ StructType *ST = dyn_cast<StructType>(CT);
+ Counter += ((getTypeSize(ST) + 15) & ~15) >> 4;
+ } else if (CT->isIntOrIntVectorTy()
+ || CT->isFPOrFPVectorTy()
+ || CT->isArrayTy()
+ || CT->isPointerTy()
+ || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ ++Counter;
+ } else {
+ assert(0 && "Current type is not supported!");
+ addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ } else {
+ assert(0 && "Current type is not supported!");
+ addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+ }
+ ++I;
+ }
+ // Convert from slots to bytes by multiplying by 16(shift by 4).
+ mArgSize = Counter << 4;
+ }
+ return (uint32_t)mArgSize;
+}
+ uint32_t
+AMDILMachineFunctionInfo::getScratchSize()
+{
+ if (mScratchSize == -1) {
+ mScratchSize = 0;
+ Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+ Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+ while (I != Ie) {
+ Type *curType = I->getType();
+ mScratchSize += ((getTypeSize(curType) + 15) & ~15);
+ ++I;
+ }
+ mScratchSize += ((mScratchSize + 15) & ~15);
+ }
+ return (uint32_t)mScratchSize;
+}
+
+ uint32_t
+AMDILMachineFunctionInfo::getStackSize()
+{
+ if (mStackSize == -1) {
+ uint32_t privSize = 0;
+ const MachineFrameInfo *MFI = mMF->getFrameInfo();
+ privSize = MFI->getOffsetAdjustment() + MFI->getStackSize();
+ const AMDILTargetMachine *TM =
+ reinterpret_cast<const AMDILTargetMachine*>(&mMF->getTarget());
+ bool addStackSize = TM->getOptLevel() == CodeGenOpt::None;
+ Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+ Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+ while (I != Ie) {
+ Type *curType = I->getType();
+ ++I;
+ if (dyn_cast<PointerType>(curType)) {
+ Type *CT = dyn_cast<PointerType>(curType)->getElementType();
+ if (CT->isStructTy()
+ && dyn_cast<PointerType>(curType)->getAddressSpace()
+ == AMDILAS::PRIVATE_ADDRESS) {
+ addStackSize = true;
+ }
+ }
+ }
+ if (addStackSize) {
+ privSize += getScratchSize();
+ }
+ mStackSize = privSize;
+ }
+ return (uint32_t)mStackSize;
+
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addi32Literal(uint32_t val, int Opcode) {
+ // Since we have emulated 16/8/1 bit register types with a 32bit real
+ // register, we need to sign extend the constants to 32bits in order for
+ // comparisons against the constants to work correctly, this fixes some issues
+ // we had in conformance failing for saturation.
+ if (Opcode == AMDIL::LOADCONST_i16) {
+ val = (((int32_t)val << 16) >> 16);
+ } else if (Opcode == AMDIL::LOADCONST_i8) {
+ val = (((int32_t)val << 24) >> 24);
+ }
+ if (mIntLits.find(val) == mIntLits.end()) {
+ mIntLits[val] = getNumLiterals();
+ }
+ return mIntLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addi64Literal(uint64_t val) {
+ if (mLongLits.find(val) == mLongLits.end()) {
+ mLongLits[val] = getNumLiterals();
+ }
+ return mLongLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addi128Literal(uint64_t val_lo, uint64_t val_hi) {
+ std::pair<uint64_t, uint64_t> a;
+ a.first = val_lo;
+ a.second = val_hi;
+ if (mVecLits.find(a) == mVecLits.end()) {
+ mVecLits[a] = getNumLiterals();
+ }
+ return mVecLits[a];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addf32Literal(const ConstantFP *CFP) {
+ uint32_t val = (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (mIntLits.find(val) == mIntLits.end()) {
+ mIntLits[val] = getNumLiterals();
+ }
+ return mIntLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addf64Literal(const ConstantFP *CFP) {
+ union dtol_union {
+ double d;
+ uint64_t ul;
+ } dval;
+ const APFloat &APF = CFP->getValueAPF();
+ if (&APF.getSemantics() == (const llvm::fltSemantics *)&APFloat::IEEEsingle) {
+ float fval = APF.convertToFloat();
+ dval.d = (double)fval;
+ } else {
+ dval.d = APF.convertToDouble();
+ }
+ if (mLongLits.find(dval.ul) == mLongLits.end()) {
+ mLongLits[dval.ul] = getNumLiterals();
+ }
+ return mLongLits[dval.ul];
+}
+
+ uint32_t
+AMDILMachineFunctionInfo::getIntLits(uint32_t offset)
+{
+ return mIntLits[offset];
+}
+
+ uint32_t
+AMDILMachineFunctionInfo::getLongLits(uint64_t offset)
+{
+ return mLongLits[offset];
+}
+
+ uint32_t
+AMDILMachineFunctionInfo::getVecLits(uint64_t low64, uint64_t high64)
+{
+ return mVecLits[std::pair<uint64_t, uint64_t>(low64, high64)];
+}
+
+size_t
+AMDILMachineFunctionInfo::getNumLiterals() const {
+ return mLongLits.size() + mIntLits.size() + mVecLits.size() + mReservedLits;
+}
+
+ void
+AMDILMachineFunctionInfo::addReservedLiterals(uint32_t size)
+{
+ mReservedLits += size;
+}
+
+ uint32_t
+AMDILMachineFunctionInfo::addSampler(std::string name, uint32_t val)
+{
+ if (mSamplerMap.find(name) != mSamplerMap.end()) {
+ SamplerInfo newVal = mSamplerMap[name];
+ assert(newVal.val == val
+ && "Found a sampler with same name but different values!");
+ return mSamplerMap[name].idx;
+ } else {
+ SamplerInfo curVal;
+ curVal.name = name;
+ curVal.val = val;
+ curVal.idx = mSamplerMap.size();
+ mSamplerMap[name] = curVal;
+ return curVal.idx;
+ }
+}
+
+void
+AMDILMachineFunctionInfo::setUsesMem(unsigned id) {
+ assert(id < AMDILDevice::MAX_IDS &&
+ "Must set the ID to be less than MAX_IDS!");
+ mUsedMem[id] = true;
+}
+
+bool
+AMDILMachineFunctionInfo::usesMem(unsigned id) {
+ assert(id < AMDILDevice::MAX_IDS &&
+ "Must set the ID to be less than MAX_IDS!");
+ return mUsedMem[id];
+}
+
+ void
+AMDILMachineFunctionInfo::addErrorMsg(const char *msg, ErrorMsgEnum val)
+{
+ if (val == DEBUG_ONLY) {
+#if defined(DEBUG) || defined(_DEBUG)
+ mErrors.insert(msg);
+#endif
+ } else if (val == RELEASE_ONLY) {
+#if !defined(DEBUG) && !defined(_DEBUG)
+ mErrors.insert(msg);
+#endif
+ } else if (val == ALWAYS) {
+ mErrors.insert(msg);
+ }
+}
+
+ uint32_t
+AMDILMachineFunctionInfo::addPrintfString(std::string &name, unsigned offset)
+{
+ if (mPrintfMap.find(name) != mPrintfMap.end()) {
+ return mPrintfMap[name]->getPrintfID();
+ } else {
+ PrintfInfo *info = new PrintfInfo;
+ info->setPrintfID(mPrintfMap.size() + offset);
+ mPrintfMap[name] = info;
+ return info->getPrintfID();
+ }
+}
+
+ void
+AMDILMachineFunctionInfo::addPrintfOperand(std::string &name,
+ size_t idx,
+ uint32_t size)
+{
+ mPrintfMap[name]->addOperand(idx, size);
+}
+
+ void
+AMDILMachineFunctionInfo::addMetadata(const char *md, bool kernelOnly)
+{
+ addMetadata(std::string(md), kernelOnly);
+}
+
+ void
+AMDILMachineFunctionInfo::addMetadata(std::string md, bool kernelOnly)
+{
+ if (kernelOnly) {
+ mMetadataKernel.push_back(md);
+ } else {
+ mMetadataFunc.insert(md);
+ }
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILMachineFunctionInfo.h b/src/gallium/drivers/radeon/AMDILMachineFunctionInfo.h
new file mode 100644
index 00000000000..a108805b6d4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMachineFunctionInfo.h
@@ -0,0 +1,461 @@
+//== AMDILMachineFunctionInfo.h - AMD il Machine Function Info -*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares AMDIL-specific per-machine-function information
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILMACHINEFUNCTIONINFO_H_
+#define _AMDILMACHINEFUNCTIONINFO_H_
+#include "AMDIL.h"
+#include "AMDILDevice.h"
+#include "AMDILKernel.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <string>
+#include <set>
+#include <map>
+namespace llvm
+{
+ class AMDILSubtarget;
+ class PrintfInfo {
+ uint32_t mPrintfID;
+ SmallVector<uint32_t, DEFAULT_VEC_SLOTS> mOperands;
+ public:
+ void addOperand(size_t idx, uint32_t size);
+ uint32_t getPrintfID();
+ void setPrintfID(uint32_t idx);
+ size_t getNumOperands();
+ uint32_t getOperandID(uint32_t idx);
+ }; // class PrintfInfo
+
+ enum NameDecorationStyle
+ {
+ None,
+ StdCall,
+ FastCall
+ };
+ typedef struct SamplerInfoRec {
+ std::string name; // The name of the sampler
+ uint32_t val; // The value of the sampler
+ uint32_t idx; // The sampler resource id
+ } SamplerInfo;
+ // Some typedefs that will help with using the various iterators
+ // of the machine function info class.
+ typedef std::map<uint32_t, uint32_t>::iterator lit32_iterator;
+ typedef std::map<uint64_t, uint32_t>::iterator lit64_iterator;
+ typedef std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator
+ lit128_iterator;
+ typedef StringMap<SamplerInfo>::iterator sampler_iterator;
+ typedef DenseSet<uint32_t>::iterator func_iterator;
+ typedef DenseSet<uint32_t>::iterator intr_iterator;
+ typedef DenseSet<uint32_t>::iterator uav_iterator;
+ typedef DenseSet<uint32_t>::iterator read_image2d_iterator;
+ typedef DenseSet<uint32_t>::iterator read_image3d_iterator;
+ typedef DenseSet<uint32_t>::iterator write_image2d_iterator;
+ typedef DenseSet<uint32_t>::iterator write_image3d_iterator;
+ typedef DenseSet<const char*>::iterator error_iterator;
+ typedef std::map<std::string, PrintfInfo*>::iterator printf_iterator;
+ typedef std::set<std::string>::iterator func_md_iterator;
+ typedef std::vector<std::string>::iterator kernel_md_iterator;
+ // AMDILMachineFunctionInfo - This class is
+ // derived from MachineFunction private
+ // amdil target-specific information for each MachineFunction
+ class AMDILMachineFunctionInfo : public MachineFunctionInfo
+ {
+ // CalleeSavedFrameSize - Size of the callee-saved
+ // register portion of the
+ // stack frame in bytes.
+ unsigned int CalleeSavedFrameSize;
+ // BytesToPopOnReturn - Number of bytes function pops on return.
+ // Used on windows platform for stdcall & fastcall name decoration
+ unsigned int BytesToPopOnReturn;
+ // DecorationStyle - If the function requires additional
+ // name decoration,
+ // DecorationStyle holds the right way to do so.
+ NameDecorationStyle DecorationStyle;
+ // ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ // TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
+ // Used for creating an area before the register spill area
+ // on the stack
+ // the returnaddr can be savely move to this area
+ int TailCallReturnAddrDelta;
+
+ // SRetReturnReg - Some subtargets require that sret lowering includes
+ // returning the value of the returned struct in a register.
+ // This field holds the virtual register into which the sret
+ // argument is passed.
+ unsigned int SRetReturnReg;
+
+ // UsesLocal - Specifies that this function uses LDS memory and
+ // that it needs to be allocated.
+ bool UsesLDS;
+
+ // LDSArg - Flag that specifies if this function has an Local
+ // argument or not
+ bool LDSArg;
+
+ // UsesGDS - Specifies that this function uses GDS memory and
+ // that it needs to be allocated.
+ bool UsesGDS;
+
+ // GDSArg - Flag that specifies if this function has an Region
+ // argument or not
+ bool GDSArg;
+
+ // The size in bytes required to host all of the kernel arguments.
+ // -1 means this value has not been determined yet.
+ int32_t mArgSize;
+
+ // The size in bytes required to host the stack and the kernel arguments
+ // in private memory.
+ // -1 means this value has not been determined yet.
+ int32_t mScratchSize;
+
+ // The size in bytes required to host the the kernel arguments
+ // on the stack.
+ // -1 means this value has not been determined yet.
+ int32_t mStackSize;
+
+ /// A map of constant to literal mapping for all of the 32bit or
+ /// smaller literals in the current function.
+ std::map<uint32_t, uint32_t> mIntLits;
+
+ /// A map of constant to literal mapping for all of the 64bit
+ /// literals in the current function.
+ std::map<uint64_t, uint32_t> mLongLits;
+
+ /// A map of constant to literal mapping for all of the 128bit
+ /// literals in the current function.
+ std::map<std::pair<uint64_t, uint64_t>, uint32_t> mVecLits;
+
+ /// The number of literals that should be reserved.
+ /// TODO: Remove this when the wrapper emitter is added.
+ uint32_t mReservedLits;
+
+ /// A map of name to sampler information that is used to emit
+ /// metadata to the IL stream that the runtimes can use for
+ /// hardware setup.
+ StringMap<SamplerInfo> mSamplerMap;
+
+ /// Array of flags to specify if a specific memory type is used or not.
+ bool mUsedMem[AMDILDevice::MAX_IDS];
+
+ /// Set of all functions that this function calls.
+ DenseSet<uint32_t> mFuncs;
+
+ /// Set of all intrinsics that this function calls.
+ DenseSet<uint32_t> mIntrs;
+
+ /// Set of all read only 2D images.
+ DenseSet<uint32_t> mRO2D;
+ /// Set of all read only 3D images.
+ DenseSet<uint32_t> mRO3D;
+ /// Set of all write only 2D images.
+ DenseSet<uint32_t> mWO2D;
+ /// Set of all write only 3D images.
+ DenseSet<uint32_t> mWO3D;
+ /// Set of all the raw uavs.
+ DenseSet<uint32_t> mRawUAV;
+ /// Set of all the arena uavs.
+ DenseSet<uint32_t> mArenaUAV;
+
+ /// A set of all errors that occured in the backend for this function.
+ DenseSet<const char *> mErrors;
+
+ /// A mapping of printf data and the printf string
+ std::map<std::string, PrintfInfo*> mPrintfMap;
+
+ /// A set of all of the metadata that is used for the current function.
+ std::set<std::string> mMetadataFunc;
+
+ /// A set of all of the metadata that is used for the function wrapper.
+ std::vector<std::string> mMetadataKernel;
+
+ /// Information about the kernel, NULL if the function is not a kernel.
+ AMDILKernel *mKernel;
+
+ /// Pointer to the machine function that this information belongs to.
+ MachineFunction *mMF;
+
+ /// Pointer to the subtarget for this function.
+ const AMDILSubtarget *mSTM;
+ public:
+ AMDILMachineFunctionInfo();
+ AMDILMachineFunctionInfo(MachineFunction &MF);
+ virtual ~AMDILMachineFunctionInfo();
+ unsigned int
+ getCalleeSavedFrameSize() const;
+ void
+ setCalleeSavedFrameSize(unsigned int bytes);
+
+ unsigned int
+ getBytesToPopOnReturn() const;
+ void
+ setBytesToPopOnReturn (unsigned int bytes);
+
+ NameDecorationStyle
+ getDecorationStyle() const;
+ void
+ setDecorationStyle(NameDecorationStyle style);
+
+ int
+ getRAIndex() const;
+ void
+ setRAIndex(int Index);
+
+ int
+ getTCReturnAddrDelta() const;
+ void
+ setTCReturnAddrDelta(int delta);
+
+ unsigned int
+ getSRetReturnReg() const;
+ void
+ setSRetReturnReg(unsigned int Reg);
+
+ void
+ setUsesLocal();
+ bool
+ usesLocal() const;
+ void
+ setHasLocalArg();
+ bool
+ hasLocalArg() const;
+
+ void
+ setUsesRegion();
+ bool
+ usesRegion() const;
+ void
+ setHasRegionArg();
+ bool
+ hasRegionArg() const;
+
+ bool
+ usesHWConstant(std::string name) const;
+ uint32_t
+ getLocal(uint32_t);
+ bool
+ isKernel() const;
+ AMDILKernel*
+ getKernel();
+
+ std::string
+ getName();
+
+ /// Get the size in bytes that are required to host all of
+ /// arguments based on the argument alignment rules in the AMDIL
+ /// Metadata spec.
+ uint32_t getArgSize();
+
+ /// Get the size in bytes that are required to host all of
+ /// arguments and stack memory in scratch.
+ uint32_t getScratchSize();
+
+ /// Get the size in bytes that is required to host all of
+ /// the arguments on the stack.
+ uint32_t getStackSize();
+
+ ///
+ /// @param val value to add the lookup table
+ /// @param Opcode opcode of the literal instruction
+ /// @brief adds the specified value of the type represented by the
+ /// Opcode
+ /// to the literal to integer and integer to literal mappings.
+ ///
+ /// Add a 32bit integer value to the literal table.
+ uint32_t addi32Literal(uint32_t val, int Opcode = AMDIL::LOADCONST_i32);
+
+ /// Add a 32bit floating point value to the literal table.
+ uint32_t addf32Literal(const ConstantFP *CFP);
+
+ /// Add a 64bit integer value to the literal table.
+ uint32_t addi64Literal(uint64_t val);
+
+ /// Add a 128 bit integer value to the literal table.
+ uint32_t addi128Literal(uint64_t val_lo, uint64_t val_hi);
+
+ /// Add a 64bit floating point literal as a 64bit integer value.
+ uint32_t addf64Literal(const ConstantFP *CFP);
+
+ /// Get the number of literals that have currently been allocated.
+ size_t getNumLiterals() const;
+
+ /// Get the literal ID of an Integer literal of the given offset.
+ uint32_t getIntLits(uint32_t lit);
+
+ /// Get the literal ID of a Long literal of the given offset.
+ uint32_t getLongLits(uint64_t lit);
+
+ /// Get the literal ID of a Long literal of the given offset.
+ uint32_t getVecLits(uint64_t low64, uint64_t high64);
+
+ /// Add some literals to the number of reserved literals.
+ void addReservedLiterals(uint32_t);
+
+ // Functions that return iterators to the beginning and end
+ // of the various literal maps.
+ // Functions that return the beginning and end of the 32bit literal map
+ lit32_iterator begin_32() { return mIntLits.begin(); }
+ lit32_iterator end_32() { return mIntLits.end(); }
+
+ // Functions that return the beginning and end of the 64bit literal map
+ lit64_iterator begin_64() { return mLongLits.begin(); }
+ lit64_iterator end_64() { return mLongLits.end(); }
+
+ // Functions that return the beginning and end of the 2x64bit literal map
+ lit128_iterator begin_128() { return mVecLits.begin(); }
+ lit128_iterator end_128() { return mVecLits.end(); }
+
+ // Add a sampler to the set of known samplers for the current kernel.
+ uint32_t addSampler(std::string name, uint32_t value);
+
+ // Iterators that point to the beginning and end of the sampler map.
+ sampler_iterator sampler_begin() { return mSamplerMap.begin(); }
+ sampler_iterator sampler_end() { return mSamplerMap.end(); }
+
+
+ /// Set the flag for the memory ID to true for the current function.
+ void setUsesMem(unsigned);
+ /// Retrieve the flag for the memory ID.
+ bool usesMem(unsigned);
+
+ /// Add called functions to the set of all functions this function calls.
+ void addCalledFunc(uint32_t id) { mFuncs.insert(id); }
+ void eraseCalledFunc(uint32_t id) { mFuncs.erase(id); }
+ size_t func_size() { return mFuncs.size(); }
+ bool func_empty() { return mFuncs.empty(); }
+ func_iterator func_begin() { return mFuncs.begin(); }
+ func_iterator func_end() { return mFuncs.end(); }
+
+ /// Add called intrinsics to the set of all intrinscis this function calls.
+ void addCalledIntr(uint32_t id) { mIntrs.insert(id); }
+ size_t intr_size() { return mIntrs.size(); }
+ bool intr_empty() { return mIntrs.empty(); }
+ intr_iterator intr_begin() { return mIntrs.begin(); }
+ intr_iterator intr_end() { return mIntrs.end(); }
+
+ /// Add a 2D read_only image id.
+ void addROImage2D(uint32_t id) { mRO2D.insert(id); }
+ size_t read_image2d_size() { return mRO2D.size(); }
+ read_image2d_iterator read_image2d_begin() { return mRO2D.begin(); }
+ read_image2d_iterator read_image2d_end() { return mRO2D.end(); }
+
+ /// Add a 3D read_only image id.
+ void addROImage3D(uint32_t id) { mRO3D.insert(id); }
+ size_t read_image3d_size() { return mRO3D.size(); }
+ read_image3d_iterator read_image3d_begin() { return mRO3D.begin(); }
+ read_image3d_iterator read_image3d_end() { return mRO3D.end(); }
+
+ /// Add a 2D write_only image id.
+ void addWOImage2D(uint32_t id) { mWO2D.insert(id); }
+ size_t write_image2d_size() { return mWO2D.size(); }
+ write_image2d_iterator write_image2d_begin() { return mWO2D.begin(); }
+ write_image2d_iterator write_image2d_end() { return mWO2D.end(); }
+
+ /// Add a 3D write_only image id.
+ void addWOImage3D(uint32_t id) { mWO3D.insert(id); }
+ size_t write_image3d_size() { return mWO3D.size(); }
+ write_image3d_iterator write_image3d_begin() { return mWO3D.begin(); }
+ write_image3d_iterator write_image3d_end() { return mWO3D.end(); }
+
+ /// Add a raw uav id.
+ void uav_insert(uint32_t id) { mRawUAV.insert(id); }
+ bool uav_count(uint32_t id) { return mRawUAV.count(id); }
+ size_t uav_size() { return mRawUAV.size(); }
+ uav_iterator uav_begin() { return mRawUAV.begin(); }
+ uav_iterator uav_end() { return mRawUAV.end(); }
+
+ /// Add an arena uav id.
+ void arena_insert(uint32_t id) { mArenaUAV.insert(id); }
+ bool arena_count(uint32_t id) { return mArenaUAV.count(id); }
+ size_t arena_size() { return mArenaUAV.size(); }
+ uav_iterator arena_begin() { return mArenaUAV.begin(); }
+ uav_iterator arena_end() { return mArenaUAV.end(); }
+
+ // Add an error to the output for the current function.
+ typedef enum {
+ RELEASE_ONLY, /// Only emit error message in release mode.
+ DEBUG_ONLY, /// Only emit error message in debug mode.
+ ALWAYS /// Always emit the error message.
+ } ErrorMsgEnum;
+ /// Add an error message to the set of all error messages.
+ void addErrorMsg(const char* msg, ErrorMsgEnum val = ALWAYS);
+ bool errors_empty() { return mErrors.empty(); }
+ error_iterator errors_begin() { return mErrors.begin(); }
+ error_iterator errors_end() { return mErrors.end(); }
+
+ /// Add a string to the printf map
+ uint32_t addPrintfString(std::string &name, unsigned offset);
+ /// Add a operand to the printf string
+ void addPrintfOperand(std::string &name, size_t idx, uint32_t size);
+ bool printf_empty() { return mPrintfMap.empty(); }
+ size_t printf_size() { return mPrintfMap.size(); }
+ printf_iterator printf_begin() { return mPrintfMap.begin(); }
+ printf_iterator printf_end() { return mPrintfMap.end(); }
+
+ /// Add a string to the metadata set for a function/kernel wrapper
+ void addMetadata(const char *md, bool kernelOnly = false);
+ void addMetadata(std::string md, bool kernelOnly = false);
+ func_md_iterator func_md_begin() { return mMetadataFunc.begin(); }
+ func_md_iterator func_md_end() { return mMetadataFunc.end(); }
+ kernel_md_iterator kernel_md_begin() { return mMetadataKernel.begin(); }
+ kernel_md_iterator kernel_md_end() { return mMetadataKernel.end(); }
+ };
+} // llvm namespace
+#endif // _AMDILMACHINEFUNCTIONINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
new file mode 100644
index 00000000000..a00668a7abf
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
@@ -0,0 +1,217 @@
+//===-- AMDILMachinePeephole.cpp - AMDIL Machine Peephole Pass -*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "machine_peephole"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+
+#include "AMDIL.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+
+using namespace llvm;
+namespace
+{
+ class AMDILMachinePeephole : public MachineFunctionPass
+ {
+ public:
+ static char ID;
+ AMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL);
+ //virtual ~AMDILMachinePeephole();
+ virtual const char*
+ getPassName() const;
+ virtual bool
+ runOnMachineFunction(MachineFunction &MF);
+ private:
+ void insertFence(MachineBasicBlock::iterator &MIB);
+ TargetMachine &TM;
+ bool mDebug;
+ }; // AMDILMachinePeephole
+ char AMDILMachinePeephole::ID = 0;
+} // anonymous namespace
+
+namespace llvm
+{
+ FunctionPass*
+ createAMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL)
+ {
+ return new AMDILMachinePeephole(tm, OL);
+ }
+} // llvm namespace
+
+AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+ : MachineFunctionPass(ID), TM(tm)
+#else
+ : MachineFunctionPass((intptr_t)&ID), TM(tm)
+#endif
+{
+ mDebug = DEBUGME;
+}
+
+bool
+AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
+{
+ bool Changed = false;
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end();
+ MBB != MBE; ++MBB) {
+ MachineBasicBlock *mb = MBB;
+ for (MachineBasicBlock::iterator MIB = mb->begin(), MIE = mb->end();
+ MIB != MIE; ++MIB) {
+ MachineInstr *mi = MIB;
+ switch (mi->getOpcode()) {
+ default:
+ if (isAtomicInst(mi)) {
+ // If we don't support the hardware accellerated address spaces,
+ // then the atomic needs to be transformed to the global atomic.
+ if (strstr(mi->getDesc().getName(), "_L_")
+ && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::ADD_i32), AMDIL::R1011)
+ .addReg(mi->getOperand(1).getReg())
+ .addReg(AMDIL::T2);
+ mi->getOperand(1).setReg(AMDIL::R1011);
+ mi->setDesc(
+ TM.getInstrInfo()->get(
+ (mi->getOpcode() - AMDIL::ATOM_L_ADD) + AMDIL::ATOM_G_ADD));
+ } else if (strstr(mi->getDesc().getName(), "_R_")
+ && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) {
+ assert(!"Software region memory is not supported!");
+ mi->setDesc(
+ TM.getInstrInfo()->get(
+ (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD));
+ }
+ } else if ((isLoadInst(mi) || isStoreInst(mi)) && isVolatileInst(mi)) {
+ insertFence(MIB);
+ }
+ continue;
+ break;
+ case AMDIL::USHR_i16:
+ case AMDIL::USHR_v2i16:
+ case AMDIL::USHR_v4i16:
+ case AMDIL::USHRVEC_i16:
+ case AMDIL::USHRVEC_v2i16:
+ case AMDIL::USHRVEC_v4i16:
+ if (TM.getSubtarget<AMDILSubtarget>()
+ .device()->usesSoftware(AMDILDeviceInfo::ShortOps)) {
+ unsigned lReg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRI32RegClass);
+ unsigned Reg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRV4I32RegClass);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::LOADCONST_i32),
+ lReg).addImm(0xFFFF);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::BINARY_AND_v4i32),
+ Reg)
+ .addReg(mi->getOperand(1).getReg())
+ .addReg(lReg);
+ mi->getOperand(1).setReg(Reg);
+ }
+ break;
+ case AMDIL::USHR_i8:
+ case AMDIL::USHR_v2i8:
+ case AMDIL::USHR_v4i8:
+ case AMDIL::USHRVEC_i8:
+ case AMDIL::USHRVEC_v2i8:
+ case AMDIL::USHRVEC_v4i8:
+ if (TM.getSubtarget<AMDILSubtarget>()
+ .device()->usesSoftware(AMDILDeviceInfo::ByteOps)) {
+ unsigned lReg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRI32RegClass);
+ unsigned Reg = MF.getRegInfo()
+ .createVirtualRegister(&AMDIL::GPRV4I32RegClass);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::LOADCONST_i32),
+ lReg).addImm(0xFF);
+ BuildMI(*mb, MIB, mi->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::BINARY_AND_v4i32),
+ Reg)
+ .addReg(mi->getOperand(1).getReg())
+ .addReg(lReg);
+ mi->getOperand(1).setReg(Reg);
+ }
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+const char*
+AMDILMachinePeephole::getPassName() const
+{
+ return "AMDIL Generic Machine Peephole Optimization Pass";
+}
+
+void
+AMDILMachinePeephole::insertFence(MachineBasicBlock::iterator &MIB)
+{
+ MachineInstr *MI = MIB;
+ MachineInstr *fence = BuildMI(*(MI->getParent()->getParent()),
+ MI->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+
+ MI->getParent()->insert(MIB, fence);
+ fence = BuildMI(*(MI->getParent()->getParent()),
+ MI->getDebugLoc(),
+ TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+ MIB = MI->getParent()->insertAfter(MIB, fence);
+}
diff --git a/src/gallium/drivers/radeon/AMDILModuleInfo.cpp b/src/gallium/drivers/radeon/AMDILModuleInfo.cpp
new file mode 100644
index 00000000000..4f69b385467
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILModuleInfo.cpp
@@ -0,0 +1,1300 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILModuleInfo.h"
+#include "AMDILDevices.h"
+#include "AMDILKernel.h"
+#include "AMDILSubtarget.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cstdio>
+
+#define CB_BASE_OFFSET 2
+using namespace llvm;
+
+AMDILModuleInfo::AMDILModuleInfo(const MachineModuleInfo &MMI)
+{
+ mMMI = &MMI;
+ mOffset = 0;
+ mReservedBuffs = 0;
+ symTab = NULL;
+ mCurrentCPOffset = 0;
+ mPrintfOffset = 0;
+}
+
+AMDILModuleInfo::~AMDILModuleInfo() {
+ for (StringMap<AMDILKernel*>::iterator kb = mKernels.begin(), ke = mKernels.end();
+ kb != ke; ++kb) {
+ StringMapEntry<AMDILKernel*> cur = *kb;
+ AMDILKernel *ptr = cur.getValue();
+ delete ptr;
+ }
+}
+
+static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg) {
+ llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+ for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
+ begin != end; ++begin) {
+ if (!strcmp(begin->name.data(),arg.c_str())) {
+ return &(*begin);
+ }
+ }
+ return NULL;
+}
+#if 0
+static bool structContainsSub32bitType(const StructType *ST) {
+ StructType::element_iterator eib, eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ Type *ptr = *eib;
+ uint32_t size = (uint32_t)GET_SCALAR_SIZE(ptr);
+ if (!size) {
+ if (const StructType *ST = dyn_cast<StructType>(ptr)) {
+ if (structContainsSub32bitType(ST)) {
+ return true;
+ }
+ }
+ } else if (size < 32) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+void AMDILModuleInfo::processModule(const Module *M,
+ const AMDILTargetMachine *mTM)
+{
+ Module::const_global_iterator GI;
+ Module::const_global_iterator GE;
+ mSTM = mTM->getSubtargetImpl();
+ for (GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) {
+ const GlobalValue *GV = GI;
+ llvm::StringRef GVName = GV->getName();
+ const char *name = GVName.data();
+ if (!strncmp(name, "sgv", 3)) {
+ mKernelArgs[GVName] = parseSGV(GV);
+ } else if (!strncmp(name, "fgv", 3)) {
+ // we can ignore this since we don't care about the filename
+ // string
+ } else if (!strncmp(name, "lvgv", 4)) {
+ mLocalArgs[GVName] = parseLVGV(GV);
+ } else if (!strncmp(name, "llvm.image.annotations", 22)) {
+ parseImageAnnotate(GV);
+ } else if (!strncmp(name, "llvm.global.annotations", 23)) {
+ parseGlobalAnnotate(GV);
+ } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
+ parseConstantPtrAnnotate(GV);
+ } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
+ // These are skipped as we handle them later in AMDILPointerManager.cpp
+ } else if (GV->getType()->getAddressSpace() == 3) { // *** Match cl_kernel.h local AS #
+ parseAutoArray(GV, false);
+ } else if (strstr(name, "clregion")) {
+ parseAutoArray(GV, true);
+ } else if (!GV->use_empty()
+ && mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
+ parseConstantPtr(GV);
+ }
+ }
+ allocateGlobalCB();
+
+ safeForEach(M->begin(), M->end(),
+ std::bind1st(
+ std::mem_fun(&AMDILModuleInfo::checkConstPtrsUseHW),
+ this));
+}
+
+void AMDILModuleInfo::allocateGlobalCB(void) {
+ uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
+ uint32_t offset = 0;
+ uint32_t curCB = 0;
+ uint32_t swoffset = 0;
+ for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
+ cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+ bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ cpb->second.usesHardware = false;
+ if (constHW) {
+ // If we have a limit on the max CB Size, then we need to make sure that
+ // the constant sizes fall within the limits.
+ if (cpb->second.size <= maxCBSize) {
+ if (offset + cpb->second.size > maxCBSize) {
+ offset = 0;
+ curCB++;
+ }
+ if (curCB < mSTM->device()->getMaxNumCBs()) {
+ cpb->second.cbNum = curCB + CB_BASE_OFFSET;
+ cpb->second.offset = offset;
+ offset += (cpb->second.size + 15) & (~15);
+ cpb->second.usesHardware = true;
+ continue;
+ }
+ }
+ }
+ cpb->second.cbNum = 0;
+ cpb->second.offset = swoffset;
+ swoffset += (cpb->second.size + 15) & (~15);
+ }
+ if (!mConstMems.empty()) {
+ mReservedBuffs = curCB + 1;
+ }
+}
+
+bool AMDILModuleInfo::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
+{
+ Function::const_arg_iterator AI, AE;
+ const Function *func = *FCI;
+ std::string name = func->getName();
+ if (!strstr(name.c_str(), "__OpenCL")
+ || !strstr(name.c_str(), "_AMDILKernel")) {
+ return false;
+ }
+ AMDILKernel *krnl = mKernels[name];
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ for (AI = func->arg_begin(), AE = func->arg_end();
+ AI != AE; ++AI) {
+ const Argument *Arg = &(*AI);
+ const PointerType *P = dyn_cast<PointerType>(Arg->getType());
+ if (!P) {
+ continue;
+ }
+ if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
+ continue;
+ }
+ const AMDILConstPtr *ptr = getConstPtr(krnl, Arg->getName());
+ if (ptr) {
+ continue;
+ }
+ AMDILConstPtr constAttr;
+ constAttr.name = Arg->getName();
+ constAttr.size = this->mSTM->device()->getMaxCBSize();
+ constAttr.base = Arg;
+ constAttr.isArgument = true;
+ constAttr.isArray = false;
+ constAttr.offset = 0;
+ constAttr.usesHardware =
+ mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ if (constAttr.usesHardware) {
+ constAttr.cbNum = krnl->constPtr.size() + 2;
+ } else {
+ constAttr.cbNum = 0;
+ }
+ krnl->constPtr.push_back(constAttr);
+ }
+ }
+ // Now lets make sure that only the N largest buffers
+ // get allocated in hardware if we have too many buffers
+ uint32_t numPtrs = krnl->constPtr.size();
+ if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
+ // TODO: Change this routine so it sorts
+ // AMDILConstPtr instead of pulling the sizes out
+ // and then grab the N largest and disable the rest
+ llvm::SmallVector<uint32_t, 16> sizes;
+ for (uint32_t x = 0; x < numPtrs; ++x) {
+ sizes.push_back(krnl->constPtr[x].size);
+ }
+ std::sort(sizes.begin(), sizes.end());
+ uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
+ mReservedBuffs);
+ uint32_t safeSize = sizes[numToDisable-1];
+ for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
+ if (krnl->constPtr[x].size <= safeSize) {
+ krnl->constPtr[x].usesHardware = false;
+ --numToDisable;
+ }
+ }
+ }
+ // Renumber all of the valid CB's so that
+ // they are linear increase
+ uint32_t CBid = 2 + mReservedBuffs;
+ for (uint32_t x = 0; x < numPtrs; ++x) {
+ if (krnl->constPtr[x].usesHardware) {
+ krnl->constPtr[x].cbNum = CBid++;
+ }
+ }
+ for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
+ cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+ if (cpb->second.usesHardware) {
+ krnl->constPtr.push_back(cpb->second);
+ }
+ }
+ for (uint32_t x = 0; x < krnl->constPtr.size(); ++x) {
+ AMDILConstPtr &c = krnl->constPtr[x];
+ uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
+ if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
+ if ((c.size + c.offset) > krnl->constSizes[cbNum]) {
+ krnl->constSizes[cbNum] =
+ ((c.size + c.offset) + 15) & ~15;
+ }
+ } else {
+ krnl->constPtr[x].usesHardware = false;
+ }
+ }
+ return false;
+}
+
+int32_t AMDILModuleInfo::getArrayOffset(const llvm::StringRef &a) const {
+ StringMap<AMDILArrayMem>::const_iterator iter = mArrayMems.find(a);
+ if (iter != mArrayMems.end()) {
+ return iter->second.offset;
+ } else {
+ return -1;
+ }
+}
+
+int32_t AMDILModuleInfo::getConstOffset(const llvm::StringRef &a) const {
+ StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(a);
+ if (iter != mConstMems.end()) {
+ return iter->second.offset;
+ } else {
+ return -1;
+ }
+}
+
+bool AMDILModuleInfo::getConstHWBit(const llvm::StringRef &name) const {
+ StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(name);
+ if (iter != mConstMems.end()) {
+ return iter->second.usesHardware;
+ } else {
+ return false;
+ }
+}
+
+// As of right now we only care about the required group size
+// so we can skip the variable encoding
+AMDILKernelAttr AMDILModuleInfo::parseSGV(const GlobalValue *G) {
+ AMDILKernelAttr nArg;
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ memset(&nArg, 0, sizeof(nArg));
+ for (int x = 0; x < 3; ++x) {
+ nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
+ nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
+ }
+ if (!GV || !GV->hasInitializer()) {
+ return nArg;
+ }
+ const Constant *CV = GV->getInitializer();
+ const ConstantArray *CA = dyn_cast_or_null<ConstantArray>(CV);
+ if (!CA || !CA->isString()) {
+ return nArg;
+ }
+ std::string init = CA->getAsString();
+ size_t pos = init.find("RWG");
+ if (pos != llvm::StringRef::npos) {
+ pos += 3;
+ std::string LWS = init.substr(pos, init.length() - pos);
+ const char *lws = LWS.c_str();
+ sscanf(lws, "%d,%d,%d", &(nArg.reqGroupSize[0]),
+ &(nArg.reqGroupSize[1]),
+ &(nArg.reqGroupSize[2]));
+ nArg.mHasRWG = true;
+ }
+ pos = init.find("RWR");
+ if (pos != llvm::StringRef::npos) {
+ pos += 3;
+ std::string LWS = init.substr(pos, init.length() - pos);
+ const char *lws = LWS.c_str();
+ sscanf(lws, "%d,%d,%d", &(nArg.reqRegionSize[0]),
+ &(nArg.reqRegionSize[1]),
+ &(nArg.reqRegionSize[2]));
+ nArg.mHasRWR = true;
+ }
+ return nArg;
+}
+
+AMDILLocalArg AMDILModuleInfo::parseLVGV(const GlobalValue *G) {
+ AMDILLocalArg nArg;
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ nArg.name = "";
+ if (!GV || !GV->hasInitializer()) {
+ return nArg;
+ }
+ const ConstantArray *CA =
+ dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return nArg;
+ }
+ for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
+ const Value *local = CA->getOperand(x);
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
+ if (!CE || !CE->getNumOperands()) {
+ continue;
+ }
+ nArg.name = (*(CE->op_begin()))->getName();
+ if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
+ nArg.local.push_back(&(mArrayMems[nArg.name]));
+ }
+ }
+ return nArg;
+}
+
+void AMDILModuleInfo::parseConstantPtrAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
+ const ConstantArray *CA =
+ dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return;
+ }
+ uint32_t numOps = CA->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ const Value *V = CA->getOperand(x);
+ const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+ if (!CS) {
+ continue;
+ }
+ assert(CS->getNumOperands() == 2 && "There can only be 2"
+ " fields, a name and size");
+ const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
+ const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
+ assert(nameField && "There must be a constant name field");
+ assert(sizeField && "There must be a constant size field");
+ const GlobalVariable *nameGV =
+ dyn_cast<GlobalVariable>(nameField->getOperand(0));
+ const ConstantArray *nameArray =
+ dyn_cast<ConstantArray>(nameGV->getInitializer());
+ // Lets add this string to the set of strings we should ignore processing
+ mIgnoreStr.insert(nameGV->getName());
+ if (mConstMems.find(nameGV->getName())
+ != mConstMems.end()) {
+ // If we already processesd this string as a constant, lets remove it from
+ // the list of known constants. This way we don't process unneeded data
+ // and don't generate code/metadata for strings that are never used.
+ mConstMems.erase(mConstMems.find(nameGV->getName()));
+ } else {
+ mIgnoreStr.insert(CS->getOperand(0)->getName());
+ }
+ AMDILConstPtr constAttr;
+ constAttr.name = nameArray->getAsString();
+ constAttr.size = (sizeField->getZExtValue() + 15) & ~15;
+ constAttr.base = CS;
+ constAttr.isArgument = true;
+ constAttr.isArray = false;
+ constAttr.cbNum = 0;
+ constAttr.offset = 0;
+ constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
+ // Now that we have all our constant information,
+ // lets update the AMDILKernel
+ llvm::StringRef AMDILKernelName = G->getName().data() + 30;
+ AMDILKernel *k;
+ if (mKernels.find(AMDILKernelName) != mKernels.end()) {
+ k = mKernels[AMDILKernelName];
+ } else {
+ k = new AMDILKernel;
+ k->curSize = 0;
+ k->curRSize = 0;
+ k->curHWSize = 0;
+ k->curHWRSize = 0;
+ k->constSize = 0;
+ k->lvgv = NULL;
+ k->sgv = NULL;
+ memset(k->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+ constAttr.cbNum = k->constPtr.size() + 2;
+ k->constPtr.push_back(constAttr);
+ mKernels[AMDILKernelName] = k;
+ }
+}
+
+void AMDILModuleInfo::parseImageAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA) {
+ return;
+ }
+ if (isa<GlobalValue>(CA)) {
+ return;
+ }
+ uint32_t e = CA->getNumOperands();
+ if (!e) {
+ return;
+ }
+ AMDILKernel *k;
+ llvm::StringRef name = G->getName().data() + 23;
+ if (mKernels.find(name) != mKernels.end()) {
+ k = mKernels[name];
+ } else {
+ k = new AMDILKernel;
+ k->curSize = 0;
+ k->curRSize = 0;
+ k->curHWSize = 0;
+ k->curHWRSize = 0;
+ k->constSize = 0;
+ k->lvgv = NULL;
+ k->sgv = NULL;
+ memset(k->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+ for (uint32_t i = 0; i != e; ++i) {
+ const Value *V = CA->getOperand(i);
+ const Constant *C = dyn_cast<Constant>(V);
+ const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+ if (CS && CS->getNumOperands() == 2) {
+ if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
+ mConstMems.end()) {
+ // If we already processesd this string as a constant, lets remove it
+ // from the list of known constants. This way we don't process unneeded
+ // data and don't generate code/metadata for strings that are never
+ // used.
+ mConstMems.erase(
+ mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
+ } else {
+ mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
+ }
+ const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
+ uint32_t val = (uint32_t)CI->getZExtValue();
+ if (val == 1) {
+ k->readOnly.insert(i);
+ } else if (val == 2) {
+ k->writeOnly.insert(i);
+ } else {
+ assert(!"Unknown image type value!");
+ }
+ }
+ }
+ mKernels[name] = k;
+}
+
+void AMDILModuleInfo::parseAutoArray(const GlobalValue *GV, bool isRegion) {
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+ Type *Ty = (G) ? G->getType() : NULL;
+ AMDILArrayMem tmp;
+ tmp.isHW = true;
+ tmp.offset = 0;
+ tmp.vecSize = getTypeSize(Ty, true);
+ tmp.isRegion = isRegion;
+ mArrayMems[GV->getName()] = tmp;
+}
+
+void AMDILModuleInfo::parseConstantPtr(const GlobalValue *GV) {
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+ Type *Ty = (G) ? G->getType() : NULL;
+ AMDILConstPtr constAttr;
+ constAttr.name = G->getName();
+ constAttr.size = getTypeSize(Ty, true);
+ constAttr.base = GV;
+ constAttr.isArgument = false;
+ constAttr.isArray = true;
+ constAttr.offset = 0;
+ constAttr.cbNum = 0;
+ constAttr.usesHardware = false;
+ mConstMems[GV->getName()] = constAttr;
+}
+
+void AMDILModuleInfo::parseGlobalAnnotate(const GlobalValue *G) {
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV->hasInitializer()) {
+ return;
+ }
+ const Constant *CT = GV->getInitializer();
+ if (!CT || isa<GlobalValue>(CT)) {
+ return;
+ }
+ const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
+ if (!CA) {
+ return;
+ }
+
+ unsigned int nKernels = CA->getNumOperands();
+ for (unsigned int i = 0, e = nKernels; i != e; ++i) {
+ parseKernelInformation(CA->getOperand(i));
+ }
+}
+
+void AMDILModuleInfo::parseKernelInformation(const Value *V) {
+ if (isa<GlobalValue>(V)) {
+ return;
+ }
+ const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+ if (!CS) {
+ return;
+ }
+ uint32_t N = CS->getNumOperands();
+ if (N != 5) {
+ return;
+ }
+ AMDILKernel *tmp;
+
+ // The first operand is always a pointer to the AMDILKernel.
+ const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
+ llvm::StringRef AMDILKernelName = "";
+ if (CV->getNumOperands()) {
+ AMDILKernelName = (*(CV->op_begin()))->getName();
+ }
+
+ // If we have images, then we have already created the AMDILKernel and we just need
+ // to get the AMDILKernel information.
+ if (mKernels.find(AMDILKernelName) != mKernels.end()) {
+ tmp = mKernels[AMDILKernelName];
+ } else {
+ tmp = new AMDILKernel;
+ tmp->curSize = 0;
+ tmp->curRSize = 0;
+ tmp->curHWSize = 0;
+ tmp->curHWRSize = 0;
+ tmp->constSize = 0;
+ tmp->lvgv = NULL;
+ tmp->sgv = NULL;
+ memset(tmp->constSizes, 0, sizeof(uint32_t) * HW_MAX_NUM_CB);
+ }
+
+
+ // The second operand is SGV, there can only be one so we don't need to worry
+ // about parsing out multiple data points.
+ CV = dyn_cast<Constant>(CS->getOperand(1));
+
+ llvm::StringRef sgvName;
+ if (CV->getNumOperands()) {
+ sgvName = (*(CV->op_begin()))->getName();
+ }
+
+ if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
+ tmp->sgv = &mKernelArgs[sgvName];
+ }
+ // The third operand is FGV, which is skipped
+ // The fourth operand is LVGV
+ // There can be multiple local arrays, so we
+ // need to handle each one seperatly
+ CV = dyn_cast<Constant>(CS->getOperand(3));
+ llvm::StringRef lvgvName = "";
+ if (CV->getNumOperands()) {
+ lvgvName = (*(CV->op_begin()))->getName();
+ }
+ if (mLocalArgs.find(lvgvName) != mLocalArgs.end()) {
+ AMDILLocalArg *ptr = &mLocalArgs[lvgvName];
+ tmp->lvgv = ptr;
+ llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
+ for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
+ if ((*ib)->isRegion) {
+ if ((*ib)->isHW) {
+ (*ib)->offset = tmp->curHWRSize;
+ tmp->curHWRSize += ((*ib)->vecSize + 15) & ~15;
+ } else {
+ (*ib)->offset = tmp->curRSize;
+ tmp->curRSize += ((*ib)->vecSize + 15) & ~15;
+ }
+ } else {
+ if ((*ib)->isHW) {
+ (*ib)->offset = tmp->curHWSize;
+ tmp->curHWSize += ((*ib)->vecSize + 15) & ~15;
+ } else {
+ (*ib)->offset = tmp->curSize;
+ tmp->curSize += ((*ib)->vecSize + 15) & ~15;
+ }
+ }
+ }
+ }
+
+ // The fifth operand is NULL
+ mKernels[AMDILKernelName] = tmp;
+}
+
+AMDILKernel *
+AMDILModuleInfo::getKernel(const llvm::StringRef &name) {
+ StringMap<AMDILKernel*>::iterator iter = mKernels.find(name);
+ if (iter == mKernels.end()) {
+ return NULL;
+ } else {
+ return iter->second;
+ }
+}
+
+bool AMDILModuleInfo::isKernel(const llvm::StringRef &name) const {
+ return (mKernels.find(name) != mKernels.end());
+}
+
+bool AMDILModuleInfo::isWriteOnlyImage(const llvm::StringRef &name,
+ uint32_t iID) const {
+ const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return false;
+ }
+ return kiter->second->writeOnly.count(iID);
+}
+#if 0
+uint32_t
+AMDILModuleInfo::getNumWriteImages(const llvm::StringRef &name) const {
+ char *env = NULL;
+ env = getenv("GPU_DISABLE_RAW_UAV");
+ if (env && env[0] == '1') {
+ return 8;
+ }
+ const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return 0;
+ } else {
+ return kiter->second->writeOnly.size();
+ }
+}
+#endif
+bool AMDILModuleInfo::isReadOnlyImage(const llvm::StringRef &name,
+ uint32_t iID) const {
+ const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
+ if (kiter == mKernels.end()) {
+ return false;
+ }
+ return kiter->second->readOnly.count(iID);
+}
+#if 0
+bool AMDILModuleInfo::hasRWG(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ AMDILKernelAttr *ptr = iter->second->sgv;
+ if (ptr) {
+ return ptr->mHasRWG;
+ }
+ }
+ return false;
+}
+
+bool AMDILModuleInfo::hasRWR(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ AMDILKernelAttr *ptr = iter->second->sgv;
+ if (ptr) {
+ return ptr->mHasRWR;
+ }
+ }
+ return false;
+}
+
+uint32_t
+AMDILModuleInfo::getMaxGroupSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ AMDILKernelAttr *sgv = iter->second->sgv;
+ if (sgv) {
+ return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+ }
+ }
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+}
+
+uint32_t
+AMDILModuleInfo::getMaxRegionSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ AMDILKernelAttr *sgv = iter->second->sgv;
+ if (sgv) {
+ return sgv->reqRegionSize[0] *
+ sgv->reqRegionSize[1] *
+ sgv->reqRegionSize[2];
+ }
+ }
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+}
+uint32_t AMDILModuleInfo::getRegionSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second->curRSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILModuleInfo::getLocalSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second->curSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILModuleInfo::getConstSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second->constSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILModuleInfo::getHWRegionSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second->curHWRSize;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILModuleInfo::getHWLocalSize(const llvm::StringRef &name) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end()) {
+ return iter->second->curHWSize;
+ } else {
+ return 0;
+ }
+}
+#endif
+
+int32_t AMDILModuleInfo::getArgID(const Argument *arg) {
+ DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
+ if (argiter != mArgIDMap.end()) {
+ return argiter->second;
+ } else {
+ return -1;
+ }
+}
+
+
+uint32_t
+AMDILModuleInfo::getRegion(const llvm::StringRef &name, uint32_t dim) const {
+ StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+ if (iter != mKernels.end() && iter->second->sgv) {
+ AMDILKernelAttr *sgv = iter->second->sgv;
+ switch (dim) {
+ default: break;
+ case 0:
+ case 1:
+ case 2:
+ return sgv->reqRegionSize[dim];
+ break;
+ case 3:
+ return sgv->reqRegionSize[0] *
+ sgv->reqRegionSize[1] *
+ sgv->reqRegionSize[2];
+ };
+ }
+ switch (dim) {
+ default:
+ return 1;
+ case 3:
+ return mSTM->getDefaultSize(0) *
+ mSTM->getDefaultSize(1) *
+ mSTM->getDefaultSize(2);
+ case 2:
+ case 1:
+ case 0:
+ return mSTM->getDefaultSize(dim);
+ break;
+ };
+ return 1;
+}
+
+StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_begin() {
+ return mConstMems.begin();
+}
+
+
+StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_end() {
+ return mConstMems.end();
+}
+
+bool AMDILModuleInfo::byteStoreExists(StringRef S) const {
+ return mByteStore.find(S) != mByteStore.end();
+}
+
+uint32_t AMDILModuleInfo::getConstPtrSize(const AMDILKernel *krnl,
+ const llvm::StringRef &arg)
+{
+ const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->size;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILModuleInfo::getConstPtrOff(const AMDILKernel *krnl,
+ const llvm::StringRef &arg)
+{
+ const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->offset;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t AMDILModuleInfo::getConstPtrCB(const AMDILKernel *krnl,
+ const llvm::StringRef &arg)
+{
+ const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->cbNum;
+ } else {
+ return 0;
+ }
+}
+
+void AMDILModuleInfo::calculateCPOffsets(const MachineFunction *MF,
+ AMDILKernel *krnl)
+{
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ if (!MCP) {
+ return;
+ }
+ const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
+ size_t numConsts = consts.size();
+ for (size_t x = 0; x < numConsts; ++x) {
+ krnl->CPOffsets.push_back(
+ std::make_pair<uint32_t, const Constant*>(
+ mCurrentCPOffset, consts[x].Val.ConstVal));
+ size_t curSize = getTypeSize(consts[x].Val.ConstVal->getType(), true);
+ // Align the size to the vector boundary
+ curSize = (curSize + 15) & (~15);
+ mCurrentCPOffset += curSize;
+ }
+}
+
+bool AMDILModuleInfo::isConstPtrArray(const AMDILKernel *krnl,
+ const llvm::StringRef &arg) {
+ const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->isArray;
+ } else {
+ return false;
+ }
+}
+
+bool AMDILModuleInfo::isConstPtrArgument(const AMDILKernel *krnl,
+ const llvm::StringRef &arg)
+{
+ const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->isArgument;
+ } else {
+ return false;
+ }
+}
+
+const Value *AMDILModuleInfo::getConstPtrValue(const AMDILKernel *krnl,
+ const llvm::StringRef &arg) {
+ const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+ if (curConst) {
+ return curConst->base;
+ } else {
+ return NULL;
+ }
+}
+
+static void
+dumpZeroElements(StructType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(IntegerType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(ArrayType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(VectorType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(Type * const T, OSTREAM_TYPE &O, bool asBytes);
+
+void dumpZeroElements(Type * const T, OSTREAM_TYPE &O, bool asBytes) {
+ if (!T) {
+ return;
+ }
+ switch(T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::DoubleTyID:
+ if (asBytes) {
+ O << ":0:0:0:0:0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ break;
+ case Type::FloatTyID:
+ case Type::PointerTyID:
+ case Type::FunctionTyID:
+ if (asBytes) {
+ O << ":0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ case Type::IntegerTyID:
+ dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
+ break;
+ case Type::StructTyID:
+ {
+ const StructType *ST = cast<StructType>(T);
+ if (!ST->isOpaque()) {
+ dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
+ } else { // A pre-LLVM 3.0 opaque type
+ if (asBytes) {
+ O << ":0:0:0:0";
+ } else {
+ O << ":0";
+ }
+ }
+ }
+ break;
+ case Type::ArrayTyID:
+ dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
+ break;
+ case Type::VectorTyID:
+ dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
+ break;
+ };
+}
+
+void
+dumpZeroElements(StructType * const ST, OSTREAM_TYPE &O, bool asBytes) {
+ if (!ST) {
+ return;
+ }
+ Type *curType;
+ StructType::element_iterator eib = ST->element_begin();
+ StructType::element_iterator eie = ST->element_end();
+ for (;eib != eie; ++eib) {
+ curType = *eib;
+ dumpZeroElements(curType, O, asBytes);
+ }
+}
+
+void
+dumpZeroElements(IntegerType * const IT, OSTREAM_TYPE &O, bool asBytes) {
+ if (asBytes) {
+ unsigned byteWidth = (IT->getBitWidth() >> 3);
+ for (unsigned x = 0; x < byteWidth; ++x) {
+ O << ":0";
+ }
+ }
+}
+
+void
+dumpZeroElements(ArrayType * const AT, OSTREAM_TYPE &O, bool asBytes) {
+ size_t size = AT->getNumElements();
+ for (size_t x = 0; x < size; ++x) {
+ dumpZeroElements(AT->getElementType(), O, asBytes);
+ }
+}
+
+void
+dumpZeroElements(VectorType * const VT, OSTREAM_TYPE &O, bool asBytes) {
+ size_t size = VT->getNumElements();
+ for (size_t x = 0; x < size; ++x) {
+ dumpZeroElements(VT->getElementType(), O, asBytes);
+ }
+}
+
+void AMDILModuleInfo::printConstantValue(const Constant *CAval,
+ OSTREAM_TYPE &O, bool asBytes) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
+ bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+ if (isDouble) {
+ double val = CFP->getValueAPF().convertToDouble();
+ union dtol_union {
+ double d;
+ uint64_t l;
+ char c[8];
+ } conv;
+ conv.d = val;
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(conv.l);
+ } else {
+ for (int i = 0; i < 8; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ } else {
+ float val = CFP->getValueAPF().convertToFloat();
+ union ftoi_union {
+ float f;
+ uint32_t u;
+ char c[4];
+ } conv;
+ conv.f = val;
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(conv.u);
+ } else {
+ for (int i = 0; i < 4; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ }
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
+ uint64_t zVal = CI->getValue().getZExtValue();
+ if (!asBytes) {
+ O << ":";
+ O.write_hex(zVal);
+ } else {
+ switch (CI->getBitWidth()) {
+ default:
+ {
+ union ltob_union {
+ uint64_t l;
+ char c[8];
+ } conv;
+ conv.l = zVal;
+ for (int i = 0; i < 8; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ break;
+ case 8:
+ O << ":";
+ O.write_hex(zVal & 0xFF);
+ break;
+ case 16:
+ {
+ union stob_union {
+ uint16_t s;
+ char c[2];
+ } conv;
+ conv.s = (uint16_t)zVal;
+ O << ":";
+ O.write_hex((unsigned)conv.c[0] & 0xFF);
+ O << ":";
+ O.write_hex((unsigned)conv.c[1] & 0xFF);
+ }
+ break;
+ case 32:
+ {
+ union itob_union {
+ uint32_t i;
+ char c[4];
+ } conv;
+ conv.i = (uint32_t)zVal;
+ for (int i = 0; i < 4; ++i) {
+ O << ":";
+ O.write_hex((unsigned)conv.c[i] & 0xFF);
+ }
+ }
+ break;
+ }
+ }
+ } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
+ int y = CV->getNumOperands()-1;
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CV->getOperand(x), O, asBytes);
+ }
+ printConstantValue(CV->getOperand(x), O, asBytes);
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
+ int y = CS->getNumOperands();
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CS->getOperand(x), O, asBytes);
+ }
+ } else if (const ConstantAggregateZero *CAZ
+ = dyn_cast<ConstantAggregateZero>(CAval)) {
+ int y = CAZ->getNumOperands();
+ if (y > 0) {
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue((llvm::Constant *)CAZ->getOperand(x),
+ O, asBytes);
+ }
+ } else {
+ if (asBytes) {
+ dumpZeroElements(CAval->getType(), O, asBytes);
+ } else {
+ int y = getNumElements(CAval->getType())-1;
+ for (int x = 0; x < y; ++x) {
+ O << ":0";
+ }
+ O << ":0";
+ }
+ }
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
+ int y = CA->getNumOperands();
+ int x = 0;
+ for (; x < y; ++x) {
+ printConstantValue(CA->getOperand(x), O, asBytes);
+ }
+ } else if (dyn_cast<ConstantPointerNull>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else if (dyn_cast<ConstantExpr>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else if (dyn_cast<UndefValue>(CAval)) {
+ O << ":0";
+ //assert(0 && "Hit condition which was not expected");
+ } else {
+ assert(0 && "Hit condition which was not expected");
+ }
+}
+#if 0
+static bool isStruct(Type * const T)
+{
+ if (!T) {
+ return false;
+ }
+ switch (T->getTypeID()) {
+ default:
+ return false;
+ case Type::PointerTyID:
+ return isStruct(T->getContainedType(0));
+ case Type::StructTyID:
+ return true;
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ return isStruct(dyn_cast<SequentialType>(T)->getElementType());
+ };
+
+}
+
+void AMDILModuleInfo::dumpDataToCB(OSTREAM_TYPE &O, AMDILKernelManager *km,
+ uint32_t id) {
+ uint32_t size = 0;
+ for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(),
+ cme = consts_end(); cmb != cme; ++cmb) {
+ if (id == cmb->second.cbNum) {
+ size += (cmb->second.size + 15) & (~15);
+ }
+ }
+ if (id == 0) {
+ O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
+ if (mCurrentCPOffset) {
+ for (StringMap<AMDILKernel*>::iterator kcpb = mKernels.begin(),
+ kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
+ const AMDILKernel *k = kcpb->second;
+ size_t numConsts = k->CPOffsets.size();
+ for (size_t x = 0; x < numConsts; ++x) {
+ size_t offset = k->CPOffsets[x].first;
+ const Constant *C = k->CPOffsets[x].second;
+ Type *Ty = C->getType();
+ size_t size = (isStruct(Ty) ? getTypeSize(Ty, true)
+ : getNumElements(Ty));
+ O << ";#" << km->getTypeName(Ty, symTab) << ":";
+ O << offset << ":" << size ;
+ printConstantValue(C, O, isStruct(Ty));
+ O << "\n";
+ }
+ }
+ }
+ } else {
+ O << ";#DATASTART:" << id << ":" << size << "\n";
+ }
+
+ for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
+ cmb != cme; ++cmb) {
+ if (cmb->second.cbNum != id) {
+ continue;
+ }
+ const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
+ Type *Ty = (G) ? G->getType() : NULL;
+ size_t offset = cmb->second.offset;
+ const Constant *C = G->getInitializer();
+ size_t size = (isStruct(Ty)
+ ? getTypeSize(Ty, true)
+ : getNumElements(Ty));
+ O << ";#" << km->getTypeName(Ty, symTab) << ":";
+ if (!id) {
+ O << (offset + mCurrentCPOffset) << ":" << size;
+ } else {
+ O << offset << ":" << size;
+ }
+ if (C) {
+ printConstantValue(C, O, isStruct(Ty));
+ } else {
+ assert(0 && "Cannot have a constant pointer"
+ " without an initializer!");
+ }
+ O <<"\n";
+ }
+ if (id == 0) {
+ O << ";#DATAEND\n";
+ } else {
+ O << ";#DATAEND:" << id << "\n";
+ }
+}
+
+void
+AMDILModuleInfo::dumpDataSection(OSTREAM_TYPE &O, AMDILKernelManager *km) {
+ if (mConstMems.empty() && !mCurrentCPOffset) {
+ return;
+ } else {
+ llvm::DenseSet<uint32_t> const_set;
+ for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
+ cmb != cme; ++cmb) {
+ const_set.insert(cmb->second.cbNum);
+ }
+ if (mCurrentCPOffset) {
+ const_set.insert(0);
+ }
+ for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
+ sete = const_set.end(); setb != sete; ++setb) {
+ dumpDataToCB(O, km, *setb);
+ }
+ }
+}
+#endif
+/// Create a function ID if it is not known or return the known
+/// function ID.
+uint32_t AMDILModuleInfo::getOrCreateFunctionID(const GlobalValue* func) {
+ if (func->getName().size()) {
+ return getOrCreateFunctionID(func->getName());
+ }
+ uint32_t id;
+ if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
+ id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
+ mFuncPtrNames[func] = id;
+ } else {
+ id = mFuncPtrNames[func];
+ }
+ return id;
+}
+uint32_t AMDILModuleInfo::getOrCreateFunctionID(const std::string &func) {
+ uint32_t id;
+ if (mFuncNames.find(func) == mFuncNames.end()) {
+ id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
+ mFuncNames[func] = id;
+ } else {
+ id = mFuncNames[func];
+ }
+ return id;
+}
diff --git a/src/gallium/drivers/radeon/AMDILModuleInfo.h b/src/gallium/drivers/radeon/AMDILModuleInfo.h
new file mode 100644
index 00000000000..7f667e6190f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILModuleInfo.h
@@ -0,0 +1,199 @@
+//===--------------- AMDILMachineModuleInfo.h -------------------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for AMDIL targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_MACHINE_MODULE_INFO_H_
+#define _AMDIL_MACHINE_MODULE_INFO_H_
+#include "AMDIL.h"
+#include "AMDILKernel.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <set>
+
+namespace llvm {
+ class AMDILKernel;
+ class Argument;
+ class TypeSymbolTable;
+ class GlobalValue;
+ class MachineFunction;
+ class GlobalValue;
+
+ class AMDILModuleInfo : public MachineModuleInfoImpl {
+ protected:
+ const MachineModuleInfo *mMMI;
+ public:
+ AMDILModuleInfo(const MachineModuleInfo &);
+ virtual ~AMDILModuleInfo();
+
+ void processModule(const Module *MF, const AMDILTargetMachine* mTM);
+
+ /// Process the given module and parse out the global variable metadata passed
+ /// down from the frontend-compiler
+
+ /// Returns true if the image ID corresponds to a read only image.
+ bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+ /// Returns true if the image ID corresponds to a write only image.
+ bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+ /// Gets the group size of the kernel for the given dimension.
+ uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const;
+
+ /// Get the offset of the array for the kernel.
+ int32_t getArrayOffset(const llvm::StringRef &name) const;
+
+ /// Get the offset of the const memory for the kernel.
+ int32_t getConstOffset(const llvm::StringRef &name) const;
+
+ /// Get the boolean value if this particular constant uses HW or not.
+ bool getConstHWBit(const llvm::StringRef &name) const;
+
+ /// Get a reference to the kernel metadata information for the given function
+ /// name.
+ AMDILKernel *getKernel(const llvm::StringRef &name);
+ bool isKernel(const llvm::StringRef &name) const;
+
+ /// Dump the data section to the output stream for the given kernel.
+ //void dumpDataSection(OSTREAM_TYPE &O, AMDILKernelManager *km);
+
+ /// Iterate through the constants that are global to the compilation unit.
+ StringMap<AMDILConstPtr>::iterator consts_begin();
+ StringMap<AMDILConstPtr>::iterator consts_end();
+
+ /// Query if the kernel has a byte store.
+ bool byteStoreExists(llvm::StringRef S) const;
+
+ /// Query if the constant pointer is an argument.
+ bool isConstPtrArgument(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+ /// Query if the constant pointer is an array that is globally scoped.
+ bool isConstPtrArray(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+ /// Query the size of the constant pointer.
+ uint32_t getConstPtrSize(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+ /// Query the offset of the constant pointer.
+ uint32_t getConstPtrOff(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+ /// Query the constant buffer number for a constant pointer.
+ uint32_t getConstPtrCB(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+ /// Query the Value* that the constant pointer originates from.
+ const Value *getConstPtrValue(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+ /// Get the ID of the argument.
+ int32_t getArgID(const Argument *arg);
+
+ /// Get the unique function ID for the specific function name and create a new
+ /// unique ID if it is not found.
+ uint32_t getOrCreateFunctionID(const GlobalValue* func);
+ uint32_t getOrCreateFunctionID(const std::string& func);
+
+ /// Calculate the offsets of the constant pool for the given kernel and
+ /// machine function.
+ void calculateCPOffsets(const MachineFunction *MF, AMDILKernel *krnl);
+
+ void add_printf_offset(uint32_t offset) { mPrintfOffset += offset; }
+ uint32_t get_printf_offset() { return mPrintfOffset; }
+
+ private:
+ /// Various functions that parse global value information and store them in
+ /// the global manager. This approach is used instead of dynamic parsing as it
+ /// might require more space, but should allow caching of data that gets
+ /// requested multiple times.
+ AMDILKernelAttr parseSGV(const GlobalValue *GV);
+ AMDILLocalArg parseLVGV(const GlobalValue *GV);
+ void parseGlobalAnnotate(const GlobalValue *G);
+ void parseImageAnnotate(const GlobalValue *G);
+ void parseConstantPtrAnnotate(const GlobalValue *G);
+ void printConstantValue(const Constant *CAval,
+ OSTREAM_TYPE& O,
+ bool asByte);
+ void parseKernelInformation(const Value *V);
+ void parseAutoArray(const GlobalValue *G, bool isRegion);
+ void parseConstantPtr(const GlobalValue *G);
+ void allocateGlobalCB();
+ bool checkConstPtrsUseHW(Module::const_iterator *F);
+
+ llvm::StringMap<AMDILKernel*> mKernels;
+ llvm::StringMap<AMDILKernelAttr> mKernelArgs;
+ llvm::StringMap<AMDILArrayMem> mArrayMems;
+ llvm::StringMap<AMDILConstPtr> mConstMems;
+ llvm::StringMap<AMDILLocalArg> mLocalArgs;
+ llvm::StringMap<uint32_t> mFuncNames;
+ llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames;
+ llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap;
+ std::set<llvm::StringRef> mByteStore;
+ std::set<llvm::StringRef> mIgnoreStr;
+ llvm::DenseMap<const Argument *, int32_t> mArgIDMap;
+ const TypeSymbolTable *symTab;
+ const AMDILSubtarget *mSTM;
+ size_t mOffset;
+ uint32_t mReservedBuffs;
+ uint32_t mCurrentCPOffset;
+ uint32_t mPrintfOffset;
+ };
+
+
+
+} // end namespace llvm
+
+#endif // _AMDIL_COFF_MACHINE_MODULE_INFO_H_
+
diff --git a/src/gallium/drivers/radeon/AMDILMultiClass.td b/src/gallium/drivers/radeon/AMDILMultiClass.td
new file mode 100644
index 00000000000..1ac4a167faa
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILMultiClass.td
@@ -0,0 +1,1480 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// Multiclass that handles branch instructions
+multiclass BranchConditional<SDNode Op> {
+ def _i8 : ILFormat<IL_OP_IFC, (outs),
+ (ins brtarget:$target, GPRI8:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI8:$src0)]>;
+ def _i16 : ILFormat<IL_OP_IFC, (outs),
+ (ins brtarget:$target, GPRI16:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI16:$src0)]>;
+ def _i32 : ILFormat<IL_OP_IFC, (outs),
+ (ins brtarget:$target, GPRI32:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI32:$src0)]>;
+ def _f32 : ILFormat<IL_OP_IFC, (outs),
+ (ins brtarget:$target, GPRF32:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, GPRF32:$src0)]>;
+ def _i64 : ILFormat<IL_OP_IFC, (outs),
+ (ins brtarget:$target, GPRI64:$src0),
+ "; f64 Pseudo branch instruction",
+ [(Op bb:$target, (i64 GPRI64:$src0))]>;
+ def _f64 : ILFormat<IL_OP_IFC, (outs),
+ (ins brtarget:$target, GPRF64:$src0),
+ "; f64 Pseudo branch instruction",
+ [(Op bb:$target, (f64 GPRF64:$src0))]>;
+}
+// Multiclass that handles compare instructions
+// When a definition is added here, a corrosponding defition
+// needs to be added at:
+// AMDILISelLowering.cpp@EmitInstrWithCustomInserter
+multiclass Compare<string asm> {
+ def _i8 : ILFormat<IL_OP_CMP, (outs GPRI8:$dst),
+ (ins i32imm:$cc, GPRI8:$src0, GPRI8:$src1),
+ !strconcat("; i8 ", asm),
+ [(set GPRI8:$dst, (IL_cmp imm:$cc, GPRI8:$src0, GPRI8:$src1))]>;
+ def _i16 : ILFormat<IL_OP_CMP, (outs GPRI16:$dst),
+ (ins i32imm:$cc, GPRI16:$src0, GPRI16:$src1),
+ !strconcat("; i16 ", asm),
+ [(set GPRI16:$dst, (IL_cmp imm:$cc, GPRI16:$src0, GPRI16:$src1))]>;
+ def _i32 : ILFormat<IL_OP_CMP, (outs GPRI32:$dst),
+ (ins i32imm:$cc, GPRI32:$src0, GPRI32:$src1),
+ !strconcat("; i32 ", asm),
+ [(set GPRI32:$dst, (IL_cmp imm:$cc, GPRI32:$src0, GPRI32:$src1))]>;
+ def _i64 : ILFormat<IL_OP_CMP, (outs GPRI64:$dst),
+ (ins i32imm:$cc, GPRI64:$src0, GPRI64:$src1),
+ !strconcat("; i64 ", asm),
+ [(set GPRI64:$dst, (IL_cmp imm:$cc, GPRI64:$src0, GPRI64:$src1))]>;
+ def _f32 : ILFormat<IL_OP_CMP, (outs GPRF32:$dst),
+ (ins i32imm:$cc, GPRF32:$src0, GPRF32:$src1),
+ !strconcat("; f32 ", asm),
+ [(set GPRF32:$dst, (IL_cmp imm:$cc, GPRF32:$src0, GPRF32:$src1))]>;
+ def _f64 : ILFormat<IL_OP_CMP, (outs GPRF64:$dst),
+ (ins i32imm:$cc, GPRF64:$src0, GPRF64:$src1),
+ !strconcat("; f64 ", asm),
+ [(set GPRF64:$dst, (IL_cmp imm:$cc, GPRF64:$src0, GPRF64:$src1))]>;
+ def _v2i8 : ILFormat<IL_OP_CMP, (outs GPRV2I8:$dst),
+ (ins i32imm:$cc, GPRV2I8:$src0, GPRV2I8:$src1),
+ !strconcat("; i8 ", asm),
+ [(set GPRV2I8:$dst, (IL_cmp imm:$cc, GPRV2I8:$src0, GPRV2I8:$src1))]>;
+ def _v2i16 : ILFormat<IL_OP_CMP, (outs GPRV2I16:$dst),
+ (ins i32imm:$cc, GPRV2I16:$src0, GPRV2I16:$src1),
+ !strconcat("; i16 ", asm),
+ [(set GPRV2I16:$dst, (IL_cmp imm:$cc, GPRV2I16:$src0, GPRV2I16:$src1))]>;
+ def _v2i32 : ILFormat<IL_OP_CMP, (outs GPRV2I32:$dst),
+ (ins i32imm:$cc, GPRV2I32:$src0, GPRV2I32:$src1),
+ !strconcat("; i32 ", asm),
+ [(set GPRV2I32:$dst, (IL_cmp imm:$cc, GPRV2I32:$src0, GPRV2I32:$src1))]>;
+ def _v2i64 : ILFormat<IL_OP_CMP, (outs GPRV2I64:$dst),
+ (ins i32imm:$cc, GPRV2I64:$src0, GPRV2I64:$src1),
+ !strconcat("; i64 ", asm),
+ [(set GPRV2I64:$dst, (IL_cmp imm:$cc, GPRV2I64:$src0, GPRV2I64:$src1))]>;
+ def _v2f32 : ILFormat<IL_OP_CMP, (outs GPRV2F32:$dst),
+ (ins i32imm:$cc, GPRV2F32:$src0, GPRV2F32:$src1),
+ !strconcat("; f32 ", asm),
+ [(set GPRV2F32:$dst, (IL_cmp imm:$cc, GPRV2F32:$src0, GPRV2F32:$src1))]>;
+ def _v2f64 : ILFormat<IL_OP_CMP, (outs GPRV2F64:$dst),
+ (ins i32imm:$cc, GPRV2F64:$src0, GPRV2F64:$src1),
+ !strconcat("; f64 ", asm),
+ [(set GPRV2F64:$dst, (IL_cmp imm:$cc, GPRV2F64:$src0, GPRV2F64:$src1))]>;
+ def _v4i8 : ILFormat<IL_OP_CMP, (outs GPRV4I8:$dst),
+ (ins i32imm:$cc, GPRV4I8:$src0, GPRV4I8:$src1),
+ !strconcat("; i8 ", asm),
+ [(set GPRV4I8:$dst, (IL_cmp imm:$cc, GPRV4I8:$src0, GPRV4I8:$src1))]>;
+ def _v4i16 : ILFormat<IL_OP_CMP, (outs GPRV4I16:$dst),
+ (ins i32imm:$cc, GPRV4I16:$src0, GPRV4I16:$src1),
+ !strconcat("; i16 ", asm),
+ [(set GPRV4I16:$dst, (IL_cmp imm:$cc, GPRV4I16:$src0, GPRV4I16:$src1))]>;
+ def _v4i32 : ILFormat<IL_OP_CMP, (outs GPRV4I32:$dst),
+ (ins i32imm:$cc, GPRV4I32:$src0, GPRV4I32:$src1),
+ !strconcat("; i32 ", asm),
+ [(set GPRV4I32:$dst, (IL_cmp imm:$cc, GPRV4I32:$src0, GPRV4I32:$src1))]>;
+ def _v4f32 : ILFormat<IL_OP_CMP, (outs GPRV4F32:$dst),
+ (ins i32imm:$cc, GPRV4F32:$src0, GPRV4F32:$src1),
+ !strconcat("; f32 ", asm),
+ [(set GPRV4F32:$dst, (IL_cmp imm:$cc, GPRV4F32:$src0, GPRV4F32:$src1))]>;
+}
+
+// Multiclass that handles constant values
+multiclass ILConstant<string asm> {
+ def _i8 : ILFormat<IL_OP_MOV, (outs GPRI8:$dst),
+ (ins i8imm:$val),
+ asm, [(set GPRI8:$dst, imm:$val)]>;
+
+ // def _v2i8 : ILFormat<IL_OP_MOV, (outs GPRV2I8:$dst),
+ // (ins i8imm:$val),
+ // asm, [(set GPRV2I8:$dst, GPRV2I8:$val)]>;
+
+ //def _v4i8 : ILFormat<IL_OP_MOV, (outs GPRV4I8:$dst),
+ //(ins i8imm:$val),
+ //asm, [(set GPRV4I8:$dst, GPRV4I8:$val)]>;
+
+ def _i16 : ILFormat<IL_OP_MOV, (outs GPRI16:$dst),
+ (ins i16imm:$val),
+ asm, [(set GPRI16:$dst, imm:$val)]>;
+
+ // def _v2i16 : ILFormat<IL_OP_MOV, (outs GPRV2I16:$dst),
+ // (ins i16imm:$val),
+ // asm, [(set GPRV2I16:$dst, GPRV2I16:$val)]>;
+
+ // def _v4i16 : ILFormat<IL_OP_MOV, (outs GPRV4I16:$dst),
+ // (ins i16imm:$val),
+ // asm, [(set GPRV4I16:$dst, GPRV4I16:$val)]>;
+
+ def _i32 : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
+ (ins i32imm:$val),
+ asm, [(set GPRI32:$dst, imm:$val)]>;
+
+ // def _v2i32 : ILFormat<IL_OP_MOV, (outs GPRV2I32:$dst),
+ // (ins i32imm:$val),
+ // asm, [(set GPRV2I32:$dst, GPRV2I32:$val)]>;
+
+ // def _v4i32 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ // (ins GPRV4I32:$val),
+ // asm, [(set GPRV4I32:$dst, GPRV4I32:$val)]>;
+
+ def _i64 : ILFormat<IL_OP_MOV, (outs GPRI64:$dst),
+ (ins i64imm:$val),
+ asm, [(set GPRI64:$dst, imm:$val)]>;
+
+ // def _v2i64 : ILFormat<IL_OP_MOV, (outs GPRV2I64:$dst),
+ // (ins i64imm:$val),
+ // asm, [(set GPRV2I64:$dst, GPRV2I64:$val)]>;
+
+ def _f32 : ILFormat<IL_OP_MOV, (outs GPRF32:$dst),
+ (ins f32imm:$val),
+ asm, [(set GPRF32:$dst, fpimm:$val)]>;
+
+ // def _v2f32 : ILFormat<IL_OP_MOV, (outs GPRV2F32:$dst),
+ // (ins f32imm:$val),
+ // asm, [(set GPRV2F32:$dst, GPRV2F32:$val)]>;
+
+ // def _v4f32 : ILFormat<IL_OP_MOV, (outs GPRV4F32:$dst),
+ // (ins f32imm:$val),
+ // asm, [(set GPRV4F32:$dst, GPRV4F32:$val)]>;
+
+ def _f64 : ILFormat<IL_OP_MOV, (outs GPRF64:$dst),
+ (ins f64imm:$val),
+ asm, [(set GPRF64:$dst, fpimm:$val)]>;
+
+ // def _v2f64 : ILFormat<IL_OP_MOV, (outs GPRV2F64:$dst),
+ // (ins f64imm:$val),
+ // asm, [(set GPRV2F64:$dst, GPRV2F64:$val)]>;
+
+}
+
+// Multiclass that handles memory store operations
+multiclass GTRUNCSTORE<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass LTRUNCSTORE<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass PTRUNCSTORE<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass RTRUNCSTORE<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+
+// Multiclass that handles memory store operations
+multiclass STORE<string asm, PatFrag OpNode> {
+ def _i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI8:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI8:$val, ADDR:$ptr)]>;
+ def _i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI16:$val, ADDR:$ptr)]>;
+ def _i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI32:$val, ADDR:$ptr)]>;
+ def _f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRF32:$val, ADDR:$ptr)]>;
+ def _i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI64:$val, ADDR:$ptr)]>;
+ def _f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRF64:$val, ADDR:$ptr)]>;
+ def _v4f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4F32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4F32:$val, ADDR:$ptr)]>;
+ def _v2f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2F32:$val, ADDR:$ptr)]>;
+ def _v4i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4I32:$val, ADDR:$ptr)]>;
+ def _v2i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I8:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I8:$val, ADDR:$ptr)]>;
+ def _v2i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I16:$val, ADDR:$ptr)]>;
+ def _v4i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I8:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4I8:$val, ADDR:$ptr)]>;
+ def _v4i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4I16:$val, ADDR:$ptr)]>;
+ def _v2i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I32:$val, ADDR:$ptr)]>;
+ def _v2f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2F64:$val, ADDR:$ptr)]>;
+ def _v2i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI32:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles load operations
+multiclass LOAD<string asm, PatFrag OpNode> {
+ def _i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI8:$dst, (OpNode ADDR:$ptr))]>;
+ def _i16 : OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI16:$dst, (OpNode ADDR:$ptr))]>;
+ def _i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI32:$dst, (OpNode ADDR:$ptr))]>;
+ def _f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRF32:$dst, (OpNode ADDR:$ptr))]>;
+ def _i64 : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI64:$dst, (OpNode ADDR:$ptr))]>;
+ def _f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRF64:$dst, (OpNode ADDR:$ptr))]>;
+ def _v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4F32:$dst, (OpNode ADDR:$ptr))]>;
+ def _v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2F32:$dst, (OpNode ADDR:$ptr))]>;
+ def _v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2F64:$dst, (OpNode ADDR:$ptr))]>;
+ def _v4i32 : OneInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4I32:$dst, (OpNode ADDR:$ptr))]>;
+ def _v2i8 : OneInOneOut<IL_OP_MOV, (outs GPRV2I8:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I8:$dst, (OpNode ADDR:$ptr))]>;
+ def _v2i16 : OneInOneOut<IL_OP_MOV, (outs GPRV2I16:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I16:$dst, (OpNode ADDR:$ptr))]>;
+ def _v4i8 : OneInOneOut<IL_OP_MOV, (outs GPRV4I8:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4I8:$dst, (OpNode ADDR:$ptr))]>;
+ def _v4i16 : OneInOneOut<IL_OP_MOV, (outs GPRV4I16:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4I16:$dst, (OpNode ADDR:$ptr))]>;
+ def _v2i32 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I32:$dst, (OpNode ADDR:$ptr))]>;
+ def _v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst), (ins MEMI32:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I64:$dst, (OpNode ADDR:$ptr))]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass GTRUNCSTORE64<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(global_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass LTRUNCSTORE64<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(local_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass PTRUNCSTORE64<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(private_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass RTRUNCSTORE64<string asm> {
+ def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+ def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+ def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+ def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+ def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+ def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+ def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+ def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+ def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(region_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+
+// Multiclass that handles memory store operations
+multiclass STORE64<string asm, PatFrag OpNode> {
+ def _i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI8:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI8:$val, ADDR64:$ptr)]>;
+ def _i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI16:$val, ADDR64:$ptr)]>;
+ def _i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI32:$val, ADDR64:$ptr)]>;
+ def _f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRF32:$val, ADDR64:$ptr)]>;
+ def _i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRI64:$val, ADDR64:$ptr)]>;
+ def _f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRF64:$val, ADDR64:$ptr)]>;
+ def _v4f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4F32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4F32:$val, ADDR64:$ptr)]>;
+ def _v2f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2F32:$val, ADDR64:$ptr)]>;
+ def _v4i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4I32:$val, ADDR64:$ptr)]>;
+ def _v2i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I8:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I8:$val, ADDR64:$ptr)]>;
+ def _v2i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I16:$val, ADDR64:$ptr)]>;
+ def _v4i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I8:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4I8:$val, ADDR64:$ptr)]>;
+ def _v4i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV4I16:$val, ADDR64:$ptr)]>;
+ def _v2i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I32:$val, ADDR64:$ptr)]>;
+ def _v2f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2F64:$val, ADDR64:$ptr)]>;
+ def _v2i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEMI64:$ptr),
+ !strconcat(asm, " $val $ptr"),
+ [(OpNode GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles load operations
+multiclass LOAD64<string asm, PatFrag OpNode> {
+ def _i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI8:$dst, (OpNode ADDR64:$ptr))]>;
+ def _i16 : OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI16:$dst, (OpNode ADDR64:$ptr))]>;
+ def _i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI32:$dst, (OpNode ADDR64:$ptr))]>;
+ def _f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRF32:$dst, (OpNode ADDR64:$ptr))]>;
+ def _i64 : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRI64:$dst, (OpNode ADDR64:$ptr))]>;
+ def _f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRF64:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4F32:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2F32:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2F64:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v4i32 : OneInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4I32:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v2i8 : OneInOneOut<IL_OP_MOV, (outs GPRV2I8:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I8:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v2i16 : OneInOneOut<IL_OP_MOV, (outs GPRV2I16:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I16:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v4i8 : OneInOneOut<IL_OP_MOV, (outs GPRV4I8:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4I8:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v4i16 : OneInOneOut<IL_OP_MOV, (outs GPRV4I16:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV4I16:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v2i32 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I32:$dst, (OpNode ADDR64:$ptr))]>;
+ def _v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst), (ins MEMI64:$ptr),
+ !strconcat(asm, " $dst $ptr"),
+ [(set GPRV2I64:$dst, (OpNode ADDR64:$ptr))]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<ILOpCode opc> {
+ def _i8 : UnaryOpNoRet<opc, (outs), (ins GPRI8:$src),
+ !strconcat(opc.Text, " $src"), []>;
+ def _i16 : UnaryOpNoRet<opc, (outs), (ins GPRI16:$src),
+ !strconcat(opc.Text, " $src"), []>;
+ def _i32 : UnaryOpNoRet<opc, (outs), (ins GPRI32:$src),
+ !strconcat(opc.Text, " $src"), []>;
+ def _i64 : UnaryOpNoRet<opc, (outs), (ins GPRI64:$src),
+ !strconcat(opc.Text, " $src"), []>;
+ def _f32 : UnaryOpNoRet<opc, (outs), (ins GPRF32:$src),
+ !strconcat(opc.Text, " $src"), []>;
+ def _f64 : UnaryOpNoRet<opc, (outs), (ins GPRF64:$src),
+ !strconcat(opc.Text, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<ILOpCode opc> {
+ def _i8 : BinaryOpNoRet<opc, (outs), (ins GPRI8:$src0, GPRI8:$src1),
+ !strconcat(opc.Text, " $src0, $src1"), []>;
+ def _i16 : BinaryOpNoRet<opc, (outs), (ins GPRI16:$src0, GPRI16:$src1),
+ !strconcat(opc.Text, " $src0, $src1"), []>;
+ def _i32 : BinaryOpNoRet<opc, (outs), (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(opc.Text, " $src0, $src1"), []>;
+ def _i64 : BinaryOpNoRet<opc, (outs), (ins GPRI64:$src0, GPRI64:$src1),
+ !strconcat(opc.Text, " $src0, $src1"), []>;
+ def _f32 : BinaryOpNoRet<opc, (outs), (ins GPRF32:$src0, GPRF32:$src1),
+ !strconcat(opc.Text, " $src0, $src1"), []>;
+ def _f64 : BinaryOpNoRet<opc, (outs), (ins GPRF64:$src0, GPRF64:$src1),
+ !strconcat(opc.Text, " $src0, $src1"), []>;
+}
+
+// Class that handles the various vector extract patterns
+multiclass VectorExtract<SDNode OpNode> {
+ def _v2f64 : ExtractVectorClass<GPRF64, GPRV2F64, OpNode>;
+ def _v4f32: ExtractVectorClass<GPRF32, GPRV4F32, OpNode>;
+ def _v2f32 : ExtractVectorClass<GPRF32, GPRV2F32, OpNode>;
+ def _v2i64 : ExtractVectorClass<GPRI64, GPRV2I64, OpNode>;
+ def _v4i8 : ExtractVectorClass<GPRI8, GPRV4I8, OpNode>;
+ def _v4i16 : ExtractVectorClass<GPRI16, GPRV4I16, OpNode>;
+ def _v4i32 : ExtractVectorClass<GPRI32, GPRV4I32, OpNode>;
+ def _v2i8 : ExtractVectorClass<GPRI8, GPRV2I8, OpNode>;
+ def _v2i16 : ExtractVectorClass<GPRI16, GPRV2I16, OpNode>;
+ def _v2i32 : ExtractVectorClass<GPRI32, GPRV2I32, OpNode>;
+}
+
+multiclass VectorConcat<SDNode OpNode> {
+ def _v2f64 : VectorConcatClass<GPRV2F64, GPRF64, OpNode>;
+ def _v2i64 : VectorConcatClass<GPRV2F64, GPRI64, OpNode>;
+ def _v4f32 : VectorConcatClass<GPRV4F32, GPRV2F32, OpNode>;
+ def _v4i32 : VectorConcatClass<GPRV4I32, GPRV2I32, OpNode>;
+ def _v4i16 : VectorConcatClass<GPRV4I16, GPRV2I16, OpNode>;
+ def _v4i8 : VectorConcatClass<GPRV4I8, GPRV2I8, OpNode>;
+ def _v2f32 : VectorConcatClass<GPRV2F32, GPRF32, OpNode>;
+ def _v2i32 : VectorConcatClass<GPRV2I32, GPRI32, OpNode>;
+ def _v2i16 : VectorConcatClass<GPRV2I16, GPRI16, OpNode>;
+ def _v2i8 : VectorConcatClass<GPRV2I8, GPRI8, OpNode>;
+}
+
+// Class that handles the various vector insert patterns
+multiclass VectorInsert<SDNode OpNode> {
+ def _v2f64 : InsertVectorClass<IL_OP_I_ADD, GPRV2F64,
+ GPRF64, OpNode, "iadd">;
+ def _v4f32: InsertVectorClass<IL_OP_I_ADD, GPRV4F32,
+ GPRF32, OpNode, "iadd">;
+ def _v2f32 : InsertVectorClass<IL_OP_I_ADD, GPRV2F32,
+ GPRF32, OpNode, "iadd">;
+ def _v2i64 : InsertVectorClass<IL_OP_I_ADD, GPRV2I64,
+ GPRI64, OpNode, "iadd">;
+ def _v4i8 : InsertVectorClass<IL_OP_I_ADD, GPRV4I8,
+ GPRI8, OpNode, "iadd">;
+ def _v4i16 : InsertVectorClass<IL_OP_I_ADD, GPRV4I16,
+ GPRI16, OpNode, "iadd">;
+ def _v4i32 : InsertVectorClass<IL_OP_I_ADD, GPRV4I32,
+ GPRI32, OpNode, "iadd">;
+ def _v2i8 : InsertVectorClass<IL_OP_I_ADD, GPRV2I8,
+ GPRI8, OpNode, "iadd">;
+ def _v2i16 : InsertVectorClass<IL_OP_I_ADD, GPRV2I16,
+ GPRI16, OpNode, "iadd">;
+ def _v2i32 : InsertVectorClass<IL_OP_I_ADD, GPRV2I32,
+ GPRI32, OpNode, "iadd">;
+}
+
+// generic class that handles math instruction for OneInOneOut instruction
+// patterns
+multiclass UnaryOpMC<ILOpCode OpCode, SDNode OpNode> {
+ def _i8 : UnaryOp<OpCode, OpNode, GPRI8, GPRI8>;
+ def _i16 : UnaryOp<OpCode, OpNode, GPRI16, GPRI16>;
+ def _i32 : UnaryOp<OpCode, OpNode, GPRI32, GPRI32>;
+ def _f32 : UnaryOp<OpCode, OpNode, GPRF32, GPRF32>;
+ def _f64 : UnaryOp<OpCode, OpNode, GPRF64, GPRF64>;
+ def _i64 : UnaryOp<OpCode, OpNode, GPRI64, GPRI64>;
+ def _v4f32: UnaryOp<OpCode, OpNode, GPRV4F32, GPRV4F32>;
+ def _v4i16 : UnaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16>;
+ def _v4i8 : UnaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8>;
+ def _v4i32 : UnaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32>;
+ def _v2f32 : UnaryOp<OpCode, OpNode, GPRV2F32, GPRV2F32>;
+ def _v2i16 : UnaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16>;
+ def _v2i8 : UnaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8>;
+ def _v2i32 : UnaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32>;
+ def _v2f64 : UnaryOp<OpCode, OpNode, GPRV2F64, GPRV2F64>;
+ def _v2i64 : UnaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64>;
+}
+multiclass UnaryOpMCVec<ILOpCode OpCode, SDNode OpNode> {
+ def _v4f32: UnaryOp<OpCode, OpNode, GPRV4F32, GPRF32>;
+ def _v4i16 : UnaryOp<OpCode, OpNode, GPRV4I16, GPRI16>;
+ def _v4i8 : UnaryOp<OpCode, OpNode, GPRV4I8, GPRI8>;
+ def _v4i32 : UnaryOp<OpCode, OpNode, GPRV4I32, GPRI32>;
+ def _v2f32 : UnaryOp<OpCode, OpNode, GPRV2F32, GPRF32>;
+ def _v2i16 : UnaryOp<OpCode, OpNode, GPRV2I16, GPRI16>;
+ def _v2i8 : UnaryOp<OpCode, OpNode, GPRV2I8, GPRI8>;
+ def _v2i32 : UnaryOp<OpCode, OpNode, GPRV2I32, GPRI32>;
+ def _v2f64 : UnaryOp<OpCode, OpNode, GPRV2F64, GPRF64>;
+ def _v2i64 : UnaryOp<OpCode, OpNode, GPRV2I64, GPRI64>;
+}
+
+multiclass UnaryOpMCf32<
+ILOpCode f32OpCode,
+ SDNode OpNode> {
+ def _f32 : UnaryOp<f32OpCode, OpNode, GPRF32, GPRF32>;
+ def _v4f32: UnaryOp<f32OpCode, OpNode, GPRV4F32, GPRV4F32>;
+ def _v2f32 : UnaryOp<f32OpCode, OpNode, GPRV2F32, GPRV2F32>;
+ }
+
+multiclass UnaryOpMCi32<
+ILOpCode i32OpCode,
+ SDNode OpNode> {
+ def _i8 : UnaryOp<i32OpCode, OpNode, GPRI8, GPRI8>;
+ def _i16 : UnaryOp<i32OpCode, OpNode, GPRI16, GPRI16>;
+ def _i32 : UnaryOp<i32OpCode, OpNode, GPRI32, GPRI32>;
+ def _v4i16 : UnaryOp<i32OpCode, OpNode, GPRV4I16, GPRV4I16>;
+ def _v4i8 : UnaryOp<i32OpCode, OpNode, GPRV4I8, GPRV4I8>;
+ def _v4i32 : UnaryOp<i32OpCode, OpNode, GPRV4I32, GPRV4I32>;
+ def _v2i16 : UnaryOp<i32OpCode, OpNode, GPRV2I16, GPRV2I16>;
+ def _v2i8 : UnaryOp<i32OpCode, OpNode, GPRV2I8, GPRV2I8>;
+ def _v2i32 : UnaryOp<i32OpCode, OpNode, GPRV2I32, GPRV2I32>;
+ }
+
+
+multiclass BinaryOpMC<ILOpCode OpCode, SDNode OpNode> {
+ def _i8 : BinaryOp<OpCode, OpNode, GPRI8, GPRI8, GPRI8>;
+
+ def _i16 : BinaryOp<OpCode, OpNode, GPRI16, GPRI16, GPRI16>;
+ def _i32 : BinaryOp<OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+ def _f32 : BinaryOp<OpCode, OpNode, GPRF32, GPRF32, GPRF32>;
+ def _f64 : BinaryOp<OpCode, OpNode, GPRF64, GPRF64, GPRF64>;
+ def _i64 : BinaryOp<OpCode, OpNode, GPRI64, GPRI64, GPRI64>;
+ def _v4f32: BinaryOp<OpCode, OpNode, GPRV4F32, GPRV4F32, GPRV4F32>;
+ def _v4i16 : BinaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16, GPRV4I16>;
+ def _v4i8 : BinaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8, GPRV4I8>;
+ def _v4i32 : BinaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32, GPRV4I32>;
+ def _v2f32 : BinaryOp<OpCode, OpNode, GPRV2F32, GPRV2F32, GPRV2F32>;
+ def _v2i16 : BinaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16, GPRV2I16>;
+ def _v2i8 : BinaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8, GPRV2I8>;
+ def _v2i32 : BinaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32, GPRV2I32>;
+ def _v2f64 : BinaryOp<OpCode, OpNode, GPRV2F64, GPRV2F64, GPRV2F64>;
+ def _v2i64 : BinaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64, GPRV2I64>;
+}
+
+multiclass BinaryOpMCInt<ILOpCode OpCode, SDNode OpNode> {
+ def _i8 : BinaryOp<OpCode, OpNode, GPRI8, GPRI8, GPRI8>;
+
+ def _i16 : BinaryOp<OpCode, OpNode, GPRI16, GPRI16, GPRI16>;
+ def _i32 : BinaryOp<OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+ def _i64 : BinaryOp<OpCode, OpNode, GPRI64, GPRI64, GPRI64>;
+ def _v4i16 : BinaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16, GPRV4I16>;
+ def _v4i8 : BinaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8, GPRV4I8>;
+ def _v4i32 : BinaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32, GPRV4I32>;
+ def _v2i16 : BinaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16, GPRV2I16>;
+ def _v2i8 : BinaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8, GPRV2I8>;
+ def _v2i32 : BinaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32, GPRV2I32>;
+ def _v2i64 : BinaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64, GPRV2I64>;
+}
+
+// generic class that handles math instruction for ThreeInOneOut
+// instruction patterns
+multiclass TernaryOpMC<ILOpCode OpCode, SDNode OpNode> {
+ def _i8 : TernaryOp<OpCode, OpNode, GPRI8, GPRI8, GPRI8, GPRI8>;
+ def _i16 : TernaryOp<OpCode, OpNode, GPRI16, GPRI16, GPRI16, GPRI16>;
+ def _i32 : TernaryOp<OpCode, OpNode, GPRI32, GPRI32, GPRI32, GPRI32>;
+ def _f32 : TernaryOp<OpCode, OpNode, GPRF32, GPRF32, GPRF32, GPRF32>;
+ def _f64 : TernaryOp<OpCode, OpNode, GPRF64, GPRF64, GPRF64, GPRF64>;
+ def _i64 : TernaryOp<OpCode, OpNode, GPRI64, GPRI64, GPRI64, GPRI64>;
+ def _v4f32: TernaryOp<OpCode, OpNode, GPRV4F32, GPRV4F32,
+ GPRV4F32, GPRV4F32>;
+ def _v4i8 : TernaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8,
+ GPRV4I8, GPRV4I8>;
+ def _v4i16 : TernaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16,
+ GPRV4I16, GPRV4I16>;
+ def _v4i32 : TernaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32,
+ GPRV4I32, GPRV4I32>;
+ def _v2f32 : TernaryOp<OpCode, OpNode, GPRV2F32, GPRV2F32,
+ GPRV2F32, GPRV2F32>;
+ def _v2i8 : TernaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8,
+ GPRV2I8, GPRV2I8>;
+ def _v2i16 : TernaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16,
+ GPRV2I16, GPRV2I16>;
+ def _v2i32 : TernaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32,
+ GPRV2I32, GPRV2I32>;
+ def _v2f64 : TernaryOp<OpCode, OpNode, GPRV2F64, GPRV2F64,
+ GPRV2F64, GPRV2F64>;
+ def _v2i64 : TernaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64,
+ GPRV2I64, GPRV2I64>;
+}
+multiclass BinaryOpMCi32<ILOpCode i32OpCode, SDNode OpNode> {
+ def _i8 : BinaryOp<i32OpCode, OpNode, GPRI8, GPRI8, GPRI8>;
+ def _i16 : BinaryOp<i32OpCode, OpNode, GPRI16, GPRI16, GPRI16>;
+ def _i32 : BinaryOp<i32OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+ def _v4i16 : BinaryOp<i32OpCode, OpNode, GPRV4I16,
+ GPRV4I16, GPRV4I16>;
+ def _v4i8 : BinaryOp<i32OpCode, OpNode, GPRV4I8,
+ GPRV4I8, GPRV4I8>;
+ def _v4i32 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+ GPRV4I32, GPRV4I32>;
+ def _v2i16 : BinaryOp<i32OpCode, OpNode, GPRV2I16,
+ GPRV2I16, GPRV2I16>;
+ def _v2i8 : BinaryOp<i32OpCode, OpNode, GPRV2I8,
+ GPRV2I8, GPRV2I8>;
+ def _v2i32 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+ GPRV2I32, GPRV2I32>;
+}
+multiclass BinaryOpMCi64<ILOpCode i64OpCode, SDNode OpNode> {
+ def _i64 : BinaryOp<i64OpCode, OpNode, GPRI64, GPRI64, GPRI64>;
+ def _v2i64 : BinaryOp<i64OpCode, OpNode, GPRV2I64,
+ GPRV2I64, GPRV2I64>;
+}
+multiclass BinaryOpMCi32Const<ILOpCode i32OpCode, SDNode OpNode> {
+ def _i8 : BinaryOp<i32OpCode, OpNode, GPRI8, GPRI8, GPRI32>;
+ def _i16 : BinaryOp<i32OpCode, OpNode, GPRI16, GPRI16, GPRI32>;
+ def _i32 : BinaryOp<i32OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+ def _v4i16 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+ GPRV4I32, GPRI32>;
+ def _v4i8 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+ GPRV4I32, GPRI32>;
+ def _v4i32 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+ GPRV4I32, GPRI32>;
+ def _v2i16 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+ GPRV2I32, GPRI32>;
+ def _v2i8 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+ GPRV2I32, GPRI32>;
+ def _v2i32 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+ GPRV2I32, GPRI32>;
+}
+multiclass BinaryOpMCf32<ILOpCode f32OpCode, SDNode OpNode> {
+ def _f32 : BinaryOp<f32OpCode, OpNode, GPRF32,
+ GPRF32, GPRF32>;
+ def _v4f32: BinaryOp<f32OpCode, OpNode, GPRV4F32,
+ GPRV4F32, GPRV4F32>;
+ def _v2f32 : BinaryOp<f32OpCode, OpNode, GPRV2F32,
+ GPRV2F32, GPRV2F32>;
+}
+
+multiclass TernaryOpMCf64<ILOpCode f64OpCode, SDNode OpNode> {
+ def _f64 : TernaryOp<f64OpCode, OpNode, GPRF64,
+ GPRF64, GPRF64, GPRF64>;
+}
+
+multiclass TernaryOpMCf32<ILOpCode f32OpCode, SDNode OpNode> {
+ def _f32 : TernaryOp<f32OpCode, OpNode, GPRF32,
+ GPRF32, GPRF32, GPRF32>;
+ def _v4f32: TernaryOp<f32OpCode, OpNode, GPRV4F32,
+ GPRV4F32, GPRV4F32, GPRV4F32>;
+ def _v2f32 : TernaryOp<f32OpCode, OpNode, GPRV2F32,
+ GPRV2F32, GPRV2F32, GPRV2F32>;
+}
+multiclass BinaryOpMCFloat<ILOpCode f32OpCode, ILOpCode f64OpCode,
+ SDNode OpNode> {
+ def _f64 : BinaryOp<f64OpCode, OpNode, GPRF64,
+ GPRF64, GPRF64>;
+ def _v2f64 : BinaryOp<f64OpCode, OpNode, GPRV2F64,
+ GPRV2F64, GPRV2F64>;
+ def _f32 : BinaryOp<f32OpCode, OpNode, GPRF32,
+ GPRF32, GPRF32>;
+ def _v2f32 : BinaryOp<f32OpCode, OpNode, GPRV2F32,
+ GPRV2F32, GPRV2F32>;
+ def _v4f32: BinaryOp<f32OpCode, OpNode, GPRV4F32,
+ GPRV4F32, GPRV4F32>;
+ }
+
+multiclass TernaryOpMCScalar<ILOpCode opcode, SDNode node>
+{
+ def _i8: TernaryOp<opcode, node, GPRI8, GPRI8, GPRI8, GPRI8>;
+ def _i16: TernaryOp<opcode, node, GPRI16, GPRI8, GPRI16, GPRI16>;
+ def _i32: TernaryOp<opcode, node, GPRI32, GPRI8, GPRI32, GPRI32>;
+ def _i64: TernaryOp<opcode, node, GPRI64, GPRI8, GPRI64, GPRI64>;
+ def _f32: TernaryOp<opcode, node, GPRF32, GPRI8, GPRF32, GPRF32>;
+ def _f64: TernaryOp<opcode, node, GPRF64, GPRI8, GPRF64, GPRF64>;
+}
+
+
+multiclass BitConversion<ILOpCode opcode, RegisterClass Regs, SDNode OpNode>
+{
+ def _i8 : UnaryOp<opcode, OpNode, Regs, GPRI8>;
+ def _i16 : UnaryOp<opcode, OpNode, Regs, GPRI16>;
+ def _i32 : UnaryOp<opcode, OpNode, Regs, GPRI32>;
+ def _f32 : UnaryOp<opcode, OpNode, Regs, GPRF32>;
+ def _i64 : UnaryOp<opcode, OpNode, Regs, GPRI64>;
+ def _f64 : UnaryOp<opcode, OpNode, Regs, GPRF64>;
+ def _v2i8 : UnaryOp<opcode, OpNode, Regs, GPRV2I8>;
+ def _v2i16 : UnaryOp<opcode, OpNode, Regs, GPRV2I16>;
+ def _v2i32 : UnaryOp<opcode, OpNode, Regs, GPRV2I32>;
+ def _v2f32 : UnaryOp<opcode, OpNode, Regs, GPRV2F32>;
+ def _v2i64 : UnaryOp<opcode, OpNode, Regs, GPRV2I64>;
+ def _v2f64 : UnaryOp<opcode, OpNode, Regs, GPRV2F64>;
+ def _v4i8 : UnaryOp<opcode, OpNode, Regs, GPRV4I8>;
+ def _v4i16 : UnaryOp<opcode, OpNode, Regs, GPRV4I16>;
+ def _v4i32 : UnaryOp<opcode, OpNode, Regs, GPRV4I32>;
+ def _v4f32 : UnaryOp<opcode, OpNode, Regs, GPRV4F32>;
+}
+
+
+multiclass UnaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+def _i32 : OneInOneOut<opcode, (outs GPRI32:$dst),
+ (ins GPRI32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (intr GPRI32:$src))]>;
+def _v2i32 : OneInOneOut<opcode, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (intr GPRV2I32:$src))]>;
+def _v4i32 : OneInOneOut<opcode, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV4I32:$dst, (intr GPRV4I32:$src))]>;
+}
+
+multiclass IntrConvertF32TOF16<ILOpCode opcode, Intrinsic intr>
+{
+def _i16 : OneInOneOut<opcode, (outs GPRI16:$dst),
+ (ins GPRF32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRI16:$dst, (intr GPRF32:$src))]>;
+def _v2i16 : OneInOneOut<opcode, (outs GPRV2I16:$dst),
+ (ins GPRV2F32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2I16:$dst, (intr GPRV2F32:$src))]>;
+def _v4i16 : OneInOneOut<opcode, (outs GPRV4I16:$dst),
+ (ins GPRV4F32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV4I16:$dst, (intr GPRV4F32:$src))]>;
+}
+
+
+multiclass IntrConvertF32TOI32<ILOpCode opcode, Intrinsic intr>
+{
+def _i32 : OneInOneOut<opcode, (outs GPRI32:$dst),
+ (ins GPRF32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (intr GPRF32:$src))]>;
+def _v2i32 : OneInOneOut<opcode, (outs GPRV2I32:$dst),
+ (ins GPRV2F32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (intr GPRV2F32:$src))]>;
+def _v4i32 : OneInOneOut<opcode, (outs GPRV4I32:$dst),
+ (ins GPRV4F32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV4I32:$dst, (intr GPRV4F32:$src))]>;
+}
+
+multiclass IntrConvertF64TOI32<ILOpCode opcode, Intrinsic intr>
+{
+def _i32 : OneInOneOut<opcode, (outs GPRI32:$dst),
+ (ins GPRF64:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRI32:$dst, (intr GPRF64:$src))]>;
+def _v2i32 : OneInOneOut<opcode, (outs GPRV2I32:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2I32:$dst, (intr GPRV2F64:$src))]>;
+}
+
+multiclass IntrConvertF16TOF32<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRI16:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRF32:$dst, (intr GPRI16:$src))]>;
+def _v2f32 : OneInOneOut<opcode, (outs GPRV2F32:$dst),
+ (ins GPRV2I16:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2F32:$dst, (intr GPRV2I16:$src))]>;
+def _v4f32 : OneInOneOut<opcode, (outs GPRV4F32:$dst),
+ (ins GPRV4I16:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst, (intr GPRV4I16:$src))]>;
+}
+
+
+multiclass IntrConvertI32TOF32<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRI32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRF32:$dst, (intr GPRI32:$src))]>;
+def _v2f32 : OneInOneOut<opcode, (outs GPRV2F32:$dst),
+ (ins GPRV2I32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2F32:$dst, (intr GPRV2I32:$src))]>;
+def _v4f32 : OneInOneOut<opcode, (outs GPRV4F32:$dst),
+ (ins GPRV4I32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst, (intr GPRV4I32:$src))]>;
+}
+
+multiclass BinaryIntrinsicLong<ILOpCode opcode, Intrinsic intr>
+{
+def _i64 : TwoInOneOut<opcode, (outs GPRI64:$dst),
+ (ins GPRI64:$src, GPRI64:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRI64:$dst,
+ (intr GPRI64:$src, GPRI64:$src2))]>;
+}
+
+
+multiclass BinaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+def _i32 : TwoInOneOut<opcode, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRI32:$dst,
+ (intr GPRI32:$src, GPRI32:$src2))]>;
+def _v2i32 : TwoInOneOut<opcode, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$src, GPRV2I32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRV2I32:$dst,
+ (intr GPRV2I32:$src, GPRV2I32:$src2))]>;
+def _v4i32 : TwoInOneOut<opcode, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$src, GPRV4I32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRV4I32:$dst,
+ (intr GPRV4I32:$src, GPRV4I32:$src2))]>;
+}
+
+multiclass TernaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+def _i32 : ThreeInOneOut<opcode, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2, GPRI32:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRI32:$dst,
+ (intr GPRI32:$src, GPRI32:$src2, GPRI32:$src3))]>;
+def _v2i32 : ThreeInOneOut<opcode, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRV2I32:$dst,
+ (intr GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3))]>;
+def _v4i32 : ThreeInOneOut<opcode, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRV4I32:$dst,
+ (intr GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3))]>;
+}
+
+multiclass TernaryIntrinsicFloat<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : ThreeInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRF32:$src, GPRF32:$src2, GPRF32:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRF32:$dst,
+ (intr GPRF32:$src, GPRF32:$src2, GPRF32:$src3))]>;
+def _v2f32 : ThreeInOneOut<opcode, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src, GPRV2F32:$src2, GPRV2F32:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRV2F32:$dst,
+ (intr GPRV2F32:$src, GPRV2F32:$src2, GPRV2F32:$src3))]>;
+def _v4f32 : ThreeInOneOut<opcode, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src, GPRV4F32:$src2, GPRV4F32:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRV4F32:$dst,
+ (intr GPRV4F32:$src, GPRV4F32:$src2, GPRV4F32:$src3))]>;
+}
+
+multiclass BinaryIntrinsicDoubleScalar<ILOpCode opcode, Intrinsic intr>
+{
+def _f64 : ThreeInOneOut<opcode, (outs GPRF64:$dst),
+ (ins GPRF64:$src, GPRF64:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRF64:$dst,
+ (intr GPRF64:$src, GPRF64:$src2))]>;
+}
+
+multiclass TernaryIntrinsicDoubleScalar<ILOpCode opcode, Intrinsic intr>
+{
+def _f64 : ThreeInOneOut<opcode, (outs GPRF64:$dst),
+ (ins GPRF64:$src, GPRF64:$src2, GPRF64:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRF64:$dst,
+ (intr GPRF64:$src, GPRF64:$src2, GPRF64:$src3))]>;
+}
+
+
+multiclass TernaryIntrinsicLongScalar<ILOpCode opcode, Intrinsic intr>
+{
+def _i64 : ThreeInOneOut<opcode, (outs GPRI64:$dst),
+ (ins GPRI64:$src, GPRI64:$src2, GPRI64:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRI64:$dst,
+ (intr GPRI64:$src, GPRI64:$src2, GPRI64:$src3))]>;
+}
+
+multiclass QuaternaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+def _i32 : FourInOneOut<opcode, (outs GPRI32:$dst),
+ (ins GPRI32:$src, GPRI32:$src2, GPRI32:$src3, GPRI32:$src4),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3, $src4"),
+ [(set GPRI32:$dst,
+ (intr GPRI32:$src, GPRI32:$src2, GPRI32:$src3, GPRI32:$src4))]>;
+def _v2i32 : FourInOneOut<opcode, (outs GPRV2I32:$dst),
+ (ins GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3, GPRV2I32:$src4),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3, $src4"),
+ [(set GPRV2I32:$dst,
+ (intr GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3, GPRV2I32:$src4))]>;
+def _v4i32 : FourInOneOut<opcode, (outs GPRV4I32:$dst),
+ (ins GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3, GPRV4I32:$src4),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3, $src4"),
+ [(set GPRV4I32:$dst,
+ (intr GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3, GPRV4I32:$src4))]>;
+}
+
+multiclass UnaryIntrinsicFloatScalar<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRF32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRF32:$dst, (intr GPRF32:$src))]>;
+}
+
+multiclass UnaryIntrinsicFloat<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRF32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRF32:$dst, (intr GPRF32:$src))]>;
+def _v2f32 : OneInOneOut<opcode, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2F32:$dst, (intr GPRV2F32:$src))]>;
+def _v4f32 : OneInOneOut<opcode, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV4F32:$dst, (intr GPRV4F32:$src))]>;
+}
+
+multiclass BinaryIntrinsicFloatScalar<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : TwoInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRF32:$src, GPRF32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRF32:$dst,
+ (intr GPRF32:$src, GPRF32:$src2))]>;
+}
+multiclass BinaryIntrinsicFloat<ILOpCode opcode, Intrinsic intr>
+{
+def _f32 : TwoInOneOut<opcode, (outs GPRF32:$dst),
+ (ins GPRF32:$src, GPRF32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRF32:$dst,
+ (intr GPRF32:$src, GPRF32:$src2))]>;
+def _v2f32 : TwoInOneOut<opcode, (outs GPRV2F32:$dst),
+ (ins GPRV2F32:$src, GPRV2F32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRV2F32:$dst,
+ (intr GPRV2F32:$src, GPRV2F32:$src2))]>;
+def _v4f32 : TwoInOneOut<opcode, (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src, GPRV4F32:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRV4F32:$dst,
+ (intr GPRV4F32:$src, GPRV4F32:$src2))]>;
+}
+
+multiclass UnaryIntrinsicDoubleScalar<ILOpCode opcode, Intrinsic intr>
+{
+def _f64 : OneInOneOut<opcode, (outs GPRF64:$dst),
+ (ins GPRF64:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRF64:$dst, (intr GPRF64:$src))]>;
+}
+
+multiclass UnaryIntrinsicDouble<ILOpCode opcode, Intrinsic intr>
+{
+def _f64 : OneInOneOut<opcode, (outs GPRF64:$dst),
+ (ins GPRF64:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRF64:$dst, (intr GPRF64:$src))]>;
+def _v2f64 : OneInOneOut<opcode, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src),
+ !strconcat(opcode.Text, " $dst, $src"),
+ [(set GPRV2F64:$dst, (intr GPRV2F64:$src))]>;
+}
+
+multiclass BinaryIntrinsicDouble<ILOpCode opcode, Intrinsic intr>
+{
+def _f64 : TwoInOneOut<opcode, (outs GPRF64:$dst),
+ (ins GPRF64:$src, GPRF64:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRF64:$dst,
+ (intr GPRF64:$src, GPRF64:$src2))]>;
+def _v2f64 : TwoInOneOut<opcode, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src, GPRV2F64:$src2),
+ !strconcat(opcode.Text, " $dst, $src, $src2"),
+ [(set GPRV2F64:$dst,
+ (intr GPRV2F64:$src, GPRV2F64:$src2))]>;
+}
+
+multiclass TernaryIntrinsicDouble<ILOpCode opcode, Intrinsic intr>
+{
+def _f64 : TwoInOneOut<opcode, (outs GPRF64:$dst),
+ (ins GPRF64:$src, GPRF64:$src2, GPRF64:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRF64:$dst,
+ (intr GPRF64:$src, GPRF64:$src2, GPRF64:$src3))]>;
+def _v2f64 : TwoInOneOut<opcode, (outs GPRV2F64:$dst),
+ (ins GPRV2F64:$src, GPRV2F64:$src2, GPRV2F64:$src3),
+ !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+ [(set GPRV2F64:$dst,
+ (intr GPRV2F64:$src, GPRV2F64:$src2, GPRV2F64:$src3))]>;
+}
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp
new file mode 100644
index 00000000000..d2650a8fffd
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILNIDevice.cpp
@@ -0,0 +1,110 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+using namespace llvm;
+
+AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST)
+{
+ std::string name = ST->getDeviceName();
+ if (name == "caicos") {
+ mDeviceFlag = OCL_DEVICE_CAICOS;
+ } else if (name == "turks") {
+ mDeviceFlag = OCL_DEVICE_TURKS;
+ } else if (name == "cayman") {
+ mDeviceFlag = OCL_DEVICE_CAYMAN;
+ } else {
+ mDeviceFlag = OCL_DEVICE_BARTS;
+ }
+}
+AMDILNIDevice::~AMDILNIDevice()
+{
+}
+
+size_t
+AMDILNIDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDILNIDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD6XXX;
+}
+
+
+AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST)
+ : AMDILNIDevice(ST)
+{
+ setCaps();
+}
+
+AMDILCaymanDevice::~AMDILCaymanDevice()
+{
+}
+
+void
+AMDILCaymanDevice::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+ mHWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ mSWBits.reset(AMDILDeviceInfo::Signed24BitOps);
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.h b/src/gallium/drivers/radeon/AMDILNIDevice.h
new file mode 100644
index 00000000000..d2eaf2a571e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILNIDevice.h
@@ -0,0 +1,99 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILNIDEVICE_H_
+#define _AMDILNIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm {
+ class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILNIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities. The
+// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+ class AMDILNIDevice : public AMDILEvergreenDevice {
+ public:
+ AMDILNIDevice(AMDILSubtarget*);
+ virtual ~AMDILNIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ protected:
+ }; // AMDILNIDevice
+
+// Just as the AMDILCypressDevice is the double capable version of the
+// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of
+// the AMDILNIDevice. The other major difference that is not as useful from
+// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
+// NI family is a 5 wide.
+
+ class AMDILCaymanDevice: public AMDILNIDevice {
+ public:
+ AMDILCaymanDevice(AMDILSubtarget*);
+ virtual ~AMDILCaymanDevice();
+ private:
+ virtual void setCaps();
+ }; // AMDILCaymanDevice
+
+ static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // _AMDILNIDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILNodes.td b/src/gallium/drivers/radeon/AMDILNodes.td
new file mode 100644
index 00000000000..d876222e634
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILNodes.td
@@ -0,0 +1,366 @@
+//===- AMDILNodes.td - AMD IL nodes ------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Conversion DAG Nodes
+//===----------------------------------------------------------------------===//
+// Double to Single conversion
+def IL_d2f : SDNode<"AMDILISD::DP_TO_FP" , SDTIL_DPToFPOp>;
+
+def IL_inttoany: SDNode<"AMDILISD::INTTOANY", SDTIL_IntToAny>;
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDILISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_cmp : SDNode<"AMDILISD::CMP", SDTIL_Cmp>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_callseq_start : SDNode<"ISD::CALLSEQ_START", SDTIL_CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def IL_callseq_end : SDNode<"ISD::CALLSEQ_END", SDTIL_CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def IL_call : SDNode<"AMDILISD::CALL", SDTIL_Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def IL_retflag : SDNode<"AMDILISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic DAG Nodes
+//===----------------------------------------------------------------------===//
+// Address modification nodes
+def IL_addaddrri : SDNode<"AMDILISD::ADDADDR", SDTIL_AddAddrri,
+ [SDNPCommutative, SDNPAssociative]>;
+def IL_addaddrir : SDNode<"AMDILISD::ADDADDR", SDTIL_AddAddrir,
+ [SDNPCommutative, SDNPAssociative]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_cmov_logical : SDNode<"AMDILISD::CMOVLOG", SDTIL_GenTernaryOp>;
+def IL_add : SDNode<"AMDILISD::ADD" , SDTIL_GenBinaryOp>;
+def IL_cmov : SDNode<"AMDILISD::CMOV" , SDTIL_GenBinaryOp>;
+def IL_or : SDNode<"AMDILISD::OR" ,SDTIL_GenBinaryOp>;
+def IL_and : SDNode<"AMDILISD::AND" ,SDTIL_GenBinaryOp>;
+def IL_xor : SDNode<"AMDILISD::XOR", SDTIL_GenBinaryOp>;
+def IL_not : SDNode<"AMDILISD::NOT", SDTIL_GenUnaryOp>;
+def IL_div_inf : SDNode<"AMDILISD::DIV_INF", SDTIL_GenBinaryOp>;
+def IL_mad : SDNode<"AMDILISD::MAD", SDTIL_GenTernaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_inegate : SDNode<"AMDILISD::INEGATE" , SDTIntUnaryOp>;
+def IL_umul : SDNode<"AMDILISD::UMUL" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def IL_mov : SDNode<"AMDILISD::MOVE", SDTIL_GenUnaryOp>;
+def IL_phimov : SDNode<"AMDILISD::PHIMOVE", SDTIL_GenUnaryOp>;
+def IL_bitconv : SDNode<"AMDILISD::BITCONV", SDTIL_GenBitConv>;
+def IL_ffb_hi : SDNode<"AMDILISD::IFFB_HI", SDTIL_GenUnaryOp>;
+def IL_ffb_lo : SDNode<"AMDILISD::IFFB_LO", SDTIL_GenUnaryOp>;
+def IL_smax : SDNode<"AMDILISD::SMAX", SDTIL_GenBinaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Double functions
+//===----------------------------------------------------------------------===//
+def IL_dcreate : SDNode<"AMDILISD::DCREATE" , SDTIL_DCreate>;
+def IL_dcomphi : SDNode<"AMDILISD::DCOMPHI" , SDTIL_DComp>;
+def IL_dcomplo : SDNode<"AMDILISD::DCOMPLO" , SDTIL_DComp>;
+def IL_dcreate2 : SDNode<"AMDILISD::DCREATE2" , SDTIL_DCreate2>;
+def IL_dcomphi2 : SDNode<"AMDILISD::DCOMPHI2" , SDTIL_DComp2>;
+def IL_dcomplo2 : SDNode<"AMDILISD::DCOMPLO2" , SDTIL_DComp2>;
+
+//===----------------------------------------------------------------------===//
+// Long functions
+//===----------------------------------------------------------------------===//
+def IL_lcreate : SDNode<"AMDILISD::LCREATE" , SDTIL_LCreate>;
+def IL_lcreate2 : SDNode<"AMDILISD::LCREATE2" , SDTIL_LCreate2>;
+def IL_lcomphi : SDNode<"AMDILISD::LCOMPHI" , SDTIL_LComp>;
+def IL_lcomphi2 : SDNode<"AMDILISD::LCOMPHI2" , SDTIL_LComp2>;
+def IL_lcomplo : SDNode<"AMDILISD::LCOMPLO" , SDTIL_LComp>;
+def IL_lcomplo2 : SDNode<"AMDILISD::LCOMPLO2" , SDTIL_LComp2>;
+
+//===----------------------------------------------------------------------===//
+// Vector functions
+//===----------------------------------------------------------------------===//
+def IL_vbuild : SDNode<"AMDILISD::VBUILD", SDTIL_GenVecBuild,
+ []>;
+def IL_vextract : SDNode<"AMDILISD::VEXTRACT", SDTIL_GenVecExtract,
+ []>;
+def IL_vinsert : SDNode<"AMDILISD::VINSERT", SDTIL_GenVecInsert,
+ []>;
+def IL_vconcat : SDNode<"AMDILISD::VCONCAT", SDTIL_GenVecConcat,
+ []>;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Image Custom SDNodes
+//===----------------------------------------------------------------------===//
+def image2d_read : SDNode<"AMDILISD::IMAGE2D_READ", SDTIL_ImageRead,
+ [SDNPHasChain, SDNPMayLoad]>;
+def image2d_write : SDNode<"AMDILISD::IMAGE2D_WRITE", SDTIL_ImageWrite,
+ [SDNPHasChain, SDNPMayStore]>;
+def image2d_info0 : SDNode<"AMDILISD::IMAGE2D_INFO0", SDTIL_ImageInfo, []>;
+def image2d_info1 : SDNode<"AMDILISD::IMAGE2D_INFO1", SDTIL_ImageInfo, []>;
+def image3d_read : SDNode<"AMDILISD::IMAGE3D_READ", SDTIL_ImageRead,
+ [SDNPHasChain, SDNPMayLoad]>;
+def image3d_write : SDNode<"AMDILISD::IMAGE3D_WRITE", SDTIL_ImageWrite3D,
+ [SDNPHasChain, SDNPMayStore]>;
+def image3d_info0 : SDNode<"AMDILISD::IMAGE3D_INFO0", SDTIL_ImageInfo, []>;
+def image3d_info1 : SDNode<"AMDILISD::IMAGE3D_INFO1", SDTIL_ImageInfo, []>;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Atomic Custom SDNodes
+//===----------------------------------------------------------------------===//
+//===-------------- 32 bit global atomics with return values --------------===//
+def atom_g_add : SDNode<"AMDILISD::ATOM_G_ADD", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_and : SDNode<"AMDILISD::ATOM_G_AND", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_cmpxchg : SDNode<"AMDILISD::ATOM_G_CMPXCHG", SDTIL_TriAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_dec : SDNode<"AMDILISD::ATOM_G_DEC", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_inc : SDNode<"AMDILISD::ATOM_G_INC", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_max : SDNode<"AMDILISD::ATOM_G_MAX", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umax : SDNode<"AMDILISD::ATOM_G_UMAX", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_min : SDNode<"AMDILISD::ATOM_G_MIN", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umin : SDNode<"AMDILISD::ATOM_G_UMIN", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_or : SDNode<"AMDILISD::ATOM_G_OR", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_sub : SDNode<"AMDILISD::ATOM_G_SUB", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_rsub : SDNode<"AMDILISD::ATOM_G_RSUB", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xchg : SDNode<"AMDILISD::ATOM_G_XCHG", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xor : SDNode<"AMDILISD::ATOM_G_XOR", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===------------- 32 bit global atomics without return values ------------===//
+def atom_g_add_noret : SDNode<"AMDILISD::ATOM_G_ADD_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_and_noret : SDNode<"AMDILISD::ATOM_G_AND_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_cmpxchg_noret : SDNode<"AMDILISD::ATOM_G_CMPXCHG_NORET",
+ SDTIL_TriAtom, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_cmp_noret : SDNode<"AMDILISD::ATOM_G_CMPXCHG_NORET",
+ SDTIL_TriAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_dec_noret : SDNode<"AMDILISD::ATOM_G_DEC_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_inc_noret : SDNode<"AMDILISD::ATOM_G_INC_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_max_noret : SDNode<"AMDILISD::ATOM_G_MAX_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umax_noret: SDNode<"AMDILISD::ATOM_G_UMAX_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_min_noret : SDNode<"AMDILISD::ATOM_G_MIN_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umin_noret: SDNode<"AMDILISD::ATOM_G_UMIN_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_or_noret : SDNode<"AMDILISD::ATOM_G_OR_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_sub_noret : SDNode<"AMDILISD::ATOM_G_SUB_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_rsub_noret : SDNode<"AMDILISD::ATOM_G_RSUB_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xchg_noret: SDNode<"AMDILISD::ATOM_G_XCHG_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xor_noret : SDNode<"AMDILISD::ATOM_G_XOR_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===--------------- 32 bit local atomics with return values --------------===//
+def atom_l_add : SDNode<"AMDILISD::ATOM_L_ADD", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_and : SDNode<"AMDILISD::ATOM_L_AND", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_cmpxchg : SDNode<"AMDILISD::ATOM_L_CMPXCHG", SDTIL_TriAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_dec : SDNode<"AMDILISD::ATOM_L_DEC", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_inc : SDNode<"AMDILISD::ATOM_L_INC", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_max : SDNode<"AMDILISD::ATOM_L_MAX", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umax : SDNode<"AMDILISD::ATOM_L_UMAX", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_min : SDNode<"AMDILISD::ATOM_L_MIN", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umin : SDNode<"AMDILISD::ATOM_L_UMIN", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_or : SDNode<"AMDILISD::ATOM_L_OR", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_mskor : SDNode<"AMDILISD::ATOM_L_MSKOR", SDTIL_TriAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_sub : SDNode<"AMDILISD::ATOM_L_SUB", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_rsub : SDNode<"AMDILISD::ATOM_L_RSUB", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xchg : SDNode<"AMDILISD::ATOM_L_XCHG", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xor : SDNode<"AMDILISD::ATOM_L_XOR", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+//===-------------- 32 bit local atomics without return values ------------===//
+def atom_l_add_noret : SDNode<"AMDILISD::ATOM_L_ADD_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_and_noret : SDNode<"AMDILISD::ATOM_L_AND_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_cmpxchg_noret : SDNode<"AMDILISD::ATOM_L_CMPXCHG_NORET",
+ SDTIL_TriAtom, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_dec_noret : SDNode<"AMDILISD::ATOM_L_DEC_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_inc_noret : SDNode<"AMDILISD::ATOM_L_INC_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_max_noret : SDNode<"AMDILISD::ATOM_L_MAX_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umax_noret: SDNode<"AMDILISD::ATOM_L_UMAX_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_min_noret : SDNode<"AMDILISD::ATOM_L_MIN_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umin_noret: SDNode<"AMDILISD::ATOM_L_UMIN_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_or_noret : SDNode<"AMDILISD::ATOM_L_OR_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_mskor_noret : SDNode<"AMDILISD::ATOM_L_MSKOR_NORET",
+ SDTIL_TriAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_sub_noret : SDNode<"AMDILISD::ATOM_L_SUB_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_rsub_noret : SDNode<"AMDILISD::ATOM_L_RSUB_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xchg_noret: SDNode<"AMDILISD::ATOM_L_XCHG_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xor_noret : SDNode<"AMDILISD::ATOM_L_XOR_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===--------------- 32 bit local atomics with return values --------------===//
+def atom_r_add : SDNode<"AMDILISD::ATOM_R_ADD", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_and : SDNode<"AMDILISD::ATOM_R_AND", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_cmpxchg : SDNode<"AMDILISD::ATOM_R_CMPXCHG", SDTIL_TriAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_dec : SDNode<"AMDILISD::ATOM_R_DEC", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_inc : SDNode<"AMDILISD::ATOM_R_INC", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_max : SDNode<"AMDILISD::ATOM_R_MAX", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umax : SDNode<"AMDILISD::ATOM_R_UMAX", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_min : SDNode<"AMDILISD::ATOM_R_MIN", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umin : SDNode<"AMDILISD::ATOM_R_UMIN", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_or : SDNode<"AMDILISD::ATOM_R_OR", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_mskor : SDNode<"AMDILISD::ATOM_R_MSKOR", SDTIL_TriAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_sub : SDNode<"AMDILISD::ATOM_R_SUB", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_rsub : SDNode<"AMDILISD::ATOM_R_RSUB", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xchg : SDNode<"AMDILISD::ATOM_R_XCHG", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xor : SDNode<"AMDILISD::ATOM_R_XOR", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+//===-------------- 32 bit local atomics without return values ------------===//
+def atom_r_add_noret : SDNode<"AMDILISD::ATOM_R_ADD_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_and_noret : SDNode<"AMDILISD::ATOM_R_AND_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_cmpxchg_noret : SDNode<"AMDILISD::ATOM_R_CMPXCHG_NORET",
+ SDTIL_TriAtom, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_dec_noret : SDNode<"AMDILISD::ATOM_R_DEC_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_inc_noret : SDNode<"AMDILISD::ATOM_R_INC_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_max_noret : SDNode<"AMDILISD::ATOM_R_MAX_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umax_noret: SDNode<"AMDILISD::ATOM_R_UMAX_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_min_noret : SDNode<"AMDILISD::ATOM_R_MIN_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umin_noret: SDNode<"AMDILISD::ATOM_R_UMIN_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_or_noret : SDNode<"AMDILISD::ATOM_R_OR_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_mskor_noret : SDNode<"AMDILISD::ATOM_R_MSKOR_NORET", SDTIL_TriAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_sub_noret : SDNode<"AMDILISD::ATOM_R_SUB_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_rsub_noret : SDNode<"AMDILISD::ATOM_R_RSUB_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xchg_noret: SDNode<"AMDILISD::ATOM_R_XCHG_NORET",
+ SDTIL_BinAtom, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xor_noret : SDNode<"AMDILISD::ATOM_R_XOR_NORET", SDTIL_BinAtom,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===--------------- 32 bit atomic counter instructions -------------------===//
+def append_alloc : SDNode<"AMDILISD::APPEND_ALLOC", SDTIL_Append,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def append_consume : SDNode<"AMDILISD::APPEND_CONSUME", SDTIL_Append,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def append_alloc_noret : SDNode<"AMDILISD::APPEND_ALLOC_NORET", SDTIL_Append,
+ [SDNPHasChain, SDNPMayStore]>;
+def append_consume_noret : SDNode<"AMDILISD::APPEND_CONSUME_NORET",
+ SDTIL_Append, [SDNPHasChain, SDNPMayStore]>;
diff --git a/src/gallium/drivers/radeon/AMDILOperands.td b/src/gallium/drivers/radeon/AMDILOperands.td
new file mode 100644
index 00000000000..40fd8063694
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILOperands.td
@@ -0,0 +1,78 @@
+//===- AMDILOperands.td - AMD IL Operands ------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Custom memory operand
+//===----------------------------------------------------------------------===//
+
+def MEMI32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPRI32, GPRI32);
+}
+
+def MEMI64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPRI64, GPRI64);
+}
+
+// Call target types
+def calltarget : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+
+// def v2i8imm : Operand<v2i8>;
+// def v4i8imm : Operand<v4i8>;
+// def v2i16imm : Operand<v2i16>;
+// def v4i16imm : Operand<v4i16>;
+// def v2i32imm : Operand<v2i32>;
+// def v4i32imm : Operand<v4i32>;
+// def v2i64imm : Operand<v2i64>;
+// def v2f32imm : Operand<v2f32>;
+// def v4f32imm : Operand<v4f32>;
+// def v2f64imm : Operand<v2f64>;
+
diff --git a/src/gallium/drivers/radeon/AMDILPatterns.td b/src/gallium/drivers/radeon/AMDILPatterns.td
new file mode 100644
index 00000000000..9e83352b745
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILPatterns.td
@@ -0,0 +1,545 @@
+//===- AMDILPatterns.td - AMDIL Target Patterns------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Store pattern fragments
+//===----------------------------------------------------------------------===//
+def truncstorei64 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+def truncstorev2i8 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i8;
+}]>;
+def truncstorev2i16 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16;
+}]>;
+def truncstorev2i32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i32;
+}]>;
+def truncstorev2i64 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i64;
+}]>;
+def truncstorev2f32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f32;
+}]>;
+def truncstorev2f64 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f64;
+}]>;
+def truncstorev4i8 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8;
+}]>;
+def truncstorev4i16 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i16;
+}]>;
+def truncstorev4i32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i32;
+}]>;
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei32 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei64 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref32 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref64 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i8 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i16 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i32 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i64 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f32 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f64 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i8 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i16 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i32 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4f32 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei32 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei64 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref32 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref64 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i8 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i16 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i32 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i64 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f32 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f64 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i8 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i16 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i32 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4f32 node:$val, node:$ptr), [{
+ return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def local_trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei32 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei64 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref32 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref64 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i8 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i16 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i32 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i64 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f32 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f64 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i8 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i16 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i32 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4f32 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def region_trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei32 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei64 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref32 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstoref64 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i8 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i16 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i32 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2i64 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f32 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev2f64 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i8 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i16 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4i32 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorev4f32 node:$val, node:$ptr), [{
+ return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def global_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def global_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def global_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Private address space loads
+def private_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def private_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def private_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def private_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Local address space loads
+def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def local_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def local_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def local_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Region address space loads
+def region_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def region_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def region_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def region_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+def constant_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+def constant_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+def constant_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+// Constant pool loads
+def cp_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def cp_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+ return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def cp_zext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+ return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def cp_aext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Instruction Pattern Leafs
+//===----------------------------------------------------------------------===//
+class IL_CC_Op<int N> : PatLeaf<(i32 N)>;
+def IL_CC_D_EQ : IL_CC_Op<0>;
+def IL_CC_D_GE : IL_CC_Op<1>;
+def IL_CC_D_LT : IL_CC_Op<2>;
+def IL_CC_D_NE : IL_CC_Op<3>;
+def IL_CC_F_EQ : IL_CC_Op<4>;
+def IL_CC_F_GE : IL_CC_Op<5>;
+def IL_CC_F_LT : IL_CC_Op<6>;
+def IL_CC_F_NE : IL_CC_Op<7>;
+def IL_CC_I_EQ : IL_CC_Op<8>;
+def IL_CC_I_GE : IL_CC_Op<9>;
+def IL_CC_I_LT : IL_CC_Op<10>;
+def IL_CC_I_NE : IL_CC_Op<11>;
+def IL_CC_U_GE : IL_CC_Op<12>;
+def IL_CC_U_LT : IL_CC_Op<13>;
+// Pseudo IL comparison instructions that aren't natively supported
+def IL_CC_F_GT : IL_CC_Op<14>;
+def IL_CC_U_GT : IL_CC_Op<15>;
+def IL_CC_I_GT : IL_CC_Op<16>;
+def IL_CC_D_GT : IL_CC_Op<17>;
+def IL_CC_F_LE : IL_CC_Op<18>;
+def IL_CC_U_LE : IL_CC_Op<19>;
+def IL_CC_I_LE : IL_CC_Op<20>;
+def IL_CC_D_LE : IL_CC_Op<21>;
+def IL_CC_F_UNE : IL_CC_Op<22>;
+def IL_CC_F_UEQ : IL_CC_Op<23>;
+def IL_CC_F_ULT : IL_CC_Op<24>;
+def IL_CC_F_UGT : IL_CC_Op<25>;
+def IL_CC_F_ULE : IL_CC_Op<26>;
+def IL_CC_F_UGE : IL_CC_Op<27>;
+def IL_CC_F_ONE : IL_CC_Op<28>;
+def IL_CC_F_OEQ : IL_CC_Op<29>;
+def IL_CC_F_OLT : IL_CC_Op<30>;
+def IL_CC_F_OGT : IL_CC_Op<31>;
+def IL_CC_F_OLE : IL_CC_Op<32>;
+def IL_CC_F_OGE : IL_CC_Op<33>;
+def IL_CC_D_UNE : IL_CC_Op<34>;
+def IL_CC_D_UEQ : IL_CC_Op<35>;
+def IL_CC_D_ULT : IL_CC_Op<36>;
+def IL_CC_D_UGT : IL_CC_Op<37>;
+def IL_CC_D_ULE : IL_CC_Op<38>;
+def IL_CC_D_UGE : IL_CC_Op<39>;
+def IL_CC_D_ONE : IL_CC_Op<30>;
+def IL_CC_D_OEQ : IL_CC_Op<41>;
+def IL_CC_D_OLT : IL_CC_Op<42>;
+def IL_CC_D_OGT : IL_CC_Op<43>;
+def IL_CC_D_OLE : IL_CC_Op<44>;
+def IL_CC_D_OGE : IL_CC_Op<45>;
+def IL_CC_U_EQ : IL_CC_Op<46>;
+def IL_CC_U_NE : IL_CC_Op<47>;
+def IL_CC_F_O : IL_CC_Op<48>;
+def IL_CC_D_O : IL_CC_Op<49>;
+def IL_CC_F_UO : IL_CC_Op<50>;
+def IL_CC_D_UO : IL_CC_Op<51>;
+def IL_CC_L_LE : IL_CC_Op<52>;
+def IL_CC_L_GE : IL_CC_Op<53>;
+def IL_CC_L_EQ : IL_CC_Op<54>;
+def IL_CC_L_NE : IL_CC_Op<55>;
+def IL_CC_L_LT : IL_CC_Op<56>;
+def IL_CC_L_GT : IL_CC_Op<57>;
+def IL_CC_UL_LE : IL_CC_Op<58>;
+def IL_CC_UL_GE : IL_CC_Op<59>;
+def IL_CC_UL_EQ : IL_CC_Op<60>;
+def IL_CC_UL_NE : IL_CC_Op<61>;
+def IL_CC_UL_LT : IL_CC_Op<62>;
+def IL_CC_UL_GT : IL_CC_Op<63>;
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 00000000000..87d714d077a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1412 @@
+//===-- AMDILPeepholeOpt.cpp - Peephole Optimization pass --===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <sstream>
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+ "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+STATISTIC(LocalFuncs, "Number of get_local_size(N) functions removed");
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDILPeepholeOpt(TargetMachine &tm, CodeGenOpt::Level OL);
+ ~AMDILPeepholeOpt();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+ // Function to initiate all of the instruction level optimizations.
+ bool instLevelOptimizations(BasicBlock::iterator *inst);
+ // Quick check to see if we need to dump all of the pointers into the
+ // arena. If this is correct, then we set all pointers to exist in arena. This
+ // is a workaround for aliasing of pointers in a struct/union.
+ bool dumpAllIntoArena(Function &F);
+ // Because I don't want to invalidate any pointers while in the
+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+ // it later. This function does the conversions if required.
+ void doAtomicConversionIfNeeded(Function &F);
+ // Because __amdil_is_constant cannot be properly evaluated if
+ // optimizations are disabled, the call's are placed in a vector
+ // and evaluated after the __amdil_image* functions are evaluated
+ // which should allow the __amdil_is_constant function to be
+ // evaluated correctly.
+ void doIsConstCallConversionIfNeeded();
+ bool mChanged;
+ bool mDebug;
+ bool mRWGOpt;
+ bool mConvertAtomics;
+ CodeGenOpt::Level optLevel;
+ // Run a series of tests to see if we can optimize a CALL instruction.
+ bool optimizeCallInst(BasicBlock::iterator *bbb);
+ // A peephole optimization to optimize bit extract sequences.
+ bool optimizeBitExtract(Instruction *inst);
+ // A peephole optimization to optimize bit insert sequences.
+ bool optimizeBitInsert(Instruction *inst);
+ bool setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift);
+ // Expand the bit field insert instruction on versions of OpenCL that
+ // don't support it.
+ bool expandBFI(CallInst *CI);
+ // Expand the bit field mask instruction on version of OpenCL that
+ // don't support it.
+ bool expandBFM(CallInst *CI);
+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+ // this case we need to expand them. These functions check for 24bit functions
+ // and then expand.
+ bool isSigned24BitOps(CallInst *CI);
+ void expandSigned24BitOps(CallInst *CI);
+ // One optimization that can occur is that if the required workgroup size is
+ // specified then the result of get_local_size is known at compile time and
+ // can be returned accordingly.
+ bool isRWGLocalOpt(CallInst *CI);
+ void expandRWGLocalOpt(CallInst *CI);
+ // On northern island cards, the division is slightly less accurate than on
+ // previous generations, so we need to utilize a more accurate division. So we
+ // can translate the accurate divide to a normal divide on all other cards.
+ bool convertAccurateDivide(CallInst *CI);
+ void expandAccurateDivide(CallInst *CI);
+ // If the alignment is set incorrectly, it can produce really inefficient
+ // code. This checks for this scenario and fixes it if possible.
+ bool correctMisalignedMemOp(Instruction *inst);
+
+ // If we are in no opt mode, then we need to make sure that
+ // local samplers are properly propagated as constant propagation
+ // doesn't occur and we need to know the value of kernel defined
+ // samplers at compile time.
+ bool propagateSamplerInst(CallInst *CI);
+
+ LLVMContext *mCTX;
+ Function *mF;
+ const AMDILSubtarget *mSTM;
+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+ SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDILPeepholeOpt
+ char AMDILPeepholeOpt::ID = 0;
+} // anonymous namespace
+
+namespace llvm {
+ FunctionPass *
+ createAMDILPeepholeOpt(TargetMachine &tm, CodeGenOpt::Level OL)
+ {
+ return new AMDILPeepholeOpt(tm, OL);
+ }
+} // llvm namespace
+
+AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm, CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+ : FunctionPass(ID), TM(tm)
+#else
+ : FunctionPass((intptr_t)&ID), TM(tm)
+#endif
+{
+ mDebug = DEBUGME;
+ optLevel = OL;
+}
+
+AMDILPeepholeOpt::~AMDILPeepholeOpt()
+{
+}
+
+const char *
+AMDILPeepholeOpt::getPassName() const
+{
+ return "AMDIL PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty)
+{
+ if (!Ty) {
+ return false;
+ }
+ switch(Ty->getTypeID()) {
+ default:
+ return false;
+#if LLVM_VERSION < 2500
+ case Type::UnionTyID: {
+ const UnionType *ST = dyn_cast<UnionType>(Ty);
+ for (UnionType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+#endif
+ case Type::StructTyID: {
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ for (StructType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+ case Type::PointerTyID:
+ return true;
+ };
+ return false;
+}
+
+bool
+AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
+{
+ bool dumpAll = false;
+ for (Function::const_arg_iterator cab = F.arg_begin(),
+ cae = F.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+ if (!PT) {
+ continue;
+ }
+ Type *DereferencedType = PT->getElementType();
+ if (!dyn_cast<StructType>(DereferencedType)
+#if LLVM_VERSION < 2500
+ && !dyn_cast<UnionType>(DereferencedType)
+#endif
+ ) {
+ continue;
+ }
+ if (!containsPointerType(DereferencedType)) {
+ continue;
+ }
+ // FIXME: Because a pointer inside of a struct/union may be aliased to
+ // another pointer we need to take the conservative approach and place all
+ // pointers into the arena until more advanced detection is implemented.
+ dumpAll = true;
+ }
+ return dumpAll;
+}
+void
+AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
+{
+ if (isConstVec.empty()) {
+ return;
+ }
+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+ CallInst *CI = isConstVec[x];
+#if LLVM_VERSION >= 2500
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+#else
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(1));
+#endif
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ }
+ isConstVec.clear();
+}
+void
+AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
+{
+ // Don't do anything if we don't have any atomic operations.
+ if (atomicFuncs.empty()) {
+ return;
+ }
+ // Change the function name for the atomic if it is required
+ uint32_t size = atomicFuncs.size();
+ for (uint32_t x = 0; x < size; ++x) {
+#if LLVM_VERSION >= 2500
+ atomicFuncs[x].first->setOperand(
+ atomicFuncs[x].first->getNumOperands()-1,
+ atomicFuncs[x].second);
+#else
+ atomicFuncs[x].first->setOperand(0, atomicFuncs[x].second);
+#endif
+
+ }
+ mChanged = true;
+ if (mConvertAtomics) {
+ return;
+ }
+ // If we did not convert all of the atomics, then we need to make sure that
+ // the atomics that were not converted have their base pointers set to use the
+ // arena path.
+ Function::arg_iterator argB = F.arg_begin();
+ Function::arg_iterator argE = F.arg_end();
+ AMDILKernelManager *KM = (AMDILKernelManager *) mSTM->getKernelManager();
+ AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ for (; argB != argE; ++argB) {
+ if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
+ KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
+ mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
+ } else {
+ KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
+ mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
+ }
+ }
+}
+
+bool
+AMDILPeepholeOpt::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ mF = &MF;
+ mSTM = &TM.getSubtarget<AMDILSubtarget>();
+ if (mDebug) {
+ MF.dump();
+ }
+ mCTX = &MF.getType()->getContext();
+ mConvertAtomics = true;
+ if (dumpAllIntoArena(MF)) {
+ for (Function::const_arg_iterator cab = MF.arg_begin(),
+ cae = MF.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ AMDILKernelManager *KM = (AMDILKernelManager *)mSTM->getKernelManager();
+ KM->setUAVID(getBasePointerValue(arg),
+ mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
+ }
+ }
+ mRWGOpt = mSTM->getGlobalManager()->hasRWG(MF.getName());
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
+ this));
+
+ doAtomicConversionIfNeeded(MF);
+ doIsConstCallConversionIfNeeded();
+
+ if (mDebug) {
+ MF.dump();
+ }
+ return mChanged;
+}
+#if 0
+// This currently isn't used, but might be used in the future,
+// so not removing it. Just commenting it out to fix linux warnings.
+static Value* getSignExtend24(Value *op, Instruction *oldOp)
+{
+ Constant *s8val;
+ Type *opType = op->getType();
+
+static Value *getSignExtend24(Value *op, Instruction *oldOp) {
+ Constant *s8val;
+ Type *opType = op->getType();
+ if (opType->isVectorTy()) {
+ std::vector<Constant *> consts;
+ for (size_t x = 0, y = dyn_cast<VectorType>(opType)->getNumElements();
+ x < y; ++x) {
+ consts.push_back(ConstantInt::get(Type::getInt32Ty(opType->getContext()),
+ 8));
+ }
+#if LLVM_VERSION >= 3212
+ s8val = ConstantVector::get(consts);
+#else
+ s8val = ConstantVector::get(dyn_cast<VectorType>(opType), consts);
+#endif
+ } else {
+ s8val = ConstantInt::get(Type::getInt32Ty(opType->getContext()), 8);
+ }
+ BinaryOperator *res = BinaryOperator::Create(Instruction::Shl,
+ op, s8val, "bit24shl", oldOp);
+ res = BinaryOperator::Create(Instruction::AShr, res, s8val,
+ "bit24shr", oldOp);
+ return res;
+}
+#endif
+
+bool
+AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ if (isSigned24BitOps(CI)) {
+ expandSigned24BitOps(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (isRWGLocalOpt(CI)) {
+ expandRWGLocalOpt(CI);
+ return false;
+ }
+ if (propagateSamplerInst(CI)) {
+ return false;
+ }
+ if (expandBFI(CI) || expandBFM(CI)) {
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (convertAccurateDivide(CI)) {
+ expandAccurateDivide(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+
+#if LLVM_VERSION >= 2500
+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+#else
+ StringRef calleeName = CI->getOperand(0)->getName();
+#endif
+ if (calleeName.startswith("__amdil_is_constant")) {
+ // If we do not have optimizations, then this
+ // cannot be properly evaluated, so we add the
+ // call instruction to a vector and process
+ // them at the end of processing after the
+ // samplers have been correctly handled.
+ if (optLevel == CodeGenOpt::None) {
+ isConstVec.push_back(CI);
+ return false;
+ } else {
+#if LLVM_VERSION >= 2500
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+#else
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(1));
+#endif
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ }
+
+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
+#if LLVM_VERSION >= 2500
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+#else
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(1));
+#endif
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = CV;
+ if (Val) {
+ Val = ConstantInt::get(aType,
+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+ } else {
+ Val = ConstantInt::get(aType, 0);
+ }
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+#if LLVM_VERSION >= 2500
+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+#else
+ Function *F = dyn_cast<Function>(CI->getOperand(0));
+#endif
+ if (!F) {
+ return false;
+ }
+ if (F->getName().startswith("__atom") && !CI->getNumUses()
+ && F->getName().find("_xchg") == StringRef::npos) {
+ std::string buffer(F->getName().str() + "_noret");
+ F = dyn_cast<Function>(
+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
+ }
+
+ if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ return false;
+ }
+ if (!mConvertAtomics) {
+ return false;
+ }
+ StringRef name = F->getName();
+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+#if LLVM_VERSION >= 2500
+ Value *ptr = CI->getOperand(0);
+#else
+ Value *ptr = CI->getOperand(1);
+#endif
+ const Value *basePtr = getBasePointerValue(ptr);
+ const Argument *Arg = dyn_cast<Argument>(basePtr);
+ if (Arg) {
+ AMDILGlobalManager *GM = (AMDILGlobalManager*)mSTM->getGlobalManager();
+ int32_t id = GM->getArgID(Arg);
+ if (id >= 0) {
+ std::stringstream ss;
+ ss << name.data() << "_" << id << '\n';
+ std::string val;
+ ss >> val;
+ F = dyn_cast<Function>(
+ F->getParent() ->getOrInsertFunction(val, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
+ } else {
+ mConvertAtomics = false;
+ }
+ } else {
+ mConvertAtomics = false;
+ }
+ }
+ return false;
+}
+
+bool
+AMDILPeepholeOpt::setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift)
+{
+ if (!base) {
+ if (mDebug) {
+ dbgs() << "Null pointer passed into function.\n";
+ }
+ return false;
+ }
+ bool andOp = false;
+ if (base->getOpcode() == Instruction::Shl) {
+ shift = dyn_cast<Constant>(base->getOperand(1));
+ } else if (base->getOpcode() == Instruction::And) {
+ mask = dyn_cast<Constant>(base->getOperand(1));
+ andOp = true;
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+ }
+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+ return false;
+ }
+ src = dyn_cast<Instruction>(base->getOperand(0));
+ if (!src) {
+ if (mDebug) {
+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
+ }
+ return false;
+ }
+ // If we find an 'and' operation, then we don't need to
+ // find the next operation as we already know the
+ // bits that are valid at this point.
+ if (andOp) {
+ return true;
+ }
+ if (src->getOpcode() == Instruction::Shl && !shift) {
+ shift = dyn_cast<Constant>(src->getOperand(1));
+ src = dyn_cast<Instruction>(src->getOperand(0));
+ } else if (src->getOpcode() == Instruction::And && !mask) {
+ mask = dyn_cast<Constant>(src->getOperand(1));
+ }
+ if (!mask && !shift) {
+ if (mDebug) {
+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
+ }
+ // Did not find a constant mask or a shift.
+ return false;
+ }
+ return true;
+}
+bool
+AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
+{
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::Or) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do an optimization on a sequence of ops that in the end equals a
+ // single ISA instruction.
+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+ // Some simplified versions of this pattern are as follows:
+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+ // (A & B) | (D << F) when (1 << F) >= B
+ // (A << C) | (D & E) when (1 << C) >= E
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This optimization only works on 32bit integers.
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ // TODO: Handle vectors.
+ if (isVector) {
+ if (mDebug) {
+ dbgs() << "!!! Vectors are not supported yet!\n";
+ }
+ return false;
+ }
+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+ Constant *LHSMask = NULL, *RHSMask = NULL;
+ Constant *LHSShift = NULL, *RHSShift = NULL;
+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (LHS) { LHS->dump(); }
+ if (LHSSrc) { LHSSrc->dump(); }
+ if (LHSMask) { LHSMask->dump(); }
+ if (LHSShift) { LHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (RHS) { RHS->dump(); }
+ if (RHSSrc) { RHSSrc->dump(); }
+ if (RHSMask) { RHSMask->dump(); }
+ if (RHSShift) { RHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+ dbgs() << "Op: "; inst->dump();
+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ }
+ Constant *offset = NULL;
+ Constant *width = NULL;
+ int32_t lhsMaskVal = 0, rhsMaskVal = 0;
+ int32_t lhsShiftVal = 0, rhsShiftVal = 0;
+ int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+ int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+ lhsMaskVal = (int32_t)(LHSMask
+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+ rhsMaskVal = (int32_t)(RHSMask
+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+ lhsShiftVal = (int32_t)(LHSShift
+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+ rhsShiftVal = (int32_t)(RHSShift
+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+ if (mDebug) {
+ dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
+ dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
+ dbgs() << (RHSMask ? " & E)" : ")");
+ dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
+ dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
+ dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
+ dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
+ dbgs() << "width(B) = " << lhsMaskWidth;
+ dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
+ dbgs() << "offset(B) = " << lhsMaskOffset;
+ dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
+ dbgs() << "Constraints: \n";
+ dbgs() << "\t(1) B ^ E == 0\n";
+ dbgs() << "\t(2-LHS) B is a mask\n";
+ dbgs() << "\t(2-LHS) E is a mask\n";
+ dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
+ dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
+ }
+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
+ dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
+ dbgs() << "Failed constraint 1!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "LHS = " << lhsMaskOffset << "";
+ dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
+ dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
+ dbgs() << "\nRHS = " << rhsMaskOffset << "";
+ dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
+ dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
+ dbgs() << "\n";
+ }
+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
+ width = ConstantInt::get(aType, lhsMaskWidth, false);
+ RHSSrc = RHS;
+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
+ dbgs() << "Failed constraint 2!\n";
+ }
+ return false;
+ }
+ if (!LHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ } else if (lhsShiftVal != lhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing LHS!\n";
+ }
+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
+ width = ConstantInt::get(aType, rhsMaskWidth, false);
+ LHSSrc = RHSSrc;
+ RHSSrc = LHS;
+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+ if (mDebug) {
+ dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
+ dbgs() << "Failed constraint 2!\n";
+ }
+ return false;
+ }
+ if (!RHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ } else if (rhsShiftVal != rhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing RHS!\n";
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed constraint 3!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ }
+ if (!offset || !width) {
+ if (mDebug) {
+ dbgs() << "Either width or offset are NULL, failed detection!\n";
+ }
+ return false;
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_insert";
+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[4] = {
+ width,
+ offset,
+ LHSSrc,
+ RHSSrc
+ };
+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+ if (mDebug) {
+ dbgs() << "Old Inst: ";
+ inst->dump();
+ dbgs() << "New Inst: ";
+ CI->dump();
+ dbgs() << "\n\n";
+ }
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
+{
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::And) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do some simple optimizations on Shift right/And patterns. The
+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+ // value smaller than 32 and C is a mask. If C is a constant value, then the
+ // following transformation can occur. For signed integers, it turns into the
+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+ // integers, it turns into the function call dst =
+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+ // Evergreen hardware.
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ // This does not work on HD4XXX hardware.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This only works on 32bit integers
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+ // If the first operand is not a shift instruction, then we can return as it
+ // doesn't match this pattern.
+ if (!ShiftInst || !ShiftInst->isShift()) {
+ return false;
+ }
+ // If we are a shift left, then we need don't match this pattern.
+ if (ShiftInst->getOpcode() == Instruction::Shl) {
+ return false;
+ }
+ bool isSigned = ShiftInst->isArithmeticShift();
+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+ // Lets make sure that the shift value and the and mask are constant integers.
+ if (!AndMask || !ShrVal) {
+ return false;
+ }
+ Constant *newMaskConst;
+ Constant *shiftValConst;
+ if (isVector) {
+ // Handle the vector case
+ std::vector<Constant *> maskVals;
+ std::vector<Constant *> shiftVals;
+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+ Type *scalarType = AndMaskVec->getType()->getScalarType();
+ assert(AndMaskVec->getNumOperands() ==
+ ShrValVec->getNumOperands() && "cannot have a "
+ "combination where the number of elements to a "
+ "shift and an and are different!");
+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+ if (!AndCI || !ShiftIC) {
+ return false;
+ }
+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+ // If the mask or shiftval is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left
+ // then this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+ }
+ newMaskConst = ConstantVector::get(maskVals);
+ shiftValConst = ConstantVector::get(shiftVals);
+ } else {
+ // Handle the scalar case
+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+ // This must be a mask value where all lower bits are set to 1 and then any
+ // bit higher is set to 0.
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ // Count the number of bits set in the mask, this is the width of the
+ // resulting bit set that is extracted from the source value.
+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+ // If the mask or shift val is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left then
+ // this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_extract";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ // Lets create the function.
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[3] = {
+ newMaskConst,
+ shiftValConst,
+ ShiftInst->getOperand(0)
+ };
+ // Lets create the Call with the operands
+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::expandBFI(CallInst *CI)
+{
+ if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ return false;
+ }
+#if LLVM_VERSION >= 2500
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+#else
+ Value *LHS = CI->getOperand(0);
+#endif
+ if (!LHS->getName().startswith("__amdil_bfi")) {
+ return false;
+ }
+ Type* type = CI->getOperand(0)->getType();
+ Constant *negOneConst = NULL;
+ if (type->isVectorTy()) {
+ std::vector<Constant *> negOneVals;
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ negOneVals.push_back(negOneConst);
+ }
+ negOneConst = ConstantVector::get(negOneVals);
+ } else {
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ }
+ // __amdil_bfi => (A & B) | (~A & C)
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ CI->getOperand(1), "bfi_and", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+ "bfi_not", CI);
+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+ "bfi_and", CI);
+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::expandBFM(CallInst *CI)
+{
+ if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) {
+ return false;
+ }
+#if LLVM_VERSION >= 2500
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+#else
+ Value *LHS = CI->getOperand(0);
+#endif
+ if (!LHS->getName().startswith("__amdil_bfm")) {
+ return false;
+ }
+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+ Constant *newMaskConst = NULL;
+ Constant *newShiftConst = NULL;
+ Type* type = CI->getOperand(0)->getType();
+ if (type->isVectorTy()) {
+ std::vector<Constant*> newMaskVals, newShiftVals;
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ newMaskVals.push_back(newMaskConst);
+ newShiftVals.push_back(newShiftConst);
+ }
+ newMaskConst = ConstantVector::get(newMaskVals);
+ newShiftConst = ConstantVector::get(newShiftVals);
+ } else {
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ }
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+ lhs, "bfm_shl", CI);
+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+ newShiftConst, "bfm_sub", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ if (optimizeCallInst(bbb)) {
+ return true;
+ }
+ if (optimizeBitExtract(inst)) {
+ return false;
+ }
+ if (optimizeBitInsert(inst)) {
+ return false;
+ }
+ if (correctMisalignedMemOp(inst)) {
+ return false;
+ }
+ return false;
+}
+bool
+AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
+{
+ LoadInst *linst = dyn_cast<LoadInst>(inst);
+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
+ unsigned alignment;
+ Type* Ty = inst->getType();
+ if (linst) {
+ alignment = linst->getAlignment();
+ Ty = inst->getType();
+ } else if (sinst) {
+ alignment = sinst->getAlignment();
+ Ty = sinst->getValueOperand()->getType();
+ } else {
+ return false;
+ }
+ unsigned size = getTypeSize(Ty);
+ if (size == alignment || size < alignment) {
+ return false;
+ }
+#if LLVM_VERSION >= 2500
+ if (!Ty->isStructTy()) {
+#else
+ if (!Ty->isStructTy() && !Ty->isUnionTy()) {
+#endif
+ return false;
+ }
+ if (alignment < 4) {
+ if (linst) {
+ linst->setAlignment(0);
+ return true;
+ } else if (sinst) {
+ sinst->setAlignment(0);
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+#if LLVM_VERSION >= 2500
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+#else
+ Value *LHS = CI->getOperand(0);
+#endif
+ std::string namePrefix = LHS->getName().substr(0, 14);
+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+ && namePrefix != "__amdil__imul24_high") {
+ return false;
+ }
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ return false;
+ }
+ return true;
+}
+
+void
+AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
+{
+ assert(isSigned24BitOps(CI) && "Must be a "
+ "signed 24 bit operation to call this function!");
+#if LLVM_VERSION >= 2500
+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+#else
+ Value *LHS = CI->getOperand(0);
+#endif
+ // On 7XX and 8XX we do not have signed 24bit, so we need to
+ // expand it to the following:
+ // imul24 turns into 32bit imul
+ // imad24 turns into 32bit imad
+ // imul24_high turns into 32bit imulhigh
+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+#if LLVM_VERSION >= 2500
+ Type *aType = CI->getOperand(0)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+#else
+ Type *aType = CI->getOperand(1)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ callTypes.push_back(CI->getOperand(3)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(1)->getType(), callTypes, false);
+#endif
+ std::string name = "__amdil_imad";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+#if LLVM_VERSION >= 2500
+ Value *Operands[3] = {
+ CI->getOperand(0),
+ CI->getOperand(1),
+ CI->getOperand(2)
+ };
+#else
+ Value *Operands[3] = {
+ CI->getOperand(1),
+ CI->getOperand(2),
+ CI->getOperand(3)
+ };
+#endif
+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+#if LLVM_VERSION >= 2500
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+ CI->getOperand(1), "imul24", CI);
+#else
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(1),
+ CI->getOperand(2), "imul24", CI);
+#endif
+ CI->replaceAllUsesWith(mulOp);
+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+#if LLVM_VERSION >= 2500
+ Type *aType = CI->getOperand(0)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+#else
+ Type *aType = CI->getOperand(1)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(1)->getType(), callTypes, false);
+#endif
+ std::string name = "__amdil_imul_high";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(llvm::StringRef(name), funcType));
+ Value *Operands[2] = {
+#if LLVM_VERSION >= 2500
+ CI->getOperand(0),
+ CI->getOperand(1)
+#else
+ CI->getOperand(1),
+ CI->getOperand(2)
+#endif
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ }
+}
+
+bool
+AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
+{
+ return (CI != NULL && mRWGOpt
+#if LLVM_VERSION >= 2500
+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
+#else
+ && CI->getOperand(0)->getName()
+#endif
+ == "__amdil_get_local_size_int");
+}
+
+void
+AMDILPeepholeOpt::expandRWGLocalOpt(CallInst *CI)
+{
+ assert(isRWGLocalOpt(CI) &&
+ "This optmization only works when the call inst is get_local_size!");
+ std::vector<Constant *> consts;
+ for (uint32_t x = 0; x < 3; ++x) {
+ uint32_t val = mSTM->getGlobalManager()->getLocal(mF->getName(), x);
+ consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), val));
+ }
+ consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), 0));
+ Value *cVec = ConstantVector::get(consts);
+ CI->replaceAllUsesWith(cVec);
+ ++LocalFuncs;
+ return;
+}
+
+bool
+AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
+{
+ if (!CI) {
+ return false;
+ }
+ if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
+ && (mSTM->getDeviceName() == "cayman"
+ || mSTM->getDeviceName() == "kauai"
+ || mSTM->getDeviceName() == "trinity")) {
+ return false;
+ }
+#if LLVM_VERSION >= 2500
+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+#else
+ return CI->getOperand(0)->getName().substr(0, 20)
+#endif
+ == "__amdil_improved_div";
+}
+
+void
+AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
+{
+ assert(convertAccurateDivide(CI)
+ && "expanding accurate divide can only happen if it is expandable!");
+ BinaryOperator *divOp =
+#if LLVM_VERSION >= 2500
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+ CI->getOperand(1), "fdiv32", CI);
+#else
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(1),
+ CI->getOperand(2), "fdiv32", CI);
+#endif
+ CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
+{
+ if (optLevel != CodeGenOpt::None) {
+ return false;
+ }
+
+ if (!CI) {
+ return false;
+ }
+
+ unsigned funcNameIdx = 0;
+#if LLVM_VERSION >= 2500
+ funcNameIdx = CI->getNumOperands() - 1;
+#endif
+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+ if (calleeName != "__amdil_image2d_read_norm"
+ && calleeName != "__amdil_image2d_read_unnorm"
+ && calleeName != "__amdil_image3d_read_norm"
+ && calleeName != "__amdil_image3d_read_unnorm") {
+ return false;
+ }
+
+ unsigned samplerIdx = 2;
+#if LLVM_VERSION >= 2500
+ samplerIdx = 1;
+#endif
+ Value *sampler = CI->getOperand(samplerIdx);
+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+ if (!lInst) {
+ return false;
+ }
+
+ if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+ return false;
+ }
+
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+ // If we are loading from what is not a global value, then we
+ // fail and return.
+ if (!gv) {
+ return false;
+ }
+
+ // If we don't have an initializer or we have an initializer and
+ // the initializer is not a 32bit integer, we fail.
+ if (!gv->hasInitializer()
+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+ return false;
+ }
+
+ // Now that we have the global variable initializer, lets replace
+ // all uses of the load instruction with the samplerVal and
+ // reparse the __amdil_is_constant() function.
+ Constant *samplerVal = gv->getInitializer();
+ lInst->replaceAllUsesWith(samplerVal);
+ return true;
+}
+
+bool
+AMDILPeepholeOpt::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool
+AMDILPeepholeOpt::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
diff --git a/src/gallium/drivers/radeon/AMDILPointerManager.cpp b/src/gallium/drivers/radeon/AMDILPointerManager.cpp
new file mode 100644
index 00000000000..dcf3f4133c5
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILPointerManager.cpp
@@ -0,0 +1,2710 @@
+//===-------- AMDILPointerManager.cpp - Manage Pointers for HW-------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// Implementation for the AMDILPointerManager classes. See header file for
+// more documentation of class.
+// TODO: This fails when function calls are enabled, must always be inlined
+//===----------------------------------------------------------------------===//
+#include "AMDILPointerManager.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDeviceInfo.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include <iostream>
+#include <set>
+#include <map>
+#include <list>
+#include <queue>
+#include <stdio.h>
+using namespace llvm;
+char AMDILPointerManager::ID = 0;
+namespace llvm {
+ FunctionPass*
+ createAMDILPointerManager(TargetMachine &tm, CodeGenOpt::Level OL)
+ {
+ return tm.getSubtarget<AMDILSubtarget>()
+ .device()->getPointerManager(tm, OL);
+ }
+}
+
+AMDILPointerManager::AMDILPointerManager(
+ TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+#if LLVM_VERSION >= 2500
+ MachineFunctionPass(ID),
+#else
+ MachineFunctionPass((intptr_t)&ID),
+#endif
+ TM(tm)
+{
+ mDebug = DEBUGME;
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+}
+
+AMDILPointerManager::~AMDILPointerManager()
+{
+}
+
+const char*
+AMDILPointerManager::getPassName() const
+{
+ return "AMD IL Default Pointer Manager Pass";
+}
+
+void
+AMDILPointerManager::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.setPreservesAll();
+ AU.addRequiredID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AMDILEGPointerManager::AMDILEGPointerManager(
+ TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ AMDILPointerManager(tm, OL),
+ TM(tm)
+{
+}
+
+AMDILEGPointerManager::~AMDILEGPointerManager()
+{
+}
+std::string
+findSamplerName(MachineInstr* MI,
+ FIPMap &FIToPtrMap,
+ RVPVec &lookupTable)
+{
+ std::string sampler = "unknown";
+ assert(MI->getNumOperands() == 5 && "Only an "
+ "image read instruction with 5 arguments can "
+ "have a sampler.");
+ assert(MI->getOperand(3).isReg() &&
+ "Argument 3 must be a register to call this function");
+ unsigned reg = MI->getOperand(3).getReg();
+ // If this register points to an argument, then
+ // we can return the argument name.
+ if (lookupTable[reg].second && dyn_cast<Argument>(lookupTable[reg].second)) {
+ return lookupTable[reg].second->getName();
+ }
+ // Otherwise the sampler is coming from memory somewhere.
+ // If the sampler memory location can be tracked, then
+ // we ascertain the sampler name that way.
+ // The most common case is when optimizations are disabled
+ // or mem2reg is not enabled, then the sampler when it is
+ // an argument is passed through the frame index.
+
+ // In the optimized case, the instruction that defined
+ // register from operand #3 is a private load.
+ MachineRegisterInfo &regInfo = MI->getParent()->getParent()->getRegInfo();
+ assert(!regInfo.def_empty(reg)
+ && "We don't have any defs of this register, but we aren't an argument!");
+ MachineOperand *defOp = regInfo.getRegUseDefListHead(reg);
+ MachineInstr *defMI = defOp->getParent();
+ if (isPrivateInst(defMI) && isLoadInst(defMI)) {
+ if (defMI->getOperand(1).isFI()) {
+ RegValPair &fiRVP = FIToPtrMap[reg];
+ if (fiRVP.second && dyn_cast<Argument>(fiRVP.second)) {
+ return fiRVP.second->getName();
+ } else {
+ // FIXME: Fix the case where the value stored is not a kernel argument.
+ assert(!"Found a private load of a sampler where the value isn't an argument!");
+ }
+ } else {
+ // FIXME: Fix the case where someone dynamically loads a sampler value
+ // from private memory. This is problematic because we need to know the
+ // sampler value at compile time and if it is dynamically loaded, we won't
+ // know what sampler value to use.
+ assert(!"Found a private load of a sampler that isn't from a frame index!");
+ }
+ } else {
+ // FIXME: Handle the case where the def is neither a private instruction
+ // and not a load instruction. This shouldn't occur, but putting an assertion
+ // just to make sure that it doesn't.
+ assert(!"Found a case which we don't handle.");
+ }
+ return sampler;
+}
+
+const char*
+AMDILEGPointerManager::getPassName() const
+{
+ return "AMD IL EG Pointer Manager Pass";
+}
+
+// Helper function to determine if the current pointer is from the
+// local, region or private address spaces.
+ static bool
+isLRPInst(MachineInstr *MI,
+ const AMDILTargetMachine *ATM)
+{
+ const AMDILSubtarget *STM
+ = ATM->getSubtargetImpl();
+ if (!MI) {
+ return false;
+ }
+ if ((isRegionInst(MI)
+ && STM->device()->usesHardware(AMDILDeviceInfo::RegionMem))
+ || (isLocalInst(MI)
+ && STM->device()->usesHardware(AMDILDeviceInfo::LocalMem))
+ || (isPrivateInst(MI)
+ && STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem))) {
+ return true;
+ }
+ return false;
+}
+
+/// Helper function to determine if the I/O instruction uses
+/// global device memory or not.
+static bool
+usesGlobal(
+ const AMDILTargetMachine *ATM,
+ MachineInstr *MI) {
+ const AMDILSubtarget *STM
+ = ATM->getSubtargetImpl();
+ switch(MI->getOpcode()) {
+ ExpandCaseToAllTypes(AMDIL::GLOBALSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE);
+ ExpandCaseToAllTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::GLOBALAEXTLOAD);
+ return true;
+ ExpandCaseToAllTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONAEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::REGIONSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
+ return !STM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+ ExpandCaseToAllTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::LOCALSTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
+ return !STM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+ ExpandCaseToAllTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTSEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTAEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::CONSTANTZEXTLOAD);
+ return !STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+ ExpandCaseToAllTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD);
+ ExpandCaseToAllTypes(AMDIL::PRIVATESTORE);
+ ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
+ return !STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+ default:
+ return false;
+ }
+ return false;
+}
+
+// Helper function that allocates the default resource ID for the
+// respective I/O types.
+static void
+allocateDefaultID(
+ const AMDILTargetMachine *ATM,
+ AMDILAS::InstrResEnc &curRes,
+ MachineInstr *MI,
+ bool mDebug)
+{
+ AMDILMachineFunctionInfo *mMFI =
+ MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>();
+ const AMDILSubtarget *STM
+ = ATM->getSubtargetImpl();
+ if (mDebug) {
+ dbgs() << "Assigning instruction to default ID. Inst:";
+ MI->dump();
+ }
+ // If we use global memory, lets set the Operand to
+ // the ARENA_UAV_ID.
+ if (usesGlobal(ATM, MI)) {
+ curRes.bits.ResourceID =
+ STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+ if (isAtomicInst(MI)) {
+ MI->getOperand(MI->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ if (curRes.bits.ResourceID == 8
+ && !STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ KM->setUAVID(NULL, curRes.bits.ResourceID);
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ } else if (isPrivateInst(MI)) {
+ curRes.bits.ResourceID =
+ STM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ } else if (isLocalInst(MI) || isLocalAtomic(MI)) {
+ curRes.bits.ResourceID =
+ STM->device()->getResourceID(AMDILDevice::LDS_ID);
+ AMDILMachineFunctionInfo *mMFI =
+ MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>();
+ mMFI->setUsesLocal();
+ if (isAtomicInst(MI)) {
+ assert(curRes.bits.ResourceID && "Atomic resource ID "
+ "cannot be zero!");
+ MI->getOperand(MI->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ } else if (isRegionInst(MI) || isRegionAtomic(MI)) {
+ curRes.bits.ResourceID =
+ STM->device()->getResourceID(AMDILDevice::GDS_ID);
+ AMDILMachineFunctionInfo *mMFI =
+ MI->getParent()->getParent()->getInfo<AMDILMachineFunctionInfo>();
+ mMFI->setUsesRegion();
+ if (isAtomicInst(MI)) {
+ assert(curRes.bits.ResourceID && "Atomic resource ID "
+ "cannot be zero!");
+ (MI)->getOperand((MI)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ } else if (isConstantInst(MI)) {
+ // If we are unknown constant instruction and the base pointer is known.
+ // Set the resource ID accordingly, otherwise use the default constant ID.
+ // FIXME: this should not require the base pointer to know what constant
+ // it is from.
+ AMDILGlobalManager *GM = (AMDILGlobalManager*)STM->getGlobalManager();
+ MachineFunction *MF = MI->getParent()->getParent();
+ if (GM->isKernel(MF->getFunction()->getName())) {
+ const kernel &krnl = GM->getKernel(MF->getFunction()->getName());
+ const Value *V = getBasePointerValue(MI);
+ if (V && !dyn_cast<AllocaInst>(V)) {
+ curRes.bits.ResourceID = GM->getConstPtrCB(krnl, V->getName());
+ curRes.bits.HardwareInst = 1;
+ } else if (V && dyn_cast<AllocaInst>(V)) {
+ // FIXME: Need a better way to fix this. Requires a rewrite of how
+ // we lower global addresses to various address spaces.
+ // So for now, lets assume that there is only a single
+ // constant buffer that can be accessed from a load instruction
+ // that is derived from an alloca instruction.
+ curRes.bits.ResourceID = 2;
+ curRes.bits.HardwareInst = 1;
+ } else {
+ if (isStoreInst(MI)) {
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+ MI->dump();
+ }
+ curRes.bits.ByteStore = 1;
+ }
+ curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::CONSTANT_ID);
+ }
+ } else {
+ if (isStoreInst(MI)) {
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+ MI->dump();
+ }
+ curRes.bits.ByteStore = 1;
+ }
+ curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+ AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ KM->setUAVID(NULL, curRes.bits.ResourceID);
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ } else if (isAppendInst(MI)) {
+ unsigned opcode = MI->getOpcode();
+ if (opcode == AMDIL::APPEND_ALLOC
+ || opcode == AMDIL::APPEND_ALLOC_NORET) {
+ curRes.bits.ResourceID = 1;
+ } else {
+ curRes.bits.ResourceID = 2;
+ }
+ }
+ setAsmPrinterFlags(MI, curRes);
+}
+
+// Function that parses the arguments and updates the lookupTable with the
+// pointer -> register mapping. This function also checks for cacheable
+// pointers and updates the CacheableSet with the arguments that
+// can be cached based on the readonlypointer annotation. The final
+// purpose of this function is to update the imageSet and counterSet
+// with all pointers that are either images or atomic counters.
+uint32_t
+parseArguments(MachineFunction &MF,
+ RVPVec &lookupTable,
+ const AMDILTargetMachine *ATM,
+ CacheableSet &cacheablePtrs,
+ ImageSet &imageSet,
+ AppendSet &counterSet,
+ bool mDebug)
+{
+ const AMDILSubtarget *STM
+ = ATM->getSubtargetImpl();
+ uint32_t writeOnlyImages = 0;
+ uint32_t readOnlyImages = 0;
+ std::string cachedKernelName = "llvm.readonlypointer.annotations.";
+ cachedKernelName.append(MF.getFunction()->getName());
+ GlobalVariable *GV = MF.getFunction()->getParent()
+ ->getGlobalVariable(cachedKernelName);
+ MachineBasicBlock::livein_iterator LII = MF.begin()->livein_begin();
+ MachineBasicBlock::livein_iterator LIE = MF.begin()->livein_end();
+ unsigned cbNum = 0;
+ unsigned regNum = AMDIL::R1;
+ AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ for (Function::const_arg_iterator I = MF.getFunction()->arg_begin(),
+ E = MF.getFunction()->arg_end(); I != E; ++I) {
+ const Argument *curArg = I;
+ if (mDebug) {
+ dbgs() << "Argument: ";
+ curArg->dump();
+ }
+ Type *curType = curArg->getType();
+ // We are either a scalar or vector type that
+ // is passed by value that is not a opaque/struct
+ // type. We just need to increment regNum
+ // the correct number of times to match the number
+ // of registers that it takes up.
+ if (curType->isFPOrFPVectorTy() ||
+ curType->isIntOrIntVectorTy()) {
+ // We are scalar, so increment once and
+ // move on
+ if (!curType->isVectorTy()) {
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg);
+ ++regNum;
+ ++cbNum;
+ continue;
+ }
+ VectorType *VT = dyn_cast<VectorType>(curType);
+ // We are a vector type. If we are 64bit type, then
+ // we increment length / 2 times, otherwise we
+ // increment length / 4 times. The only corner case
+ // is with vec3 where the vector gets scalarized and
+ // therefor we need a loop count of 3.
+ size_t loopCount = VT->getNumElements();
+ if (loopCount != 3) {
+ if (VT->getScalarSizeInBits() == 64) {
+ loopCount = loopCount >> 1;
+ } else {
+ loopCount = (loopCount + 2) >> 2;
+ }
+ cbNum += loopCount;
+ } else {
+ cbNum++;
+ }
+ while (loopCount--) {
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>(~0U, curArg);
+ ++regNum;
+ }
+ } else if (curType->isPointerTy()) {
+ Type *CT = dyn_cast<PointerType>(curType)->getElementType();
+ const StructType *ST = dyn_cast<StructType>(CT);
+ if (ST && ST->isOpaque()) {
+ StringRef name = ST->getName();
+ bool i1d_type = name == "struct._image1d_t";
+ bool i1da_type = name == "struct._image1d_array_t";
+ bool i1db_type = name == "struct._image1d_buffer_t";
+ bool i2d_type = name == "struct._image2d_t";
+ bool i2da_type = name == "struct._image2d_array_t";
+ bool i3d_type = name == "struct._image3d_t";
+ bool c32_type = name == "struct._counter32_t";
+ bool c64_type = name == "struct._counter64_t";
+ if (i2d_type || i3d_type || i2da_type ||
+ i1d_type || i1db_type || i1da_type) {
+ imageSet.insert(I);
+ uint32_t imageNum = readOnlyImages + writeOnlyImages;
+ if (STM->getGlobalManager()
+ ->isReadOnlyImage(MF.getFunction()->getName(), imageNum)) {
+ if (mDebug) {
+ dbgs() << "Pointer: '" << curArg->getName()
+ << "' is a read only image # " << readOnlyImages << "!\n";
+ }
+ // We store the cbNum along with the image number so that we can
+ // correctly encode the 'info' intrinsics.
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ ((cbNum << 16 | readOnlyImages++), curArg);
+ } else if (STM->getGlobalManager()
+ ->isWriteOnlyImage(MF.getFunction()->getName(), imageNum)) {
+ if (mDebug) {
+ dbgs() << "Pointer: '" << curArg->getName()
+ << "' is a write only image # " << writeOnlyImages << "!\n";
+ }
+ // We store the cbNum along with the image number so that we can
+ // correctly encode the 'info' intrinsics.
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ ((cbNum << 16 | writeOnlyImages++), curArg);
+ } else {
+ assert(!"Read/Write images are not supported!");
+ }
+ ++regNum;
+ cbNum += 2;
+ continue;
+ } else if (c32_type || c64_type) {
+ if (mDebug) {
+ dbgs() << "Pointer: '" << curArg->getName()
+ << "' is a " << (c32_type ? "32" : "64")
+ << " bit atomic counter type!\n";
+ }
+ counterSet.push_back(I);
+ }
+ }
+
+ if (STM->device()->isSupported(AMDILDeviceInfo::CachedMem)
+ && GV && GV->hasInitializer()) {
+ const ConstantArray *nameArray
+ = dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ if (nameArray) {
+ for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) {
+ const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>(
+ nameArray->getOperand(x)->getOperand(0));
+ const ConstantArray *argName =
+ dyn_cast_or_null<ConstantArray>(gV->getInitializer());
+ if (!argName) {
+ continue;
+ }
+ std::string argStr = argName->getAsString();
+ std::string curStr = curArg->getNameStr();
+ if (!strcmp(argStr.data(), curStr.data())) {
+ if (mDebug) {
+ dbgs() << "Pointer: '" << curArg->getName()
+ << "' is cacheable!\n";
+ }
+ cacheablePtrs.insert(curArg);
+ }
+ }
+ }
+ }
+ uint32_t as = dyn_cast<PointerType>(curType)->getAddressSpace();
+ // Handle the case where the kernel argument is a pointer
+ if (mDebug) {
+ dbgs() << "Pointer: " << curArg->getName() << " is assigned ";
+ if (as == AMDILAS::GLOBAL_ADDRESS) {
+ dbgs() << "uav " << STM->device()
+ ->getResourceID(AMDILDevice::GLOBAL_ID);
+ } else if (as == AMDILAS::PRIVATE_ADDRESS) {
+ dbgs() << "scratch " << STM->device()
+ ->getResourceID(AMDILDevice::SCRATCH_ID);
+ } else if (as == AMDILAS::LOCAL_ADDRESS) {
+ dbgs() << "lds " << STM->device()
+ ->getResourceID(AMDILDevice::LDS_ID);
+ } else if (as == AMDILAS::CONSTANT_ADDRESS) {
+ dbgs() << "cb " << STM->device()
+ ->getResourceID(AMDILDevice::CONSTANT_ID);
+ } else if (as == AMDILAS::REGION_ADDRESS) {
+ dbgs() << "gds " << STM->device()
+ ->getResourceID(AMDILDevice::GDS_ID);
+ } else {
+ assert(!"Found an address space that we don't support!");
+ }
+ dbgs() << " @ register " << regNum << ". Inst: ";
+ curArg->dump();
+ }
+ switch (as) {
+ default:
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg);
+ break;
+ case AMDILAS::LOCAL_ADDRESS:
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ (STM->device()->getResourceID(AMDILDevice::LDS_ID), curArg);
+ mMFI->setHasLocalArg();
+ break;
+ case AMDILAS::REGION_ADDRESS:
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ (STM->device()->getResourceID(AMDILDevice::GDS_ID), curArg);
+ mMFI->setHasRegionArg();
+ break;
+ case AMDILAS::CONSTANT_ADDRESS:
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ (STM->device()->getResourceID(AMDILDevice::CONSTANT_ID), curArg);
+ break;
+ case AMDILAS::PRIVATE_ADDRESS:
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ (STM->device()->getResourceID(AMDILDevice::SCRATCH_ID), curArg);
+ break;
+ }
+ // In this case we need to increment it once.
+ ++regNum;
+ ++cbNum;
+ } else {
+ // Is anything missing that is legal in CL?
+ assert(0 && "Current type is not supported!");
+ lookupTable[regNum] = std::make_pair<unsigned, const Value*>
+ (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), curArg);
+ ++regNum;
+ ++cbNum;
+ }
+ }
+ return writeOnlyImages;
+}
+// The call stack is interesting in that even in SSA form, it assigns
+// registers to the same value's over and over again. So we need to
+// ignore the values that are assigned and just deal with the input
+// and return registers.
+static void
+parseCall(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ RVPVec &lookupTable,
+ MachineBasicBlock::iterator &mBegin,
+ MachineBasicBlock::iterator mEnd,
+ bool mDebug)
+{
+ SmallVector<unsigned, 8> inputRegs;
+ AMDILAS::InstrResEnc curRes;
+ if (mDebug) {
+ dbgs() << "Parsing Call Stack Start.\n";
+ }
+ MachineBasicBlock::iterator callInst = mBegin;
+ MachineInstr *CallMI = callInst;
+ getAsmPrinterFlags(CallMI, curRes);
+ MachineInstr *MI = --mBegin;
+ unsigned reg = AMDIL::R1;
+ // First we need to check the input registers.
+ do {
+ // We stop if we hit the beginning of the call stack
+ // adjustment.
+ if (MI->getOpcode() == AMDIL::ADJCALLSTACKDOWN
+ || MI->getOpcode() == AMDIL::ADJCALLSTACKUP
+ || MI->getNumOperands() != 2
+ || !MI->getOperand(0).isReg()) {
+ break;
+ }
+ reg = MI->getOperand(0).getReg();
+ if (MI->getOperand(1).isReg()) {
+ unsigned reg1 = MI->getOperand(1).getReg();
+ inputRegs.push_back(reg1);
+ if (lookupTable[reg1].second) {
+ curRes.bits.PointerPath = 1;
+ }
+ }
+ lookupTable.erase(reg);
+ if ((signed)reg < 0
+ || mBegin == CallMI->getParent()->begin()) {
+ break;
+ }
+ MI = --mBegin;
+ } while (1);
+ mBegin = callInst;
+ MI = ++mBegin;
+ // If the next registers operand 1 is not a register or that register
+ // is not R1, then we don't have any return values.
+ if (MI->getNumOperands() == 2
+ && MI->getOperand(1).isReg()
+ && MI->getOperand(1).getReg() == AMDIL::R1) {
+ // Next we check the output register.
+ reg = MI->getOperand(0).getReg();
+ // Now we link the inputs to the output.
+ for (unsigned x = 0; x < inputRegs.size(); ++x) {
+ if (lookupTable[inputRegs[x]].second) {
+ curRes.bits.PointerPath = 1;
+ lookupTable[reg] = lookupTable[inputRegs[x]];
+ InstToPtrMap[CallMI].insert(
+ lookupTable[reg].second);
+ break;
+ }
+ }
+ lookupTable.erase(MI->getOperand(1).getReg());
+ }
+ setAsmPrinterFlags(CallMI, curRes);
+ if (mDebug) {
+ dbgs() << "Parsing Call Stack End.\n";
+ }
+ return;
+}
+#if LLVM_VERSION < 2500
+// The call stack is interesting in that even in SSA form, it assigns
+// registers to the same value's over and over again. So we need to
+// ignore the values that are assigned and just deal with the input
+// and return registers.
+static void
+parseCallStack(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ RVPVec &lookupTable,
+ MachineBasicBlock::iterator &mBegin,
+ MachineBasicBlock::iterator mEnd,
+ bool mDebug)
+{
+ bool preCall = true;
+ SmallVector<unsigned, 8> inputRegs;
+ SmallVector<unsigned, 2> outputRegs;
+ MachineInstr *CallMI = NULL;
+ AMDILAS::InstrResEnc curRes;
+ if (mDebug) {
+ dbgs() << "Parsing Call Stack Start.\n";
+ }
+ while (mBegin != mEnd) {
+ MachineInstr *MI = mBegin;
+ if (mDebug) {
+ MI->dump();
+ }
+ if (MI->getOpcode() == AMDIL::ADJCALLSTACKUP) {
+ ++mBegin;
+ MI = mBegin;
+ if (MI->getNumOperands() == 2
+ && MI->getOperand(1).isReg()
+ && MI->getOperand(1).getReg() < 1025) {
+ // now that we have all the source registers and the call
+ // instructions, we need to get the output register and
+ // link them all together.
+ unsigned reg = MI->getOperand(0).getReg();
+ outputRegs.push_back(reg);
+ getAsmPrinterFlags(CallMI, curRes);
+ for (unsigned x = 0; x < inputRegs.size(); ++x) {
+ if (lookupTable[inputRegs[x]].second) {
+ curRes.bits.PointerPath = 1;
+ InstToPtrMap[CallMI].insert(lookupTable[inputRegs[x]].second);
+ }
+ }
+ for (unsigned x = 0, y = 0; x < outputRegs.size(); ++x, ++y) {
+ if (lookupTable[outputRegs[x]].second) {
+ curRes.bits.PointerPath = 1;
+ InstToPtrMap[CallMI].insert(lookupTable[outputRegs[x]].second);
+ if (y < inputRegs.size()) {
+ lookupTable[outputRegs[x]] = lookupTable[inputRegs[y]];
+ }
+ }
+ }
+ lookupTable.erase(MI->getOperand(1).getReg());
+ setAsmPrinterFlags(CallMI, curRes);
+ if (mDebug) {
+ dbgs() << "Parsing Call Stack End.\n";
+ }
+ return;
+ }
+ // Otherwise there are no return values from this function call, so need
+ // to backup the iterator one instruction.
+ --mBegin;
+ if (mDebug) {
+ dbgs() << "Parsing Call Stack End.\n";
+ }
+ return;
+ }
+ if (MI->getDesc().isCall()) {
+ preCall = false;
+ CallMI = MI;
+ } else if (preCall) {
+ // We are handling the input registers
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg()) {
+ unsigned reg = MI->getOperand(1).getReg();
+ inputRegs.push_back(reg);
+ if (lookupTable[reg].second) {
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ }
+ // We need to remove this register from the table
+ // as we don't want it linked to its original live-in
+ // register anymore.
+ lookupTable.erase(MI->getOperand(0).getReg());
+ }
+ } else {
+ // We are handling the return values
+ unsigned reg = MI->getOperand(0).getReg();
+ outputRegs.push_back(reg);
+ if (lookupTable[reg].second) {
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ }
+ }
+ ++mBegin;
+ }
+ assert(0 && "Should never reach here, call stack never ended!");
+ if (mDebug) {
+ dbgs() << "Parsing Call Stack End.\n";
+ }
+}
+#endif
+
+// Detect if the current instruction conflicts with another instruction
+// and add the instruction to the correct location accordingly.
+static void
+detectConflictInst(
+ MachineInstr *MI,
+ AMDILAS::InstrResEnc &curRes,
+ RVPVec &lookupTable,
+ InstPMap &InstToPtrMap,
+ bool isLoadStore,
+ unsigned reg,
+ unsigned dstReg,
+ bool mDebug)
+{
+ // If the instruction does not have a point path flag
+ // associated with it, then we know that no other pointer
+ // hits this instruciton.
+ if (!curRes.bits.PointerPath) {
+ if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) {
+ curRes.bits.PointerPath = 1;
+ }
+ // We don't want to transfer to the register number
+ // between load/store because the load dest can be completely
+ // different pointer path and the store doesn't have a real
+ // destination register.
+ if (!isLoadStore) {
+ if (mDebug) {
+ if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) {
+ dbgs() << "Pointer: " << lookupTable[reg].second->getName();
+ assert(dyn_cast<PointerType>(lookupTable[reg].second->getType())
+ && "Must be a pointer type for an instruction!");
+ switch (dyn_cast<PointerType>(
+ lookupTable[reg].second->getType())->getAddressSpace())
+ {
+ case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break;
+ case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break;
+ case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break;
+ case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break;
+ case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break;
+
+ }
+ dbgs() << lookupTable[reg].first << " Reg: " << reg
+ << " assigned to reg " << dstReg << ". Inst: ";
+ MI->dump();
+ }
+ }
+ // We don't want to do any copies if the register is not virtual
+ // as it is the result of a CALL. ParseCallInst handles the
+ // case where the input and output need to be linked up
+ // if it occurs. The easiest way to check for virtual
+ // is to check the top bit.
+ lookupTable[dstReg] = lookupTable[reg];
+ }
+ } else {
+ if (dyn_cast<PointerType>(lookupTable[reg].second->getType())) {
+ // Otherwise we have a conflict between two pointers somehow.
+ curRes.bits.ConflictPtr = 1;
+ if (mDebug) {
+ dbgs() << "Pointer: " << lookupTable[reg].second->getName();
+ assert(dyn_cast<PointerType>(lookupTable[reg].second->getType())
+ && "Must be a pointer type for a conflict instruction!");
+ switch (dyn_cast<PointerType>(
+ lookupTable[reg].second->getType())->getAddressSpace())
+ {
+ case AMDILAS::GLOBAL_ADDRESS: dbgs() << " UAV: "; break;
+ case AMDILAS::LOCAL_ADDRESS: dbgs() << " LDS: "; break;
+ case AMDILAS::REGION_ADDRESS: dbgs() << " GDS: "; break;
+ case AMDILAS::PRIVATE_ADDRESS: dbgs() << " SCRATCH: "; break;
+ case AMDILAS::CONSTANT_ADDRESS: dbgs() << " CB: "; break;
+
+ }
+ dbgs() << lookupTable[reg].first << " Reg: " << reg;
+ if (InstToPtrMap[MI].size() > 1) {
+ dbgs() << " conflicts with:\n ";
+ for (PtrSet::iterator psib = InstToPtrMap[MI].begin(),
+ psie = InstToPtrMap[MI].end(); psib != psie; ++psib) {
+ dbgs() << "\t\tPointer: " << (*psib)->getName() << " ";
+ assert(dyn_cast<PointerType>((*psib)->getType())
+ && "Must be a pointer type for a conflict instruction!");
+ (*psib)->dump();
+ }
+ } else {
+ dbgs() << ".";
+ }
+ dbgs() << " Inst: ";
+ MI->dump();
+ }
+ }
+ // Add the conflicting values to the pointer set for the instruction
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ // We don't want to add the destination register if
+ // we are a load or store.
+ if (!isLoadStore) {
+ InstToPtrMap[MI].insert(lookupTable[dstReg].second);
+ }
+ }
+ setAsmPrinterFlags(MI, curRes);
+}
+
+// In this case we want to handle a load instruction.
+static void
+parseLoadInst(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ FIPMap &FIToPtrMap,
+ RVPVec &lookupTable,
+ CPoolSet &cpool,
+ BlockCacheableInfo &bci,
+ MachineInstr *MI,
+ bool mDebug)
+{
+ assert(isLoadInst(MI) && "Only a load instruction can be parsed by "
+ "the parseLoadInst function.");
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ unsigned dstReg = MI->getOperand(0).getReg();
+ unsigned idx = 0;
+ const Value *basePtr = NULL;
+ if (MI->getOperand(1).isReg()) {
+ idx = MI->getOperand(1).getReg();
+ basePtr = lookupTable[idx].second;
+ // If we don't know what value the register
+ // is assigned to, then we need to special case
+ // this instruction.
+ } else if (MI->getOperand(1).isFI()) {
+ idx = MI->getOperand(1).getIndex();
+ lookupTable[dstReg] = FIToPtrMap[idx];
+ } else if (MI->getOperand(1).isCPI()) {
+ cpool.insert(MI);
+ }
+ // If we are a hardware local, then we don't need to track as there
+ // is only one resource ID that we need to know about, so we
+ // map it using allocateDefaultID, which maps it to the default.
+ // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
+ if (isLRPInst(MI, ATM) || !basePtr) {
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ return;
+ }
+ // We have a load instruction so we map this instruction
+ // to the pointer and insert it into the set of known
+ // load instructions.
+ InstToPtrMap[MI].insert(basePtr);
+ PtrToInstMap[basePtr].push_back(MI);
+
+ if (isGlobalInst(MI)) {
+ // Add to the cacheable set for the block. If there was a store earlier
+ // in the block, this call won't actually add it to the cacheable set.
+ bci.addPossiblyCacheableInst(MI);
+ }
+
+ if (mDebug) {
+ dbgs() << "Assigning instruction to pointer ";
+ dbgs() << basePtr->getName() << ". Inst: ";
+ MI->dump();
+ }
+ detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
+ idx, dstReg, mDebug);
+}
+
+// In this case we want to handle a store instruction.
+static void
+parseStoreInst(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ FIPMap &FIToPtrMap,
+ RVPVec &lookupTable,
+ CPoolSet &cpool,
+ BlockCacheableInfo &bci,
+ MachineInstr *MI,
+ ByteSet &bytePtrs,
+ ConflictSet &conflictPtrs,
+ bool mDebug)
+{
+ assert(isStoreInst(MI) && "Only a store instruction can be parsed by "
+ "the parseStoreInst function.");
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ unsigned dstReg = MI->getOperand(0).getReg();
+
+ // If the data part of the store instruction is known to
+ // be a pointer, then we need to mark this pointer as being
+ // a byte pointer. This is the conservative case that needs
+ // to be handled correctly.
+ if (lookupTable[dstReg].second && lookupTable[dstReg].first != ~0U) {
+ curRes.bits.ConflictPtr = 1;
+ if (mDebug) {
+ dbgs() << "Found a case where the pointer is being stored!\n";
+ MI->dump();
+ dbgs() << "Pointer is ";
+ lookupTable[dstReg].second->print(dbgs());
+ dbgs() << "\n";
+ }
+ //PtrToInstMap[lookupTable[dstReg].second].push_back(MI);
+ if (lookupTable[dstReg].second->getType()->isPointerTy()) {
+ conflictPtrs.insert(lookupTable[dstReg].second);
+ }
+ }
+
+ // Before we go through the special cases, for the cacheable information
+ // all we care is if the store if global or not.
+ if (!isLRPInst(MI, ATM)) {
+ bci.setReachesExit();
+ }
+
+ // If the address is not a register address,
+ // then we need to lower it as an unknown id.
+ if (!MI->getOperand(1).isReg()) {
+ if (MI->getOperand(1).isCPI()) {
+ if (mDebug) {
+ dbgs() << "Found an instruction with a CPI index #"
+ << MI->getOperand(1).getIndex() << "!\n";
+ }
+ cpool.insert(MI);
+ } else if (MI->getOperand(1).isFI()) {
+ if (mDebug) {
+ dbgs() << "Found an instruction with a frame index #"
+ << MI->getOperand(1).getIndex() << "!\n";
+ }
+ // If we are a frame index and we are storing a pointer there, lets
+ // go ahead and assign the pointer to the location within the frame
+ // index map so that we can get the value out later.
+ FIToPtrMap[MI->getOperand(1).getIndex()] = lookupTable[dstReg];
+ }
+
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ return;
+ }
+ unsigned reg = MI->getOperand(1).getReg();
+ // If we don't know what value the register
+ // is assigned to, then we need to special case
+ // this instruction.
+ if (!lookupTable[reg].second) {
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ return;
+ }
+ // const Value *basePtr = lookupTable[reg].second;
+ // If we are a hardware local, then we don't need to track as there
+ // is only one resource ID that we need to know about, so we
+ // map it using allocateDefaultID, which maps it to the default.
+ // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
+ if (isLRPInst(MI, ATM)) {
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ return;
+ }
+
+ // We have a store instruction so we map this instruction
+ // to the pointer and insert it into the set of known
+ // store instructions.
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ PtrToInstMap[lookupTable[reg].second].push_back(MI);
+ uint16_t RegClass = MI->getDesc().OpInfo[0].RegClass;
+ switch (RegClass) {
+ default:
+ break;
+ case AMDIL::GPRI8RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRI16RegClassID:
+ if (usesGlobal(ATM, MI)) {
+ if (mDebug) {
+ dbgs() << "Annotating instruction as Byte Store. Inst: ";
+ MI->dump();
+ }
+ curRes.bits.ByteStore = 1;
+ setAsmPrinterFlags(MI, curRes);
+ const PointerType *PT = dyn_cast<PointerType>(
+ lookupTable[reg].second->getType());
+ if (PT) {
+ bytePtrs.insert(lookupTable[reg].second);
+ }
+ }
+ break;
+ };
+ // If we are a truncating store, then we need to determine the
+ // size of the pointer that we are truncating to, and if we
+ // are less than 32 bits, we need to mark the pointer as a
+ // byte store pointer.
+ switch (MI->getOpcode()) {
+ case AMDIL::GLOBALTRUNCSTORE_i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+ case AMDIL::GLOBALTRUNCSTORE_i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+ case AMDIL::GLOBALTRUNCSTORE_i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE_i32i16:
+ case AMDIL::GLOBALTRUNCSTORE_i64i16:
+ case AMDIL::GLOBALSTORE_i8:
+ case AMDIL::GLOBALSTORE_i16:
+ curRes.bits.ByteStore = 1;
+ setAsmPrinterFlags(MI, curRes);
+ bytePtrs.insert(lookupTable[reg].second);
+ break;
+ default:
+ break;
+ }
+
+ if (mDebug) {
+ dbgs() << "Assigning instruction to pointer ";
+ dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
+ MI->dump();
+ }
+ detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
+ reg, dstReg, mDebug);
+}
+
+// In this case we want to handle an atomic instruction.
+static void
+parseAtomicInst(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ RVPVec &lookupTable,
+ BlockCacheableInfo &bci,
+ MachineInstr *MI,
+ ByteSet &bytePtrs,
+ bool mDebug)
+{
+ assert(isAtomicInst(MI) && "Only an atomic instruction can be parsed by "
+ "the parseAtomicInst function.");
+ AMDILAS::InstrResEnc curRes;
+ unsigned dstReg = MI->getOperand(0).getReg();
+ unsigned reg = 0;
+ getAsmPrinterFlags(MI, curRes);
+ unsigned numOps = MI->getNumOperands();
+ bool found = false;
+ while (--numOps) {
+ MachineOperand &Op = MI->getOperand(numOps);
+ if (!Op.isReg()) {
+ continue;
+ }
+ reg = Op.getReg();
+ // If the register is not known to be owned by a pointer
+ // then we can ignore it
+ if (!lookupTable[reg].second) {
+ continue;
+ }
+ // if the pointer is known to be local, region or private, then we
+ // can ignore it. Although there are no private atomics, we still
+ // do this check so we don't have to write a new function to check
+ // for only local and region.
+ if (isLRPInst(MI, ATM)) {
+ continue;
+ }
+ found = true;
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ PtrToInstMap[lookupTable[reg].second].push_back(MI);
+
+ // We now know we have an atomic operation on global memory.
+ // This is a store so must update the cacheable information.
+ bci.setReachesExit();
+
+ // Only do if have SC with arena atomic bug fix (EPR 326883).
+ // TODO: enable once SC with EPR 326883 has been promoted to CAL.
+ if (ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_150) {
+ // Force pointers that are used by atomics to be in the arena.
+ // If they were allowed to be accessed as RAW they would cause
+ // all access to use the slow complete path.
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on atomic instruction: ";
+ MI->dump();
+ }
+ curRes.bits.ByteStore = 1;
+ bytePtrs.insert(lookupTable[reg].second);
+ }
+
+ if (mDebug) {
+ dbgs() << "Assigning instruction to pointer ";
+ dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
+ MI->dump();
+ }
+ detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
+ reg, dstReg, mDebug);
+ }
+ if (!found) {
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ }
+}
+// In this case we want to handle a counter instruction.
+static void
+parseAppendInst(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ RVPVec &lookupTable,
+ MachineInstr *MI,
+ bool mDebug)
+{
+ assert(isAppendInst(MI) && "Only an atomic counter instruction can be "
+ "parsed by the parseAppendInst function.");
+ AMDILAS::InstrResEnc curRes;
+ unsigned dstReg = MI->getOperand(0).getReg();
+ unsigned reg = MI->getOperand(1).getReg();
+ getAsmPrinterFlags(MI, curRes);
+ // If the register is not known to be owned by a pointer
+ // then we set it to the default
+ if (!lookupTable[reg].second) {
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ return;
+ }
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ PtrToInstMap[lookupTable[reg].second].push_back(MI);
+ if (mDebug) {
+ dbgs() << "Assigning instruction to pointer ";
+ dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
+ MI->dump();
+ }
+ detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, true,
+ reg, dstReg, mDebug);
+}
+// In this case we want to handle an Image instruction.
+static void
+parseImageInst(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ FIPMap &FIToPtrMap,
+ RVPVec &lookupTable,
+ MachineInstr *MI,
+ bool mDebug)
+{
+ assert(isImageInst(MI) && "Only an image instruction can be "
+ "parsed by the parseImageInst function.");
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ // AMDILKernelManager *km =
+ // (AMDILKernelManager *)ATM->getSubtargetImpl()->getKernelManager();
+ AMDILMachineFunctionInfo *mMFI = MI->getParent()->getParent()
+ ->getInfo<AMDILMachineFunctionInfo>();
+ if (MI->getOpcode() == AMDIL::IMAGE2D_WRITE
+ || MI->getOpcode() == AMDIL::IMAGE3D_WRITE) {
+ unsigned dstReg = MI->getOperand(0).getReg();
+ curRes.bits.ResourceID = lookupTable[dstReg].first & 0xFFFF;
+ curRes.bits.isImage = 1;
+ InstToPtrMap[MI].insert(lookupTable[dstReg].second);
+ PtrToInstMap[lookupTable[dstReg].second].push_back(MI);
+ if (mDebug) {
+ dbgs() << "Assigning instruction to pointer ";
+ dbgs() << lookupTable[dstReg].second->getName() << ". Inst: ";
+ MI->dump();
+ }
+ } else {
+ // unsigned dstReg = MI->getOperand(0).getReg();
+ unsigned reg = MI->getOperand(1).getReg();
+
+ // If the register is not known to be owned by a pointer
+ // then we set it to the default
+ if (!lookupTable[reg].second) {
+ assert(!"This should not happen for images!");
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ return;
+ }
+ InstToPtrMap[MI].insert(lookupTable[reg].second);
+ PtrToInstMap[lookupTable[reg].second].push_back(MI);
+ if (mDebug) {
+ dbgs() << "Assigning instruction to pointer ";
+ dbgs() << lookupTable[reg].second->getName() << ". Inst: ";
+ MI->dump();
+ }
+ switch (MI->getOpcode()) {
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ curRes.bits.ResourceID = lookupTable[reg].first & 0xFFFF;
+ if (MI->getOperand(3).isReg()) {
+ // Our sampler is not a literal value.
+ char buffer[256];
+ memset(buffer, 0, sizeof(buffer));
+ std::string sampler_name = "";
+ unsigned reg = MI->getOperand(3).getReg();
+ if (lookupTable[reg].second) {
+ sampler_name = lookupTable[reg].second->getName();
+ }
+ if (sampler_name.empty()) {
+ sampler_name = findSamplerName(MI, lookupTable, FIToPtrMap);
+ }
+ uint32_t val = mMFI->addSampler(sampler_name, ~0U);
+ if (mDebug) {
+ dbgs() << "Mapping kernel sampler " << sampler_name
+ << " to sampler number " << val << " for Inst:\n";
+ MI->dump();
+ }
+ MI->getOperand(3).ChangeToImmediate(val);
+ } else {
+ // Our sampler is known at runtime as a literal, lets make sure
+ // that the metadata for it is known.
+ char buffer[256];
+ memset(buffer, 0, sizeof(buffer));
+ sprintf(buffer,"_%d", (int32_t)MI->getOperand(3).getImm());
+ std::string sampler_name = std::string("unknown") + std::string(buffer);
+ uint32_t val = mMFI->addSampler(sampler_name, MI->getOperand(3).getImm());
+ if (mDebug) {
+ dbgs() << "Mapping internal sampler " << sampler_name
+ << " to sampler number " << val << " for Inst:\n";
+ MI->dump();
+ }
+ MI->getOperand(3).setImm(val);
+ }
+ break;
+ case AMDIL::IMAGE2D_INFO0:
+ case AMDIL::IMAGE3D_INFO0:
+ curRes.bits.ResourceID = lookupTable[reg].first >> 16;
+ break;
+ case AMDIL::IMAGE2D_INFO1:
+ case AMDIL::IMAGE2DA_INFO1:
+ curRes.bits.ResourceID = (lookupTable[reg].first >> 16) + 1;
+ break;
+ };
+ curRes.bits.isImage = 1;
+ }
+ setAsmPrinterFlags(MI, curRes);
+}
+// This case handles the rest of the instructions
+static void
+parseInstruction(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ RVPVec &lookupTable,
+ CPoolSet &cpool,
+ MachineInstr *MI,
+ bool mDebug)
+{
+ assert(!isAtomicInst(MI) && !isStoreInst(MI) && !isLoadInst(MI) &&
+ !isAppendInst(MI) && !isImageInst(MI) &&
+ "Atomic/Load/Store/Append/Image insts should not be handled here!");
+ unsigned numOps = MI->getNumOperands();
+ // If we don't have any operands, we can skip this instruction
+ if (!numOps) {
+ return;
+ }
+ // if the dst operand is not a register, then we can skip
+ // this instruction. That is because we are probably a branch
+ // or jump instruction.
+ if (!MI->getOperand(0).isReg()) {
+ return;
+ }
+ // If we are a LOADCONST_i32, we might be a sampler, so we need
+ // to propogate the LOADCONST to IMAGE[2|3]D_READ instructions.
+ if (MI->getOpcode() == AMDIL::LOADCONST_i32) {
+ uint32_t val = MI->getOperand(1).getImm();
+ MachineOperand* oldPtr = &MI->getOperand(0);
+ MachineOperand* moPtr = oldPtr->getNextOperandForReg();
+ while (moPtr) {
+ oldPtr = moPtr;
+ moPtr = oldPtr->getNextOperandForReg();
+ switch (oldPtr->getParent()->getOpcode()) {
+ default:
+ break;
+ case AMDIL::IMAGE2D_READ:
+ case AMDIL::IMAGE2D_READ_UNNORM:
+ case AMDIL::IMAGE3D_READ:
+ case AMDIL::IMAGE3D_READ_UNNORM:
+ if (mDebug) {
+ dbgs() << "Found a constant sampler for image read inst: ";
+ oldPtr->getParent()->print(dbgs());
+ }
+ oldPtr->ChangeToImmediate(val);
+ break;
+ }
+ }
+ }
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ unsigned dstReg = MI->getOperand(0).getReg();
+ unsigned reg = 0;
+ while (--numOps) {
+ MachineOperand &Op = MI->getOperand(numOps);
+ // if the operand is not a register, then we can ignore it
+ if (!Op.isReg()) {
+ if (Op.isCPI()) {
+ cpool.insert(MI);
+ }
+ continue;
+ }
+ reg = Op.getReg();
+ // If the register is not known to be owned by a pointer
+ // then we can ignore it
+ if (!lookupTable[reg].second) {
+ continue;
+ }
+ detectConflictInst(MI, curRes, lookupTable, InstToPtrMap, false,
+ reg, dstReg, mDebug);
+
+ }
+}
+
+// This function parses the basic block and based on the instruction type,
+// calls the function to finish parsing the instruction.
+static void
+parseBasicBlock(
+ const AMDILTargetMachine *ATM,
+ MachineBasicBlock *MB,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ FIPMap &FIToPtrMap,
+ RVPVec &lookupTable,
+ ByteSet &bytePtrs,
+ ConflictSet &conflictPtrs,
+ CPoolSet &cpool,
+ BlockCacheableInfo &bci,
+ bool mDebug)
+{
+ for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end();
+ mbb != mbe; ++mbb) {
+ MachineInstr *MI = mbb;
+#if LLVM_VERSION < 2500
+ if (MI->getOpcode() == AMDIL::ADJCALLSTACKDOWN) {
+ parseCallStack(ATM, InstToPtrMap, PtrToInstMap, lookupTable,
+ mbb, mbe, mDebug);
+ }
+#else
+ if (MI->getOpcode() == AMDIL::CALL) {
+ parseCall(ATM, InstToPtrMap, PtrToInstMap, lookupTable,
+ mbb, mbe, mDebug);
+ }
+#endif
+ else if (isLoadInst(MI)) {
+ parseLoadInst(ATM, InstToPtrMap, PtrToInstMap,
+ FIToPtrMap, lookupTable, cpool, bci, MI, mDebug);
+ } else if (isStoreInst(MI)) {
+ parseStoreInst(ATM, InstToPtrMap, PtrToInstMap,
+ FIToPtrMap, lookupTable, cpool, bci, MI, bytePtrs, conflictPtrs, mDebug);
+ } else if (isAtomicInst(MI)) {
+ parseAtomicInst(ATM, InstToPtrMap, PtrToInstMap,
+ lookupTable, bci, MI, bytePtrs, mDebug);
+ } else if (isAppendInst(MI)) {
+ parseAppendInst(ATM, InstToPtrMap, PtrToInstMap,
+ lookupTable, MI, mDebug);
+ } else if (isImageInst(MI)) {
+ parseImageInst(ATM, InstToPtrMap, PtrToInstMap,
+ FIToPtrMap, lookupTable, MI, mDebug);
+ } else {
+ parseInstruction(ATM, InstToPtrMap, PtrToInstMap,
+ lookupTable, cpool, MI, mDebug);
+ }
+ }
+}
+
+// Follows the Reverse Post Order Traversal of the basic blocks to
+// determine which order to parse basic blocks in.
+void
+parseFunction(
+ const AMDILPointerManager *PM,
+ const AMDILTargetMachine *ATM,
+ MachineFunction &MF,
+ InstPMap &InstToPtrMap,
+ PtrIMap &PtrToInstMap,
+ FIPMap &FIToPtrMap,
+ RVPVec &lookupTable,
+ ByteSet &bytePtrs,
+ ConflictSet &conflictPtrs,
+ CPoolSet &cpool,
+ MBBCacheableMap &mbbCacheable,
+ bool mDebug)
+{
+ if (mDebug) {
+ MachineDominatorTree *dominatorTree = &PM
+ ->getAnalysis<MachineDominatorTree>();
+ dominatorTree->dump();
+ }
+
+ std::list<MachineBasicBlock*> prop_worklist;
+
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+ for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+ curBlock = RPOT.begin(), endBlock = RPOT.end();
+ curBlock != endBlock; ++curBlock) {
+ MachineBasicBlock *MB = (*curBlock);
+ BlockCacheableInfo &bci = mbbCacheable[MB];
+ for (MachineBasicBlock::pred_iterator mbbit = MB->pred_begin(),
+ mbbitend = MB->pred_end();
+ mbbit != mbbitend;
+ mbbit++) {
+ MBBCacheableMap::const_iterator mbbcmit = mbbCacheable.find(*mbbit);
+ if (mbbcmit != mbbCacheable.end() &&
+ mbbcmit->second.storeReachesExit()) {
+ bci.setReachesTop();
+ break;
+ }
+ }
+
+ if (mDebug) {
+ dbgs() << "[BlockOrdering] Parsing CurrentBlock: "
+ << MB->getNumber() << "\n";
+ }
+ parseBasicBlock(ATM, MB, InstToPtrMap, PtrToInstMap,
+ FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool, bci, mDebug);
+
+ if (bci.storeReachesExit())
+ prop_worklist.push_back(MB);
+
+ if (mDebug) {
+ dbgs() << "BCI info: Top: " << bci.storeReachesTop() << " Exit: "
+ << bci.storeReachesExit() << "\n Instructions:\n";
+ for (CacheableInstrSet::const_iterator cibit = bci.cacheableBegin(),
+ cibitend = bci.cacheableEnd();
+ cibit != cibitend;
+ cibit++)
+ {
+ (*cibit)->dump();
+ }
+ }
+ }
+
+ // This loop pushes any "storeReachesExit" flags into successor
+ // blocks until the flags have been fully propagated. This will
+ // ensure that blocks that have reachable stores due to loops
+ // are labeled appropriately.
+ while (!prop_worklist.empty()) {
+ MachineBasicBlock *wlb = prop_worklist.front();
+ prop_worklist.pop_front();
+ for (MachineBasicBlock::succ_iterator mbbit = wlb->succ_begin(),
+ mbbitend = wlb->succ_end();
+ mbbit != mbbitend;
+ mbbit++)
+ {
+ BlockCacheableInfo &blockCache = mbbCacheable[*mbbit];
+ if (!blockCache.storeReachesTop()) {
+ blockCache.setReachesTop();
+ prop_worklist.push_back(*mbbit);
+ }
+ if (mDebug) {
+ dbgs() << "BCI Prop info: " << (*mbbit)->getNumber() << " Top: "
+ << blockCache.storeReachesTop() << " Exit: "
+ << blockCache.storeReachesExit()
+ << "\n";
+ }
+ }
+ }
+}
+
+// Helper function that dumps to dbgs() information about
+// a pointer set.
+ void
+dumpPointers(AppendSet &Ptrs, const char *str)
+{
+ if (Ptrs.empty()) {
+ return;
+ }
+ dbgs() << "[Dump]" << str << " found: " << "\n";
+ for (AppendSet::iterator sb = Ptrs.begin();
+ sb != Ptrs.end(); ++sb) {
+ (*sb)->dump();
+ }
+ dbgs() << "\n";
+}
+// Helper function that dumps to dbgs() information about
+// a pointer set.
+ void
+dumpPointers(PtrSet &Ptrs, const char *str)
+{
+ if (Ptrs.empty()) {
+ return;
+ }
+ dbgs() << "[Dump]" << str << " found: " << "\n";
+ for (PtrSet::iterator sb = Ptrs.begin();
+ sb != Ptrs.end(); ++sb) {
+ (*sb)->dump();
+ }
+ dbgs() << "\n";
+}
+// Function that detects all the conflicting pointers and adds
+// the pointers that are detected to the conflict set, otherwise
+// they are added to the raw or byte set based on their usage.
+void
+detectConflictingPointers(
+ const AMDILTargetMachine *ATM,
+ InstPMap &InstToPtrMap,
+ ByteSet &bytePtrs,
+ RawSet &rawPtrs,
+ ConflictSet &conflictPtrs,
+ bool mDebug)
+{
+ if (InstToPtrMap.empty()) {
+ return;
+ }
+ PtrSet aliasedPtrs;
+ const AMDILSubtarget *STM = ATM->getSubtargetImpl();
+ for (InstPMap::iterator
+ mapIter = InstToPtrMap.begin(), iterEnd = InstToPtrMap.end();
+ mapIter != iterEnd; ++mapIter) {
+ if (mDebug) {
+ dbgs() << "Instruction: ";
+ (mapIter)->first->dump();
+ }
+ MachineInstr* MI = mapIter->first;
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ if (curRes.bits.isImage) {
+ continue;
+ }
+ bool byte = false;
+ // We might have a case where more than 1 pointers is going to the same
+ // I/O instruction
+ if (mDebug) {
+ dbgs() << "Base Pointer[s]:\n";
+ }
+ for (PtrSet::iterator cfIter = mapIter->second.begin(),
+ cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
+ if (mDebug) {
+ (*cfIter)->dump();
+ }
+ if (bytePtrs.count(*cfIter)) {
+ if (mDebug) {
+ dbgs() << "Byte pointer found!\n";
+ }
+ byte = true;
+ break;
+ }
+ }
+ if (byte) {
+ for (PtrSet::iterator cfIter = mapIter->second.begin(),
+ cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
+ const Value *ptr = (*cfIter);
+ if (isLRPInst(mapIter->first, ATM)) {
+ // We don't need to deal with pointers to local/region/private
+ // memory regions
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "Adding pointer " << (ptr)->getName()
+ << " to byte set!\n";
+ }
+ const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+ if (PT) {
+ bytePtrs.insert(ptr);
+ }
+ }
+ } else {
+ for (PtrSet::iterator cfIter = mapIter->second.begin(),
+ cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
+ const Value *ptr = (*cfIter);
+ // bool aliased = false;
+ if (isLRPInst(mapIter->first, ATM)) {
+ // We don't need to deal with pointers to local/region/private
+ // memory regions
+ continue;
+ }
+ const Argument *arg = dyn_cast_or_null<Argument>(*cfIter);
+ if (!arg) {
+ continue;
+ }
+ if (!STM->device()->isSupported(AMDILDeviceInfo::NoAlias)
+ && !arg->hasNoAliasAttr()) {
+ if (mDebug) {
+ dbgs() << "Possible aliased pointer found!\n";
+ }
+ aliasedPtrs.insert(ptr);
+ }
+ if (mapIter->second.size() > 1) {
+ if (mDebug) {
+ dbgs() << "Adding pointer " << ptr->getName()
+ << " to conflict set!\n";
+ }
+ const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+ if (PT) {
+ conflictPtrs.insert(ptr);
+ }
+ }
+ if (mDebug) {
+ dbgs() << "Adding pointer " << ptr->getName()
+ << " to raw set!\n";
+ }
+ const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+ if (PT) {
+ rawPtrs.insert(ptr);
+ }
+ }
+ }
+ if (mDebug) {
+ dbgs() << "\n";
+ }
+ }
+ // If we have any aliased pointers and byte pointers exist,
+ // then make sure that all of the aliased pointers are
+ // part of the byte pointer set.
+ if (!bytePtrs.empty()) {
+ for (PtrSet::iterator aIter = aliasedPtrs.begin(),
+ aEnd = aliasedPtrs.end(); aIter != aEnd; ++aIter) {
+ if (mDebug) {
+ dbgs() << "Moving " << (*aIter)->getName()
+ << " from raw to byte.\n";
+ }
+ bytePtrs.insert(*aIter);
+ rawPtrs.erase(*aIter);
+ }
+ }
+}
+// Function that detects aliased constant pool operations.
+void
+detectAliasedCPoolOps(
+ TargetMachine &TM,
+ CPoolSet &cpool,
+ bool mDebug
+ )
+{
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ if (mDebug && !cpool.empty()) {
+ dbgs() << "Instructions w/ CPool Ops: \n";
+ }
+ // The algorithm for detecting aliased cpool is as follows.
+ // For each instruction that has a cpool argument
+ // follow def-use chain
+ // if instruction is a load and load is a private load,
+ // switch to constant pool load
+ for (CPoolSet::iterator cpb = cpool.begin(), cpe = cpool.end();
+ cpb != cpe; ++cpb) {
+ if (mDebug) {
+ (*cpb)->dump();
+ }
+ std::queue<MachineInstr*> queue;
+ std::set<MachineInstr*> visited;
+ queue.push(*cpb);
+ MachineInstr *cur;
+ while (!queue.empty()) {
+ cur = queue.front();
+ queue.pop();
+ if (visited.count(cur)) {
+ continue;
+ }
+ if (isLoadInst(cur) && isPrivateInst(cur)) {
+ // If we are a private load and the register is
+ // used in the address register, we need to
+ // switch from private to constant pool load.
+ if (mDebug) {
+ dbgs() << "Found an instruction that is a private load "
+ << "but should be a constant pool load.\n";
+ cur->print(dbgs());
+ dbgs() << "\n";
+ }
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(cur, curRes);
+ curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+ curRes.bits.ConflictPtr = 1;
+ setAsmPrinterFlags(cur, curRes);
+ cur->setDesc(TM.getInstrInfo()->get(
+ (cur->getOpcode() - AMDIL::PRIVATEAEXTLOAD_f32)
+ + AMDIL::CPOOLAEXTLOAD_f32));
+ } else {
+ if (cur->getOperand(0).isReg()) {
+ MachineOperand* ptr = cur->getOperand(0).getNextOperandForReg();
+ while (ptr && !ptr->isDef() && ptr->isReg()) {
+ queue.push(ptr->getParent());
+ ptr = ptr->getNextOperandForReg();
+ }
+ }
+ }
+ visited.insert(cur);
+ }
+ }
+}
+// Function that detects fully cacheable pointers. Fully cacheable pointers
+// are pointers that have no writes to them and -fno-alias is specified.
+void
+detectFullyCacheablePointers(
+ const AMDILTargetMachine *ATM,
+ PtrIMap &PtrToInstMap,
+ RawSet &rawPtrs,
+ CacheableSet &cacheablePtrs,
+ ConflictSet &conflictPtrs,
+ bool mDebug
+ )
+{
+ if (PtrToInstMap.empty()) {
+ return;
+ }
+ const AMDILSubtarget *STM
+ = ATM->getSubtargetImpl();
+ // 4XXX hardware doesn't support cached uav opcodes and we assume
+ // no aliasing for this to work. Also in debug mode we don't do
+ // any caching.
+ if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
+ || !STM->device()->isSupported(AMDILDeviceInfo::CachedMem)) {
+ return;
+ }
+ if (STM->device()->isSupported(AMDILDeviceInfo::NoAlias)) {
+ for (PtrIMap::iterator mapIter = PtrToInstMap.begin(),
+ iterEnd = PtrToInstMap.end(); mapIter != iterEnd; ++mapIter) {
+ if (mDebug) {
+ dbgs() << "Instruction: ";
+ mapIter->first->dump();
+ }
+ // Skip the pointer if we have already detected it.
+ if (cacheablePtrs.count(mapIter->first)) {
+ continue;
+ }
+ bool cacheable = true;
+ for (std::vector<MachineInstr*>::iterator
+ miBegin = mapIter->second.begin(),
+ miEnd = mapIter->second.end(); miBegin != miEnd; ++miBegin) {
+ if (isStoreInst(*miBegin) ||
+ isImageInst(*miBegin) ||
+ isAtomicInst(*miBegin)) {
+ cacheable = false;
+ break;
+ }
+ }
+ // we aren't cacheable, so lets move on to the next instruction
+ if (!cacheable) {
+ continue;
+ }
+ // If we are in the conflict set, lets move to the next instruction
+ // FIXME: we need to check to see if the pointers that conflict with
+ // the current pointer are also cacheable. If they are, then add them
+ // to the cacheable list and not fail.
+ if (conflictPtrs.count(mapIter->first)) {
+ continue;
+ }
+ // Otherwise if we have no stores and no conflicting pointers, we can
+ // be added to the cacheable set.
+ if (mDebug) {
+ dbgs() << "Adding pointer " << mapIter->first->getName();
+ dbgs() << " to cached set!\n";
+ }
+ const PointerType *PT = dyn_cast<PointerType>(mapIter->first->getType());
+ if (PT) {
+ cacheablePtrs.insert(mapIter->first);
+ }
+ }
+ }
+}
+
+// Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs?
+static bool
+ptrSetIntersectsByteOrCache(
+ PtrSet &cacheSet,
+ ByteSet &bytePtrs,
+ CacheableSet &cacheablePtrs
+ )
+{
+ for (PtrSet::const_iterator psit = cacheSet.begin(),
+ psitend = cacheSet.end();
+ psit != psitend;
+ psit++) {
+ if (bytePtrs.find(*psit) != bytePtrs.end() ||
+ cacheablePtrs.find(*psit) != cacheablePtrs.end()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Function that detects which instructions are cacheable even if
+// all instructions of the pointer are not cacheable. The resulting
+// set of instructions will not contain Ptrs that are in the cacheable
+// ptr set (under the assumption they will get marked cacheable already)
+// or pointers in the byte set, since they are not cacheable.
+void
+detectCacheableInstrs(
+ MBBCacheableMap &bbCacheable,
+ InstPMap &InstToPtrMap,
+ CacheableSet &cacheablePtrs,
+ ByteSet &bytePtrs,
+ CacheableInstrSet &cacheableSet,
+ bool mDebug
+ )
+
+{
+ for (MBBCacheableMap::const_iterator mbbcit = bbCacheable.begin(),
+ mbbcitend = bbCacheable.end();
+ mbbcit != mbbcitend;
+ mbbcit++) {
+ for (CacheableInstrSet::const_iterator bciit
+ = mbbcit->second.cacheableBegin(),
+ bciitend
+ = mbbcit->second.cacheableEnd();
+ bciit != bciitend;
+ bciit++) {
+ if (!ptrSetIntersectsByteOrCache(InstToPtrMap[*bciit],
+ bytePtrs,
+ cacheablePtrs)) {
+ cacheableSet.insert(*bciit);
+ }
+ }
+ }
+}
+// This function annotates the cacheable pointers with the
+// CacheableRead bit. The cacheable read bit is set
+// when the number of write images is not equal to the max
+// or if the default RAW_UAV_ID is equal to 11. The first
+// condition means that there is a raw uav between 0 and 7
+// that is available for cacheable reads and the second
+// condition means that UAV 11 is available for cacheable
+// reads.
+void
+annotateCacheablePtrs(
+ TargetMachine &TM,
+ PtrIMap &PtrToInstMap,
+ CacheableSet &cacheablePtrs,
+ ByteSet &bytePtrs,
+ uint32_t numWriteImages,
+ bool mDebug)
+{
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ PtrSet::iterator siBegin, siEnd;
+ std::vector<MachineInstr*>::iterator miBegin, miEnd;
+ AMDILMachineFunctionInfo *mMFI = NULL;
+ // First we can check the cacheable pointers
+ for (siBegin = cacheablePtrs.begin(), siEnd = cacheablePtrs.end();
+ siBegin != siEnd; ++siBegin) {
+ assert(!bytePtrs.count(*siBegin) && "Found a cacheable pointer "
+ "that also exists as a byte pointer!");
+ for (miBegin = PtrToInstMap[*siBegin].begin(),
+ miEnd = PtrToInstMap[*siBegin].end();
+ miBegin != miEnd; ++miBegin) {
+ if (mDebug) {
+ dbgs() << "Annotating pointer as cacheable. Inst: ";
+ (*miBegin)->dump();
+ }
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+ assert(!curRes.bits.ByteStore && "No cacheable pointers should have the "
+ "byte Store flag set!");
+ // If UAV11 is enabled, then we can enable cached reads.
+ if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+ curRes.bits.CacheableRead = 1;
+ curRes.bits.ResourceID = 11;
+ setAsmPrinterFlags(*miBegin, curRes);
+ if (!mMFI) {
+ mMFI = (*miBegin)->getParent()->getParent()
+ ->getInfo<AMDILMachineFunctionInfo>();
+ }
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ }
+ }
+}
+
+// A byte pointer is a pointer that along the pointer path has a
+// byte store assigned to it.
+void
+annotateBytePtrs(
+ TargetMachine &TM,
+ PtrIMap &PtrToInstMap,
+ ByteSet &bytePtrs,
+ RawSet &rawPtrs,
+ bool mDebug
+ )
+{
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ PtrSet::iterator siBegin, siEnd;
+ std::vector<MachineInstr*>::iterator miBegin, miEnd;
+ uint32_t arenaID = STM->device()
+ ->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ arenaID = ARENA_SEGMENT_RESERVED_UAVS + 1;
+ }
+ AMDILMachineFunctionInfo *mMFI = NULL;
+ for (siBegin = bytePtrs.begin(), siEnd = bytePtrs.end();
+ siBegin != siEnd; ++siBegin) {
+ const Value* val = (*siBegin);
+ const PointerType *PT = dyn_cast<PointerType>(val->getType());
+ if (!PT) {
+ continue;
+ }
+ const Argument *curArg = dyn_cast<Argument>(val);
+ assert(!rawPtrs.count(*siBegin) && "Found a byte pointer "
+ "that also exists as a raw pointer!");
+ bool arenaInc = false;
+ for (miBegin = PtrToInstMap[*siBegin].begin(),
+ miEnd = PtrToInstMap[*siBegin].end();
+ miBegin != miEnd; ++miBegin) {
+ if (mDebug) {
+ dbgs() << "Annotating pointer as arena. Inst: ";
+ (*miBegin)->dump();
+ }
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+
+ if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
+ && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+ // If hardware constant mem is enabled, then we need to
+ // get the constant pointer CB number and use that to specify
+ // the resource ID.
+ AMDILGlobalManager *GM = (AMDILGlobalManager*)STM->getGlobalManager();
+ const StringRef funcName = (*miBegin)->getParent()->getParent()
+ ->getFunction()->getName();
+ if (GM->isKernel(funcName)) {
+ const kernel &krnl = GM->getKernel(funcName);
+ curRes.bits.ResourceID = GM->getConstPtrCB(krnl,
+ (*siBegin)->getName());
+ curRes.bits.HardwareInst = 1;
+ } else {
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::CONSTANT_ID);
+ }
+ } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+ // If hardware local mem is enabled, get the local mem ID from
+ // the device to use as the ResourceID
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::LDS_ID);
+ if (isAtomicInst(*miBegin)) {
+ assert(curRes.bits.ResourceID && "Atomic resource ID "
+ "cannot be non-zero!");
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
+ && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+ // If hardware region mem is enabled, get the gds mem ID from
+ // the device to use as the ResourceID
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::GDS_ID);
+ if (isAtomicInst(*miBegin)) {
+ assert(curRes.bits.ResourceID && "Atomic resource ID "
+ "cannot be non-zero!");
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::SCRATCH_ID);
+ } else {
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+ (*miBegin)->print(dbgs());
+ }
+ curRes.bits.ByteStore = 1;
+ curRes.bits.ResourceID = (curArg && curArg->hasNoAliasAttr()) ? arenaID
+ : STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+ arenaInc = true;
+ }
+ if (isAtomicInst(*miBegin) &&
+ STM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ // If we are an arena instruction, we need to switch the atomic opcode
+ // from the global version to the arena version.
+ MachineInstr *MI = *miBegin;
+ MI->setDesc(
+ TM.getInstrInfo()->get(
+ (MI->getOpcode() - AMDIL::ATOM_G_ADD) + AMDIL::ATOM_A_ADD));
+ }
+ if (mDebug) {
+ dbgs() << "Annotating pointer as arena. Inst: ";
+ (*miBegin)->dump();
+ }
+ }
+ setAsmPrinterFlags(*miBegin, curRes);
+ KM->setUAVID(*siBegin, curRes.bits.ResourceID);
+ if (!mMFI) {
+ mMFI = (*miBegin)->getParent()->getParent()
+ ->getInfo<AMDILMachineFunctionInfo>();
+ }
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ if (arenaInc) {
+ ++arenaID;
+ }
+ }
+}
+// An append pointer is a opaque object that has append instructions
+// in its path.
+void
+annotateAppendPtrs(
+ TargetMachine &TM,
+ PtrIMap &PtrToInstMap,
+ AppendSet &appendPtrs,
+ bool mDebug)
+{
+ unsigned currentCounter = 0;
+ // const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ MachineFunction *MF = NULL;
+ for (AppendSet::iterator asBegin = appendPtrs.begin(),
+ asEnd = appendPtrs.end(); asBegin != asEnd; ++asBegin)
+ {
+ bool usesWrite = false;
+ bool usesRead = false;
+ const Value* curVal = *asBegin;
+ if (mDebug) {
+ dbgs() << "Counter: " << curVal->getName()
+ << " assigned the counter " << currentCounter << "\n";
+ }
+ for (std::vector<MachineInstr*>::iterator
+ miBegin = PtrToInstMap[curVal].begin(),
+ miEnd = PtrToInstMap[curVal].end(); miBegin != miEnd; ++miBegin) {
+ MachineInstr *MI = *miBegin;
+ if (!MF) {
+ MF = MI->getParent()->getParent();
+ }
+ unsigned opcode = MI->getOpcode();
+ switch (opcode) {
+ default:
+ if (mDebug) {
+ dbgs() << "Skipping instruction: ";
+ MI->dump();
+ }
+ break;
+ case AMDIL::APPEND_ALLOC:
+ case AMDIL::APPEND_ALLOC_NORET:
+ usesWrite = true;
+ MI->getOperand(1).ChangeToImmediate(currentCounter);
+ if (mDebug) {
+ dbgs() << "Assing to counter " << currentCounter << " Inst: ";
+ MI->dump();
+ }
+ break;
+ case AMDIL::APPEND_CONSUME:
+ case AMDIL::APPEND_CONSUME_NORET:
+ usesRead = true;
+ MI->getOperand(1).ChangeToImmediate(currentCounter);
+ if (mDebug) {
+ dbgs() << "Assing to counter " << currentCounter << " Inst: ";
+ MI->dump();
+ }
+ break;
+ };
+ }
+ if (usesWrite && usesRead && MF) {
+ MF->getInfo<AMDILMachineFunctionInfo>()->addErrorMsg(
+ amd::CompilerErrorMessage[INCORRECT_COUNTER_USAGE]);
+ }
+ ++currentCounter;
+ }
+}
+// A raw pointer is any pointer that does not have byte store in its path.
+static void
+annotateRawPtrs(
+ TargetMachine &TM,
+ PtrIMap &PtrToInstMap,
+ RawSet &rawPtrs,
+ ByteSet &bytePtrs,
+ uint32_t numWriteImages,
+ bool mDebug
+ )
+{
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ PtrSet::iterator siBegin, siEnd;
+ std::vector<MachineInstr*>::iterator miBegin, miEnd;
+ AMDILMachineFunctionInfo *mMFI = NULL;
+
+ // Now all of the raw pointers will go to the raw uav.
+ for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
+ siBegin != siEnd; ++siBegin) {
+ const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
+ if (!PT) {
+ continue;
+ }
+ assert(!bytePtrs.count(*siBegin) && "Found a raw pointer "
+ " that also exists as a byte pointers!");
+ for (miBegin = PtrToInstMap[*siBegin].begin(),
+ miEnd = PtrToInstMap[*siBegin].end();
+ miBegin != miEnd; ++miBegin) {
+ if (mDebug) {
+ dbgs() << "Annotating pointer as raw. Inst: ";
+ (*miBegin)->dump();
+ }
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+ if (!curRes.bits.ConflictPtr) {
+ assert(!curRes.bits.ByteStore
+ && "Found a instruction that is marked as "
+ "raw but has a byte store bit set!");
+ } else if (curRes.bits.ConflictPtr) {
+ if (curRes.bits.ByteStore) {
+ curRes.bits.ByteStore = 0;
+ }
+ }
+ if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
+ && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+ // If hardware constant mem is enabled, then we need to
+ // get the constant pointer CB number and use that to specify
+ // the resource ID.
+ AMDILGlobalManager *GM = (AMDILGlobalManager*)STM->getGlobalManager();
+ const StringRef funcName = (*miBegin)->getParent()->getParent()
+ ->getFunction()->getName();
+ if (GM->isKernel(funcName)) {
+ const kernel &krnl = GM->getKernel(funcName);
+ curRes.bits.ResourceID = GM->getConstPtrCB(krnl,
+ (*siBegin)->getName());
+ curRes.bits.HardwareInst = 1;
+ } else {
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::CONSTANT_ID);
+ }
+ } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+ // If hardware local mem is enabled, get the local mem ID from
+ // the device to use as the ResourceID
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::LDS_ID);
+ if (isAtomicInst(*miBegin)) {
+ assert(curRes.bits.ResourceID && "Atomic resource ID "
+ "cannot be non-zero!");
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
+ && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+ // If hardware region mem is enabled, get the gds mem ID from
+ // the device to use as the ResourceID
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::GDS_ID);
+ if (isAtomicInst(*miBegin)) {
+ assert(curRes.bits.ResourceID && "Atomic resource ID "
+ "cannot be non-zero!");
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ }
+ } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::SCRATCH_ID);
+ } else if (!STM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ // If multi uav is enabled, then the resource ID is either the
+ // number of write images that are available or the device
+ // raw uav id if it is 11.
+ if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
+ STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::RAW_UAV_ID);
+ } else if (numWriteImages != OPENCL_MAX_WRITE_IMAGES) {
+ if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+ < numWriteImages) {
+ curRes.bits.ResourceID = numWriteImages;
+ } else {
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::RAW_UAV_ID);
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+ (*miBegin)->print(dbgs());
+ }
+ curRes.bits.ByteStore = 1;
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::ARENA_UAV_ID);
+ }
+ if (isAtomicInst(*miBegin)) {
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ if (curRes.bits.ResourceID
+ == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ assert(0 && "Found an atomic instruction that has "
+ "an arena uav id!");
+ }
+ }
+ KM->setUAVID(*siBegin, curRes.bits.ResourceID);
+ if (!mMFI) {
+ mMFI = (*miBegin)->getParent()->getParent()
+ ->getInfo<AMDILMachineFunctionInfo>();
+ }
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ setAsmPrinterFlags(*miBegin, curRes);
+ }
+ }
+
+}
+
+void
+annotateCacheableInstrs(
+ TargetMachine &TM,
+ CacheableInstrSet &cacheableSet,
+ bool mDebug)
+{
+ const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ // AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+
+ CacheableInstrSet::iterator miBegin, miEnd;
+
+ for (miBegin = cacheableSet.begin(),
+ miEnd = cacheableSet.end();
+ miBegin != miEnd; ++miBegin) {
+ if (mDebug) {
+ dbgs() << "Annotating instr as cacheable. Inst: ";
+ (*miBegin)->dump();
+ }
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+ // If UAV11 is enabled, then we can enable cached reads.
+ if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+ curRes.bits.CacheableRead = 1;
+ curRes.bits.ResourceID = 11;
+ setAsmPrinterFlags(*miBegin, curRes);
+ }
+ }
+}
+
+// Annotate the instructions along various pointer paths. The paths that
+// are handled are the raw, byte and cacheable pointer paths.
+static void
+annotatePtrPath(
+ TargetMachine &TM,
+ PtrIMap &PtrToInstMap,
+ RawSet &rawPtrs,
+ ByteSet &bytePtrs,
+ CacheableSet &cacheablePtrs,
+ uint32_t numWriteImages,
+ bool mDebug
+ )
+{
+ if (PtrToInstMap.empty()) {
+ return;
+ }
+ // First we can check the cacheable pointers
+ annotateCacheablePtrs(TM, PtrToInstMap, cacheablePtrs,
+ bytePtrs, numWriteImages, mDebug);
+
+ // Next we annotate the byte pointers
+ annotateBytePtrs(TM, PtrToInstMap, bytePtrs, rawPtrs, mDebug);
+
+ // Next we annotate the raw pointers
+ annotateRawPtrs(TM, PtrToInstMap, rawPtrs, bytePtrs,
+ numWriteImages, mDebug);
+}
+// Allocate MultiUAV pointer ID's for the raw/conflict pointers.
+static void
+allocateMultiUAVPointers(
+ MachineFunction &MF,
+ const AMDILTargetMachine *ATM,
+ PtrIMap &PtrToInstMap,
+ RawSet &rawPtrs,
+ ConflictSet &conflictPtrs,
+ CacheableSet &cacheablePtrs,
+ uint32_t numWriteImages,
+ bool mDebug)
+{
+ if (PtrToInstMap.empty()) {
+ return;
+ }
+ AMDILMachineFunctionInfo *mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ uint32_t curUAV = numWriteImages;
+ bool increment = true;
+ const AMDILSubtarget *STM
+ = ATM->getSubtargetImpl();
+ // If the RAW_UAV_ID is a value that is larger than the max number of write
+ // images, then we use that UAV ID.
+ if (numWriteImages >= OPENCL_MAX_WRITE_IMAGES) {
+ curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ increment = false;
+ }
+ AMDILKernelManager *KM = (AMDILKernelManager*)STM->getKernelManager();
+ PtrSet::iterator siBegin, siEnd;
+ std::vector<MachineInstr*>::iterator miBegin, miEnd;
+ // First lets handle the raw pointers.
+ for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
+ siBegin != siEnd; ++siBegin) {
+ assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type "
+ "to be processed at this point!");
+ const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
+ if (conflictPtrs.count(*siBegin) || !PT) {
+ continue;
+ }
+ // We only want to process global address space pointers
+ if (PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+ if ((PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS
+ && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+ || (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS
+ && STM->device()->usesSoftware(AMDILDeviceInfo::ConstantMem))
+ || (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS
+ && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))) {
+ // If we are using software emulated hardware features, then
+ // we need to specify that they use the raw uav and not
+ // zero-copy uav. The easiest way to do this is to assume they
+ // conflict with another pointer. Any pointer that conflicts
+ // with another pointer is assigned to the raw uav or the
+ // arena uav if no raw uav exists.
+ const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
+ if (PT) {
+ conflictPtrs.insert(*siBegin);
+ }
+ }
+ if (PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ if (STM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
+ const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
+ if (PT) {
+ conflictPtrs.insert(*siBegin);
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Scratch Pointer '" << (*siBegin)->getName()
+ << "' being assigned uav "<<
+ STM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "\n";
+ }
+ for (miBegin = PtrToInstMap[*siBegin].begin(),
+ miEnd = PtrToInstMap[*siBegin].end();
+ miBegin != miEnd; ++miBegin) {
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+ curRes.bits.ResourceID = STM->device()
+ ->getResourceID(AMDILDevice::SCRATCH_ID);
+ if (mDebug) {
+ dbgs() << "Updated instruction to bitmask ";
+ dbgs().write_hex(curRes.u16all);
+ dbgs() << " with ResID " << curRes.bits.ResourceID;
+ dbgs() << ". Inst: ";
+ (*miBegin)->dump();
+ }
+ setAsmPrinterFlags((*miBegin), curRes);
+ KM->setUAVID(*siBegin, curRes.bits.ResourceID);
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ }
+ }
+ continue;
+ }
+ // If more than just UAV 11 is cacheable, then we can remove
+ // this check.
+ if (cacheablePtrs.count(*siBegin)) {
+ if (mDebug) {
+ dbgs() << "Raw Pointer '" << (*siBegin)->getName()
+ << "' is cacheable, not allocating a multi-uav for it!\n";
+ }
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "Raw Pointer '" << (*siBegin)->getName()
+ << "' being assigned uav " << curUAV << "\n";
+ }
+ if (PtrToInstMap[*siBegin].empty()) {
+ KM->setUAVID(*siBegin, curUAV);
+ mMFI->uav_insert(curUAV);
+ }
+ // For all instructions here, we are going to set the new UAV to the curUAV
+ // number and not the value that it currently is set to.
+ for (miBegin = PtrToInstMap[*siBegin].begin(),
+ miEnd = PtrToInstMap[*siBegin].end();
+ miBegin != miEnd; ++miBegin) {
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+ curRes.bits.ResourceID = curUAV;
+ if (isAtomicInst(*miBegin)) {
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ if (curRes.bits.ResourceID
+ == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ assert(0 && "Found an atomic instruction that has "
+ "an arena uav id!");
+ }
+ }
+ if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+ (*miBegin)->print(dbgs());
+ }
+ curRes.bits.ByteStore = 1;
+ curRes.bits.CacheableRead = 0;
+ }
+ if (mDebug) {
+ dbgs() << "Updated instruction to bitmask ";
+ dbgs().write_hex(curRes.u16all);
+ dbgs() << " with ResID " << curRes.bits.ResourceID;
+ dbgs() << ". Inst: ";
+ (*miBegin)->dump();
+ }
+ setAsmPrinterFlags(*miBegin, curRes);
+ KM->setUAVID(*siBegin, curRes.bits.ResourceID);
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ // If we make it here, we can increment the uav counter if we are less
+ // than the max write image count. Otherwise we set it to the default
+ // UAV and leave it.
+ if (increment && curUAV < (OPENCL_MAX_WRITE_IMAGES - 1)) {
+ ++curUAV;
+ } else {
+ curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ increment = false;
+ }
+ }
+ if (numWriteImages == 8) {
+ curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+ }
+ // Now lets handle the conflict pointers
+ for (siBegin = conflictPtrs.begin(), siEnd = conflictPtrs.end();
+ siBegin != siEnd; ++siBegin) {
+ assert((*siBegin)->getType()->isPointerTy() && "We must be a pointer type "
+ "to be processed at this point!");
+ const PointerType *PT = dyn_cast<PointerType>((*siBegin)->getType());
+ // We only want to process global address space pointers
+ if (!PT || PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "Conflict Pointer '" << (*siBegin)->getName()
+ << "' being assigned uav " << curUAV << "\n";
+ }
+ if (PtrToInstMap[*siBegin].empty()) {
+ KM->setUAVID(*siBegin, curUAV);
+ mMFI->uav_insert(curUAV);
+ }
+ for (miBegin = PtrToInstMap[*siBegin].begin(),
+ miEnd = PtrToInstMap[*siBegin].end();
+ miBegin != miEnd; ++miBegin) {
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(*miBegin, curRes);
+ curRes.bits.ResourceID = curUAV;
+ if (isAtomicInst(*miBegin)) {
+ (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+ .setImm(curRes.bits.ResourceID);
+ if (curRes.bits.ResourceID
+ == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ assert(0 && "Found an atomic instruction that has "
+ "an arena uav id!");
+ }
+ }
+ if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+ if (mDebug) {
+ dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+ (*miBegin)->print(dbgs());
+ }
+ curRes.bits.ByteStore = 1;
+ }
+ if (mDebug) {
+ dbgs() << "Updated instruction to bitmask ";
+ dbgs().write_hex(curRes.u16all);
+ dbgs() << " with ResID " << curRes.bits.ResourceID;
+ dbgs() << ". Inst: ";
+ (*miBegin)->dump();
+ }
+ setAsmPrinterFlags(*miBegin, curRes);
+ KM->setUAVID(*siBegin, curRes.bits.ResourceID);
+ mMFI->uav_insert(curRes.bits.ResourceID);
+ }
+ }
+}
+// The first thing we should do is to allocate the default
+// ID for each load/store/atomic instruction so that
+// it is correctly allocated. Everything else after this
+// is just an optimization to more efficiently allocate
+// resource ID's.
+void
+allocateDefaultIDs(
+ const AMDILTargetMachine *ATM,
+ MachineFunction &MF,
+ bool mDebug)
+{
+ for (MachineFunction::iterator mfBegin = MF.begin(),
+ mfEnd = MF.end(); mfBegin != mfEnd; ++mfBegin) {
+ MachineBasicBlock *MB = mfBegin;
+ for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end();
+ mbb != mbe; ++mbb) {
+ MachineInstr *MI = mbb;
+ if (isLoadInst(MI)
+ || isStoreInst(MI)
+ || isAtomicInst(MI)) {
+ AMDILAS::InstrResEnc curRes;
+ getAsmPrinterFlags(MI, curRes);
+ allocateDefaultID(ATM, curRes, MI, mDebug);
+ }
+ }
+ }
+}
+
+ bool
+AMDILEGPointerManager::runOnMachineFunction(MachineFunction &MF)
+{
+ bool changed = false;
+ const AMDILTargetMachine *ATM
+ = reinterpret_cast<const AMDILTargetMachine*>(&TM);
+ AMDILMachineFunctionInfo *mMFI =
+ MF.getInfo<AMDILMachineFunctionInfo>();
+ if (mDebug) {
+ dbgs() << getPassName() << "\n";
+ dbgs() << MF.getFunction()->getName() << "\n";
+ MF.dump();
+ }
+ // Start out by allocating the default ID's to all instructions in the
+ // function.
+ allocateDefaultIDs(ATM, MF, mDebug);
+
+ // A set of all pointers are tracked in this map and
+ // if multiple pointers are detected, they go to the same
+ // set.
+ PtrIMap PtrToInstMap;
+
+ // All of the instructions that are loads, stores or pointer
+ // conflicts are tracked in the map with a set of all values
+ // that reference the instruction stored.
+ InstPMap InstToPtrMap;
+
+ // In order to track across stack entries, we need a map between a
+ // frame index and a pointer. That way when we load from a frame
+ // index, we know what pointer was stored to the frame index.
+ FIPMap FIToPtrMap;
+
+ // Set of all the pointers that are byte pointers. Byte pointers
+ // are required to have their instructions go to the arena.
+ ByteSet bytePtrs;
+
+ // Set of all the pointers that are cacheable. All of the cache pointers
+ // are required to go to a raw uav and cannot go to arena.
+ CacheableSet cacheablePtrs;
+
+ // Set of all the pointers that go into a raw buffer. A pointer can
+ // exist in either rawPtrs or bytePtrs but not both.
+ RawSet rawPtrs;
+
+ // Set of all the pointers that end up having a conflicting instruction
+ // somewhere in the pointer path.
+ ConflictSet conflictPtrs;
+
+ // Set of all pointers that are images
+ ImageSet images;
+
+ // Set of all pointers that are counters
+ AppendSet counters;
+
+ // Set of all pointers that load from a constant pool
+ CPoolSet cpool;
+
+ // Mapping from BB to infomation about the cacheability of the
+ // global load instructions in it.
+ MBBCacheableMap bbCacheable;
+
+ // A set of load instructions that are cacheable
+ // even if all the load instructions of the ptr are not.
+ CacheableInstrSet cacheableSet;
+
+ // The lookup table holds all of the registers that
+ // are used as we assign pointers values to them.
+ // If two pointers collide on the lookup table, then
+ // we assign them to the same UAV. If one of the
+ // pointers is byte addressable, then we assign
+ // them to arena, otherwise we assign them to raw.
+ RVPVec lookupTable;
+
+ // First we need to go through all of the arguments and assign the
+ // live in registers to the lookup table and the pointer mapping.
+ uint32_t numWriteImages = parseArguments(MF, lookupTable, ATM,
+ cacheablePtrs, images, counters, mDebug);
+
+ // Lets do some error checking on the results of the parsing.
+ if (counters.size() > OPENCL_MAX_NUM_ATOMIC_COUNTERS) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INSUFFICIENT_COUNTER_RESOURCES]);
+ }
+ if (numWriteImages > OPENCL_MAX_WRITE_IMAGES
+ || (images.size() - numWriteImages > OPENCL_MAX_READ_IMAGES)) {
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INSUFFICIENT_IMAGE_RESOURCES]);
+ }
+
+ // Now lets parse all of the instructions and update our
+ // lookup tables.
+ parseFunction(this, ATM, MF, InstToPtrMap, PtrToInstMap,
+ FIToPtrMap, lookupTable, bytePtrs, conflictPtrs, cpool,
+ bbCacheable, mDebug);
+
+ // We need to go over our pointer map and find all the conflicting
+ // pointers that have byte stores and put them in the bytePtr map.
+ // All conflicting pointers that don't have byte stores go into
+ // the rawPtr map.
+ detectConflictingPointers(ATM, InstToPtrMap, bytePtrs, rawPtrs,
+ conflictPtrs, mDebug);
+
+ // The next step is to detect whether the pointer should be added to
+ // the fully cacheable set or not. A pointer is marked as cacheable if
+ // no store instruction exists.
+ detectFullyCacheablePointers(ATM, PtrToInstMap, rawPtrs,
+ cacheablePtrs, conflictPtrs, mDebug);
+
+ // Disable partially cacheable for now when multiUAV is on.
+ // SC versions before SC139 have a bug that generates incorrect
+ // addressing for some cached accesses.
+ if (!ATM->getSubtargetImpl()
+ ->device()->isSupported(AMDILDeviceInfo::MultiUAV) &&
+ ATM->getSubtargetImpl()->calVersion() >= CAL_VERSION_SC_139) {
+ // Now we take the set of loads that have no reachable stores and
+ // create a list of additional instructions (those that aren't already
+ // in a cacheablePtr set) that are safe to mark as cacheable.
+ detectCacheableInstrs(bbCacheable, InstToPtrMap, cacheablePtrs,
+ bytePtrs, cacheableSet, mDebug);
+
+ // Annotate the additional instructions computed above as cacheable.
+ // Note that this should not touch any instructions annotated in
+ // annotatePtrPath.
+ annotateCacheableInstrs(TM, cacheableSet, mDebug);
+ }
+
+ // Now that we have detected everything we need to detect, lets go through an
+ // annotate the instructions along the pointer path for each of the
+ // various pointer types.
+ annotatePtrPath(TM, PtrToInstMap, rawPtrs, bytePtrs,
+ cacheablePtrs, numWriteImages, mDebug);
+
+ // Annotate the atomic counter path if any exists.
+ annotateAppendPtrs(TM, PtrToInstMap, counters, mDebug);
+
+ // If we support MultiUAV, then we need to determine how
+ // many write images exist so that way we know how many UAV are
+ // left to allocate to buffers.
+ if (ATM->getSubtargetImpl()
+ ->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+ // We now have (OPENCL_MAX_WRITE_IMAGES - numPtrs) buffers open for
+ // multi-uav allocation.
+ allocateMultiUAVPointers(MF, ATM, PtrToInstMap, rawPtrs,
+ conflictPtrs, cacheablePtrs, numWriteImages, mDebug);
+ }
+
+ // The last step is to detect if we have any alias constant pool operations.
+ // This is not likely, but does happen on occasion with double precision
+ // operations.
+ detectAliasedCPoolOps(TM, cpool, mDebug);
+ if (mDebug) {
+ dumpPointers(bytePtrs, "Byte Store Ptrs");
+ dumpPointers(rawPtrs, "Raw Ptrs");
+ dumpPointers(cacheablePtrs, "Cache Load Ptrs");
+ dumpPointers(counters, "Atomic Counters");
+ dumpPointers(images, "Images");
+ }
+ return changed;
+}
+
+// The default pointer manager just assigns the default ID's to
+// each load/store instruction and does nothing else. This is
+// the pointer manager for the 7XX series of cards.
+ bool
+AMDILPointerManager::runOnMachineFunction(MachineFunction &MF)
+{
+ bool changed = false;
+ const AMDILTargetMachine *ATM
+ = reinterpret_cast<const AMDILTargetMachine*>(&TM);
+ if (mDebug) {
+ dbgs() << getPassName() << "\n";
+ dbgs() << MF.getFunction()->getName() << "\n";
+ MF.dump();
+ }
+ // On the 7XX we don't have to do any special processing, so we
+ // can just allocate the default ID and be done with it.
+ allocateDefaultIDs(ATM, MF, mDebug);
+ return changed;
+}
diff --git a/src/gallium/drivers/radeon/AMDILPointerManager.h b/src/gallium/drivers/radeon/AMDILPointerManager.h
new file mode 100644
index 00000000000..bdfe1d42b32
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILPointerManager.h
@@ -0,0 +1,249 @@
+//===-------- AMDILPointerManager.h - Manage Pointers for HW ------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// The AMDIL Pointer Manager is a class that does all the checking for
+// different pointer characteristics. Pointers have attributes that need
+// to be attached to them in order to correctly codegen them efficiently.
+// This class will analyze the pointers of a function and then traverse the uses
+// of the pointers and determine if a pointer can be cached, should belong in
+// the arena, and what UAV it should belong to. There are seperate classes for
+// each unique generation of devices. This pass only works in SSA form.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL_POINTER_MANAGER_H_
+#define _AMDIL_POINTER_MANAGER_H_
+#undef DEBUG_TYPE
+#undef DEBUGME
+#define DEBUG_TYPE "PointerManager"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include <set>
+#include <map>
+#include <list>
+#include <queue>
+
+namespace llvm {
+ class Value;
+ class MachineBasicBlock;
+ // Typedefing the multiple different set types to that it is
+ // easier to read what each set is supposed to handle. This
+ // also allows it easier to track which set goes to which
+ // argument in a function call.
+ typedef std::set<const Value*> PtrSet;
+
+ // A Byte set is the set of all base pointers that must
+ // be allocated to the arena path.
+ typedef PtrSet ByteSet;
+
+ // A Raw set is the set of all base pointers that can be
+ // allocated to the raw path.
+ typedef PtrSet RawSet;
+
+ // A cacheable set is the set of all base pointers that
+ // are deamed cacheable based on annotations or
+ // compiler options.
+ typedef PtrSet CacheableSet;
+
+ // A conflict set is a set of all base pointers whose
+ // use/def chains conflict with another base pointer.
+ typedef PtrSet ConflictSet;
+
+ // An image set is a set of all read/write only image pointers.
+ typedef PtrSet ImageSet;
+
+ // An append set is a set of atomic counter base pointers
+ typedef std::vector<const Value*> AppendSet;
+
+ // A ConstantSet is a set of constant pool instructions
+ typedef std::set<MachineInstr*> CPoolSet;
+
+ // A CacheableInstSet set is a set of instructions that are cachable
+ // even if the pointer is not generally cacheable.
+ typedef std::set<MachineInstr*> CacheableInstrSet;
+
+ // A pair that maps a virtual register to the equivalent base
+ // pointer value that it was derived from.
+ typedef std::pair<unsigned, const Value*> RegValPair;
+
+ // A map that maps between the base pointe rvalue and an array
+ // of instructions that are part of the pointer chain. A pointer
+ // chain is a recursive def/use chain of all instructions that don't
+ // store data to memory unless the pointer is the data being stored.
+ typedef std::map<const Value*, std::vector<MachineInstr*> > PtrIMap;
+
+ // A map that holds a set of all base pointers that are used in a machine
+ // instruction. This helps to detect when conflict pointers are found
+ // such as when pointer subtraction occurs.
+ typedef std::map<MachineInstr*, PtrSet> InstPMap;
+
+ // A map that holds the frame index to RegValPair so that writes of
+ // pointers to the stack can be tracked.
+ typedef std::map<unsigned, RegValPair > FIPMap;
+
+ // A small vector impl that holds all of the register to base pointer
+ // mappings for a given function.
+ typedef std::map<unsigned, RegValPair> RVPVec;
+
+
+
+ // The default pointer manager. This handles pointer
+ // resource allocation for default ID's only.
+ // There is no special processing.
+ class AMDILPointerManager : public MachineFunctionPass
+ {
+ public:
+ AMDILPointerManager(
+ TargetMachine &tm,
+ CodeGenOpt::Level OL);
+ virtual ~AMDILPointerManager();
+ virtual const char*
+ getPassName() const;
+ virtual bool
+ runOnMachineFunction(MachineFunction &F);
+ virtual void
+ getAnalysisUsage(AnalysisUsage &AU) const;
+ static char ID;
+ protected:
+ bool mDebug;
+ private:
+ TargetMachine &TM;
+ }; // class AMDILPointerManager
+
+ // The pointer manager for Evergreen and Northern Island
+ // devices. This pointer manager allocates and trackes
+ // cached memory, arena resources, raw resources and
+ // whether multi-uav is utilized or not.
+ class AMDILEGPointerManager : public AMDILPointerManager
+ {
+ public:
+ AMDILEGPointerManager(
+ TargetMachine &tm,
+ CodeGenOpt::Level OL);
+ virtual ~AMDILEGPointerManager();
+ virtual const char*
+ getPassName() const;
+ virtual bool
+ runOnMachineFunction(MachineFunction &F);
+ private:
+ TargetMachine &TM;
+ }; // class AMDILEGPointerManager
+
+ // Information related to the cacheability of instructions in a basic block.
+ // This is used during the parse phase of the pointer algorithm to track
+ // the reachability of stores within a basic block.
+ class BlockCacheableInfo {
+ public:
+ BlockCacheableInfo() :
+ mStoreReachesTop(false),
+ mStoreReachesExit(false),
+ mCacheableSet()
+ {};
+
+ bool storeReachesTop() const { return mStoreReachesTop; }
+ bool storeReachesExit() const { return mStoreReachesExit; }
+ CacheableInstrSet::const_iterator
+ cacheableBegin() const { return mCacheableSet.begin(); }
+ CacheableInstrSet::const_iterator
+ cacheableEnd() const { return mCacheableSet.end(); }
+
+ // mark the block as having a global store that reaches it. This
+ // will also set the store reaches exit flag, and clear the list
+ // of loads (since they are now reachable by a store.)
+ bool setReachesTop() {
+ bool changedExit = !mStoreReachesExit;
+
+ if (!mStoreReachesTop)
+ mCacheableSet.clear();
+
+ mStoreReachesTop = true;
+ mStoreReachesExit = true;
+ return changedExit;
+ }
+
+ // Mark the block as having a store that reaches the exit of the
+ // block.
+ void setReachesExit() {
+ mStoreReachesExit = true;
+ }
+
+ // If the top or the exit of the block are not marked as reachable
+ // by a store, add the load to the list of cacheable loads.
+ void addPossiblyCacheableInst(MachineInstr *load) {
+ // By definition, if store reaches top, then store reaches exit.
+ // So, we only test for exit here.
+ // If we have a volatile load we cannot cache it.
+ if (mStoreReachesExit || isVolatileInst(load)) {
+ return;
+ }
+
+ mCacheableSet.insert(load);
+ }
+
+ private:
+ bool mStoreReachesTop; // Does a global store reach the top of this block?
+ bool mStoreReachesExit;// Does a global store reach the exit of this block?
+ CacheableInstrSet mCacheableSet; // The set of loads in the block not
+ // reachable by a global store.
+ };
+ // Map from MachineBasicBlock to it's cacheable load info.
+ typedef std::map<MachineBasicBlock*, BlockCacheableInfo> MBBCacheableMap;
+} // end llvm namespace
+#endif // _AMDIL_POINTER_MANAGER_H_
diff --git a/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp b/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp
new file mode 100644
index 00000000000..093f41449f7
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILPrintfConvert.cpp
@@ -0,0 +1,357 @@
+//===-- AMDILPrintfConvert.cpp - Printf Conversion pass --===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PrintfConvert"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include <cstdio>
+using namespace llvm;
+namespace
+{
+ class LLVM_LIBRARY_VISIBILITY AMDILPrintfConvert : public FunctionPass
+ {
+ public:
+ TargetMachine &TM;
+ static char ID;
+ AMDILPrintfConvert(TargetMachine &tm, CodeGenOpt::Level OL);
+ ~AMDILPrintfConvert();
+ const char* getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ bool expandPrintf(BasicBlock::iterator *bbb);
+ AMDILMachineFunctionInfo *mMFI;
+ AMDILKernelManager *mKM;
+ bool mChanged;
+ SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
+ };
+ char AMDILPrintfConvert::ID = 0;
+} // anonymouse namespace
+
+namespace llvm
+{
+ FunctionPass*
+ createAMDILPrintfConvert(TargetMachine &tm, CodeGenOpt::Level OL)
+ {
+ return new AMDILPrintfConvert(tm, OL);
+ }
+} // llvm namespace
+AMDILPrintfConvert::AMDILPrintfConvert(TargetMachine &tm, CodeGenOpt::Level OL)
+#if LLVM_VERSION >= 2500
+ : FunctionPass(ID), TM(tm)
+#else
+ : FunctionPass((intptr_t)&ID), TM(tm)
+#endif
+{
+}
+AMDILPrintfConvert::~AMDILPrintfConvert()
+{
+}
+ bool
+AMDILPrintfConvert::expandPrintf(BasicBlock::iterator *bbb)
+{
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ int num_ops = CI->getNumOperands();
+ if (!num_ops) {
+ return false;
+ }
+#if LLVM_VERSION >= 2500
+ if (CI->getOperand(num_ops - 1)->getName() != "printf") {
+#else
+ if (CI->getOperand(0)->getName() != "printf") {
+#endif
+ return false;
+ }
+
+ Function *mF = inst->getParent()->getParent();
+ uint64_t bytes = 0;
+ mChanged = true;
+ if (num_ops == 1) {
+ ++(*bbb);
+ Constant *newConst = ConstantInt::getSigned(CI->getType(), bytes);
+ CI->replaceAllUsesWith(newConst);
+ CI->eraseFromParent();
+ return mChanged;
+ }
+ // Deal with the string here
+#if LLVM_VERSION >= 2500
+ Value *op = CI->getOperand(0);
+#else
+ Value *op = CI->getOperand(1);
+#endif
+ ConstantExpr *GEPinst = dyn_cast<ConstantExpr>(op);
+ if (GEPinst) {
+ GlobalVariable *GVar
+ = dyn_cast<GlobalVariable>(GEPinst->getOperand(0));
+ std::string str = "unknown";
+ if (GVar && GVar->hasInitializer()) {
+ ConstantArray *CA
+ = dyn_cast<ConstantArray>(GVar->getInitializer());
+ str = (CA->isString() ? CA->getAsString() : "unknown");
+ }
+ uint64_t id = (uint64_t)mMFI->addPrintfString(str,
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getMMI().getObjFileInfo<AMDILModuleInfo>().get_printf_offset());
+ std::string name = "___dumpStringID";
+ Function *nF = NULL;
+ std::vector<Type*> types;
+ types.push_back(Type::getInt32Ty(mF->getContext()));
+ nF = mF->getParent()->getFunction(name);
+ if (!nF) {
+ nF = Function::Create(
+ FunctionType::get(
+ Type::getVoidTy(mF->getContext()), types, false),
+ GlobalValue::ExternalLinkage,
+ name, mF->getParent());
+ }
+ Constant *C = ConstantInt::get(
+ Type::getInt32Ty(mF->getContext()), id, false);
+ CallInst *nCI = CallInst::Create(nF, C);
+ nCI->insertBefore(CI);
+ bytes = strlen(str.data());
+#if LLVM_VERSION >= 2500
+ for (uint32_t x = 1, y = num_ops - 1; x < y; ++x) {
+#else
+ for (uint32_t x = 2, y = num_ops; x < y; ++x) {
+#endif
+ op = CI->getOperand(x);
+ Type *oType = op->getType();
+ uint32_t eleCount = getNumElements(oType);
+ uint32_t eleSize = (uint32_t)GET_SCALAR_SIZE(oType);
+ if (!eleSize) {
+ // Default size is 32bits.
+ eleSize = 32;
+ }
+ if (!eleCount) {
+ // Default num elements is 1.
+ eleCount = 1;
+ }
+ uint32_t totalSize = eleCount * eleSize;
+#if LLVM_VERSION >= 2500
+ mMFI->addPrintfOperand(str, (x - 1),
+#else
+ mMFI->addPrintfOperand(str, (x - 2),
+#endif
+ (uint32_t)totalSize);
+ }
+ }
+#if LLVM_VERSION >= 2500
+ for (uint32_t x = 1, y = num_ops - 1; x < y; ++x) {
+#else
+ for (uint32_t x = 2, y = num_ops; x < y; ++x) {
+#endif
+ op = CI->getOperand(x);
+ Type *oType = op->getType();
+ if (oType->isFPOrFPVectorTy()
+ && (oType->getTypeID() != Type::VectorTyID)) {
+ Type *iType = NULL;
+ if (oType->isFloatTy()) {
+ iType = dyn_cast<Type>(
+ Type::getInt32Ty(oType->getContext()));
+ } else {
+ iType = dyn_cast<Type>(
+ Type::getInt64Ty(oType->getContext()));
+ }
+ op = new BitCastInst(op, iType, "printfBitCast", CI);
+ } else if (oType->getTypeID() == Type::VectorTyID) {
+ Type *iType = NULL;
+ uint32_t eleCount = getNumElements(oType);
+ uint32_t eleSize = (uint32_t)GET_SCALAR_SIZE(oType);
+ uint32_t totalSize = eleCount * eleSize;
+ switch (eleSize) {
+ default:
+ eleCount = totalSize / 64;
+ iType = dyn_cast<Type>(
+ Type::getInt64Ty(oType->getContext()));
+ break;
+ case 8:
+ if (eleCount >= 8) {
+ eleCount = totalSize / 64;
+ iType = dyn_cast<Type>(
+ Type::getInt64Ty(oType->getContext()));
+ } else if (eleCount >= 4) {
+ eleCount = 1;
+ iType = dyn_cast<Type>(
+ Type::getInt32Ty(oType->getContext()));
+ } else {
+ eleCount = 1;
+ iType = dyn_cast<Type>(
+ Type::getInt16Ty(oType->getContext()));
+ }
+ break;
+ case 16:
+ if (eleCount >= 4) {
+ eleCount = totalSize / 64;
+ iType = dyn_cast<Type>(
+ Type::getInt64Ty(oType->getContext()));
+ } else {
+ eleCount = 1;
+ iType = dyn_cast<Type>(
+ Type::getInt32Ty(oType->getContext()));
+ }
+ break;
+ }
+ if (eleCount > 1) {
+ iType = dyn_cast<Type>(
+ VectorType::get(iType, eleCount));
+ }
+ op = new BitCastInst(op, iType, "printfBitCast", CI);
+ }
+ char buffer[256];
+ uint32_t size = (uint32_t)GET_SCALAR_SIZE(oType);
+ if (size) {
+ sprintf(buffer, "___dumpBytes_v%db%u",
+ 1,
+ (uint32_t)getNumElements(oType) * (uint32_t)size);
+ } else {
+ const PointerType *PT = dyn_cast<PointerType>(oType);
+ if (PT->getAddressSpace() == 0 &&
+ GET_SCALAR_SIZE(PT->getContainedType(0)) == 8
+ && getNumElements(PT->getContainedType(0)) == 1) {
+ op = new BitCastInst(op,
+ Type::getInt8PtrTy(oType->getContext(),
+ AMDILAS::CONSTANT_ADDRESS),
+ "printfPtrCast", CI);
+
+ sprintf(buffer, "___dumpBytes_v%dbs", 1);
+ } else {
+ op = new PtrToIntInst(op,
+ Type::getInt32Ty(oType->getContext()),
+ "printfPtrCast", CI);
+ sprintf(buffer, "___dumpBytes_v1b32");
+ }
+ }
+ std::vector<Type*> types;
+ types.push_back(op->getType());
+ std::string name = buffer;
+ Function *nF = NULL;
+ nF = mF->getParent()->getFunction(name);
+ if (!nF) {
+ nF = Function::Create(
+ FunctionType::get(
+ Type::getVoidTy(mF->getContext()), types, false),
+ GlobalValue::ExternalLinkage,
+ name, mF->getParent());
+ }
+ CallInst *nCI = CallInst::Create(nF, op);
+ nCI->insertBefore(CI);
+ bytes += (size - 4);
+ }
+ ++(*bbb);
+ Constant *newConst = ConstantInt::getSigned(CI->getType(), bytes);
+ CI->replaceAllUsesWith(newConst);
+ CI->eraseFromParent();
+ return mChanged;
+}
+ bool
+AMDILPrintfConvert::runOnFunction(Function &MF)
+{
+ mChanged = false;
+ mKM = (AMDILKernelManager*)(TM.getSubtarget<AMDILSubtarget>()
+ .getKernelManager());
+ mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ bVecMap.clear();
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(
+ std::mem_fun(
+ &AMDILPrintfConvert::expandPrintf), this));
+ return mChanged;
+}
+
+const char*
+AMDILPrintfConvert::getPassName() const
+{
+ return "AMDIL Printf Conversion Pass";
+}
+bool
+AMDILPrintfConvert::doInitialization(Module &M)
+{
+ return false;
+}
+
+bool
+AMDILPrintfConvert::doFinalization(Module &M)
+{
+ return false;
+}
+
+void
+AMDILPrintfConvert::getAnalysisUsage(AnalysisUsage &AU) const
+{
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
diff --git a/src/gallium/drivers/radeon/AMDILProfiles.td b/src/gallium/drivers/radeon/AMDILProfiles.td
new file mode 100644
index 00000000000..1ba4ac088be
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILProfiles.td
@@ -0,0 +1,215 @@
+//===- AMDILProfiles.td - AMD IL Profiles ------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// These are used for custom selection dag type profiles
+
+//===----------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===----------------------------------------------------------------------===//
+// SDTCisDP - The specified operand has double type
+// Tablegen needs to be hacked to get this constraint to work
+//class SDTCisDP<int OpNum> : SDTypeConstraint<OpNum>;
+
+//===----------------------------------------------------------------------===//
+// Generic Profile Types
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenUnaryOp : SDTypeProfile<1, 1, [
+ SDTCisSameAs<0, 1>
+ ]>;
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+ ]>;
+def SDTIL_GenCMovLog : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1>
+ ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+ SDTCisEltOfVec<1, 0>
+ ]>;
+
+def SDTIL_GenVecExtract : SDTypeProfile<1, 2, [
+ SDTCisEltOfVec<0, 1>, SDTCisVT<2, i32>
+ ]>;
+
+def SDTIL_GenVecInsert : SDTypeProfile<1, 4, [
+ SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>
+ ]>;
+
+def SDTIL_GenVecShuffle : SDTypeProfile <1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisVT<2, i32>
+ ]>;
+
+def SDTIL_GenVecConcat : SDTypeProfile <1, 2, [
+ SDTCisSameAs<1, 2>
+ ]>;
+//===----------------------------------------------------------------------===//
+// Conversion Profile Types
+//===----------------------------------------------------------------------===//
+def SDTIL_DPToFPOp : SDTypeProfile<1, 1, [
+ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
+ ]>; // d2f
+
+def SDTIL_AnyToInt : SDTypeProfile<1, 1, [
+ SDTCisInt<0>
+ ]>;
+def SDTIL_IntToAny : SDTypeProfile<1, 1, [
+ SDTCisInt<1>
+ ]>;
+def SDTIL_GenBitConv : SDTypeProfile<1, 1, []>;
+//===----------------------------------------------------------------------===//
+// Scalar Profile Types
+//===----------------------------------------------------------------------===//
+
+// Add instruction pattern to handle offsets of memory operationns
+def SDTIL_AddAddrri: SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisSameAs<0, 2>
+ ]>;
+def SDTIL_AddAddrir : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<2>, SDTCisSameAs<0, 1>
+ ]>;
+
+def SDTIL_LCreate : SDTypeProfile<1, 2, [
+ SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_LCreate2 : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v2i64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_LComp : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>, SDTCisVT<1, i64>
+ ]>;
+def SDTIL_LComp2 : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2i32>, SDTCisVT<1, v2i64>
+ ]>;
+def SDTIL_DCreate : SDTypeProfile<1, 2, [
+ SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_DComp : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>, SDTCisVT<1, f64>
+ ]>;
+def SDTIL_DCreate2 : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v2f64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_DComp2 : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2i32>, SDTCisVT<1, v2f64>
+ ]>;
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Profile for Normal Call
+def SDTIL_Call : SDTypeProfile<0, 1, [
+ SDTCisVT<0, i32>
+ ]>;
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+// Comparison instruction
+def SDTIL_Cmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2,3>, SDTCisVT<1, i32>
+ ]>;
+
+
+//===----------------------------------------------------------------------===//
+// Call Sequence Profiles
+//===----------------------------------------------------------------------===//
+def SDTIL_CallSeqStart : SDCallSeqStart< [
+ SDTCisVT<0, i32>
+ ]>;
+def SDTIL_CallSeqEnd : SDCallSeqEnd< [
+ SDTCisVT<0, i32>, SDTCisVT<1, i32>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Image Operation Profiles
+//===----------------------------------------------------------------------===//
+def SDTIL_ImageRead : SDTypeProfile<1, 3,
+ [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, v4f32>]>;
+def SDTIL_ImageWrite : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, v2i32>, SDTCisVT<2, v4i32>]>;
+def SDTIL_ImageWrite3D : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>]>;
+def SDTIL_ImageInfo : SDTypeProfile<1, 1,
+ [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>]>;
+//===----------------------------------------------------------------------===//
+// Atomic Operation Profiles
+//===----------------------------------------------------------------------===//
+def SDTIL_UniAtomNoRet : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+ ]>;
+def SDTIL_BinAtomNoRet : SDTypeProfile<0, 3, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>
+ ]>;
+def SDTIL_TriAtomNoRet : SDTypeProfile<0, 4, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
+ ]>;
+def SDTIL_UniAtom : SDTypeProfile<1, 2, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
+ ]>;
+def SDTIL_BinAtom : SDTypeProfile<1, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>
+ ]>;
+def SDTIL_TriAtom : SDTypeProfile<1, 4, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>
+ ]>;
+
+def SDTIL_BinAtomFloat : SDTypeProfile<1, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, f32>, SDTCisVT<3, f32>
+ ]>;
+def SDTIL_BinAtomNoRetFloat : SDTypeProfile<0, 3, [
+ SDTCisPtrTy<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>
+ ]>;
+
+def SDTIL_Append : SDTypeProfile<1, 1, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>
+ ]>;
diff --git a/src/gallium/drivers/radeon/AMDILRegisterDefsScalar.td b/src/gallium/drivers/radeon/AMDILRegisterDefsScalar.td
new file mode 100644
index 00000000000..f08c175b3d3
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterDefsScalar.td
@@ -0,0 +1,816 @@
+def Rx1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def Ry1 : AMDILReg<2, "r1">, DwarfRegNum<[2]>;
+def Rz1 : AMDILReg<3, "r1">, DwarfRegNum<[3]>;
+def Rw1 : AMDILReg<4, "r1">, DwarfRegNum<[4]>;
+def Rx2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def Ry2 : AMDILReg<3, "r2">, DwarfRegNum<[3]>;
+def Rz2 : AMDILReg<4, "r2">, DwarfRegNum<[4]>;
+def Rw2 : AMDILReg<5, "r2">, DwarfRegNum<[5]>;
+def Rx3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def Ry3 : AMDILReg<4, "r3">, DwarfRegNum<[4]>;
+def Rz3 : AMDILReg<5, "r3">, DwarfRegNum<[5]>;
+def Rw3 : AMDILReg<6, "r3">, DwarfRegNum<[6]>;
+def Rx4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def Ry4 : AMDILReg<5, "r4">, DwarfRegNum<[5]>;
+def Rz4 : AMDILReg<6, "r4">, DwarfRegNum<[6]>;
+def Rw4 : AMDILReg<7, "r4">, DwarfRegNum<[7]>;
+def Rx5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def Ry5 : AMDILReg<6, "r5">, DwarfRegNum<[6]>;
+def Rz5 : AMDILReg<7, "r5">, DwarfRegNum<[7]>;
+def Rw5 : AMDILReg<8, "r5">, DwarfRegNum<[8]>;
+def Rx6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def Ry6 : AMDILReg<7, "r6">, DwarfRegNum<[7]>;
+def Rz6 : AMDILReg<8, "r6">, DwarfRegNum<[8]>;
+def Rw6 : AMDILReg<9, "r6">, DwarfRegNum<[9]>;
+def Rx7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def Ry7 : AMDILReg<8, "r7">, DwarfRegNum<[8]>;
+def Rz7 : AMDILReg<9, "r7">, DwarfRegNum<[9]>;
+def Rw7 : AMDILReg<10, "r7">, DwarfRegNum<[10]>;
+def Rx8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def Ry8 : AMDILReg<9, "r8">, DwarfRegNum<[9]>;
+def Rz8 : AMDILReg<10, "r8">, DwarfRegNum<[10]>;
+def Rw8 : AMDILReg<11, "r8">, DwarfRegNum<[11]>;
+def Rx9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def Ry9 : AMDILReg<10, "r9">, DwarfRegNum<[10]>;
+def Rz9 : AMDILReg<11, "r9">, DwarfRegNum<[11]>;
+def Rw9 : AMDILReg<12, "r9">, DwarfRegNum<[12]>;
+def Rx10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def Ry10 : AMDILReg<11, "r10">, DwarfRegNum<[11]>;
+def Rz10 : AMDILReg<12, "r10">, DwarfRegNum<[12]>;
+def Rw10 : AMDILReg<13, "r10">, DwarfRegNum<[13]>;
+def Rx11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def Ry11 : AMDILReg<12, "r11">, DwarfRegNum<[12]>;
+def Rz11 : AMDILReg<13, "r11">, DwarfRegNum<[13]>;
+def Rw11 : AMDILReg<14, "r11">, DwarfRegNum<[14]>;
+def Rx12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def Ry12 : AMDILReg<13, "r12">, DwarfRegNum<[13]>;
+def Rz12 : AMDILReg<14, "r12">, DwarfRegNum<[14]>;
+def Rw12 : AMDILReg<15, "r12">, DwarfRegNum<[15]>;
+def Rx13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def Ry13 : AMDILReg<14, "r13">, DwarfRegNum<[14]>;
+def Rz13 : AMDILReg<15, "r13">, DwarfRegNum<[15]>;
+def Rw13 : AMDILReg<16, "r13">, DwarfRegNum<[16]>;
+def Rx14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def Ry14 : AMDILReg<15, "r14">, DwarfRegNum<[15]>;
+def Rz14 : AMDILReg<16, "r14">, DwarfRegNum<[16]>;
+def Rw14 : AMDILReg<17, "r14">, DwarfRegNum<[17]>;
+def Rx15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def Ry15 : AMDILReg<16, "r15">, DwarfRegNum<[16]>;
+def Rz15 : AMDILReg<17, "r15">, DwarfRegNum<[17]>;
+def Rw15 : AMDILReg<18, "r15">, DwarfRegNum<[18]>;
+def Rx16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def Ry16 : AMDILReg<17, "r16">, DwarfRegNum<[17]>;
+def Rz16 : AMDILReg<18, "r16">, DwarfRegNum<[18]>;
+def Rw16 : AMDILReg<19, "r16">, DwarfRegNum<[19]>;
+def Rx17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def Ry17 : AMDILReg<18, "r17">, DwarfRegNum<[18]>;
+def Rz17 : AMDILReg<19, "r17">, DwarfRegNum<[19]>;
+def Rw17 : AMDILReg<20, "r17">, DwarfRegNum<[20]>;
+def Rx18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def Ry18 : AMDILReg<19, "r18">, DwarfRegNum<[19]>;
+def Rz18 : AMDILReg<20, "r18">, DwarfRegNum<[20]>;
+def Rw18 : AMDILReg<21, "r18">, DwarfRegNum<[21]>;
+def Rx19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def Ry19 : AMDILReg<20, "r19">, DwarfRegNum<[20]>;
+def Rz19 : AMDILReg<21, "r19">, DwarfRegNum<[21]>;
+def Rw19 : AMDILReg<22, "r19">, DwarfRegNum<[22]>;
+def Rx20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+def Ry20 : AMDILReg<21, "r20">, DwarfRegNum<[21]>;
+def Rz20 : AMDILReg<22, "r20">, DwarfRegNum<[22]>;
+def Rw20 : AMDILReg<23, "r20">, DwarfRegNum<[23]>;
+def Rx21 : AMDILReg<21, "r21">, DwarfRegNum<[21]>;
+def Ry21 : AMDILReg<22, "r21">, DwarfRegNum<[22]>;
+def Rz21 : AMDILReg<23, "r21">, DwarfRegNum<[23]>;
+def Rw21 : AMDILReg<24, "r21">, DwarfRegNum<[24]>;
+def Rx22 : AMDILReg<22, "r22">, DwarfRegNum<[22]>;
+def Ry22 : AMDILReg<23, "r22">, DwarfRegNum<[23]>;
+def Rz22 : AMDILReg<24, "r22">, DwarfRegNum<[24]>;
+def Rw22 : AMDILReg<25, "r22">, DwarfRegNum<[25]>;
+def Rx23 : AMDILReg<23, "r23">, DwarfRegNum<[23]>;
+def Ry23 : AMDILReg<24, "r23">, DwarfRegNum<[24]>;
+def Rz23 : AMDILReg<25, "r23">, DwarfRegNum<[25]>;
+def Rw23 : AMDILReg<26, "r23">, DwarfRegNum<[26]>;
+def Rx24 : AMDILReg<24, "r24">, DwarfRegNum<[24]>;
+def Ry24 : AMDILReg<25, "r24">, DwarfRegNum<[25]>;
+def Rz24 : AMDILReg<26, "r24">, DwarfRegNum<[26]>;
+def Rw24 : AMDILReg<27, "r24">, DwarfRegNum<[27]>;
+def Rx25 : AMDILReg<25, "r25">, DwarfRegNum<[25]>;
+def Ry25 : AMDILReg<26, "r25">, DwarfRegNum<[26]>;
+def Rz25 : AMDILReg<27, "r25">, DwarfRegNum<[27]>;
+def Rw25 : AMDILReg<28, "r25">, DwarfRegNum<[28]>;
+def Rx26 : AMDILReg<26, "r26">, DwarfRegNum<[26]>;
+def Ry26 : AMDILReg<27, "r26">, DwarfRegNum<[27]>;
+def Rz26 : AMDILReg<28, "r26">, DwarfRegNum<[28]>;
+def Rw26 : AMDILReg<29, "r26">, DwarfRegNum<[29]>;
+def Rx27 : AMDILReg<27, "r27">, DwarfRegNum<[27]>;
+def Ry27 : AMDILReg<28, "r27">, DwarfRegNum<[28]>;
+def Rz27 : AMDILReg<29, "r27">, DwarfRegNum<[29]>;
+def Rw27 : AMDILReg<30, "r27">, DwarfRegNum<[30]>;
+def Rx28 : AMDILReg<28, "r28">, DwarfRegNum<[28]>;
+def Ry28 : AMDILReg<29, "r28">, DwarfRegNum<[29]>;
+def Rz28 : AMDILReg<30, "r28">, DwarfRegNum<[30]>;
+def Rw28 : AMDILReg<31, "r28">, DwarfRegNum<[31]>;
+def Rx29 : AMDILReg<29, "r29">, DwarfRegNum<[29]>;
+def Ry29 : AMDILReg<30, "r29">, DwarfRegNum<[30]>;
+def Rz29 : AMDILReg<31, "r29">, DwarfRegNum<[31]>;
+def Rw29 : AMDILReg<32, "r29">, DwarfRegNum<[32]>;
+def Rx30 : AMDILReg<30, "r30">, DwarfRegNum<[30]>;
+def Ry30 : AMDILReg<31, "r30">, DwarfRegNum<[31]>;
+def Rz30 : AMDILReg<32, "r30">, DwarfRegNum<[32]>;
+def Rw30 : AMDILReg<33, "r30">, DwarfRegNum<[33]>;
+def Rx31 : AMDILReg<31, "r31">, DwarfRegNum<[31]>;
+def Ry31 : AMDILReg<32, "r31">, DwarfRegNum<[32]>;
+def Rz31 : AMDILReg<33, "r31">, DwarfRegNum<[33]>;
+def Rw31 : AMDILReg<34, "r31">, DwarfRegNum<[34]>;
+def Rx32 : AMDILReg<32, "r32">, DwarfRegNum<[32]>;
+def Ry32 : AMDILReg<33, "r32">, DwarfRegNum<[33]>;
+def Rz32 : AMDILReg<34, "r32">, DwarfRegNum<[34]>;
+def Rw32 : AMDILReg<35, "r32">, DwarfRegNum<[35]>;
+def Rx33 : AMDILReg<33, "r33">, DwarfRegNum<[33]>;
+def Ry33 : AMDILReg<34, "r33">, DwarfRegNum<[34]>;
+def Rz33 : AMDILReg<35, "r33">, DwarfRegNum<[35]>;
+def Rw33 : AMDILReg<36, "r33">, DwarfRegNum<[36]>;
+def Rx34 : AMDILReg<34, "r34">, DwarfRegNum<[34]>;
+def Ry34 : AMDILReg<35, "r34">, DwarfRegNum<[35]>;
+def Rz34 : AMDILReg<36, "r34">, DwarfRegNum<[36]>;
+def Rw34 : AMDILReg<37, "r34">, DwarfRegNum<[37]>;
+def Rx35 : AMDILReg<35, "r35">, DwarfRegNum<[35]>;
+def Ry35 : AMDILReg<36, "r35">, DwarfRegNum<[36]>;
+def Rz35 : AMDILReg<37, "r35">, DwarfRegNum<[37]>;
+def Rw35 : AMDILReg<38, "r35">, DwarfRegNum<[38]>;
+def Rx36 : AMDILReg<36, "r36">, DwarfRegNum<[36]>;
+def Ry36 : AMDILReg<37, "r36">, DwarfRegNum<[37]>;
+def Rz36 : AMDILReg<38, "r36">, DwarfRegNum<[38]>;
+def Rw36 : AMDILReg<39, "r36">, DwarfRegNum<[39]>;
+def Rx37 : AMDILReg<37, "r37">, DwarfRegNum<[37]>;
+def Ry37 : AMDILReg<38, "r37">, DwarfRegNum<[38]>;
+def Rz37 : AMDILReg<39, "r37">, DwarfRegNum<[39]>;
+def Rw37 : AMDILReg<40, "r37">, DwarfRegNum<[40]>;
+def Rx38 : AMDILReg<38, "r38">, DwarfRegNum<[38]>;
+def Ry38 : AMDILReg<39, "r38">, DwarfRegNum<[39]>;
+def Rz38 : AMDILReg<40, "r38">, DwarfRegNum<[40]>;
+def Rw38 : AMDILReg<41, "r38">, DwarfRegNum<[41]>;
+def Rx39 : AMDILReg<39, "r39">, DwarfRegNum<[39]>;
+def Ry39 : AMDILReg<40, "r39">, DwarfRegNum<[40]>;
+def Rz39 : AMDILReg<41, "r39">, DwarfRegNum<[41]>;
+def Rw39 : AMDILReg<42, "r39">, DwarfRegNum<[42]>;
+def Rx40 : AMDILReg<40, "r40">, DwarfRegNum<[40]>;
+def Ry40 : AMDILReg<41, "r40">, DwarfRegNum<[41]>;
+def Rz40 : AMDILReg<42, "r40">, DwarfRegNum<[42]>;
+def Rw40 : AMDILReg<43, "r40">, DwarfRegNum<[43]>;
+def Rx41 : AMDILReg<41, "r41">, DwarfRegNum<[41]>;
+def Ry41 : AMDILReg<42, "r41">, DwarfRegNum<[42]>;
+def Rz41 : AMDILReg<43, "r41">, DwarfRegNum<[43]>;
+def Rw41 : AMDILReg<44, "r41">, DwarfRegNum<[44]>;
+def Rx42 : AMDILReg<42, "r42">, DwarfRegNum<[42]>;
+def Ry42 : AMDILReg<43, "r42">, DwarfRegNum<[43]>;
+def Rz42 : AMDILReg<44, "r42">, DwarfRegNum<[44]>;
+def Rw42 : AMDILReg<45, "r42">, DwarfRegNum<[45]>;
+def Rx43 : AMDILReg<43, "r43">, DwarfRegNum<[43]>;
+def Ry43 : AMDILReg<44, "r43">, DwarfRegNum<[44]>;
+def Rz43 : AMDILReg<45, "r43">, DwarfRegNum<[45]>;
+def Rw43 : AMDILReg<46, "r43">, DwarfRegNum<[46]>;
+def Rx44 : AMDILReg<44, "r44">, DwarfRegNum<[44]>;
+def Ry44 : AMDILReg<45, "r44">, DwarfRegNum<[45]>;
+def Rz44 : AMDILReg<46, "r44">, DwarfRegNum<[46]>;
+def Rw44 : AMDILReg<47, "r44">, DwarfRegNum<[47]>;
+def Rx45 : AMDILReg<45, "r45">, DwarfRegNum<[45]>;
+def Ry45 : AMDILReg<46, "r45">, DwarfRegNum<[46]>;
+def Rz45 : AMDILReg<47, "r45">, DwarfRegNum<[47]>;
+def Rw45 : AMDILReg<48, "r45">, DwarfRegNum<[48]>;
+def Rx46 : AMDILReg<46, "r46">, DwarfRegNum<[46]>;
+def Ry46 : AMDILReg<47, "r46">, DwarfRegNum<[47]>;
+def Rz46 : AMDILReg<48, "r46">, DwarfRegNum<[48]>;
+def Rw46 : AMDILReg<49, "r46">, DwarfRegNum<[49]>;
+def Rx47 : AMDILReg<47, "r47">, DwarfRegNum<[47]>;
+def Ry47 : AMDILReg<48, "r47">, DwarfRegNum<[48]>;
+def Rz47 : AMDILReg<49, "r47">, DwarfRegNum<[49]>;
+def Rw47 : AMDILReg<50, "r47">, DwarfRegNum<[50]>;
+def Rx48 : AMDILReg<48, "r48">, DwarfRegNum<[48]>;
+def Ry48 : AMDILReg<49, "r48">, DwarfRegNum<[49]>;
+def Rz48 : AMDILReg<50, "r48">, DwarfRegNum<[50]>;
+def Rw48 : AMDILReg<51, "r48">, DwarfRegNum<[51]>;
+def Rx49 : AMDILReg<49, "r49">, DwarfRegNum<[49]>;
+def Ry49 : AMDILReg<50, "r49">, DwarfRegNum<[50]>;
+def Rz49 : AMDILReg<51, "r49">, DwarfRegNum<[51]>;
+def Rw49 : AMDILReg<52, "r49">, DwarfRegNum<[52]>;
+def Rx50 : AMDILReg<50, "r50">, DwarfRegNum<[50]>;
+def Ry50 : AMDILReg<51, "r50">, DwarfRegNum<[51]>;
+def Rz50 : AMDILReg<52, "r50">, DwarfRegNum<[52]>;
+def Rw50 : AMDILReg<53, "r50">, DwarfRegNum<[53]>;
+def Rx51 : AMDILReg<51, "r51">, DwarfRegNum<[51]>;
+def Ry51 : AMDILReg<52, "r51">, DwarfRegNum<[52]>;
+def Rz51 : AMDILReg<53, "r51">, DwarfRegNum<[53]>;
+def Rw51 : AMDILReg<54, "r51">, DwarfRegNum<[54]>;
+def Rx52 : AMDILReg<52, "r52">, DwarfRegNum<[52]>;
+def Ry52 : AMDILReg<53, "r52">, DwarfRegNum<[53]>;
+def Rz52 : AMDILReg<54, "r52">, DwarfRegNum<[54]>;
+def Rw52 : AMDILReg<55, "r52">, DwarfRegNum<[55]>;
+def Rx53 : AMDILReg<53, "r53">, DwarfRegNum<[53]>;
+def Ry53 : AMDILReg<54, "r53">, DwarfRegNum<[54]>;
+def Rz53 : AMDILReg<55, "r53">, DwarfRegNum<[55]>;
+def Rw53 : AMDILReg<56, "r53">, DwarfRegNum<[56]>;
+def Rx54 : AMDILReg<54, "r54">, DwarfRegNum<[54]>;
+def Ry54 : AMDILReg<55, "r54">, DwarfRegNum<[55]>;
+def Rz54 : AMDILReg<56, "r54">, DwarfRegNum<[56]>;
+def Rw54 : AMDILReg<57, "r54">, DwarfRegNum<[57]>;
+def Rx55 : AMDILReg<55, "r55">, DwarfRegNum<[55]>;
+def Ry55 : AMDILReg<56, "r55">, DwarfRegNum<[56]>;
+def Rz55 : AMDILReg<57, "r55">, DwarfRegNum<[57]>;
+def Rw55 : AMDILReg<58, "r55">, DwarfRegNum<[58]>;
+def Rx56 : AMDILReg<56, "r56">, DwarfRegNum<[56]>;
+def Ry56 : AMDILReg<57, "r56">, DwarfRegNum<[57]>;
+def Rz56 : AMDILReg<58, "r56">, DwarfRegNum<[58]>;
+def Rw56 : AMDILReg<59, "r56">, DwarfRegNum<[59]>;
+def Rx57 : AMDILReg<57, "r57">, DwarfRegNum<[57]>;
+def Ry57 : AMDILReg<58, "r57">, DwarfRegNum<[58]>;
+def Rz57 : AMDILReg<59, "r57">, DwarfRegNum<[59]>;
+def Rw57 : AMDILReg<60, "r57">, DwarfRegNum<[60]>;
+def Rx58 : AMDILReg<58, "r58">, DwarfRegNum<[58]>;
+def Ry58 : AMDILReg<59, "r58">, DwarfRegNum<[59]>;
+def Rz58 : AMDILReg<60, "r58">, DwarfRegNum<[60]>;
+def Rw58 : AMDILReg<61, "r58">, DwarfRegNum<[61]>;
+def Rx59 : AMDILReg<59, "r59">, DwarfRegNum<[59]>;
+def Ry59 : AMDILReg<60, "r59">, DwarfRegNum<[60]>;
+def Rz59 : AMDILReg<61, "r59">, DwarfRegNum<[61]>;
+def Rw59 : AMDILReg<62, "r59">, DwarfRegNum<[62]>;
+def Rx60 : AMDILReg<60, "r60">, DwarfRegNum<[60]>;
+def Ry60 : AMDILReg<61, "r60">, DwarfRegNum<[61]>;
+def Rz60 : AMDILReg<62, "r60">, DwarfRegNum<[62]>;
+def Rw60 : AMDILReg<63, "r60">, DwarfRegNum<[63]>;
+def Rx61 : AMDILReg<61, "r61">, DwarfRegNum<[61]>;
+def Ry61 : AMDILReg<62, "r61">, DwarfRegNum<[62]>;
+def Rz61 : AMDILReg<63, "r61">, DwarfRegNum<[63]>;
+def Rw61 : AMDILReg<64, "r61">, DwarfRegNum<[64]>;
+def Rx62 : AMDILReg<62, "r62">, DwarfRegNum<[62]>;
+def Ry62 : AMDILReg<63, "r62">, DwarfRegNum<[63]>;
+def Rz62 : AMDILReg<64, "r62">, DwarfRegNum<[64]>;
+def Rw62 : AMDILReg<65, "r62">, DwarfRegNum<[65]>;
+def Rx63 : AMDILReg<63, "r63">, DwarfRegNum<[63]>;
+def Ry63 : AMDILReg<64, "r63">, DwarfRegNum<[64]>;
+def Rz63 : AMDILReg<65, "r63">, DwarfRegNum<[65]>;
+def Rw63 : AMDILReg<66, "r63">, DwarfRegNum<[66]>;
+def Rx64 : AMDILReg<64, "r64">, DwarfRegNum<[64]>;
+def Ry64 : AMDILReg<65, "r64">, DwarfRegNum<[65]>;
+def Rz64 : AMDILReg<66, "r64">, DwarfRegNum<[66]>;
+def Rw64 : AMDILReg<67, "r64">, DwarfRegNum<[67]>;
+def Rx65 : AMDILReg<65, "r65">, DwarfRegNum<[65]>;
+def Ry65 : AMDILReg<66, "r65">, DwarfRegNum<[66]>;
+def Rz65 : AMDILReg<67, "r65">, DwarfRegNum<[67]>;
+def Rw65 : AMDILReg<68, "r65">, DwarfRegNum<[68]>;
+def Rx66 : AMDILReg<66, "r66">, DwarfRegNum<[66]>;
+def Ry66 : AMDILReg<67, "r66">, DwarfRegNum<[67]>;
+def Rz66 : AMDILReg<68, "r66">, DwarfRegNum<[68]>;
+def Rw66 : AMDILReg<69, "r66">, DwarfRegNum<[69]>;
+def Rx67 : AMDILReg<67, "r67">, DwarfRegNum<[67]>;
+def Ry67 : AMDILReg<68, "r67">, DwarfRegNum<[68]>;
+def Rz67 : AMDILReg<69, "r67">, DwarfRegNum<[69]>;
+def Rw67 : AMDILReg<70, "r67">, DwarfRegNum<[70]>;
+def Rx68 : AMDILReg<68, "r68">, DwarfRegNum<[68]>;
+def Ry68 : AMDILReg<69, "r68">, DwarfRegNum<[69]>;
+def Rz68 : AMDILReg<70, "r68">, DwarfRegNum<[70]>;
+def Rw68 : AMDILReg<71, "r68">, DwarfRegNum<[71]>;
+def Rx69 : AMDILReg<69, "r69">, DwarfRegNum<[69]>;
+def Ry69 : AMDILReg<70, "r69">, DwarfRegNum<[70]>;
+def Rz69 : AMDILReg<71, "r69">, DwarfRegNum<[71]>;
+def Rw69 : AMDILReg<72, "r69">, DwarfRegNum<[72]>;
+def Rx70 : AMDILReg<70, "r70">, DwarfRegNum<[70]>;
+def Ry70 : AMDILReg<71, "r70">, DwarfRegNum<[71]>;
+def Rz70 : AMDILReg<72, "r70">, DwarfRegNum<[72]>;
+def Rw70 : AMDILReg<73, "r70">, DwarfRegNum<[73]>;
+def Rx71 : AMDILReg<71, "r71">, DwarfRegNum<[71]>;
+def Ry71 : AMDILReg<72, "r71">, DwarfRegNum<[72]>;
+def Rz71 : AMDILReg<73, "r71">, DwarfRegNum<[73]>;
+def Rw71 : AMDILReg<74, "r71">, DwarfRegNum<[74]>;
+def Rx72 : AMDILReg<72, "r72">, DwarfRegNum<[72]>;
+def Ry72 : AMDILReg<73, "r72">, DwarfRegNum<[73]>;
+def Rz72 : AMDILReg<74, "r72">, DwarfRegNum<[74]>;
+def Rw72 : AMDILReg<75, "r72">, DwarfRegNum<[75]>;
+def Rx73 : AMDILReg<73, "r73">, DwarfRegNum<[73]>;
+def Ry73 : AMDILReg<74, "r73">, DwarfRegNum<[74]>;
+def Rz73 : AMDILReg<75, "r73">, DwarfRegNum<[75]>;
+def Rw73 : AMDILReg<76, "r73">, DwarfRegNum<[76]>;
+def Rx74 : AMDILReg<74, "r74">, DwarfRegNum<[74]>;
+def Ry74 : AMDILReg<75, "r74">, DwarfRegNum<[75]>;
+def Rz74 : AMDILReg<76, "r74">, DwarfRegNum<[76]>;
+def Rw74 : AMDILReg<77, "r74">, DwarfRegNum<[77]>;
+def Rx75 : AMDILReg<75, "r75">, DwarfRegNum<[75]>;
+def Ry75 : AMDILReg<76, "r75">, DwarfRegNum<[76]>;
+def Rz75 : AMDILReg<77, "r75">, DwarfRegNum<[77]>;
+def Rw75 : AMDILReg<78, "r75">, DwarfRegNum<[78]>;
+def Rx76 : AMDILReg<76, "r76">, DwarfRegNum<[76]>;
+def Ry76 : AMDILReg<77, "r76">, DwarfRegNum<[77]>;
+def Rz76 : AMDILReg<78, "r76">, DwarfRegNum<[78]>;
+def Rw76 : AMDILReg<79, "r76">, DwarfRegNum<[79]>;
+def Rx77 : AMDILReg<77, "r77">, DwarfRegNum<[77]>;
+def Ry77 : AMDILReg<78, "r77">, DwarfRegNum<[78]>;
+def Rz77 : AMDILReg<79, "r77">, DwarfRegNum<[79]>;
+def Rw77 : AMDILReg<80, "r77">, DwarfRegNum<[80]>;
+def Rx78 : AMDILReg<78, "r78">, DwarfRegNum<[78]>;
+def Ry78 : AMDILReg<79, "r78">, DwarfRegNum<[79]>;
+def Rz78 : AMDILReg<80, "r78">, DwarfRegNum<[80]>;
+def Rw78 : AMDILReg<81, "r78">, DwarfRegNum<[81]>;
+def Rx79 : AMDILReg<79, "r79">, DwarfRegNum<[79]>;
+def Ry79 : AMDILReg<80, "r79">, DwarfRegNum<[80]>;
+def Rz79 : AMDILReg<81, "r79">, DwarfRegNum<[81]>;
+def Rw79 : AMDILReg<82, "r79">, DwarfRegNum<[82]>;
+def Rx80 : AMDILReg<80, "r80">, DwarfRegNum<[80]>;
+def Ry80 : AMDILReg<81, "r80">, DwarfRegNum<[81]>;
+def Rz80 : AMDILReg<82, "r80">, DwarfRegNum<[82]>;
+def Rw80 : AMDILReg<83, "r80">, DwarfRegNum<[83]>;
+def Rx81 : AMDILReg<81, "r81">, DwarfRegNum<[81]>;
+def Ry81 : AMDILReg<82, "r81">, DwarfRegNum<[82]>;
+def Rz81 : AMDILReg<83, "r81">, DwarfRegNum<[83]>;
+def Rw81 : AMDILReg<84, "r81">, DwarfRegNum<[84]>;
+def Rx82 : AMDILReg<82, "r82">, DwarfRegNum<[82]>;
+def Ry82 : AMDILReg<83, "r82">, DwarfRegNum<[83]>;
+def Rz82 : AMDILReg<84, "r82">, DwarfRegNum<[84]>;
+def Rw82 : AMDILReg<85, "r82">, DwarfRegNum<[85]>;
+def Rx83 : AMDILReg<83, "r83">, DwarfRegNum<[83]>;
+def Ry83 : AMDILReg<84, "r83">, DwarfRegNum<[84]>;
+def Rz83 : AMDILReg<85, "r83">, DwarfRegNum<[85]>;
+def Rw83 : AMDILReg<86, "r83">, DwarfRegNum<[86]>;
+def Rx84 : AMDILReg<84, "r84">, DwarfRegNum<[84]>;
+def Ry84 : AMDILReg<85, "r84">, DwarfRegNum<[85]>;
+def Rz84 : AMDILReg<86, "r84">, DwarfRegNum<[86]>;
+def Rw84 : AMDILReg<87, "r84">, DwarfRegNum<[87]>;
+def Rx85 : AMDILReg<85, "r85">, DwarfRegNum<[85]>;
+def Ry85 : AMDILReg<86, "r85">, DwarfRegNum<[86]>;
+def Rz85 : AMDILReg<87, "r85">, DwarfRegNum<[87]>;
+def Rw85 : AMDILReg<88, "r85">, DwarfRegNum<[88]>;
+def Rx86 : AMDILReg<86, "r86">, DwarfRegNum<[86]>;
+def Ry86 : AMDILReg<87, "r86">, DwarfRegNum<[87]>;
+def Rz86 : AMDILReg<88, "r86">, DwarfRegNum<[88]>;
+def Rw86 : AMDILReg<89, "r86">, DwarfRegNum<[89]>;
+def Rx87 : AMDILReg<87, "r87">, DwarfRegNum<[87]>;
+def Ry87 : AMDILReg<88, "r87">, DwarfRegNum<[88]>;
+def Rz87 : AMDILReg<89, "r87">, DwarfRegNum<[89]>;
+def Rw87 : AMDILReg<90, "r87">, DwarfRegNum<[90]>;
+def Rx88 : AMDILReg<88, "r88">, DwarfRegNum<[88]>;
+def Ry88 : AMDILReg<89, "r88">, DwarfRegNum<[89]>;
+def Rz88 : AMDILReg<90, "r88">, DwarfRegNum<[90]>;
+def Rw88 : AMDILReg<91, "r88">, DwarfRegNum<[91]>;
+def Rx89 : AMDILReg<89, "r89">, DwarfRegNum<[89]>;
+def Ry89 : AMDILReg<90, "r89">, DwarfRegNum<[90]>;
+def Rz89 : AMDILReg<91, "r89">, DwarfRegNum<[91]>;
+def Rw89 : AMDILReg<92, "r89">, DwarfRegNum<[92]>;
+def Rx90 : AMDILReg<90, "r90">, DwarfRegNum<[90]>;
+def Ry90 : AMDILReg<91, "r90">, DwarfRegNum<[91]>;
+def Rz90 : AMDILReg<92, "r90">, DwarfRegNum<[92]>;
+def Rw90 : AMDILReg<93, "r90">, DwarfRegNum<[93]>;
+def Rx91 : AMDILReg<91, "r91">, DwarfRegNum<[91]>;
+def Ry91 : AMDILReg<92, "r91">, DwarfRegNum<[92]>;
+def Rz91 : AMDILReg<93, "r91">, DwarfRegNum<[93]>;
+def Rw91 : AMDILReg<94, "r91">, DwarfRegNum<[94]>;
+def Rx92 : AMDILReg<92, "r92">, DwarfRegNum<[92]>;
+def Ry92 : AMDILReg<93, "r92">, DwarfRegNum<[93]>;
+def Rz92 : AMDILReg<94, "r92">, DwarfRegNum<[94]>;
+def Rw92 : AMDILReg<95, "r92">, DwarfRegNum<[95]>;
+def Rx93 : AMDILReg<93, "r93">, DwarfRegNum<[93]>;
+def Ry93 : AMDILReg<94, "r93">, DwarfRegNum<[94]>;
+def Rz93 : AMDILReg<95, "r93">, DwarfRegNum<[95]>;
+def Rw93 : AMDILReg<96, "r93">, DwarfRegNum<[96]>;
+def Rx94 : AMDILReg<94, "r94">, DwarfRegNum<[94]>;
+def Ry94 : AMDILReg<95, "r94">, DwarfRegNum<[95]>;
+def Rz94 : AMDILReg<96, "r94">, DwarfRegNum<[96]>;
+def Rw94 : AMDILReg<97, "r94">, DwarfRegNum<[97]>;
+def Rx95 : AMDILReg<95, "r95">, DwarfRegNum<[95]>;
+def Ry95 : AMDILReg<96, "r95">, DwarfRegNum<[96]>;
+def Rz95 : AMDILReg<97, "r95">, DwarfRegNum<[97]>;
+def Rw95 : AMDILReg<98, "r95">, DwarfRegNum<[98]>;
+def Rx96 : AMDILReg<96, "r96">, DwarfRegNum<[96]>;
+def Ry96 : AMDILReg<97, "r96">, DwarfRegNum<[97]>;
+def Rz96 : AMDILReg<98, "r96">, DwarfRegNum<[98]>;
+def Rw96 : AMDILReg<99, "r96">, DwarfRegNum<[99]>;
+def Rx97 : AMDILReg<97, "r97">, DwarfRegNum<[97]>;
+def Ry97 : AMDILReg<98, "r97">, DwarfRegNum<[98]>;
+def Rz97 : AMDILReg<99, "r97">, DwarfRegNum<[99]>;
+def Rw97 : AMDILReg<100, "r97">, DwarfRegNum<[100]>;
+def Rx98 : AMDILReg<98, "r98">, DwarfRegNum<[98]>;
+def Ry98 : AMDILReg<99, "r98">, DwarfRegNum<[99]>;
+def Rz98 : AMDILReg<100, "r98">, DwarfRegNum<[100]>;
+def Rw98 : AMDILReg<101, "r98">, DwarfRegNum<[101]>;
+def Rx99 : AMDILReg<99, "r99">, DwarfRegNum<[99]>;
+def Ry99 : AMDILReg<100, "r99">, DwarfRegNum<[100]>;
+def Rz99 : AMDILReg<101, "r99">, DwarfRegNum<[101]>;
+def Rw99 : AMDILReg<102, "r99">, DwarfRegNum<[102]>;
+def Rx100 : AMDILReg<100, "r100">, DwarfRegNum<[100]>;
+def Ry100 : AMDILReg<101, "r100">, DwarfRegNum<[101]>;
+def Rz100 : AMDILReg<102, "r100">, DwarfRegNum<[102]>;
+def Rw100 : AMDILReg<103, "r100">, DwarfRegNum<[103]>;
+def Rx101 : AMDILReg<101, "r101">, DwarfRegNum<[101]>;
+def Ry101 : AMDILReg<102, "r101">, DwarfRegNum<[102]>;
+def Rz101 : AMDILReg<103, "r101">, DwarfRegNum<[103]>;
+def Rw101 : AMDILReg<104, "r101">, DwarfRegNum<[104]>;
+def Rx102 : AMDILReg<102, "r102">, DwarfRegNum<[102]>;
+def Ry102 : AMDILReg<103, "r102">, DwarfRegNum<[103]>;
+def Rz102 : AMDILReg<104, "r102">, DwarfRegNum<[104]>;
+def Rw102 : AMDILReg<105, "r102">, DwarfRegNum<[105]>;
+def Rx103 : AMDILReg<103, "r103">, DwarfRegNum<[103]>;
+def Ry103 : AMDILReg<104, "r103">, DwarfRegNum<[104]>;
+def Rz103 : AMDILReg<105, "r103">, DwarfRegNum<[105]>;
+def Rw103 : AMDILReg<106, "r103">, DwarfRegNum<[106]>;
+def Rx104 : AMDILReg<104, "r104">, DwarfRegNum<[104]>;
+def Ry104 : AMDILReg<105, "r104">, DwarfRegNum<[105]>;
+def Rz104 : AMDILReg<106, "r104">, DwarfRegNum<[106]>;
+def Rw104 : AMDILReg<107, "r104">, DwarfRegNum<[107]>;
+def Rx105 : AMDILReg<105, "r105">, DwarfRegNum<[105]>;
+def Ry105 : AMDILReg<106, "r105">, DwarfRegNum<[106]>;
+def Rz105 : AMDILReg<107, "r105">, DwarfRegNum<[107]>;
+def Rw105 : AMDILReg<108, "r105">, DwarfRegNum<[108]>;
+def Rx106 : AMDILReg<106, "r106">, DwarfRegNum<[106]>;
+def Ry106 : AMDILReg<107, "r106">, DwarfRegNum<[107]>;
+def Rz106 : AMDILReg<108, "r106">, DwarfRegNum<[108]>;
+def Rw106 : AMDILReg<109, "r106">, DwarfRegNum<[109]>;
+def Rx107 : AMDILReg<107, "r107">, DwarfRegNum<[107]>;
+def Ry107 : AMDILReg<108, "r107">, DwarfRegNum<[108]>;
+def Rz107 : AMDILReg<109, "r107">, DwarfRegNum<[109]>;
+def Rw107 : AMDILReg<110, "r107">, DwarfRegNum<[110]>;
+def Rx108 : AMDILReg<108, "r108">, DwarfRegNum<[108]>;
+def Ry108 : AMDILReg<109, "r108">, DwarfRegNum<[109]>;
+def Rz108 : AMDILReg<110, "r108">, DwarfRegNum<[110]>;
+def Rw108 : AMDILReg<111, "r108">, DwarfRegNum<[111]>;
+def Rx109 : AMDILReg<109, "r109">, DwarfRegNum<[109]>;
+def Ry109 : AMDILReg<110, "r109">, DwarfRegNum<[110]>;
+def Rz109 : AMDILReg<111, "r109">, DwarfRegNum<[111]>;
+def Rw109 : AMDILReg<112, "r109">, DwarfRegNum<[112]>;
+def Rx110 : AMDILReg<110, "r110">, DwarfRegNum<[110]>;
+def Ry110 : AMDILReg<111, "r110">, DwarfRegNum<[111]>;
+def Rz110 : AMDILReg<112, "r110">, DwarfRegNum<[112]>;
+def Rw110 : AMDILReg<113, "r110">, DwarfRegNum<[113]>;
+def Rx111 : AMDILReg<111, "r111">, DwarfRegNum<[111]>;
+def Ry111 : AMDILReg<112, "r111">, DwarfRegNum<[112]>;
+def Rz111 : AMDILReg<113, "r111">, DwarfRegNum<[113]>;
+def Rw111 : AMDILReg<114, "r111">, DwarfRegNum<[114]>;
+def Rx112 : AMDILReg<112, "r112">, DwarfRegNum<[112]>;
+def Ry112 : AMDILReg<113, "r112">, DwarfRegNum<[113]>;
+def Rz112 : AMDILReg<114, "r112">, DwarfRegNum<[114]>;
+def Rw112 : AMDILReg<115, "r112">, DwarfRegNum<[115]>;
+def Rx113 : AMDILReg<113, "r113">, DwarfRegNum<[113]>;
+def Ry113 : AMDILReg<114, "r113">, DwarfRegNum<[114]>;
+def Rz113 : AMDILReg<115, "r113">, DwarfRegNum<[115]>;
+def Rw113 : AMDILReg<116, "r113">, DwarfRegNum<[116]>;
+def Rx114 : AMDILReg<114, "r114">, DwarfRegNum<[114]>;
+def Ry114 : AMDILReg<115, "r114">, DwarfRegNum<[115]>;
+def Rz114 : AMDILReg<116, "r114">, DwarfRegNum<[116]>;
+def Rw114 : AMDILReg<117, "r114">, DwarfRegNum<[117]>;
+def Rx115 : AMDILReg<115, "r115">, DwarfRegNum<[115]>;
+def Ry115 : AMDILReg<116, "r115">, DwarfRegNum<[116]>;
+def Rz115 : AMDILReg<117, "r115">, DwarfRegNum<[117]>;
+def Rw115 : AMDILReg<118, "r115">, DwarfRegNum<[118]>;
+def Rx116 : AMDILReg<116, "r116">, DwarfRegNum<[116]>;
+def Ry116 : AMDILReg<117, "r116">, DwarfRegNum<[117]>;
+def Rz116 : AMDILReg<118, "r116">, DwarfRegNum<[118]>;
+def Rw116 : AMDILReg<119, "r116">, DwarfRegNum<[119]>;
+def Rx117 : AMDILReg<117, "r117">, DwarfRegNum<[117]>;
+def Ry117 : AMDILReg<118, "r117">, DwarfRegNum<[118]>;
+def Rz117 : AMDILReg<119, "r117">, DwarfRegNum<[119]>;
+def Rw117 : AMDILReg<120, "r117">, DwarfRegNum<[120]>;
+def Rx118 : AMDILReg<118, "r118">, DwarfRegNum<[118]>;
+def Ry118 : AMDILReg<119, "r118">, DwarfRegNum<[119]>;
+def Rz118 : AMDILReg<120, "r118">, DwarfRegNum<[120]>;
+def Rw118 : AMDILReg<121, "r118">, DwarfRegNum<[121]>;
+def Rx119 : AMDILReg<119, "r119">, DwarfRegNum<[119]>;
+def Ry119 : AMDILReg<120, "r119">, DwarfRegNum<[120]>;
+def Rz119 : AMDILReg<121, "r119">, DwarfRegNum<[121]>;
+def Rw119 : AMDILReg<122, "r119">, DwarfRegNum<[122]>;
+def Rx120 : AMDILReg<120, "r120">, DwarfRegNum<[120]>;
+def Ry120 : AMDILReg<121, "r120">, DwarfRegNum<[121]>;
+def Rz120 : AMDILReg<122, "r120">, DwarfRegNum<[122]>;
+def Rw120 : AMDILReg<123, "r120">, DwarfRegNum<[123]>;
+def Rx121 : AMDILReg<121, "r121">, DwarfRegNum<[121]>;
+def Ry121 : AMDILReg<122, "r121">, DwarfRegNum<[122]>;
+def Rz121 : AMDILReg<123, "r121">, DwarfRegNum<[123]>;
+def Rw121 : AMDILReg<124, "r121">, DwarfRegNum<[124]>;
+def Rx122 : AMDILReg<122, "r122">, DwarfRegNum<[122]>;
+def Ry122 : AMDILReg<123, "r122">, DwarfRegNum<[123]>;
+def Rz122 : AMDILReg<124, "r122">, DwarfRegNum<[124]>;
+def Rw122 : AMDILReg<125, "r122">, DwarfRegNum<[125]>;
+def Rx123 : AMDILReg<123, "r123">, DwarfRegNum<[123]>;
+def Ry123 : AMDILReg<124, "r123">, DwarfRegNum<[124]>;
+def Rz123 : AMDILReg<125, "r123">, DwarfRegNum<[125]>;
+def Rw123 : AMDILReg<126, "r123">, DwarfRegNum<[126]>;
+def Rx124 : AMDILReg<124, "r124">, DwarfRegNum<[124]>;
+def Ry124 : AMDILReg<125, "r124">, DwarfRegNum<[125]>;
+def Rz124 : AMDILReg<126, "r124">, DwarfRegNum<[126]>;
+def Rw124 : AMDILReg<127, "r124">, DwarfRegNum<[127]>;
+def Rx125 : AMDILReg<125, "r125">, DwarfRegNum<[125]>;
+def Ry125 : AMDILReg<126, "r125">, DwarfRegNum<[126]>;
+def Rz125 : AMDILReg<127, "r125">, DwarfRegNum<[127]>;
+def Rw125 : AMDILReg<128, "r125">, DwarfRegNum<[128]>;
+def Rx126 : AMDILReg<126, "r126">, DwarfRegNum<[126]>;
+def Ry126 : AMDILReg<127, "r126">, DwarfRegNum<[127]>;
+def Rz126 : AMDILReg<128, "r126">, DwarfRegNum<[128]>;
+def Rw126 : AMDILReg<129, "r126">, DwarfRegNum<[129]>;
+def Rx127 : AMDILReg<127, "r127">, DwarfRegNum<[127]>;
+def Ry127 : AMDILReg<128, "r127">, DwarfRegNum<[128]>;
+def Rz127 : AMDILReg<129, "r127">, DwarfRegNum<[129]>;
+def Rw127 : AMDILReg<130, "r127">, DwarfRegNum<[130]>;
+def Rx128 : AMDILReg<128, "r128">, DwarfRegNum<[128]>;
+def Ry128 : AMDILReg<129, "r128">, DwarfRegNum<[129]>;
+def Rz128 : AMDILReg<130, "r128">, DwarfRegNum<[130]>;
+def Rw128 : AMDILReg<131, "r128">, DwarfRegNum<[131]>;
+def Rx129 : AMDILReg<129, "r129">, DwarfRegNum<[129]>;
+def Ry129 : AMDILReg<130, "r129">, DwarfRegNum<[130]>;
+def Rz129 : AMDILReg<131, "r129">, DwarfRegNum<[131]>;
+def Rw129 : AMDILReg<132, "r129">, DwarfRegNum<[132]>;
+def Rx130 : AMDILReg<130, "r130">, DwarfRegNum<[130]>;
+def Ry130 : AMDILReg<131, "r130">, DwarfRegNum<[131]>;
+def Rz130 : AMDILReg<132, "r130">, DwarfRegNum<[132]>;
+def Rw130 : AMDILReg<133, "r130">, DwarfRegNum<[133]>;
+def Rx131 : AMDILReg<131, "r131">, DwarfRegNum<[131]>;
+def Ry131 : AMDILReg<132, "r131">, DwarfRegNum<[132]>;
+def Rz131 : AMDILReg<133, "r131">, DwarfRegNum<[133]>;
+def Rw131 : AMDILReg<134, "r131">, DwarfRegNum<[134]>;
+def Rx132 : AMDILReg<132, "r132">, DwarfRegNum<[132]>;
+def Ry132 : AMDILReg<133, "r132">, DwarfRegNum<[133]>;
+def Rz132 : AMDILReg<134, "r132">, DwarfRegNum<[134]>;
+def Rw132 : AMDILReg<135, "r132">, DwarfRegNum<[135]>;
+def Rx133 : AMDILReg<133, "r133">, DwarfRegNum<[133]>;
+def Ry133 : AMDILReg<134, "r133">, DwarfRegNum<[134]>;
+def Rz133 : AMDILReg<135, "r133">, DwarfRegNum<[135]>;
+def Rw133 : AMDILReg<136, "r133">, DwarfRegNum<[136]>;
+def Rx134 : AMDILReg<134, "r134">, DwarfRegNum<[134]>;
+def Ry134 : AMDILReg<135, "r134">, DwarfRegNum<[135]>;
+def Rz134 : AMDILReg<136, "r134">, DwarfRegNum<[136]>;
+def Rw134 : AMDILReg<137, "r134">, DwarfRegNum<[137]>;
+def Rx135 : AMDILReg<135, "r135">, DwarfRegNum<[135]>;
+def Ry135 : AMDILReg<136, "r135">, DwarfRegNum<[136]>;
+def Rz135 : AMDILReg<137, "r135">, DwarfRegNum<[137]>;
+def Rw135 : AMDILReg<138, "r135">, DwarfRegNum<[138]>;
+def Rx136 : AMDILReg<136, "r136">, DwarfRegNum<[136]>;
+def Ry136 : AMDILReg<137, "r136">, DwarfRegNum<[137]>;
+def Rz136 : AMDILReg<138, "r136">, DwarfRegNum<[138]>;
+def Rw136 : AMDILReg<139, "r136">, DwarfRegNum<[139]>;
+def Rx137 : AMDILReg<137, "r137">, DwarfRegNum<[137]>;
+def Ry137 : AMDILReg<138, "r137">, DwarfRegNum<[138]>;
+def Rz137 : AMDILReg<139, "r137">, DwarfRegNum<[139]>;
+def Rw137 : AMDILReg<140, "r137">, DwarfRegNum<[140]>;
+def Rx138 : AMDILReg<138, "r138">, DwarfRegNum<[138]>;
+def Ry138 : AMDILReg<139, "r138">, DwarfRegNum<[139]>;
+def Rz138 : AMDILReg<140, "r138">, DwarfRegNum<[140]>;
+def Rw138 : AMDILReg<141, "r138">, DwarfRegNum<[141]>;
+def Rx139 : AMDILReg<139, "r139">, DwarfRegNum<[139]>;
+def Ry139 : AMDILReg<140, "r139">, DwarfRegNum<[140]>;
+def Rz139 : AMDILReg<141, "r139">, DwarfRegNum<[141]>;
+def Rw139 : AMDILReg<142, "r139">, DwarfRegNum<[142]>;
+def Rx140 : AMDILReg<140, "r140">, DwarfRegNum<[140]>;
+def Ry140 : AMDILReg<141, "r140">, DwarfRegNum<[141]>;
+def Rz140 : AMDILReg<142, "r140">, DwarfRegNum<[142]>;
+def Rw140 : AMDILReg<143, "r140">, DwarfRegNum<[143]>;
+def Rx141 : AMDILReg<141, "r141">, DwarfRegNum<[141]>;
+def Ry141 : AMDILReg<142, "r141">, DwarfRegNum<[142]>;
+def Rz141 : AMDILReg<143, "r141">, DwarfRegNum<[143]>;
+def Rw141 : AMDILReg<144, "r141">, DwarfRegNum<[144]>;
+def Rx142 : AMDILReg<142, "r142">, DwarfRegNum<[142]>;
+def Ry142 : AMDILReg<143, "r142">, DwarfRegNum<[143]>;
+def Rz142 : AMDILReg<144, "r142">, DwarfRegNum<[144]>;
+def Rw142 : AMDILReg<145, "r142">, DwarfRegNum<[145]>;
+def Rx143 : AMDILReg<143, "r143">, DwarfRegNum<[143]>;
+def Ry143 : AMDILReg<144, "r143">, DwarfRegNum<[144]>;
+def Rz143 : AMDILReg<145, "r143">, DwarfRegNum<[145]>;
+def Rw143 : AMDILReg<146, "r143">, DwarfRegNum<[146]>;
+def Rx144 : AMDILReg<144, "r144">, DwarfRegNum<[144]>;
+def Ry144 : AMDILReg<145, "r144">, DwarfRegNum<[145]>;
+def Rz144 : AMDILReg<146, "r144">, DwarfRegNum<[146]>;
+def Rw144 : AMDILReg<147, "r144">, DwarfRegNum<[147]>;
+def Rx145 : AMDILReg<145, "r145">, DwarfRegNum<[145]>;
+def Ry145 : AMDILReg<146, "r145">, DwarfRegNum<[146]>;
+def Rz145 : AMDILReg<147, "r145">, DwarfRegNum<[147]>;
+def Rw145 : AMDILReg<148, "r145">, DwarfRegNum<[148]>;
+def Rx146 : AMDILReg<146, "r146">, DwarfRegNum<[146]>;
+def Ry146 : AMDILReg<147, "r146">, DwarfRegNum<[147]>;
+def Rz146 : AMDILReg<148, "r146">, DwarfRegNum<[148]>;
+def Rw146 : AMDILReg<149, "r146">, DwarfRegNum<[149]>;
+def Rx147 : AMDILReg<147, "r147">, DwarfRegNum<[147]>;
+def Ry147 : AMDILReg<148, "r147">, DwarfRegNum<[148]>;
+def Rz147 : AMDILReg<149, "r147">, DwarfRegNum<[149]>;
+def Rw147 : AMDILReg<150, "r147">, DwarfRegNum<[150]>;
+def Rx148 : AMDILReg<148, "r148">, DwarfRegNum<[148]>;
+def Ry148 : AMDILReg<149, "r148">, DwarfRegNum<[149]>;
+def Rz148 : AMDILReg<150, "r148">, DwarfRegNum<[150]>;
+def Rw148 : AMDILReg<151, "r148">, DwarfRegNum<[151]>;
+def Rx149 : AMDILReg<149, "r149">, DwarfRegNum<[149]>;
+def Ry149 : AMDILReg<150, "r149">, DwarfRegNum<[150]>;
+def Rz149 : AMDILReg<151, "r149">, DwarfRegNum<[151]>;
+def Rw149 : AMDILReg<152, "r149">, DwarfRegNum<[152]>;
+def Rx150 : AMDILReg<150, "r150">, DwarfRegNum<[150]>;
+def Ry150 : AMDILReg<151, "r150">, DwarfRegNum<[151]>;
+def Rz150 : AMDILReg<152, "r150">, DwarfRegNum<[152]>;
+def Rw150 : AMDILReg<153, "r150">, DwarfRegNum<[153]>;
+def Rx151 : AMDILReg<151, "r151">, DwarfRegNum<[151]>;
+def Ry151 : AMDILReg<152, "r151">, DwarfRegNum<[152]>;
+def Rz151 : AMDILReg<153, "r151">, DwarfRegNum<[153]>;
+def Rw151 : AMDILReg<154, "r151">, DwarfRegNum<[154]>;
+def Rx152 : AMDILReg<152, "r152">, DwarfRegNum<[152]>;
+def Ry152 : AMDILReg<153, "r152">, DwarfRegNum<[153]>;
+def Rz152 : AMDILReg<154, "r152">, DwarfRegNum<[154]>;
+def Rw152 : AMDILReg<155, "r152">, DwarfRegNum<[155]>;
+def Rx153 : AMDILReg<153, "r153">, DwarfRegNum<[153]>;
+def Ry153 : AMDILReg<154, "r153">, DwarfRegNum<[154]>;
+def Rz153 : AMDILReg<155, "r153">, DwarfRegNum<[155]>;
+def Rw153 : AMDILReg<156, "r153">, DwarfRegNum<[156]>;
+def Rx154 : AMDILReg<154, "r154">, DwarfRegNum<[154]>;
+def Ry154 : AMDILReg<155, "r154">, DwarfRegNum<[155]>;
+def Rz154 : AMDILReg<156, "r154">, DwarfRegNum<[156]>;
+def Rw154 : AMDILReg<157, "r154">, DwarfRegNum<[157]>;
+def Rx155 : AMDILReg<155, "r155">, DwarfRegNum<[155]>;
+def Ry155 : AMDILReg<156, "r155">, DwarfRegNum<[156]>;
+def Rz155 : AMDILReg<157, "r155">, DwarfRegNum<[157]>;
+def Rw155 : AMDILReg<158, "r155">, DwarfRegNum<[158]>;
+def Rx156 : AMDILReg<156, "r156">, DwarfRegNum<[156]>;
+def Ry156 : AMDILReg<157, "r156">, DwarfRegNum<[157]>;
+def Rz156 : AMDILReg<158, "r156">, DwarfRegNum<[158]>;
+def Rw156 : AMDILReg<159, "r156">, DwarfRegNum<[159]>;
+def Rx157 : AMDILReg<157, "r157">, DwarfRegNum<[157]>;
+def Ry157 : AMDILReg<158, "r157">, DwarfRegNum<[158]>;
+def Rz157 : AMDILReg<159, "r157">, DwarfRegNum<[159]>;
+def Rw157 : AMDILReg<160, "r157">, DwarfRegNum<[160]>;
+def Rx158 : AMDILReg<158, "r158">, DwarfRegNum<[158]>;
+def Ry158 : AMDILReg<159, "r158">, DwarfRegNum<[159]>;
+def Rz158 : AMDILReg<160, "r158">, DwarfRegNum<[160]>;
+def Rw158 : AMDILReg<161, "r158">, DwarfRegNum<[161]>;
+def Rx159 : AMDILReg<159, "r159">, DwarfRegNum<[159]>;
+def Ry159 : AMDILReg<160, "r159">, DwarfRegNum<[160]>;
+def Rz159 : AMDILReg<161, "r159">, DwarfRegNum<[161]>;
+def Rw159 : AMDILReg<162, "r159">, DwarfRegNum<[162]>;
+def Rx160 : AMDILReg<160, "r160">, DwarfRegNum<[160]>;
+def Ry160 : AMDILReg<161, "r160">, DwarfRegNum<[161]>;
+def Rz160 : AMDILReg<162, "r160">, DwarfRegNum<[162]>;
+def Rw160 : AMDILReg<163, "r160">, DwarfRegNum<[163]>;
+def Rx161 : AMDILReg<161, "r161">, DwarfRegNum<[161]>;
+def Ry161 : AMDILReg<162, "r161">, DwarfRegNum<[162]>;
+def Rz161 : AMDILReg<163, "r161">, DwarfRegNum<[163]>;
+def Rw161 : AMDILReg<164, "r161">, DwarfRegNum<[164]>;
+def Rx162 : AMDILReg<162, "r162">, DwarfRegNum<[162]>;
+def Ry162 : AMDILReg<163, "r162">, DwarfRegNum<[163]>;
+def Rz162 : AMDILReg<164, "r162">, DwarfRegNum<[164]>;
+def Rw162 : AMDILReg<165, "r162">, DwarfRegNum<[165]>;
+def Rx163 : AMDILReg<163, "r163">, DwarfRegNum<[163]>;
+def Ry163 : AMDILReg<164, "r163">, DwarfRegNum<[164]>;
+def Rz163 : AMDILReg<165, "r163">, DwarfRegNum<[165]>;
+def Rw163 : AMDILReg<166, "r163">, DwarfRegNum<[166]>;
+def Rx164 : AMDILReg<164, "r164">, DwarfRegNum<[164]>;
+def Ry164 : AMDILReg<165, "r164">, DwarfRegNum<[165]>;
+def Rz164 : AMDILReg<166, "r164">, DwarfRegNum<[166]>;
+def Rw164 : AMDILReg<167, "r164">, DwarfRegNum<[167]>;
+def Rx165 : AMDILReg<165, "r165">, DwarfRegNum<[165]>;
+def Ry165 : AMDILReg<166, "r165">, DwarfRegNum<[166]>;
+def Rz165 : AMDILReg<167, "r165">, DwarfRegNum<[167]>;
+def Rw165 : AMDILReg<168, "r165">, DwarfRegNum<[168]>;
+def Rx166 : AMDILReg<166, "r166">, DwarfRegNum<[166]>;
+def Ry166 : AMDILReg<167, "r166">, DwarfRegNum<[167]>;
+def Rz166 : AMDILReg<168, "r166">, DwarfRegNum<[168]>;
+def Rw166 : AMDILReg<169, "r166">, DwarfRegNum<[169]>;
+def Rx167 : AMDILReg<167, "r167">, DwarfRegNum<[167]>;
+def Ry167 : AMDILReg<168, "r167">, DwarfRegNum<[168]>;
+def Rz167 : AMDILReg<169, "r167">, DwarfRegNum<[169]>;
+def Rw167 : AMDILReg<170, "r167">, DwarfRegNum<[170]>;
+def Rx168 : AMDILReg<168, "r168">, DwarfRegNum<[168]>;
+def Ry168 : AMDILReg<169, "r168">, DwarfRegNum<[169]>;
+def Rz168 : AMDILReg<170, "r168">, DwarfRegNum<[170]>;
+def Rw168 : AMDILReg<171, "r168">, DwarfRegNum<[171]>;
+def Rx169 : AMDILReg<169, "r169">, DwarfRegNum<[169]>;
+def Ry169 : AMDILReg<170, "r169">, DwarfRegNum<[170]>;
+def Rz169 : AMDILReg<171, "r169">, DwarfRegNum<[171]>;
+def Rw169 : AMDILReg<172, "r169">, DwarfRegNum<[172]>;
+def Rx170 : AMDILReg<170, "r170">, DwarfRegNum<[170]>;
+def Ry170 : AMDILReg<171, "r170">, DwarfRegNum<[171]>;
+def Rz170 : AMDILReg<172, "r170">, DwarfRegNum<[172]>;
+def Rw170 : AMDILReg<173, "r170">, DwarfRegNum<[173]>;
+def Rx171 : AMDILReg<171, "r171">, DwarfRegNum<[171]>;
+def Ry171 : AMDILReg<172, "r171">, DwarfRegNum<[172]>;
+def Rz171 : AMDILReg<173, "r171">, DwarfRegNum<[173]>;
+def Rw171 : AMDILReg<174, "r171">, DwarfRegNum<[174]>;
+def Rx172 : AMDILReg<172, "r172">, DwarfRegNum<[172]>;
+def Ry172 : AMDILReg<173, "r172">, DwarfRegNum<[173]>;
+def Rz172 : AMDILReg<174, "r172">, DwarfRegNum<[174]>;
+def Rw172 : AMDILReg<175, "r172">, DwarfRegNum<[175]>;
+def Rx173 : AMDILReg<173, "r173">, DwarfRegNum<[173]>;
+def Ry173 : AMDILReg<174, "r173">, DwarfRegNum<[174]>;
+def Rz173 : AMDILReg<175, "r173">, DwarfRegNum<[175]>;
+def Rw173 : AMDILReg<176, "r173">, DwarfRegNum<[176]>;
+def Rx174 : AMDILReg<174, "r174">, DwarfRegNum<[174]>;
+def Ry174 : AMDILReg<175, "r174">, DwarfRegNum<[175]>;
+def Rz174 : AMDILReg<176, "r174">, DwarfRegNum<[176]>;
+def Rw174 : AMDILReg<177, "r174">, DwarfRegNum<[177]>;
+def Rx175 : AMDILReg<175, "r175">, DwarfRegNum<[175]>;
+def Ry175 : AMDILReg<176, "r175">, DwarfRegNum<[176]>;
+def Rz175 : AMDILReg<177, "r175">, DwarfRegNum<[177]>;
+def Rw175 : AMDILReg<178, "r175">, DwarfRegNum<[178]>;
+def Rx176 : AMDILReg<176, "r176">, DwarfRegNum<[176]>;
+def Ry176 : AMDILReg<177, "r176">, DwarfRegNum<[177]>;
+def Rz176 : AMDILReg<178, "r176">, DwarfRegNum<[178]>;
+def Rw176 : AMDILReg<179, "r176">, DwarfRegNum<[179]>;
+def Rx177 : AMDILReg<177, "r177">, DwarfRegNum<[177]>;
+def Ry177 : AMDILReg<178, "r177">, DwarfRegNum<[178]>;
+def Rz177 : AMDILReg<179, "r177">, DwarfRegNum<[179]>;
+def Rw177 : AMDILReg<180, "r177">, DwarfRegNum<[180]>;
+def Rx178 : AMDILReg<178, "r178">, DwarfRegNum<[178]>;
+def Ry178 : AMDILReg<179, "r178">, DwarfRegNum<[179]>;
+def Rz178 : AMDILReg<180, "r178">, DwarfRegNum<[180]>;
+def Rw178 : AMDILReg<181, "r178">, DwarfRegNum<[181]>;
+def Rx179 : AMDILReg<179, "r179">, DwarfRegNum<[179]>;
+def Ry179 : AMDILReg<180, "r179">, DwarfRegNum<[180]>;
+def Rz179 : AMDILReg<181, "r179">, DwarfRegNum<[181]>;
+def Rw179 : AMDILReg<182, "r179">, DwarfRegNum<[182]>;
+def Rx180 : AMDILReg<180, "r180">, DwarfRegNum<[180]>;
+def Ry180 : AMDILReg<181, "r180">, DwarfRegNum<[181]>;
+def Rz180 : AMDILReg<182, "r180">, DwarfRegNum<[182]>;
+def Rw180 : AMDILReg<183, "r180">, DwarfRegNum<[183]>;
+def Rx181 : AMDILReg<181, "r181">, DwarfRegNum<[181]>;
+def Ry181 : AMDILReg<182, "r181">, DwarfRegNum<[182]>;
+def Rz181 : AMDILReg<183, "r181">, DwarfRegNum<[183]>;
+def Rw181 : AMDILReg<184, "r181">, DwarfRegNum<[184]>;
+def Rx182 : AMDILReg<182, "r182">, DwarfRegNum<[182]>;
+def Ry182 : AMDILReg<183, "r182">, DwarfRegNum<[183]>;
+def Rz182 : AMDILReg<184, "r182">, DwarfRegNum<[184]>;
+def Rw182 : AMDILReg<185, "r182">, DwarfRegNum<[185]>;
+def Rx183 : AMDILReg<183, "r183">, DwarfRegNum<[183]>;
+def Ry183 : AMDILReg<184, "r183">, DwarfRegNum<[184]>;
+def Rz183 : AMDILReg<185, "r183">, DwarfRegNum<[185]>;
+def Rw183 : AMDILReg<186, "r183">, DwarfRegNum<[186]>;
+def Rx184 : AMDILReg<184, "r184">, DwarfRegNum<[184]>;
+def Ry184 : AMDILReg<185, "r184">, DwarfRegNum<[185]>;
+def Rz184 : AMDILReg<186, "r184">, DwarfRegNum<[186]>;
+def Rw184 : AMDILReg<187, "r184">, DwarfRegNum<[187]>;
+def Rx185 : AMDILReg<185, "r185">, DwarfRegNum<[185]>;
+def Ry185 : AMDILReg<186, "r185">, DwarfRegNum<[186]>;
+def Rz185 : AMDILReg<187, "r185">, DwarfRegNum<[187]>;
+def Rw185 : AMDILReg<188, "r185">, DwarfRegNum<[188]>;
+def Rx186 : AMDILReg<186, "r186">, DwarfRegNum<[186]>;
+def Ry186 : AMDILReg<187, "r186">, DwarfRegNum<[187]>;
+def Rz186 : AMDILReg<188, "r186">, DwarfRegNum<[188]>;
+def Rw186 : AMDILReg<189, "r186">, DwarfRegNum<[189]>;
+def Rx187 : AMDILReg<187, "r187">, DwarfRegNum<[187]>;
+def Ry187 : AMDILReg<188, "r187">, DwarfRegNum<[188]>;
+def Rz187 : AMDILReg<189, "r187">, DwarfRegNum<[189]>;
+def Rw187 : AMDILReg<190, "r187">, DwarfRegNum<[190]>;
+def Rx188 : AMDILReg<188, "r188">, DwarfRegNum<[188]>;
+def Ry188 : AMDILReg<189, "r188">, DwarfRegNum<[189]>;
+def Rz188 : AMDILReg<190, "r188">, DwarfRegNum<[190]>;
+def Rw188 : AMDILReg<191, "r188">, DwarfRegNum<[191]>;
+def Rx189 : AMDILReg<189, "r189">, DwarfRegNum<[189]>;
+def Ry189 : AMDILReg<190, "r189">, DwarfRegNum<[190]>;
+def Rz189 : AMDILReg<191, "r189">, DwarfRegNum<[191]>;
+def Rw189 : AMDILReg<192, "r189">, DwarfRegNum<[192]>;
+def Rx190 : AMDILReg<190, "r190">, DwarfRegNum<[190]>;
+def Ry190 : AMDILReg<191, "r190">, DwarfRegNum<[191]>;
+def Rz190 : AMDILReg<192, "r190">, DwarfRegNum<[192]>;
+def Rw190 : AMDILReg<193, "r190">, DwarfRegNum<[193]>;
+def Rx191 : AMDILReg<191, "r191">, DwarfRegNum<[191]>;
+def Ry191 : AMDILReg<192, "r191">, DwarfRegNum<[192]>;
+def Rz191 : AMDILReg<193, "r191">, DwarfRegNum<[193]>;
+def Rw191 : AMDILReg<194, "r191">, DwarfRegNum<[194]>;
+def Rx1000 : AMDILReg<1000, "r1000">, DwarfRegNum<[1000]>;
+def Ry1000 : AMDILReg<1001, "r1000">, DwarfRegNum<[1001]>;
+def Rz1000 : AMDILReg<1002, "r1000">, DwarfRegNum<[1002]>;
+def Rw1000 : AMDILReg<1003, "r1000">, DwarfRegNum<[1003]>;
+def Rx1001 : AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def Ry1001 : AMDILReg<1002, "r1001">, DwarfRegNum<[1002]>;
+def Rz1001 : AMDILReg<1003, "r1001">, DwarfRegNum<[1003]>;
+def Rw1001 : AMDILReg<1004, "r1001">, DwarfRegNum<[1004]>;
+def Rx1002 : AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def Ry1002 : AMDILReg<1003, "r1002">, DwarfRegNum<[1003]>;
+def Rz1002 : AMDILReg<1004, "r1002">, DwarfRegNum<[1004]>;
+def Rw1002 : AMDILReg<1005, "r1002">, DwarfRegNum<[1005]>;
+def Rx1003 : AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def Ry1003 : AMDILReg<1004, "r1003">, DwarfRegNum<[1004]>;
+def Rz1003 : AMDILReg<1005, "r1003">, DwarfRegNum<[1005]>;
+def Rw1003 : AMDILReg<1006, "r1003">, DwarfRegNum<[1006]>;
+def Rx1004 : AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def Ry1004 : AMDILReg<1005, "r1004">, DwarfRegNum<[1005]>;
+def Rz1004 : AMDILReg<1006, "r1004">, DwarfRegNum<[1006]>;
+def Rw1004 : AMDILReg<1007, "r1004">, DwarfRegNum<[1007]>;
+def Rx1005 : AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def Ry1005 : AMDILReg<1006, "r1005">, DwarfRegNum<[1006]>;
+def Rz1005 : AMDILReg<1007, "r1005">, DwarfRegNum<[1007]>;
+def Rw1005 : AMDILReg<1008, "r1005">, DwarfRegNum<[1008]>;
+def Rx1006 : AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def Ry1006 : AMDILReg<1007, "r1006">, DwarfRegNum<[1007]>;
+def Rz1006 : AMDILReg<1008, "r1006">, DwarfRegNum<[1008]>;
+def Rw1006 : AMDILReg<1009, "r1006">, DwarfRegNum<[1009]>;
+def Rx1007 : AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def Ry1007 : AMDILReg<1008, "r1007">, DwarfRegNum<[1008]>;
+def Rz1007 : AMDILReg<1009, "r1007">, DwarfRegNum<[1009]>;
+def Rw1007 : AMDILReg<1010, "r1007">, DwarfRegNum<[1010]>;
+def Rx1008 : AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def Ry1008 : AMDILReg<1009, "r1008">, DwarfRegNum<[1009]>;
+def Rz1008 : AMDILReg<1010, "r1008">, DwarfRegNum<[1010]>;
+def Rw1008 : AMDILReg<1011, "r1008">, DwarfRegNum<[1011]>;
+def Rx1009 : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def Ry1009 : AMDILReg<1010, "r1009">, DwarfRegNum<[1010]>;
+def Rz1009 : AMDILReg<1011, "r1009">, DwarfRegNum<[1011]>;
+def Rw1009 : AMDILReg<1012, "r1009">, DwarfRegNum<[1012]>;
+def Rx1010 : AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def Ry1010 : AMDILReg<1011, "r1010">, DwarfRegNum<[1011]>;
+def Rz1010 : AMDILReg<1012, "r1010">, DwarfRegNum<[1012]>;
+def Rw1010 : AMDILReg<1013, "r1010">, DwarfRegNum<[1013]>;
+def Rx1011 : AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def Ry1011 : AMDILReg<1012, "r1011">, DwarfRegNum<[1012]>;
+def Rz1011 : AMDILReg<1013, "r1011">, DwarfRegNum<[1013]>;
+def Rw1011 : AMDILReg<1014, "r1011">, DwarfRegNum<[1014]>;
+def Rx1012 : AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def Ry1012 : AMDILReg<1013, "r1012">, DwarfRegNum<[1013]>;
+def Rz1012 : AMDILReg<1014, "r1012">, DwarfRegNum<[1014]>;
+def Rw1012 : AMDILReg<1015, "r1012">, DwarfRegNum<[1015]>;
diff --git a/src/gallium/drivers/radeon/AMDILRegisterDefsV2.td b/src/gallium/drivers/radeon/AMDILRegisterDefsV2.td
new file mode 100644
index 00000000000..e286e74fa6f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterDefsV2.td
@@ -0,0 +1,408 @@
+def Rxy1 : AMDILRegWithSubReg<1, "r1", [Rx1, Ry1], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1]>;
+def Rzw1 : AMDILRegWithSubReg<1, "r1", [Rz1, Rw1], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1]>;
+def Rxy2 : AMDILRegWithSubReg<2, "r2", [Rx2, Ry2], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[2]>;
+def Rzw2 : AMDILRegWithSubReg<2, "r2", [Rz2, Rw2], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[2]>;
+def Rxy3 : AMDILRegWithSubReg<3, "r3", [Rx3, Ry3], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[3]>;
+def Rzw3 : AMDILRegWithSubReg<3, "r3", [Rz3, Rw3], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[3]>;
+def Rxy4 : AMDILRegWithSubReg<4, "r4", [Rx4, Ry4], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[4]>;
+def Rzw4 : AMDILRegWithSubReg<4, "r4", [Rz4, Rw4], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[4]>;
+def Rxy5 : AMDILRegWithSubReg<5, "r5", [Rx5, Ry5], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[5]>;
+def Rzw5 : AMDILRegWithSubReg<5, "r5", [Rz5, Rw5], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[5]>;
+def Rxy6 : AMDILRegWithSubReg<6, "r6", [Rx6, Ry6], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[6]>;
+def Rzw6 : AMDILRegWithSubReg<6, "r6", [Rz6, Rw6], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[6]>;
+def Rxy7 : AMDILRegWithSubReg<7, "r7", [Rx7, Ry7], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[7]>;
+def Rzw7 : AMDILRegWithSubReg<7, "r7", [Rz7, Rw7], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[7]>;
+def Rxy8 : AMDILRegWithSubReg<8, "r8", [Rx8, Ry8], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[8]>;
+def Rzw8 : AMDILRegWithSubReg<8, "r8", [Rz8, Rw8], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[8]>;
+def Rxy9 : AMDILRegWithSubReg<9, "r9", [Rx9, Ry9], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[9]>;
+def Rzw9 : AMDILRegWithSubReg<9, "r9", [Rz9, Rw9], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[9]>;
+def Rxy10 : AMDILRegWithSubReg<10, "r10", [Rx10, Ry10], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[10]>;
+def Rzw10 : AMDILRegWithSubReg<10, "r10", [Rz10, Rw10], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[10]>;
+def Rxy11 : AMDILRegWithSubReg<11, "r11", [Rx11, Ry11], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[11]>;
+def Rzw11 : AMDILRegWithSubReg<11, "r11", [Rz11, Rw11], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[11]>;
+def Rxy12 : AMDILRegWithSubReg<12, "r12", [Rx12, Ry12], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[12]>;
+def Rzw12 : AMDILRegWithSubReg<12, "r12", [Rz12, Rw12], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[12]>;
+def Rxy13 : AMDILRegWithSubReg<13, "r13", [Rx13, Ry13], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[13]>;
+def Rzw13 : AMDILRegWithSubReg<13, "r13", [Rz13, Rw13], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[13]>;
+def Rxy14 : AMDILRegWithSubReg<14, "r14", [Rx14, Ry14], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[14]>;
+def Rzw14 : AMDILRegWithSubReg<14, "r14", [Rz14, Rw14], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[14]>;
+def Rxy15 : AMDILRegWithSubReg<15, "r15", [Rx15, Ry15], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[15]>;
+def Rzw15 : AMDILRegWithSubReg<15, "r15", [Rz15, Rw15], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[15]>;
+def Rxy16 : AMDILRegWithSubReg<16, "r16", [Rx16, Ry16], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[16]>;
+def Rzw16 : AMDILRegWithSubReg<16, "r16", [Rz16, Rw16], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[16]>;
+def Rxy17 : AMDILRegWithSubReg<17, "r17", [Rx17, Ry17], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[17]>;
+def Rzw17 : AMDILRegWithSubReg<17, "r17", [Rz17, Rw17], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[17]>;
+def Rxy18 : AMDILRegWithSubReg<18, "r18", [Rx18, Ry18], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[18]>;
+def Rzw18 : AMDILRegWithSubReg<18, "r18", [Rz18, Rw18], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[18]>;
+def Rxy19 : AMDILRegWithSubReg<19, "r19", [Rx19, Ry19], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[19]>;
+def Rzw19 : AMDILRegWithSubReg<19, "r19", [Rz19, Rw19], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[19]>;
+def Rxy20 : AMDILRegWithSubReg<20, "r20", [Rx20, Ry20], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[20]>;
+def Rzw20 : AMDILRegWithSubReg<20, "r20", [Rz20, Rw20], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[20]>;
+def Rxy21 : AMDILRegWithSubReg<21, "r21", [Rx21, Ry21], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[21]>;
+def Rzw21 : AMDILRegWithSubReg<21, "r21", [Rz21, Rw21], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[21]>;
+def Rxy22 : AMDILRegWithSubReg<22, "r22", [Rx22, Ry22], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[22]>;
+def Rzw22 : AMDILRegWithSubReg<22, "r22", [Rz22, Rw22], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[22]>;
+def Rxy23 : AMDILRegWithSubReg<23, "r23", [Rx23, Ry23], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[23]>;
+def Rzw23 : AMDILRegWithSubReg<23, "r23", [Rz23, Rw23], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[23]>;
+def Rxy24 : AMDILRegWithSubReg<24, "r24", [Rx24, Ry24], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[24]>;
+def Rzw24 : AMDILRegWithSubReg<24, "r24", [Rz24, Rw24], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[24]>;
+def Rxy25 : AMDILRegWithSubReg<25, "r25", [Rx25, Ry25], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[25]>;
+def Rzw25 : AMDILRegWithSubReg<25, "r25", [Rz25, Rw25], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[25]>;
+def Rxy26 : AMDILRegWithSubReg<26, "r26", [Rx26, Ry26], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[26]>;
+def Rzw26 : AMDILRegWithSubReg<26, "r26", [Rz26, Rw26], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[26]>;
+def Rxy27 : AMDILRegWithSubReg<27, "r27", [Rx27, Ry27], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[27]>;
+def Rzw27 : AMDILRegWithSubReg<27, "r27", [Rz27, Rw27], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[27]>;
+def Rxy28 : AMDILRegWithSubReg<28, "r28", [Rx28, Ry28], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[28]>;
+def Rzw28 : AMDILRegWithSubReg<28, "r28", [Rz28, Rw28], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[28]>;
+def Rxy29 : AMDILRegWithSubReg<29, "r29", [Rx29, Ry29], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[29]>;
+def Rzw29 : AMDILRegWithSubReg<29, "r29", [Rz29, Rw29], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[29]>;
+def Rxy30 : AMDILRegWithSubReg<30, "r30", [Rx30, Ry30], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[30]>;
+def Rzw30 : AMDILRegWithSubReg<30, "r30", [Rz30, Rw30], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[30]>;
+def Rxy31 : AMDILRegWithSubReg<31, "r31", [Rx31, Ry31], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[31]>;
+def Rzw31 : AMDILRegWithSubReg<31, "r31", [Rz31, Rw31], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[31]>;
+def Rxy32 : AMDILRegWithSubReg<32, "r32", [Rx32, Ry32], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[32]>;
+def Rzw32 : AMDILRegWithSubReg<32, "r32", [Rz32, Rw32], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[32]>;
+def Rxy33 : AMDILRegWithSubReg<33, "r33", [Rx33, Ry33], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[33]>;
+def Rzw33 : AMDILRegWithSubReg<33, "r33", [Rz33, Rw33], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[33]>;
+def Rxy34 : AMDILRegWithSubReg<34, "r34", [Rx34, Ry34], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[34]>;
+def Rzw34 : AMDILRegWithSubReg<34, "r34", [Rz34, Rw34], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[34]>;
+def Rxy35 : AMDILRegWithSubReg<35, "r35", [Rx35, Ry35], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[35]>;
+def Rzw35 : AMDILRegWithSubReg<35, "r35", [Rz35, Rw35], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[35]>;
+def Rxy36 : AMDILRegWithSubReg<36, "r36", [Rx36, Ry36], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[36]>;
+def Rzw36 : AMDILRegWithSubReg<36, "r36", [Rz36, Rw36], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[36]>;
+def Rxy37 : AMDILRegWithSubReg<37, "r37", [Rx37, Ry37], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[37]>;
+def Rzw37 : AMDILRegWithSubReg<37, "r37", [Rz37, Rw37], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[37]>;
+def Rxy38 : AMDILRegWithSubReg<38, "r38", [Rx38, Ry38], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[38]>;
+def Rzw38 : AMDILRegWithSubReg<38, "r38", [Rz38, Rw38], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[38]>;
+def Rxy39 : AMDILRegWithSubReg<39, "r39", [Rx39, Ry39], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[39]>;
+def Rzw39 : AMDILRegWithSubReg<39, "r39", [Rz39, Rw39], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[39]>;
+def Rxy40 : AMDILRegWithSubReg<40, "r40", [Rx40, Ry40], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[40]>;
+def Rzw40 : AMDILRegWithSubReg<40, "r40", [Rz40, Rw40], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[40]>;
+def Rxy41 : AMDILRegWithSubReg<41, "r41", [Rx41, Ry41], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[41]>;
+def Rzw41 : AMDILRegWithSubReg<41, "r41", [Rz41, Rw41], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[41]>;
+def Rxy42 : AMDILRegWithSubReg<42, "r42", [Rx42, Ry42], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[42]>;
+def Rzw42 : AMDILRegWithSubReg<42, "r42", [Rz42, Rw42], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[42]>;
+def Rxy43 : AMDILRegWithSubReg<43, "r43", [Rx43, Ry43], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[43]>;
+def Rzw43 : AMDILRegWithSubReg<43, "r43", [Rz43, Rw43], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[43]>;
+def Rxy44 : AMDILRegWithSubReg<44, "r44", [Rx44, Ry44], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[44]>;
+def Rzw44 : AMDILRegWithSubReg<44, "r44", [Rz44, Rw44], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[44]>;
+def Rxy45 : AMDILRegWithSubReg<45, "r45", [Rx45, Ry45], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[45]>;
+def Rzw45 : AMDILRegWithSubReg<45, "r45", [Rz45, Rw45], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[45]>;
+def Rxy46 : AMDILRegWithSubReg<46, "r46", [Rx46, Ry46], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[46]>;
+def Rzw46 : AMDILRegWithSubReg<46, "r46", [Rz46, Rw46], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[46]>;
+def Rxy47 : AMDILRegWithSubReg<47, "r47", [Rx47, Ry47], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[47]>;
+def Rzw47 : AMDILRegWithSubReg<47, "r47", [Rz47, Rw47], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[47]>;
+def Rxy48 : AMDILRegWithSubReg<48, "r48", [Rx48, Ry48], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[48]>;
+def Rzw48 : AMDILRegWithSubReg<48, "r48", [Rz48, Rw48], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[48]>;
+def Rxy49 : AMDILRegWithSubReg<49, "r49", [Rx49, Ry49], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[49]>;
+def Rzw49 : AMDILRegWithSubReg<49, "r49", [Rz49, Rw49], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[49]>;
+def Rxy50 : AMDILRegWithSubReg<50, "r50", [Rx50, Ry50], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[50]>;
+def Rzw50 : AMDILRegWithSubReg<50, "r50", [Rz50, Rw50], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[50]>;
+def Rxy51 : AMDILRegWithSubReg<51, "r51", [Rx51, Ry51], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[51]>;
+def Rzw51 : AMDILRegWithSubReg<51, "r51", [Rz51, Rw51], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[51]>;
+def Rxy52 : AMDILRegWithSubReg<52, "r52", [Rx52, Ry52], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[52]>;
+def Rzw52 : AMDILRegWithSubReg<52, "r52", [Rz52, Rw52], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[52]>;
+def Rxy53 : AMDILRegWithSubReg<53, "r53", [Rx53, Ry53], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[53]>;
+def Rzw53 : AMDILRegWithSubReg<53, "r53", [Rz53, Rw53], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[53]>;
+def Rxy54 : AMDILRegWithSubReg<54, "r54", [Rx54, Ry54], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[54]>;
+def Rzw54 : AMDILRegWithSubReg<54, "r54", [Rz54, Rw54], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[54]>;
+def Rxy55 : AMDILRegWithSubReg<55, "r55", [Rx55, Ry55], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[55]>;
+def Rzw55 : AMDILRegWithSubReg<55, "r55", [Rz55, Rw55], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[55]>;
+def Rxy56 : AMDILRegWithSubReg<56, "r56", [Rx56, Ry56], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[56]>;
+def Rzw56 : AMDILRegWithSubReg<56, "r56", [Rz56, Rw56], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[56]>;
+def Rxy57 : AMDILRegWithSubReg<57, "r57", [Rx57, Ry57], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[57]>;
+def Rzw57 : AMDILRegWithSubReg<57, "r57", [Rz57, Rw57], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[57]>;
+def Rxy58 : AMDILRegWithSubReg<58, "r58", [Rx58, Ry58], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[58]>;
+def Rzw58 : AMDILRegWithSubReg<58, "r58", [Rz58, Rw58], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[58]>;
+def Rxy59 : AMDILRegWithSubReg<59, "r59", [Rx59, Ry59], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[59]>;
+def Rzw59 : AMDILRegWithSubReg<59, "r59", [Rz59, Rw59], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[59]>;
+def Rxy60 : AMDILRegWithSubReg<60, "r60", [Rx60, Ry60], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[60]>;
+def Rzw60 : AMDILRegWithSubReg<60, "r60", [Rz60, Rw60], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[60]>;
+def Rxy61 : AMDILRegWithSubReg<61, "r61", [Rx61, Ry61], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[61]>;
+def Rzw61 : AMDILRegWithSubReg<61, "r61", [Rz61, Rw61], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[61]>;
+def Rxy62 : AMDILRegWithSubReg<62, "r62", [Rx62, Ry62], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[62]>;
+def Rzw62 : AMDILRegWithSubReg<62, "r62", [Rz62, Rw62], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[62]>;
+def Rxy63 : AMDILRegWithSubReg<63, "r63", [Rx63, Ry63], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[63]>;
+def Rzw63 : AMDILRegWithSubReg<63, "r63", [Rz63, Rw63], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[63]>;
+def Rxy64 : AMDILRegWithSubReg<64, "r64", [Rx64, Ry64], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[64]>;
+def Rzw64 : AMDILRegWithSubReg<64, "r64", [Rz64, Rw64], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[64]>;
+def Rxy65 : AMDILRegWithSubReg<65, "r65", [Rx65, Ry65], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[65]>;
+def Rzw65 : AMDILRegWithSubReg<65, "r65", [Rz65, Rw65], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[65]>;
+def Rxy66 : AMDILRegWithSubReg<66, "r66", [Rx66, Ry66], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[66]>;
+def Rzw66 : AMDILRegWithSubReg<66, "r66", [Rz66, Rw66], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[66]>;
+def Rxy67 : AMDILRegWithSubReg<67, "r67", [Rx67, Ry67], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[67]>;
+def Rzw67 : AMDILRegWithSubReg<67, "r67", [Rz67, Rw67], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[67]>;
+def Rxy68 : AMDILRegWithSubReg<68, "r68", [Rx68, Ry68], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[68]>;
+def Rzw68 : AMDILRegWithSubReg<68, "r68", [Rz68, Rw68], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[68]>;
+def Rxy69 : AMDILRegWithSubReg<69, "r69", [Rx69, Ry69], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[69]>;
+def Rzw69 : AMDILRegWithSubReg<69, "r69", [Rz69, Rw69], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[69]>;
+def Rxy70 : AMDILRegWithSubReg<70, "r70", [Rx70, Ry70], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[70]>;
+def Rzw70 : AMDILRegWithSubReg<70, "r70", [Rz70, Rw70], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[70]>;
+def Rxy71 : AMDILRegWithSubReg<71, "r71", [Rx71, Ry71], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[71]>;
+def Rzw71 : AMDILRegWithSubReg<71, "r71", [Rz71, Rw71], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[71]>;
+def Rxy72 : AMDILRegWithSubReg<72, "r72", [Rx72, Ry72], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[72]>;
+def Rzw72 : AMDILRegWithSubReg<72, "r72", [Rz72, Rw72], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[72]>;
+def Rxy73 : AMDILRegWithSubReg<73, "r73", [Rx73, Ry73], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[73]>;
+def Rzw73 : AMDILRegWithSubReg<73, "r73", [Rz73, Rw73], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[73]>;
+def Rxy74 : AMDILRegWithSubReg<74, "r74", [Rx74, Ry74], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[74]>;
+def Rzw74 : AMDILRegWithSubReg<74, "r74", [Rz74, Rw74], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[74]>;
+def Rxy75 : AMDILRegWithSubReg<75, "r75", [Rx75, Ry75], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[75]>;
+def Rzw75 : AMDILRegWithSubReg<75, "r75", [Rz75, Rw75], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[75]>;
+def Rxy76 : AMDILRegWithSubReg<76, "r76", [Rx76, Ry76], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[76]>;
+def Rzw76 : AMDILRegWithSubReg<76, "r76", [Rz76, Rw76], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[76]>;
+def Rxy77 : AMDILRegWithSubReg<77, "r77", [Rx77, Ry77], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[77]>;
+def Rzw77 : AMDILRegWithSubReg<77, "r77", [Rz77, Rw77], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[77]>;
+def Rxy78 : AMDILRegWithSubReg<78, "r78", [Rx78, Ry78], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[78]>;
+def Rzw78 : AMDILRegWithSubReg<78, "r78", [Rz78, Rw78], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[78]>;
+def Rxy79 : AMDILRegWithSubReg<79, "r79", [Rx79, Ry79], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[79]>;
+def Rzw79 : AMDILRegWithSubReg<79, "r79", [Rz79, Rw79], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[79]>;
+def Rxy80 : AMDILRegWithSubReg<80, "r80", [Rx80, Ry80], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[80]>;
+def Rzw80 : AMDILRegWithSubReg<80, "r80", [Rz80, Rw80], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[80]>;
+def Rxy81 : AMDILRegWithSubReg<81, "r81", [Rx81, Ry81], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[81]>;
+def Rzw81 : AMDILRegWithSubReg<81, "r81", [Rz81, Rw81], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[81]>;
+def Rxy82 : AMDILRegWithSubReg<82, "r82", [Rx82, Ry82], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[82]>;
+def Rzw82 : AMDILRegWithSubReg<82, "r82", [Rz82, Rw82], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[82]>;
+def Rxy83 : AMDILRegWithSubReg<83, "r83", [Rx83, Ry83], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[83]>;
+def Rzw83 : AMDILRegWithSubReg<83, "r83", [Rz83, Rw83], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[83]>;
+def Rxy84 : AMDILRegWithSubReg<84, "r84", [Rx84, Ry84], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[84]>;
+def Rzw84 : AMDILRegWithSubReg<84, "r84", [Rz84, Rw84], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[84]>;
+def Rxy85 : AMDILRegWithSubReg<85, "r85", [Rx85, Ry85], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[85]>;
+def Rzw85 : AMDILRegWithSubReg<85, "r85", [Rz85, Rw85], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[85]>;
+def Rxy86 : AMDILRegWithSubReg<86, "r86", [Rx86, Ry86], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[86]>;
+def Rzw86 : AMDILRegWithSubReg<86, "r86", [Rz86, Rw86], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[86]>;
+def Rxy87 : AMDILRegWithSubReg<87, "r87", [Rx87, Ry87], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[87]>;
+def Rzw87 : AMDILRegWithSubReg<87, "r87", [Rz87, Rw87], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[87]>;
+def Rxy88 : AMDILRegWithSubReg<88, "r88", [Rx88, Ry88], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[88]>;
+def Rzw88 : AMDILRegWithSubReg<88, "r88", [Rz88, Rw88], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[88]>;
+def Rxy89 : AMDILRegWithSubReg<89, "r89", [Rx89, Ry89], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[89]>;
+def Rzw89 : AMDILRegWithSubReg<89, "r89", [Rz89, Rw89], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[89]>;
+def Rxy90 : AMDILRegWithSubReg<90, "r90", [Rx90, Ry90], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[90]>;
+def Rzw90 : AMDILRegWithSubReg<90, "r90", [Rz90, Rw90], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[90]>;
+def Rxy91 : AMDILRegWithSubReg<91, "r91", [Rx91, Ry91], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[91]>;
+def Rzw91 : AMDILRegWithSubReg<91, "r91", [Rz91, Rw91], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[91]>;
+def Rxy92 : AMDILRegWithSubReg<92, "r92", [Rx92, Ry92], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[92]>;
+def Rzw92 : AMDILRegWithSubReg<92, "r92", [Rz92, Rw92], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[92]>;
+def Rxy93 : AMDILRegWithSubReg<93, "r93", [Rx93, Ry93], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[93]>;
+def Rzw93 : AMDILRegWithSubReg<93, "r93", [Rz93, Rw93], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[93]>;
+def Rxy94 : AMDILRegWithSubReg<94, "r94", [Rx94, Ry94], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[94]>;
+def Rzw94 : AMDILRegWithSubReg<94, "r94", [Rz94, Rw94], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[94]>;
+def Rxy95 : AMDILRegWithSubReg<95, "r95", [Rx95, Ry95], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[95]>;
+def Rzw95 : AMDILRegWithSubReg<95, "r95", [Rz95, Rw95], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[95]>;
+def Rxy96 : AMDILRegWithSubReg<96, "r96", [Rx96, Ry96], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[96]>;
+def Rzw96 : AMDILRegWithSubReg<96, "r96", [Rz96, Rw96], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[96]>;
+def Rxy97 : AMDILRegWithSubReg<97, "r97", [Rx97, Ry97], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[97]>;
+def Rzw97 : AMDILRegWithSubReg<97, "r97", [Rz97, Rw97], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[97]>;
+def Rxy98 : AMDILRegWithSubReg<98, "r98", [Rx98, Ry98], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[98]>;
+def Rzw98 : AMDILRegWithSubReg<98, "r98", [Rz98, Rw98], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[98]>;
+def Rxy99 : AMDILRegWithSubReg<99, "r99", [Rx99, Ry99], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[99]>;
+def Rzw99 : AMDILRegWithSubReg<99, "r99", [Rz99, Rw99], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[99]>;
+def Rxy100 : AMDILRegWithSubReg<100, "r100", [Rx100, Ry100], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[100]>;
+def Rzw100 : AMDILRegWithSubReg<100, "r100", [Rz100, Rw100], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[100]>;
+def Rxy101 : AMDILRegWithSubReg<101, "r101", [Rx101, Ry101], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[101]>;
+def Rzw101 : AMDILRegWithSubReg<101, "r101", [Rz101, Rw101], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[101]>;
+def Rxy102 : AMDILRegWithSubReg<102, "r102", [Rx102, Ry102], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[102]>;
+def Rzw102 : AMDILRegWithSubReg<102, "r102", [Rz102, Rw102], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[102]>;
+def Rxy103 : AMDILRegWithSubReg<103, "r103", [Rx103, Ry103], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[103]>;
+def Rzw103 : AMDILRegWithSubReg<103, "r103", [Rz103, Rw103], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[103]>;
+def Rxy104 : AMDILRegWithSubReg<104, "r104", [Rx104, Ry104], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[104]>;
+def Rzw104 : AMDILRegWithSubReg<104, "r104", [Rz104, Rw104], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[104]>;
+def Rxy105 : AMDILRegWithSubReg<105, "r105", [Rx105, Ry105], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[105]>;
+def Rzw105 : AMDILRegWithSubReg<105, "r105", [Rz105, Rw105], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[105]>;
+def Rxy106 : AMDILRegWithSubReg<106, "r106", [Rx106, Ry106], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[106]>;
+def Rzw106 : AMDILRegWithSubReg<106, "r106", [Rz106, Rw106], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[106]>;
+def Rxy107 : AMDILRegWithSubReg<107, "r107", [Rx107, Ry107], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[107]>;
+def Rzw107 : AMDILRegWithSubReg<107, "r107", [Rz107, Rw107], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[107]>;
+def Rxy108 : AMDILRegWithSubReg<108, "r108", [Rx108, Ry108], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[108]>;
+def Rzw108 : AMDILRegWithSubReg<108, "r108", [Rz108, Rw108], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[108]>;
+def Rxy109 : AMDILRegWithSubReg<109, "r109", [Rx109, Ry109], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[109]>;
+def Rzw109 : AMDILRegWithSubReg<109, "r109", [Rz109, Rw109], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[109]>;
+def Rxy110 : AMDILRegWithSubReg<110, "r110", [Rx110, Ry110], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[110]>;
+def Rzw110 : AMDILRegWithSubReg<110, "r110", [Rz110, Rw110], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[110]>;
+def Rxy111 : AMDILRegWithSubReg<111, "r111", [Rx111, Ry111], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[111]>;
+def Rzw111 : AMDILRegWithSubReg<111, "r111", [Rz111, Rw111], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[111]>;
+def Rxy112 : AMDILRegWithSubReg<112, "r112", [Rx112, Ry112], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[112]>;
+def Rzw112 : AMDILRegWithSubReg<112, "r112", [Rz112, Rw112], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[112]>;
+def Rxy113 : AMDILRegWithSubReg<113, "r113", [Rx113, Ry113], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[113]>;
+def Rzw113 : AMDILRegWithSubReg<113, "r113", [Rz113, Rw113], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[113]>;
+def Rxy114 : AMDILRegWithSubReg<114, "r114", [Rx114, Ry114], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[114]>;
+def Rzw114 : AMDILRegWithSubReg<114, "r114", [Rz114, Rw114], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[114]>;
+def Rxy115 : AMDILRegWithSubReg<115, "r115", [Rx115, Ry115], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[115]>;
+def Rzw115 : AMDILRegWithSubReg<115, "r115", [Rz115, Rw115], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[115]>;
+def Rxy116 : AMDILRegWithSubReg<116, "r116", [Rx116, Ry116], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[116]>;
+def Rzw116 : AMDILRegWithSubReg<116, "r116", [Rz116, Rw116], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[116]>;
+def Rxy117 : AMDILRegWithSubReg<117, "r117", [Rx117, Ry117], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[117]>;
+def Rzw117 : AMDILRegWithSubReg<117, "r117", [Rz117, Rw117], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[117]>;
+def Rxy118 : AMDILRegWithSubReg<118, "r118", [Rx118, Ry118], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[118]>;
+def Rzw118 : AMDILRegWithSubReg<118, "r118", [Rz118, Rw118], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[118]>;
+def Rxy119 : AMDILRegWithSubReg<119, "r119", [Rx119, Ry119], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[119]>;
+def Rzw119 : AMDILRegWithSubReg<119, "r119", [Rz119, Rw119], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[119]>;
+def Rxy120 : AMDILRegWithSubReg<120, "r120", [Rx120, Ry120], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[120]>;
+def Rzw120 : AMDILRegWithSubReg<120, "r120", [Rz120, Rw120], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[120]>;
+def Rxy121 : AMDILRegWithSubReg<121, "r121", [Rx121, Ry121], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[121]>;
+def Rzw121 : AMDILRegWithSubReg<121, "r121", [Rz121, Rw121], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[121]>;
+def Rxy122 : AMDILRegWithSubReg<122, "r122", [Rx122, Ry122], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[122]>;
+def Rzw122 : AMDILRegWithSubReg<122, "r122", [Rz122, Rw122], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[122]>;
+def Rxy123 : AMDILRegWithSubReg<123, "r123", [Rx123, Ry123], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[123]>;
+def Rzw123 : AMDILRegWithSubReg<123, "r123", [Rz123, Rw123], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[123]>;
+def Rxy124 : AMDILRegWithSubReg<124, "r124", [Rx124, Ry124], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[124]>;
+def Rzw124 : AMDILRegWithSubReg<124, "r124", [Rz124, Rw124], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[124]>;
+def Rxy125 : AMDILRegWithSubReg<125, "r125", [Rx125, Ry125], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[125]>;
+def Rzw125 : AMDILRegWithSubReg<125, "r125", [Rz125, Rw125], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[125]>;
+def Rxy126 : AMDILRegWithSubReg<126, "r126", [Rx126, Ry126], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[126]>;
+def Rzw126 : AMDILRegWithSubReg<126, "r126", [Rz126, Rw126], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[126]>;
+def Rxy127 : AMDILRegWithSubReg<127, "r127", [Rx127, Ry127], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[127]>;
+def Rzw127 : AMDILRegWithSubReg<127, "r127", [Rz127, Rw127], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[127]>;
+def Rxy128 : AMDILRegWithSubReg<128, "r128", [Rx128, Ry128], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[128]>;
+def Rzw128 : AMDILRegWithSubReg<128, "r128", [Rz128, Rw128], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[128]>;
+def Rxy129 : AMDILRegWithSubReg<129, "r129", [Rx129, Ry129], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[129]>;
+def Rzw129 : AMDILRegWithSubReg<129, "r129", [Rz129, Rw129], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[129]>;
+def Rxy130 : AMDILRegWithSubReg<130, "r130", [Rx130, Ry130], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[130]>;
+def Rzw130 : AMDILRegWithSubReg<130, "r130", [Rz130, Rw130], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[130]>;
+def Rxy131 : AMDILRegWithSubReg<131, "r131", [Rx131, Ry131], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[131]>;
+def Rzw131 : AMDILRegWithSubReg<131, "r131", [Rz131, Rw131], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[131]>;
+def Rxy132 : AMDILRegWithSubReg<132, "r132", [Rx132, Ry132], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[132]>;
+def Rzw132 : AMDILRegWithSubReg<132, "r132", [Rz132, Rw132], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[132]>;
+def Rxy133 : AMDILRegWithSubReg<133, "r133", [Rx133, Ry133], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[133]>;
+def Rzw133 : AMDILRegWithSubReg<133, "r133", [Rz133, Rw133], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[133]>;
+def Rxy134 : AMDILRegWithSubReg<134, "r134", [Rx134, Ry134], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[134]>;
+def Rzw134 : AMDILRegWithSubReg<134, "r134", [Rz134, Rw134], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[134]>;
+def Rxy135 : AMDILRegWithSubReg<135, "r135", [Rx135, Ry135], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[135]>;
+def Rzw135 : AMDILRegWithSubReg<135, "r135", [Rz135, Rw135], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[135]>;
+def Rxy136 : AMDILRegWithSubReg<136, "r136", [Rx136, Ry136], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[136]>;
+def Rzw136 : AMDILRegWithSubReg<136, "r136", [Rz136, Rw136], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[136]>;
+def Rxy137 : AMDILRegWithSubReg<137, "r137", [Rx137, Ry137], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[137]>;
+def Rzw137 : AMDILRegWithSubReg<137, "r137", [Rz137, Rw137], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[137]>;
+def Rxy138 : AMDILRegWithSubReg<138, "r138", [Rx138, Ry138], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[138]>;
+def Rzw138 : AMDILRegWithSubReg<138, "r138", [Rz138, Rw138], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[138]>;
+def Rxy139 : AMDILRegWithSubReg<139, "r139", [Rx139, Ry139], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[139]>;
+def Rzw139 : AMDILRegWithSubReg<139, "r139", [Rz139, Rw139], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[139]>;
+def Rxy140 : AMDILRegWithSubReg<140, "r140", [Rx140, Ry140], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[140]>;
+def Rzw140 : AMDILRegWithSubReg<140, "r140", [Rz140, Rw140], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[140]>;
+def Rxy141 : AMDILRegWithSubReg<141, "r141", [Rx141, Ry141], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[141]>;
+def Rzw141 : AMDILRegWithSubReg<141, "r141", [Rz141, Rw141], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[141]>;
+def Rxy142 : AMDILRegWithSubReg<142, "r142", [Rx142, Ry142], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[142]>;
+def Rzw142 : AMDILRegWithSubReg<142, "r142", [Rz142, Rw142], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[142]>;
+def Rxy143 : AMDILRegWithSubReg<143, "r143", [Rx143, Ry143], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[143]>;
+def Rzw143 : AMDILRegWithSubReg<143, "r143", [Rz143, Rw143], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[143]>;
+def Rxy144 : AMDILRegWithSubReg<144, "r144", [Rx144, Ry144], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[144]>;
+def Rzw144 : AMDILRegWithSubReg<144, "r144", [Rz144, Rw144], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[144]>;
+def Rxy145 : AMDILRegWithSubReg<145, "r145", [Rx145, Ry145], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[145]>;
+def Rzw145 : AMDILRegWithSubReg<145, "r145", [Rz145, Rw145], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[145]>;
+def Rxy146 : AMDILRegWithSubReg<146, "r146", [Rx146, Ry146], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[146]>;
+def Rzw146 : AMDILRegWithSubReg<146, "r146", [Rz146, Rw146], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[146]>;
+def Rxy147 : AMDILRegWithSubReg<147, "r147", [Rx147, Ry147], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[147]>;
+def Rzw147 : AMDILRegWithSubReg<147, "r147", [Rz147, Rw147], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[147]>;
+def Rxy148 : AMDILRegWithSubReg<148, "r148", [Rx148, Ry148], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[148]>;
+def Rzw148 : AMDILRegWithSubReg<148, "r148", [Rz148, Rw148], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[148]>;
+def Rxy149 : AMDILRegWithSubReg<149, "r149", [Rx149, Ry149], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[149]>;
+def Rzw149 : AMDILRegWithSubReg<149, "r149", [Rz149, Rw149], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[149]>;
+def Rxy150 : AMDILRegWithSubReg<150, "r150", [Rx150, Ry150], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[150]>;
+def Rzw150 : AMDILRegWithSubReg<150, "r150", [Rz150, Rw150], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[150]>;
+def Rxy151 : AMDILRegWithSubReg<151, "r151", [Rx151, Ry151], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[151]>;
+def Rzw151 : AMDILRegWithSubReg<151, "r151", [Rz151, Rw151], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[151]>;
+def Rxy152 : AMDILRegWithSubReg<152, "r152", [Rx152, Ry152], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[152]>;
+def Rzw152 : AMDILRegWithSubReg<152, "r152", [Rz152, Rw152], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[152]>;
+def Rxy153 : AMDILRegWithSubReg<153, "r153", [Rx153, Ry153], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[153]>;
+def Rzw153 : AMDILRegWithSubReg<153, "r153", [Rz153, Rw153], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[153]>;
+def Rxy154 : AMDILRegWithSubReg<154, "r154", [Rx154, Ry154], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[154]>;
+def Rzw154 : AMDILRegWithSubReg<154, "r154", [Rz154, Rw154], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[154]>;
+def Rxy155 : AMDILRegWithSubReg<155, "r155", [Rx155, Ry155], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[155]>;
+def Rzw155 : AMDILRegWithSubReg<155, "r155", [Rz155, Rw155], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[155]>;
+def Rxy156 : AMDILRegWithSubReg<156, "r156", [Rx156, Ry156], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[156]>;
+def Rzw156 : AMDILRegWithSubReg<156, "r156", [Rz156, Rw156], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[156]>;
+def Rxy157 : AMDILRegWithSubReg<157, "r157", [Rx157, Ry157], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[157]>;
+def Rzw157 : AMDILRegWithSubReg<157, "r157", [Rz157, Rw157], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[157]>;
+def Rxy158 : AMDILRegWithSubReg<158, "r158", [Rx158, Ry158], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[158]>;
+def Rzw158 : AMDILRegWithSubReg<158, "r158", [Rz158, Rw158], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[158]>;
+def Rxy159 : AMDILRegWithSubReg<159, "r159", [Rx159, Ry159], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[159]>;
+def Rzw159 : AMDILRegWithSubReg<159, "r159", [Rz159, Rw159], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[159]>;
+def Rxy160 : AMDILRegWithSubReg<160, "r160", [Rx160, Ry160], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[160]>;
+def Rzw160 : AMDILRegWithSubReg<160, "r160", [Rz160, Rw160], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[160]>;
+def Rxy161 : AMDILRegWithSubReg<161, "r161", [Rx161, Ry161], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[161]>;
+def Rzw161 : AMDILRegWithSubReg<161, "r161", [Rz161, Rw161], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[161]>;
+def Rxy162 : AMDILRegWithSubReg<162, "r162", [Rx162, Ry162], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[162]>;
+def Rzw162 : AMDILRegWithSubReg<162, "r162", [Rz162, Rw162], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[162]>;
+def Rxy163 : AMDILRegWithSubReg<163, "r163", [Rx163, Ry163], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[163]>;
+def Rzw163 : AMDILRegWithSubReg<163, "r163", [Rz163, Rw163], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[163]>;
+def Rxy164 : AMDILRegWithSubReg<164, "r164", [Rx164, Ry164], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[164]>;
+def Rzw164 : AMDILRegWithSubReg<164, "r164", [Rz164, Rw164], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[164]>;
+def Rxy165 : AMDILRegWithSubReg<165, "r165", [Rx165, Ry165], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[165]>;
+def Rzw165 : AMDILRegWithSubReg<165, "r165", [Rz165, Rw165], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[165]>;
+def Rxy166 : AMDILRegWithSubReg<166, "r166", [Rx166, Ry166], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[166]>;
+def Rzw166 : AMDILRegWithSubReg<166, "r166", [Rz166, Rw166], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[166]>;
+def Rxy167 : AMDILRegWithSubReg<167, "r167", [Rx167, Ry167], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[167]>;
+def Rzw167 : AMDILRegWithSubReg<167, "r167", [Rz167, Rw167], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[167]>;
+def Rxy168 : AMDILRegWithSubReg<168, "r168", [Rx168, Ry168], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[168]>;
+def Rzw168 : AMDILRegWithSubReg<168, "r168", [Rz168, Rw168], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[168]>;
+def Rxy169 : AMDILRegWithSubReg<169, "r169", [Rx169, Ry169], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[169]>;
+def Rzw169 : AMDILRegWithSubReg<169, "r169", [Rz169, Rw169], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[169]>;
+def Rxy170 : AMDILRegWithSubReg<170, "r170", [Rx170, Ry170], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[170]>;
+def Rzw170 : AMDILRegWithSubReg<170, "r170", [Rz170, Rw170], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[170]>;
+def Rxy171 : AMDILRegWithSubReg<171, "r171", [Rx171, Ry171], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[171]>;
+def Rzw171 : AMDILRegWithSubReg<171, "r171", [Rz171, Rw171], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[171]>;
+def Rxy172 : AMDILRegWithSubReg<172, "r172", [Rx172, Ry172], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[172]>;
+def Rzw172 : AMDILRegWithSubReg<172, "r172", [Rz172, Rw172], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[172]>;
+def Rxy173 : AMDILRegWithSubReg<173, "r173", [Rx173, Ry173], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[173]>;
+def Rzw173 : AMDILRegWithSubReg<173, "r173", [Rz173, Rw173], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[173]>;
+def Rxy174 : AMDILRegWithSubReg<174, "r174", [Rx174, Ry174], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[174]>;
+def Rzw174 : AMDILRegWithSubReg<174, "r174", [Rz174, Rw174], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[174]>;
+def Rxy175 : AMDILRegWithSubReg<175, "r175", [Rx175, Ry175], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[175]>;
+def Rzw175 : AMDILRegWithSubReg<175, "r175", [Rz175, Rw175], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[175]>;
+def Rxy176 : AMDILRegWithSubReg<176, "r176", [Rx176, Ry176], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[176]>;
+def Rzw176 : AMDILRegWithSubReg<176, "r176", [Rz176, Rw176], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[176]>;
+def Rxy177 : AMDILRegWithSubReg<177, "r177", [Rx177, Ry177], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[177]>;
+def Rzw177 : AMDILRegWithSubReg<177, "r177", [Rz177, Rw177], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[177]>;
+def Rxy178 : AMDILRegWithSubReg<178, "r178", [Rx178, Ry178], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[178]>;
+def Rzw178 : AMDILRegWithSubReg<178, "r178", [Rz178, Rw178], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[178]>;
+def Rxy179 : AMDILRegWithSubReg<179, "r179", [Rx179, Ry179], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[179]>;
+def Rzw179 : AMDILRegWithSubReg<179, "r179", [Rz179, Rw179], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[179]>;
+def Rxy180 : AMDILRegWithSubReg<180, "r180", [Rx180, Ry180], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[180]>;
+def Rzw180 : AMDILRegWithSubReg<180, "r180", [Rz180, Rw180], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[180]>;
+def Rxy181 : AMDILRegWithSubReg<181, "r181", [Rx181, Ry181], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[181]>;
+def Rzw181 : AMDILRegWithSubReg<181, "r181", [Rz181, Rw181], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[181]>;
+def Rxy182 : AMDILRegWithSubReg<182, "r182", [Rx182, Ry182], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[182]>;
+def Rzw182 : AMDILRegWithSubReg<182, "r182", [Rz182, Rw182], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[182]>;
+def Rxy183 : AMDILRegWithSubReg<183, "r183", [Rx183, Ry183], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[183]>;
+def Rzw183 : AMDILRegWithSubReg<183, "r183", [Rz183, Rw183], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[183]>;
+def Rxy184 : AMDILRegWithSubReg<184, "r184", [Rx184, Ry184], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[184]>;
+def Rzw184 : AMDILRegWithSubReg<184, "r184", [Rz184, Rw184], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[184]>;
+def Rxy185 : AMDILRegWithSubReg<185, "r185", [Rx185, Ry185], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[185]>;
+def Rzw185 : AMDILRegWithSubReg<185, "r185", [Rz185, Rw185], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[185]>;
+def Rxy186 : AMDILRegWithSubReg<186, "r186", [Rx186, Ry186], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[186]>;
+def Rzw186 : AMDILRegWithSubReg<186, "r186", [Rz186, Rw186], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[186]>;
+def Rxy187 : AMDILRegWithSubReg<187, "r187", [Rx187, Ry187], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[187]>;
+def Rzw187 : AMDILRegWithSubReg<187, "r187", [Rz187, Rw187], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[187]>;
+def Rxy188 : AMDILRegWithSubReg<188, "r188", [Rx188, Ry188], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[188]>;
+def Rzw188 : AMDILRegWithSubReg<188, "r188", [Rz188, Rw188], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[188]>;
+def Rxy189 : AMDILRegWithSubReg<189, "r189", [Rx189, Ry189], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[189]>;
+def Rzw189 : AMDILRegWithSubReg<189, "r189", [Rz189, Rw189], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[189]>;
+def Rxy190 : AMDILRegWithSubReg<190, "r190", [Rx190, Ry190], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[190]>;
+def Rzw190 : AMDILRegWithSubReg<190, "r190", [Rz190, Rw190], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[190]>;
+def Rxy191 : AMDILRegWithSubReg<191, "r191", [Rx191, Ry191], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[191]>;
+def Rzw191 : AMDILRegWithSubReg<191, "r191", [Rz191, Rw191], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[191]>;
+def Rxy1000 : AMDILRegWithSubReg<1000, "r1000", [Rx1000, Ry1000], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1000]>;
+def Rzw1000 : AMDILRegWithSubReg<1000, "r1000", [Rz1000, Rw1000], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1000]>;
+def Rxy1001 : AMDILRegWithSubReg<1001, "r1001", [Rx1001, Ry1001], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1001]>;
+def Rzw1001 : AMDILRegWithSubReg<1001, "r1001", [Rz1001, Rw1001], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1001]>;
+def Rxy1002 : AMDILRegWithSubReg<1002, "r1002", [Rx1002, Ry1002], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1002]>;
+def Rzw1002 : AMDILRegWithSubReg<1002, "r1002", [Rz1002, Rw1002], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1002]>;
+def Rxy1003 : AMDILRegWithSubReg<1003, "r1003", [Rx1003, Ry1003], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1003]>;
+def Rzw1003 : AMDILRegWithSubReg<1003, "r1003", [Rz1003, Rw1003], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1003]>;
+def Rxy1004 : AMDILRegWithSubReg<1004, "r1004", [Rx1004, Ry1004], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1004]>;
+def Rzw1004 : AMDILRegWithSubReg<1004, "r1004", [Rz1004, Rw1004], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1004]>;
+def Rxy1005 : AMDILRegWithSubReg<1005, "r1005", [Rx1005, Ry1005], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1005]>;
+def Rzw1005 : AMDILRegWithSubReg<1005, "r1005", [Rz1005, Rw1005], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1005]>;
+def Rxy1006 : AMDILRegWithSubReg<1006, "r1006", [Rx1006, Ry1006], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1006]>;
+def Rzw1006 : AMDILRegWithSubReg<1006, "r1006", [Rz1006, Rw1006], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1006]>;
+def Rxy1007 : AMDILRegWithSubReg<1007, "r1007", [Rx1007, Ry1007], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1007]>;
+def Rzw1007 : AMDILRegWithSubReg<1007, "r1007", [Rz1007, Rw1007], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1007]>;
+def Rxy1008 : AMDILRegWithSubReg<1008, "r1008", [Rx1008, Ry1008], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1008]>;
+def Rzw1008 : AMDILRegWithSubReg<1008, "r1008", [Rz1008, Rw1008], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1008]>;
+def Rxy1009 : AMDILRegWithSubReg<1009, "r1009", [Rx1009, Ry1009], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1009]>;
+def Rzw1009 : AMDILRegWithSubReg<1009, "r1009", [Rz1009, Rw1009], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1009]>;
+def Rxy1010 : AMDILRegWithSubReg<1010, "r1010", [Rx1010, Ry1010], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1010]>;
+def Rzw1010 : AMDILRegWithSubReg<1010, "r1010", [Rz1010, Rw1010], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1010]>;
+def Rxy1011 : AMDILRegWithSubReg<1011, "r1011", [Rx1011, Ry1011], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1011]>;
+def Rzw1011 : AMDILRegWithSubReg<1011, "r1011", [Rz1011, Rw1011], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1011]>;
+def Rxy1012 : AMDILRegWithSubReg<1012, "r1012", [Rx1012, Ry1012], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[1012]>;
+def Rzw1012 : AMDILRegWithSubReg<1012, "r1012", [Rz1012, Rw1012], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[1012]>;
diff --git a/src/gallium/drivers/radeon/AMDILRegisterDefsV4.td b/src/gallium/drivers/radeon/AMDILRegisterDefsV4.td
new file mode 100644
index 00000000000..a778c7d14df
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterDefsV4.td
@@ -0,0 +1,204 @@
+def R1 : AMDILRegWithSubReg<1, "r1", [Rxy1, Rzw1], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1]>;
+def R2 : AMDILRegWithSubReg<2, "r2", [Rxy2, Rzw2], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[2]>;
+def R3 : AMDILRegWithSubReg<3, "r3", [Rxy3, Rzw3], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[3]>;
+def R4 : AMDILRegWithSubReg<4, "r4", [Rxy4, Rzw4], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[4]>;
+def R5 : AMDILRegWithSubReg<5, "r5", [Rxy5, Rzw5], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[5]>;
+def R6 : AMDILRegWithSubReg<6, "r6", [Rxy6, Rzw6], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[6]>;
+def R7 : AMDILRegWithSubReg<7, "r7", [Rxy7, Rzw7], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[7]>;
+def R8 : AMDILRegWithSubReg<8, "r8", [Rxy8, Rzw8], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[8]>;
+def R9 : AMDILRegWithSubReg<9, "r9", [Rxy9, Rzw9], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[9]>;
+def R10 : AMDILRegWithSubReg<10, "r10", [Rxy10, Rzw10], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[10]>;
+def R11 : AMDILRegWithSubReg<11, "r11", [Rxy11, Rzw11], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[11]>;
+def R12 : AMDILRegWithSubReg<12, "r12", [Rxy12, Rzw12], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[12]>;
+def R13 : AMDILRegWithSubReg<13, "r13", [Rxy13, Rzw13], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[13]>;
+def R14 : AMDILRegWithSubReg<14, "r14", [Rxy14, Rzw14], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[14]>;
+def R15 : AMDILRegWithSubReg<15, "r15", [Rxy15, Rzw15], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[15]>;
+def R16 : AMDILRegWithSubReg<16, "r16", [Rxy16, Rzw16], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[16]>;
+def R17 : AMDILRegWithSubReg<17, "r17", [Rxy17, Rzw17], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[17]>;
+def R18 : AMDILRegWithSubReg<18, "r18", [Rxy18, Rzw18], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[18]>;
+def R19 : AMDILRegWithSubReg<19, "r19", [Rxy19, Rzw19], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[19]>;
+def R20 : AMDILRegWithSubReg<20, "r20", [Rxy20, Rzw20], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[20]>;
+def R21 : AMDILRegWithSubReg<21, "r21", [Rxy21, Rzw21], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[21]>;
+def R22 : AMDILRegWithSubReg<22, "r22", [Rxy22, Rzw22], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[22]>;
+def R23 : AMDILRegWithSubReg<23, "r23", [Rxy23, Rzw23], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[23]>;
+def R24 : AMDILRegWithSubReg<24, "r24", [Rxy24, Rzw24], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[24]>;
+def R25 : AMDILRegWithSubReg<25, "r25", [Rxy25, Rzw25], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[25]>;
+def R26 : AMDILRegWithSubReg<26, "r26", [Rxy26, Rzw26], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[26]>;
+def R27 : AMDILRegWithSubReg<27, "r27", [Rxy27, Rzw27], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[27]>;
+def R28 : AMDILRegWithSubReg<28, "r28", [Rxy28, Rzw28], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[28]>;
+def R29 : AMDILRegWithSubReg<29, "r29", [Rxy29, Rzw29], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[29]>;
+def R30 : AMDILRegWithSubReg<30, "r30", [Rxy30, Rzw30], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[30]>;
+def R31 : AMDILRegWithSubReg<31, "r31", [Rxy31, Rzw31], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[31]>;
+def R32 : AMDILRegWithSubReg<32, "r32", [Rxy32, Rzw32], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[32]>;
+def R33 : AMDILRegWithSubReg<33, "r33", [Rxy33, Rzw33], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[33]>;
+def R34 : AMDILRegWithSubReg<34, "r34", [Rxy34, Rzw34], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[34]>;
+def R35 : AMDILRegWithSubReg<35, "r35", [Rxy35, Rzw35], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[35]>;
+def R36 : AMDILRegWithSubReg<36, "r36", [Rxy36, Rzw36], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[36]>;
+def R37 : AMDILRegWithSubReg<37, "r37", [Rxy37, Rzw37], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[37]>;
+def R38 : AMDILRegWithSubReg<38, "r38", [Rxy38, Rzw38], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[38]>;
+def R39 : AMDILRegWithSubReg<39, "r39", [Rxy39, Rzw39], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[39]>;
+def R40 : AMDILRegWithSubReg<40, "r40", [Rxy40, Rzw40], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[40]>;
+def R41 : AMDILRegWithSubReg<41, "r41", [Rxy41, Rzw41], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[41]>;
+def R42 : AMDILRegWithSubReg<42, "r42", [Rxy42, Rzw42], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[42]>;
+def R43 : AMDILRegWithSubReg<43, "r43", [Rxy43, Rzw43], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[43]>;
+def R44 : AMDILRegWithSubReg<44, "r44", [Rxy44, Rzw44], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[44]>;
+def R45 : AMDILRegWithSubReg<45, "r45", [Rxy45, Rzw45], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[45]>;
+def R46 : AMDILRegWithSubReg<46, "r46", [Rxy46, Rzw46], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[46]>;
+def R47 : AMDILRegWithSubReg<47, "r47", [Rxy47, Rzw47], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[47]>;
+def R48 : AMDILRegWithSubReg<48, "r48", [Rxy48, Rzw48], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[48]>;
+def R49 : AMDILRegWithSubReg<49, "r49", [Rxy49, Rzw49], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[49]>;
+def R50 : AMDILRegWithSubReg<50, "r50", [Rxy50, Rzw50], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[50]>;
+def R51 : AMDILRegWithSubReg<51, "r51", [Rxy51, Rzw51], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[51]>;
+def R52 : AMDILRegWithSubReg<52, "r52", [Rxy52, Rzw52], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[52]>;
+def R53 : AMDILRegWithSubReg<53, "r53", [Rxy53, Rzw53], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[53]>;
+def R54 : AMDILRegWithSubReg<54, "r54", [Rxy54, Rzw54], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[54]>;
+def R55 : AMDILRegWithSubReg<55, "r55", [Rxy55, Rzw55], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[55]>;
+def R56 : AMDILRegWithSubReg<56, "r56", [Rxy56, Rzw56], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[56]>;
+def R57 : AMDILRegWithSubReg<57, "r57", [Rxy57, Rzw57], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[57]>;
+def R58 : AMDILRegWithSubReg<58, "r58", [Rxy58, Rzw58], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[58]>;
+def R59 : AMDILRegWithSubReg<59, "r59", [Rxy59, Rzw59], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[59]>;
+def R60 : AMDILRegWithSubReg<60, "r60", [Rxy60, Rzw60], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[60]>;
+def R61 : AMDILRegWithSubReg<61, "r61", [Rxy61, Rzw61], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[61]>;
+def R62 : AMDILRegWithSubReg<62, "r62", [Rxy62, Rzw62], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[62]>;
+def R63 : AMDILRegWithSubReg<63, "r63", [Rxy63, Rzw63], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[63]>;
+def R64 : AMDILRegWithSubReg<64, "r64", [Rxy64, Rzw64], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[64]>;
+def R65 : AMDILRegWithSubReg<65, "r65", [Rxy65, Rzw65], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[65]>;
+def R66 : AMDILRegWithSubReg<66, "r66", [Rxy66, Rzw66], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[66]>;
+def R67 : AMDILRegWithSubReg<67, "r67", [Rxy67, Rzw67], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[67]>;
+def R68 : AMDILRegWithSubReg<68, "r68", [Rxy68, Rzw68], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[68]>;
+def R69 : AMDILRegWithSubReg<69, "r69", [Rxy69, Rzw69], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[69]>;
+def R70 : AMDILRegWithSubReg<70, "r70", [Rxy70, Rzw70], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[70]>;
+def R71 : AMDILRegWithSubReg<71, "r71", [Rxy71, Rzw71], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[71]>;
+def R72 : AMDILRegWithSubReg<72, "r72", [Rxy72, Rzw72], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[72]>;
+def R73 : AMDILRegWithSubReg<73, "r73", [Rxy73, Rzw73], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[73]>;
+def R74 : AMDILRegWithSubReg<74, "r74", [Rxy74, Rzw74], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[74]>;
+def R75 : AMDILRegWithSubReg<75, "r75", [Rxy75, Rzw75], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[75]>;
+def R76 : AMDILRegWithSubReg<76, "r76", [Rxy76, Rzw76], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[76]>;
+def R77 : AMDILRegWithSubReg<77, "r77", [Rxy77, Rzw77], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[77]>;
+def R78 : AMDILRegWithSubReg<78, "r78", [Rxy78, Rzw78], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[78]>;
+def R79 : AMDILRegWithSubReg<79, "r79", [Rxy79, Rzw79], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[79]>;
+def R80 : AMDILRegWithSubReg<80, "r80", [Rxy80, Rzw80], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[80]>;
+def R81 : AMDILRegWithSubReg<81, "r81", [Rxy81, Rzw81], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[81]>;
+def R82 : AMDILRegWithSubReg<82, "r82", [Rxy82, Rzw82], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[82]>;
+def R83 : AMDILRegWithSubReg<83, "r83", [Rxy83, Rzw83], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[83]>;
+def R84 : AMDILRegWithSubReg<84, "r84", [Rxy84, Rzw84], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[84]>;
+def R85 : AMDILRegWithSubReg<85, "r85", [Rxy85, Rzw85], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[85]>;
+def R86 : AMDILRegWithSubReg<86, "r86", [Rxy86, Rzw86], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[86]>;
+def R87 : AMDILRegWithSubReg<87, "r87", [Rxy87, Rzw87], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[87]>;
+def R88 : AMDILRegWithSubReg<88, "r88", [Rxy88, Rzw88], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[88]>;
+def R89 : AMDILRegWithSubReg<89, "r89", [Rxy89, Rzw89], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[89]>;
+def R90 : AMDILRegWithSubReg<90, "r90", [Rxy90, Rzw90], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[90]>;
+def R91 : AMDILRegWithSubReg<91, "r91", [Rxy91, Rzw91], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[91]>;
+def R92 : AMDILRegWithSubReg<92, "r92", [Rxy92, Rzw92], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[92]>;
+def R93 : AMDILRegWithSubReg<93, "r93", [Rxy93, Rzw93], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[93]>;
+def R94 : AMDILRegWithSubReg<94, "r94", [Rxy94, Rzw94], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[94]>;
+def R95 : AMDILRegWithSubReg<95, "r95", [Rxy95, Rzw95], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[95]>;
+def R96 : AMDILRegWithSubReg<96, "r96", [Rxy96, Rzw96], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[96]>;
+def R97 : AMDILRegWithSubReg<97, "r97", [Rxy97, Rzw97], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[97]>;
+def R98 : AMDILRegWithSubReg<98, "r98", [Rxy98, Rzw98], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[98]>;
+def R99 : AMDILRegWithSubReg<99, "r99", [Rxy99, Rzw99], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[99]>;
+def R100 : AMDILRegWithSubReg<100, "r100", [Rxy100, Rzw100], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[100]>;
+def R101 : AMDILRegWithSubReg<101, "r101", [Rxy101, Rzw101], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[101]>;
+def R102 : AMDILRegWithSubReg<102, "r102", [Rxy102, Rzw102], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[102]>;
+def R103 : AMDILRegWithSubReg<103, "r103", [Rxy103, Rzw103], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[103]>;
+def R104 : AMDILRegWithSubReg<104, "r104", [Rxy104, Rzw104], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[104]>;
+def R105 : AMDILRegWithSubReg<105, "r105", [Rxy105, Rzw105], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[105]>;
+def R106 : AMDILRegWithSubReg<106, "r106", [Rxy106, Rzw106], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[106]>;
+def R107 : AMDILRegWithSubReg<107, "r107", [Rxy107, Rzw107], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[107]>;
+def R108 : AMDILRegWithSubReg<108, "r108", [Rxy108, Rzw108], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[108]>;
+def R109 : AMDILRegWithSubReg<109, "r109", [Rxy109, Rzw109], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[109]>;
+def R110 : AMDILRegWithSubReg<110, "r110", [Rxy110, Rzw110], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[110]>;
+def R111 : AMDILRegWithSubReg<111, "r111", [Rxy111, Rzw111], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[111]>;
+def R112 : AMDILRegWithSubReg<112, "r112", [Rxy112, Rzw112], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[112]>;
+def R113 : AMDILRegWithSubReg<113, "r113", [Rxy113, Rzw113], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[113]>;
+def R114 : AMDILRegWithSubReg<114, "r114", [Rxy114, Rzw114], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[114]>;
+def R115 : AMDILRegWithSubReg<115, "r115", [Rxy115, Rzw115], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[115]>;
+def R116 : AMDILRegWithSubReg<116, "r116", [Rxy116, Rzw116], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[116]>;
+def R117 : AMDILRegWithSubReg<117, "r117", [Rxy117, Rzw117], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[117]>;
+def R118 : AMDILRegWithSubReg<118, "r118", [Rxy118, Rzw118], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[118]>;
+def R119 : AMDILRegWithSubReg<119, "r119", [Rxy119, Rzw119], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[119]>;
+def R120 : AMDILRegWithSubReg<120, "r120", [Rxy120, Rzw120], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[120]>;
+def R121 : AMDILRegWithSubReg<121, "r121", [Rxy121, Rzw121], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[121]>;
+def R122 : AMDILRegWithSubReg<122, "r122", [Rxy122, Rzw122], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[122]>;
+def R123 : AMDILRegWithSubReg<123, "r123", [Rxy123, Rzw123], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[123]>;
+def R124 : AMDILRegWithSubReg<124, "r124", [Rxy124, Rzw124], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[124]>;
+def R125 : AMDILRegWithSubReg<125, "r125", [Rxy125, Rzw125], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[125]>;
+def R126 : AMDILRegWithSubReg<126, "r126", [Rxy126, Rzw126], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[126]>;
+def R127 : AMDILRegWithSubReg<127, "r127", [Rxy127, Rzw127], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[127]>;
+def R128 : AMDILRegWithSubReg<128, "r128", [Rxy128, Rzw128], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[128]>;
+def R129 : AMDILRegWithSubReg<129, "r129", [Rxy129, Rzw129], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[129]>;
+def R130 : AMDILRegWithSubReg<130, "r130", [Rxy130, Rzw130], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[130]>;
+def R131 : AMDILRegWithSubReg<131, "r131", [Rxy131, Rzw131], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[131]>;
+def R132 : AMDILRegWithSubReg<132, "r132", [Rxy132, Rzw132], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[132]>;
+def R133 : AMDILRegWithSubReg<133, "r133", [Rxy133, Rzw133], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[133]>;
+def R134 : AMDILRegWithSubReg<134, "r134", [Rxy134, Rzw134], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[134]>;
+def R135 : AMDILRegWithSubReg<135, "r135", [Rxy135, Rzw135], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[135]>;
+def R136 : AMDILRegWithSubReg<136, "r136", [Rxy136, Rzw136], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[136]>;
+def R137 : AMDILRegWithSubReg<137, "r137", [Rxy137, Rzw137], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[137]>;
+def R138 : AMDILRegWithSubReg<138, "r138", [Rxy138, Rzw138], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[138]>;
+def R139 : AMDILRegWithSubReg<139, "r139", [Rxy139, Rzw139], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[139]>;
+def R140 : AMDILRegWithSubReg<140, "r140", [Rxy140, Rzw140], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[140]>;
+def R141 : AMDILRegWithSubReg<141, "r141", [Rxy141, Rzw141], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[141]>;
+def R142 : AMDILRegWithSubReg<142, "r142", [Rxy142, Rzw142], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[142]>;
+def R143 : AMDILRegWithSubReg<143, "r143", [Rxy143, Rzw143], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[143]>;
+def R144 : AMDILRegWithSubReg<144, "r144", [Rxy144, Rzw144], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[144]>;
+def R145 : AMDILRegWithSubReg<145, "r145", [Rxy145, Rzw145], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[145]>;
+def R146 : AMDILRegWithSubReg<146, "r146", [Rxy146, Rzw146], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[146]>;
+def R147 : AMDILRegWithSubReg<147, "r147", [Rxy147, Rzw147], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[147]>;
+def R148 : AMDILRegWithSubReg<148, "r148", [Rxy148, Rzw148], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[148]>;
+def R149 : AMDILRegWithSubReg<149, "r149", [Rxy149, Rzw149], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[149]>;
+def R150 : AMDILRegWithSubReg<150, "r150", [Rxy150, Rzw150], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[150]>;
+def R151 : AMDILRegWithSubReg<151, "r151", [Rxy151, Rzw151], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[151]>;
+def R152 : AMDILRegWithSubReg<152, "r152", [Rxy152, Rzw152], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[152]>;
+def R153 : AMDILRegWithSubReg<153, "r153", [Rxy153, Rzw153], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[153]>;
+def R154 : AMDILRegWithSubReg<154, "r154", [Rxy154, Rzw154], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[154]>;
+def R155 : AMDILRegWithSubReg<155, "r155", [Rxy155, Rzw155], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[155]>;
+def R156 : AMDILRegWithSubReg<156, "r156", [Rxy156, Rzw156], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[156]>;
+def R157 : AMDILRegWithSubReg<157, "r157", [Rxy157, Rzw157], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[157]>;
+def R158 : AMDILRegWithSubReg<158, "r158", [Rxy158, Rzw158], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[158]>;
+def R159 : AMDILRegWithSubReg<159, "r159", [Rxy159, Rzw159], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[159]>;
+def R160 : AMDILRegWithSubReg<160, "r160", [Rxy160, Rzw160], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[160]>;
+def R161 : AMDILRegWithSubReg<161, "r161", [Rxy161, Rzw161], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[161]>;
+def R162 : AMDILRegWithSubReg<162, "r162", [Rxy162, Rzw162], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[162]>;
+def R163 : AMDILRegWithSubReg<163, "r163", [Rxy163, Rzw163], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[163]>;
+def R164 : AMDILRegWithSubReg<164, "r164", [Rxy164, Rzw164], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[164]>;
+def R165 : AMDILRegWithSubReg<165, "r165", [Rxy165, Rzw165], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[165]>;
+def R166 : AMDILRegWithSubReg<166, "r166", [Rxy166, Rzw166], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[166]>;
+def R167 : AMDILRegWithSubReg<167, "r167", [Rxy167, Rzw167], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[167]>;
+def R168 : AMDILRegWithSubReg<168, "r168", [Rxy168, Rzw168], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[168]>;
+def R169 : AMDILRegWithSubReg<169, "r169", [Rxy169, Rzw169], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[169]>;
+def R170 : AMDILRegWithSubReg<170, "r170", [Rxy170, Rzw170], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[170]>;
+def R171 : AMDILRegWithSubReg<171, "r171", [Rxy171, Rzw171], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[171]>;
+def R172 : AMDILRegWithSubReg<172, "r172", [Rxy172, Rzw172], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[172]>;
+def R173 : AMDILRegWithSubReg<173, "r173", [Rxy173, Rzw173], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[173]>;
+def R174 : AMDILRegWithSubReg<174, "r174", [Rxy174, Rzw174], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[174]>;
+def R175 : AMDILRegWithSubReg<175, "r175", [Rxy175, Rzw175], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[175]>;
+def R176 : AMDILRegWithSubReg<176, "r176", [Rxy176, Rzw176], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[176]>;
+def R177 : AMDILRegWithSubReg<177, "r177", [Rxy177, Rzw177], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[177]>;
+def R178 : AMDILRegWithSubReg<178, "r178", [Rxy178, Rzw178], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[178]>;
+def R179 : AMDILRegWithSubReg<179, "r179", [Rxy179, Rzw179], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[179]>;
+def R180 : AMDILRegWithSubReg<180, "r180", [Rxy180, Rzw180], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[180]>;
+def R181 : AMDILRegWithSubReg<181, "r181", [Rxy181, Rzw181], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[181]>;
+def R182 : AMDILRegWithSubReg<182, "r182", [Rxy182, Rzw182], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[182]>;
+def R183 : AMDILRegWithSubReg<183, "r183", [Rxy183, Rzw183], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[183]>;
+def R184 : AMDILRegWithSubReg<184, "r184", [Rxy184, Rzw184], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[184]>;
+def R185 : AMDILRegWithSubReg<185, "r185", [Rxy185, Rzw185], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[185]>;
+def R186 : AMDILRegWithSubReg<186, "r186", [Rxy186, Rzw186], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[186]>;
+def R187 : AMDILRegWithSubReg<187, "r187", [Rxy187, Rzw187], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[187]>;
+def R188 : AMDILRegWithSubReg<188, "r188", [Rxy188, Rzw188], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[188]>;
+def R189 : AMDILRegWithSubReg<189, "r189", [Rxy189, Rzw189], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[189]>;
+def R190 : AMDILRegWithSubReg<190, "r190", [Rxy190, Rzw190], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[190]>;
+def R191 : AMDILRegWithSubReg<191, "r191", [Rxy191, Rzw191], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[191]>;
+def R1000 : AMDILRegWithSubReg<1000, "r1000", [Rxy1000, Rzw1000], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1000]>;
+def R1001 : AMDILRegWithSubReg<1001, "r1001", [Rxy1001, Rzw1001], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1001]>;
+def R1002 : AMDILRegWithSubReg<1002, "r1002", [Rxy1002, Rzw1002], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1002]>;
+def R1003 : AMDILRegWithSubReg<1003, "r1003", [Rxy1003, Rzw1003], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1003]>;
+def R1004 : AMDILRegWithSubReg<1004, "r1004", [Rxy1004, Rzw1004], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1004]>;
+def R1005 : AMDILRegWithSubReg<1005, "r1005", [Rxy1005, Rzw1005], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1005]>;
+def R1006 : AMDILRegWithSubReg<1006, "r1006", [Rxy1006, Rzw1006], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1006]>;
+def R1007 : AMDILRegWithSubReg<1007, "r1007", [Rxy1007, Rzw1007], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1007]>;
+def R1008 : AMDILRegWithSubReg<1008, "r1008", [Rxy1008, Rzw1008], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1008]>;
+def R1009 : AMDILRegWithSubReg<1009, "r1009", [Rxy1009, Rzw1009], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1009]>;
+def R1010 : AMDILRegWithSubReg<1010, "r1010", [Rxy1010, Rzw1010], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1010]>;
+def R1011 : AMDILRegWithSubReg<1011, "r1011", [Rxy1011, Rzw1011], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1011]>;
+def R1012 : AMDILRegWithSubReg<1012, "r1012", [Rxy1012, Rzw1012], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[1012]>;
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
new file mode 100644
index 00000000000..5afc848512a
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
@@ -0,0 +1,275 @@
+//===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILRegisterInfo.h"
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDILRegisterInfo::AMDILRegisterInfo(AMDILTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDILGenRegisterInfo(0), // RA???
+ TM(tm), TII(tii)
+{
+ baseOffset = 0;
+ nextFuncOffset = 0;
+}
+
+const unsigned*
+AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ // TODO: Does IL need to actually have any callee saved regs?
+ // I don't think we do since we can just use sequential registers
+ // Maybe this would be easier if every function call was inlined first
+ // and then there would be no callee issues to deal with
+ //TODO(getCalleeSavedRegs);
+ return CalleeSavedRegs;
+}
+
+BitVector
+AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ // We reserve the first getNumRegs() registers as they are the ones passed
+ // in live-in/live-out
+ // and therefor cannot be killed by the scheduler. This works around a bug
+ // discovered
+ // that was causing the linearscan register allocator to kill registers
+ // inside of the
+ // function that were also passed as LiveIn registers.
+ for (unsigned int x = 0, y = 256; x < y; ++x) {
+ Reserved.set(x);
+ }
+ return Reserved;
+}
+
+BitVector
+AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC = NULL) const
+{
+ BitVector Allocatable(getNumRegs());
+ Allocatable.clear();
+ return Allocatable;
+}
+
+const TargetRegisterClass* const*
+AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ // TODO: Keep in sync with getCalleeSavedRegs
+ //TODO(getCalleeSavedRegClasses);
+ return CalleeSavedRegClasses;
+}
+#if LLVM_VERSION < 2500
+bool
+AMDILRegisterInfo::hasFP(const MachineFunction &MF) const
+{
+ //TODO(hasFP);
+ return false;
+}
+#endif
+void
+AMDILRegisterInfo::eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const
+{
+ MBB.erase(I);
+}
+
+// For each frame index we find, we store the offset in the stack which is
+// being pushed back into the global buffer. The offset into the stack where
+// the value is stored is copied into a new register and the frame index is
+// then replaced with that register.
+#if LLVM_VERSION < 2500
+unsigned int
+#else
+void
+#endif
+AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+#if LLVM_VERSION < 2500
+ FrameIndexValue *Value,
+#endif
+ RegScavenger *RS) const
+{
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned int y = MI.getNumOperands();
+ for (unsigned int x = 0; x < y; ++x) {
+ if (!MI.getOperand(x).isFI()) {
+ continue;
+ }
+ bool def = isStoreInst(&MI);
+ int FrameIndex = MI.getOperand(x).getIndex();
+ int64_t Offset = MFI->getObjectOffset(FrameIndex);
+ //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex);
+ // An optimization is to only use the offsets if the size
+ // is larger than 4, which means we are storing an array
+ // instead of just a pointer. If we are size 4 then we can
+ // just do register copies since we don't need to worry about
+ // indexing dynamically
+ MachineInstr *nMI = MF.CreateMachineInstr(
+ TII.get(AMDIL::LOADCONST_i32), MI.getDebugLoc());
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true));
+ nMI->addOperand(
+ MachineOperand::CreateImm(Offset));
+ MI.getParent()->insert(II, nMI);
+ nMI = MF.CreateMachineInstr(
+ TII.get(AMDIL::ADD_i32), MI.getDebugLoc());
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true));
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, false));
+ nMI->addOperand(MachineOperand::CreateReg(AMDIL::FP, false));
+
+ MI.getParent()->insert(II, nMI);
+ if (MI.getOperand(x).isReg() == false) {
+ MI.getOperand(x).ChangeToRegister(
+ nMI->getOperand(0).getReg(), def);
+ } else {
+ MI.getOperand(x).setReg(
+ nMI->getOperand(0).getReg());
+ }
+ }
+#if LLVM_VERSION < 2500
+ return 0;
+#endif
+}
+
+void
+AMDILRegisterInfo::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF) const
+{
+ //TODO(processFunctionBeforeFrameFinalized);
+ // Here we keep track of the amount of stack that the current function
+ // uses so
+ // that we can set the offset to the end of the stack and any other
+ // function call
+ // will not overwrite any stack variables.
+ // baseOffset = nextFuncOffset;
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) {
+ int64_t size = MFI->getObjectSize(x);
+ if (!(size % 4) && size > 1) {
+ nextFuncOffset += size;
+ } else {
+ nextFuncOffset += 16;
+ }
+ }
+}
+#if LLVM_VERSION < 2500
+void
+AMDILRegisterInfo::emitPrologue(MachineFunction &MF) const
+{
+ //TODO(emitPrologue);
+}
+
+void
+AMDILRegisterInfo::emitEpilogue(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB) const
+{
+ //TODO(emitEpilogue);
+}
+#endif
+unsigned int
+AMDILRegisterInfo::getRARegister() const
+{
+ return AMDIL::RA;
+}
+
+unsigned int
+AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+ return AMDIL::FP;
+}
+
+unsigned int
+AMDILRegisterInfo::getEHExceptionRegister() const
+{
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned int
+AMDILRegisterInfo::getEHHandlerRegister() const
+{
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int64_t
+AMDILRegisterInfo::getStackSize() const
+{
+ return nextFuncOffset - baseOffset;
+}
+
+#define GET_REGINFO_MC_DESC
+#define GET_REGINFO_TARGET_DESC
+#include "AMDILGenRegisterInfo.inc"
+
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.h b/src/gallium/drivers/radeon/AMDILRegisterInfo.h
new file mode 100644
index 00000000000..6b43bb59c9f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.h
@@ -0,0 +1,152 @@
+//===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILREGISTERINFO_H_
+#define AMDILREGISTERINFO_H_
+
+#include "AMDILLLVMPC.h"
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "AMDILGenRegisterInfo.inc"
+// See header file for explanation
+
+namespace llvm
+{
+
+ class AMDILTargetMachine;
+ class TargetInstrInfo;
+ class Type;
+
+ /// DWARFFlavour - Flavour of dwarf regnumbers
+ ///
+ namespace DWARFFlavour {
+ enum {
+ AMDIL_Generic = 0
+ };
+ }
+
+ struct AMDILRegisterInfo : public AMDILGenRegisterInfo
+ {
+ AMDILTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ AMDILRegisterInfo(AMDILTargetMachine &tm, const TargetInstrInfo &tii);
+ /// Code Generation virtual methods...
+ const unsigned int*
+ getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector
+ getReservedRegs(const MachineFunction &MF) const;
+ BitVector
+ getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const;
+#if LLVM_VERSION < 2500
+ bool
+ hasFP(const MachineFunction &MF) const;
+#endif
+
+ void
+ eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+#if LLVM_VERSION < 2500
+ unsigned int
+#else
+ void
+#endif
+ eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+#if LLVM_VERSION < 2500
+ FrameIndexValue *Value = NULL,
+#endif
+ RegScavenger *RS = NULL) const;
+
+ void
+ processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+#if LLVM_VERSION < 2500
+ void
+ emitPrologue(MachineFunction &MF) const;
+ void
+ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+#endif
+
+ // Debug information queries.
+ unsigned int
+ getRARegister() const;
+
+ unsigned int
+ getFrameRegister(const MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned int
+ getEHExceptionRegister() const;
+ unsigned int
+ getEHHandlerRegister() const;
+
+ int64_t
+ getStackSize() const;
+ private:
+ mutable int64_t baseOffset;
+ mutable int64_t nextFuncOffset;
+ };
+
+} // end namespace llvm
+
+#endif // AMDILREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.td b/src/gallium/drivers/radeon/AMDILRegisterInfo.td
new file mode 100644
index 00000000000..564c60c79b5
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.td
@@ -0,0 +1,1005 @@
+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Declarations that describe the AMDIL register file
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILReg<bits<16> num, string n> : Register<n> {
+ field bits<16> Value;
+ let Value = num;
+ let Namespace = "AMDIL";
+}
+
+// We will start with 8 registers for each class before expanding to more
+// Since the swizzle is added based on the register class, we can leave it
+// off here and just specify different registers for different register classes
+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+def R21 : AMDILReg<21, "r21">, DwarfRegNum<[21]>;
+def R22 : AMDILReg<22, "r22">, DwarfRegNum<[22]>;
+def R23 : AMDILReg<23, "r23">, DwarfRegNum<[23]>;
+def R24 : AMDILReg<24, "r24">, DwarfRegNum<[24]>;
+def R25 : AMDILReg<25, "r25">, DwarfRegNum<[25]>;
+def R26 : AMDILReg<26, "r26">, DwarfRegNum<[26]>;
+def R27 : AMDILReg<27, "r27">, DwarfRegNum<[27]>;
+def R28 : AMDILReg<28, "r28">, DwarfRegNum<[28]>;
+def R29 : AMDILReg<29, "r29">, DwarfRegNum<[29]>;
+def R30 : AMDILReg<30, "r30">, DwarfRegNum<[30]>;
+def R31 : AMDILReg<31, "r31">, DwarfRegNum<[31]>;
+def R32 : AMDILReg<32, "r32">, DwarfRegNum<[32]>;
+def R33 : AMDILReg<33, "r33">, DwarfRegNum<[33]>;
+def R34 : AMDILReg<34, "r34">, DwarfRegNum<[34]>;
+def R35 : AMDILReg<35, "r35">, DwarfRegNum<[35]>;
+def R36 : AMDILReg<36, "r36">, DwarfRegNum<[36]>;
+def R37 : AMDILReg<37, "r37">, DwarfRegNum<[37]>;
+def R38 : AMDILReg<38, "r38">, DwarfRegNum<[38]>;
+def R39 : AMDILReg<39, "r39">, DwarfRegNum<[39]>;
+def R40 : AMDILReg<40, "r40">, DwarfRegNum<[40]>;
+def R41 : AMDILReg<41, "r41">, DwarfRegNum<[41]>;
+def R42 : AMDILReg<42, "r42">, DwarfRegNum<[42]>;
+def R43 : AMDILReg<43, "r43">, DwarfRegNum<[43]>;
+def R44 : AMDILReg<44, "r44">, DwarfRegNum<[44]>;
+def R45 : AMDILReg<45, "r45">, DwarfRegNum<[45]>;
+def R46 : AMDILReg<46, "r46">, DwarfRegNum<[46]>;
+def R47 : AMDILReg<47, "r47">, DwarfRegNum<[47]>;
+def R48 : AMDILReg<48, "r48">, DwarfRegNum<[48]>;
+def R49 : AMDILReg<49, "r49">, DwarfRegNum<[49]>;
+def R50 : AMDILReg<50, "r50">, DwarfRegNum<[50]>;
+def R51 : AMDILReg<51, "r51">, DwarfRegNum<[51]>;
+def R52 : AMDILReg<52, "r52">, DwarfRegNum<[52]>;
+def R53 : AMDILReg<53, "r53">, DwarfRegNum<[53]>;
+def R54 : AMDILReg<54, "r54">, DwarfRegNum<[54]>;
+def R55 : AMDILReg<55, "r55">, DwarfRegNum<[55]>;
+def R56 : AMDILReg<56, "r56">, DwarfRegNum<[56]>;
+def R57 : AMDILReg<57, "r57">, DwarfRegNum<[57]>;
+def R58 : AMDILReg<58, "r58">, DwarfRegNum<[58]>;
+def R59 : AMDILReg<59, "r59">, DwarfRegNum<[59]>;
+def R60 : AMDILReg<60, "r60">, DwarfRegNum<[60]>;
+def R61 : AMDILReg<61, "r61">, DwarfRegNum<[61]>;
+def R62 : AMDILReg<62, "r62">, DwarfRegNum<[62]>;
+def R63 : AMDILReg<63, "r63">, DwarfRegNum<[63]>;
+def R64 : AMDILReg<64, "r64">, DwarfRegNum<[64]>;
+def R65 : AMDILReg<65, "r65">, DwarfRegNum<[65]>;
+def R66 : AMDILReg<66, "r66">, DwarfRegNum<[66]>;
+def R67 : AMDILReg<67, "r67">, DwarfRegNum<[67]>;
+def R68 : AMDILReg<68, "r68">, DwarfRegNum<[68]>;
+def R69 : AMDILReg<69, "r69">, DwarfRegNum<[69]>;
+def R70 : AMDILReg<70, "r70">, DwarfRegNum<[70]>;
+def R71 : AMDILReg<71, "r71">, DwarfRegNum<[71]>;
+def R72 : AMDILReg<72, "r72">, DwarfRegNum<[72]>;
+def R73 : AMDILReg<73, "r73">, DwarfRegNum<[73]>;
+def R74 : AMDILReg<74, "r74">, DwarfRegNum<[74]>;
+def R75 : AMDILReg<75, "r75">, DwarfRegNum<[75]>;
+def R76 : AMDILReg<76, "r76">, DwarfRegNum<[76]>;
+def R77 : AMDILReg<77, "r77">, DwarfRegNum<[77]>;
+def R78 : AMDILReg<78, "r78">, DwarfRegNum<[78]>;
+def R79 : AMDILReg<79, "r79">, DwarfRegNum<[79]>;
+def R80 : AMDILReg<80, "r80">, DwarfRegNum<[80]>;
+def R81 : AMDILReg<81, "r81">, DwarfRegNum<[81]>;
+def R82 : AMDILReg<82, "r82">, DwarfRegNum<[82]>;
+def R83 : AMDILReg<83, "r83">, DwarfRegNum<[83]>;
+def R84 : AMDILReg<84, "r84">, DwarfRegNum<[84]>;
+def R85 : AMDILReg<85, "r85">, DwarfRegNum<[85]>;
+def R86 : AMDILReg<86, "r86">, DwarfRegNum<[86]>;
+def R87 : AMDILReg<87, "r87">, DwarfRegNum<[87]>;
+def R88 : AMDILReg<88, "r88">, DwarfRegNum<[88]>;
+def R89 : AMDILReg<89, "r89">, DwarfRegNum<[89]>;
+def R90 : AMDILReg<90, "r90">, DwarfRegNum<[90]>;
+def R91 : AMDILReg<91, "r91">, DwarfRegNum<[91]>;
+def R92 : AMDILReg<92, "r92">, DwarfRegNum<[92]>;
+def R93 : AMDILReg<93, "r93">, DwarfRegNum<[93]>;
+def R94 : AMDILReg<94, "r94">, DwarfRegNum<[94]>;
+def R95 : AMDILReg<95, "r95">, DwarfRegNum<[95]>;
+def R96 : AMDILReg<96, "r96">, DwarfRegNum<[96]>;
+def R97 : AMDILReg<97, "r97">, DwarfRegNum<[97]>;
+def R98 : AMDILReg<98, "r98">, DwarfRegNum<[98]>;
+def R99 : AMDILReg<99, "r99">, DwarfRegNum<[99]>;
+def R100 : AMDILReg<100, "r100">, DwarfRegNum<[100]>;
+def R101 : AMDILReg<101, "r101">, DwarfRegNum<[101]>;
+def R102 : AMDILReg<102, "r102">, DwarfRegNum<[102]>;
+def R103 : AMDILReg<103, "r103">, DwarfRegNum<[103]>;
+def R104 : AMDILReg<104, "r104">, DwarfRegNum<[104]>;
+def R105 : AMDILReg<105, "r105">, DwarfRegNum<[105]>;
+def R106 : AMDILReg<106, "r106">, DwarfRegNum<[106]>;
+def R107 : AMDILReg<107, "r107">, DwarfRegNum<[107]>;
+def R108 : AMDILReg<108, "r108">, DwarfRegNum<[108]>;
+def R109 : AMDILReg<109, "r109">, DwarfRegNum<[109]>;
+def R110 : AMDILReg<110, "r110">, DwarfRegNum<[110]>;
+def R111 : AMDILReg<111, "r111">, DwarfRegNum<[111]>;
+def R112 : AMDILReg<112, "r112">, DwarfRegNum<[112]>;
+def R113 : AMDILReg<113, "r113">, DwarfRegNum<[113]>;
+def R114 : AMDILReg<114, "r114">, DwarfRegNum<[114]>;
+def R115 : AMDILReg<115, "r115">, DwarfRegNum<[115]>;
+def R116 : AMDILReg<116, "r116">, DwarfRegNum<[116]>;
+def R117 : AMDILReg<117, "r117">, DwarfRegNum<[117]>;
+def R118 : AMDILReg<118, "r118">, DwarfRegNum<[118]>;
+def R119 : AMDILReg<119, "r119">, DwarfRegNum<[119]>;
+def R120 : AMDILReg<120, "r120">, DwarfRegNum<[120]>;
+def R121 : AMDILReg<121, "r121">, DwarfRegNum<[121]>;
+def R122 : AMDILReg<122, "r122">, DwarfRegNum<[122]>;
+def R123 : AMDILReg<123, "r123">, DwarfRegNum<[123]>;
+def R124 : AMDILReg<124, "r124">, DwarfRegNum<[124]>;
+def R125 : AMDILReg<125, "r125">, DwarfRegNum<[125]>;
+def R126 : AMDILReg<126, "r126">, DwarfRegNum<[126]>;
+def R127 : AMDILReg<127, "r127">, DwarfRegNum<[127]>;
+def R128 : AMDILReg<128, "r128">, DwarfRegNum<[128]>;
+def R129 : AMDILReg<129, "r129">, DwarfRegNum<[129]>;
+def R130 : AMDILReg<130, "r130">, DwarfRegNum<[130]>;
+def R131 : AMDILReg<131, "r131">, DwarfRegNum<[131]>;
+def R132 : AMDILReg<132, "r132">, DwarfRegNum<[132]>;
+def R133 : AMDILReg<133, "r133">, DwarfRegNum<[133]>;
+def R134 : AMDILReg<134, "r134">, DwarfRegNum<[134]>;
+def R135 : AMDILReg<135, "r135">, DwarfRegNum<[135]>;
+def R136 : AMDILReg<136, "r136">, DwarfRegNum<[136]>;
+def R137 : AMDILReg<137, "r137">, DwarfRegNum<[137]>;
+def R138 : AMDILReg<138, "r138">, DwarfRegNum<[138]>;
+def R139 : AMDILReg<139, "r139">, DwarfRegNum<[139]>;
+def R140 : AMDILReg<140, "r140">, DwarfRegNum<[140]>;
+def R141 : AMDILReg<141, "r141">, DwarfRegNum<[141]>;
+def R142 : AMDILReg<142, "r142">, DwarfRegNum<[142]>;
+def R143 : AMDILReg<143, "r143">, DwarfRegNum<[143]>;
+def R144 : AMDILReg<144, "r144">, DwarfRegNum<[144]>;
+def R145 : AMDILReg<145, "r145">, DwarfRegNum<[145]>;
+def R146 : AMDILReg<146, "r146">, DwarfRegNum<[146]>;
+def R147 : AMDILReg<147, "r147">, DwarfRegNum<[147]>;
+def R148 : AMDILReg<148, "r148">, DwarfRegNum<[148]>;
+def R149 : AMDILReg<149, "r149">, DwarfRegNum<[149]>;
+def R150 : AMDILReg<150, "r150">, DwarfRegNum<[150]>;
+def R151 : AMDILReg<151, "r151">, DwarfRegNum<[151]>;
+def R152 : AMDILReg<152, "r152">, DwarfRegNum<[152]>;
+def R153 : AMDILReg<153, "r153">, DwarfRegNum<[153]>;
+def R154 : AMDILReg<154, "r154">, DwarfRegNum<[154]>;
+def R155 : AMDILReg<155, "r155">, DwarfRegNum<[155]>;
+def R156 : AMDILReg<156, "r156">, DwarfRegNum<[156]>;
+def R157 : AMDILReg<157, "r157">, DwarfRegNum<[157]>;
+def R158 : AMDILReg<158, "r158">, DwarfRegNum<[158]>;
+def R159 : AMDILReg<159, "r159">, DwarfRegNum<[159]>;
+def R160 : AMDILReg<160, "r160">, DwarfRegNum<[160]>;
+def R161 : AMDILReg<161, "r161">, DwarfRegNum<[161]>;
+def R162 : AMDILReg<162, "r162">, DwarfRegNum<[162]>;
+def R163 : AMDILReg<163, "r163">, DwarfRegNum<[163]>;
+def R164 : AMDILReg<164, "r164">, DwarfRegNum<[164]>;
+def R165 : AMDILReg<165, "r165">, DwarfRegNum<[165]>;
+def R166 : AMDILReg<166, "r166">, DwarfRegNum<[166]>;
+def R167 : AMDILReg<167, "r167">, DwarfRegNum<[167]>;
+def R168 : AMDILReg<168, "r168">, DwarfRegNum<[168]>;
+def R169 : AMDILReg<169, "r169">, DwarfRegNum<[169]>;
+def R170 : AMDILReg<170, "r170">, DwarfRegNum<[170]>;
+def R171 : AMDILReg<171, "r171">, DwarfRegNum<[171]>;
+def R172 : AMDILReg<172, "r172">, DwarfRegNum<[172]>;
+def R173 : AMDILReg<173, "r173">, DwarfRegNum<[173]>;
+def R174 : AMDILReg<174, "r174">, DwarfRegNum<[174]>;
+def R175 : AMDILReg<175, "r175">, DwarfRegNum<[175]>;
+def R176 : AMDILReg<176, "r176">, DwarfRegNum<[176]>;
+def R177 : AMDILReg<177, "r177">, DwarfRegNum<[177]>;
+def R178 : AMDILReg<178, "r178">, DwarfRegNum<[178]>;
+def R179 : AMDILReg<179, "r179">, DwarfRegNum<[179]>;
+def R180 : AMDILReg<180, "r180">, DwarfRegNum<[180]>;
+def R181 : AMDILReg<181, "r181">, DwarfRegNum<[181]>;
+def R182 : AMDILReg<182, "r182">, DwarfRegNum<[182]>;
+def R183 : AMDILReg<183, "r183">, DwarfRegNum<[183]>;
+def R184 : AMDILReg<184, "r184">, DwarfRegNum<[184]>;
+def R185 : AMDILReg<185, "r185">, DwarfRegNum<[185]>;
+def R186 : AMDILReg<186, "r186">, DwarfRegNum<[186]>;
+def R187 : AMDILReg<187, "r187">, DwarfRegNum<[187]>;
+def R188 : AMDILReg<188, "r188">, DwarfRegNum<[188]>;
+def R189 : AMDILReg<189, "r189">, DwarfRegNum<[189]>;
+def R190 : AMDILReg<190, "r190">, DwarfRegNum<[190]>;
+def R191 : AMDILReg<191, "r191">, DwarfRegNum<[191]>;
+def R192 : AMDILReg<192, "r192">, DwarfRegNum<[192]>;
+def R193 : AMDILReg<193, "r193">, DwarfRegNum<[193]>;
+def R194 : AMDILReg<194, "r194">, DwarfRegNum<[194]>;
+def R195 : AMDILReg<195, "r195">, DwarfRegNum<[195]>;
+def R196 : AMDILReg<196, "r196">, DwarfRegNum<[196]>;
+def R197 : AMDILReg<197, "r197">, DwarfRegNum<[197]>;
+def R198 : AMDILReg<198, "r198">, DwarfRegNum<[198]>;
+def R199 : AMDILReg<199, "r199">, DwarfRegNum<[199]>;
+def R200 : AMDILReg<200, "r200">, DwarfRegNum<[200]>;
+def R201 : AMDILReg<201, "r201">, DwarfRegNum<[201]>;
+def R202 : AMDILReg<202, "r202">, DwarfRegNum<[202]>;
+def R203 : AMDILReg<203, "r203">, DwarfRegNum<[203]>;
+def R204 : AMDILReg<204, "r204">, DwarfRegNum<[204]>;
+def R205 : AMDILReg<205, "r205">, DwarfRegNum<[205]>;
+def R206 : AMDILReg<206, "r206">, DwarfRegNum<[206]>;
+def R207 : AMDILReg<207, "r207">, DwarfRegNum<[207]>;
+def R208 : AMDILReg<208, "r208">, DwarfRegNum<[208]>;
+def R209 : AMDILReg<209, "r209">, DwarfRegNum<[209]>;
+def R210 : AMDILReg<210, "r210">, DwarfRegNum<[210]>;
+def R211 : AMDILReg<211, "r211">, DwarfRegNum<[211]>;
+def R212 : AMDILReg<212, "r212">, DwarfRegNum<[212]>;
+def R213 : AMDILReg<213, "r213">, DwarfRegNum<[213]>;
+def R214 : AMDILReg<214, "r214">, DwarfRegNum<[214]>;
+def R215 : AMDILReg<215, "r215">, DwarfRegNum<[215]>;
+def R216 : AMDILReg<216, "r216">, DwarfRegNum<[216]>;
+def R217 : AMDILReg<217, "r217">, DwarfRegNum<[217]>;
+def R218 : AMDILReg<218, "r218">, DwarfRegNum<[218]>;
+def R219 : AMDILReg<219, "r219">, DwarfRegNum<[219]>;
+def R220 : AMDILReg<220, "r220">, DwarfRegNum<[220]>;
+def R221 : AMDILReg<221, "r221">, DwarfRegNum<[221]>;
+def R222 : AMDILReg<222, "r222">, DwarfRegNum<[222]>;
+def R223 : AMDILReg<223, "r223">, DwarfRegNum<[223]>;
+def R224 : AMDILReg<224, "r224">, DwarfRegNum<[224]>;
+def R225 : AMDILReg<225, "r225">, DwarfRegNum<[225]>;
+def R226 : AMDILReg<226, "r226">, DwarfRegNum<[226]>;
+def R227 : AMDILReg<227, "r227">, DwarfRegNum<[227]>;
+def R228 : AMDILReg<228, "r228">, DwarfRegNum<[228]>;
+def R229 : AMDILReg<229, "r229">, DwarfRegNum<[229]>;
+def R230 : AMDILReg<230, "r230">, DwarfRegNum<[230]>;
+def R231 : AMDILReg<231, "r231">, DwarfRegNum<[231]>;
+def R232 : AMDILReg<232, "r232">, DwarfRegNum<[232]>;
+def R233 : AMDILReg<233, "r233">, DwarfRegNum<[233]>;
+def R234 : AMDILReg<234, "r234">, DwarfRegNum<[234]>;
+def R235 : AMDILReg<235, "r235">, DwarfRegNum<[235]>;
+def R236 : AMDILReg<236, "r236">, DwarfRegNum<[236]>;
+def R237 : AMDILReg<237, "r237">, DwarfRegNum<[237]>;
+def R238 : AMDILReg<238, "r238">, DwarfRegNum<[238]>;
+def R239 : AMDILReg<239, "r239">, DwarfRegNum<[239]>;
+def R240 : AMDILReg<240, "r240">, DwarfRegNum<[240]>;
+def R241 : AMDILReg<241, "r241">, DwarfRegNum<[241]>;
+def R242 : AMDILReg<242, "r242">, DwarfRegNum<[242]>;
+def R243 : AMDILReg<243, "r243">, DwarfRegNum<[243]>;
+def R244 : AMDILReg<244, "r244">, DwarfRegNum<[244]>;
+def R245 : AMDILReg<245, "r245">, DwarfRegNum<[245]>;
+def R246 : AMDILReg<246, "r246">, DwarfRegNum<[246]>;
+def R247 : AMDILReg<247, "r247">, DwarfRegNum<[247]>;
+def R248 : AMDILReg<248, "r248">, DwarfRegNum<[248]>;
+def R249 : AMDILReg<249, "r249">, DwarfRegNum<[249]>;
+def R250 : AMDILReg<250, "r250">, DwarfRegNum<[250]>;
+def R251 : AMDILReg<251, "r251">, DwarfRegNum<[251]>;
+def R252 : AMDILReg<252, "r252">, DwarfRegNum<[252]>;
+def R253 : AMDILReg<253, "r253">, DwarfRegNum<[253]>;
+def R254 : AMDILReg<254, "r254">, DwarfRegNum<[254]>;
+def R255 : AMDILReg<255, "r255">, DwarfRegNum<[255]>;
+def R256 : AMDILReg<256, "r256">, DwarfRegNum<[256]>;
+def R257 : AMDILReg<257, "r257">, DwarfRegNum<[257]>;
+def R258 : AMDILReg<258, "r258">, DwarfRegNum<[258]>;
+def R259 : AMDILReg<259, "r259">, DwarfRegNum<[259]>;
+def R260 : AMDILReg<260, "r260">, DwarfRegNum<[260]>;
+def R261 : AMDILReg<261, "r261">, DwarfRegNum<[261]>;
+def R262 : AMDILReg<262, "r262">, DwarfRegNum<[262]>;
+def R263 : AMDILReg<263, "r263">, DwarfRegNum<[263]>;
+def R264 : AMDILReg<264, "r264">, DwarfRegNum<[264]>;
+def R265 : AMDILReg<265, "r265">, DwarfRegNum<[265]>;
+def R266 : AMDILReg<266, "r266">, DwarfRegNum<[266]>;
+def R267 : AMDILReg<267, "r267">, DwarfRegNum<[267]>;
+def R268 : AMDILReg<268, "r268">, DwarfRegNum<[268]>;
+def R269 : AMDILReg<269, "r269">, DwarfRegNum<[269]>;
+def R270 : AMDILReg<270, "r270">, DwarfRegNum<[270]>;
+def R271 : AMDILReg<271, "r271">, DwarfRegNum<[271]>;
+def R272 : AMDILReg<272, "r272">, DwarfRegNum<[272]>;
+def R273 : AMDILReg<273, "r273">, DwarfRegNum<[273]>;
+def R274 : AMDILReg<274, "r274">, DwarfRegNum<[274]>;
+def R275 : AMDILReg<275, "r275">, DwarfRegNum<[275]>;
+def R276 : AMDILReg<276, "r276">, DwarfRegNum<[276]>;
+def R277 : AMDILReg<277, "r277">, DwarfRegNum<[277]>;
+def R278 : AMDILReg<278, "r278">, DwarfRegNum<[278]>;
+def R279 : AMDILReg<279, "r279">, DwarfRegNum<[279]>;
+def R280 : AMDILReg<280, "r280">, DwarfRegNum<[280]>;
+def R281 : AMDILReg<281, "r281">, DwarfRegNum<[281]>;
+def R282 : AMDILReg<282, "r282">, DwarfRegNum<[282]>;
+def R283 : AMDILReg<283, "r283">, DwarfRegNum<[283]>;
+def R284 : AMDILReg<284, "r284">, DwarfRegNum<[284]>;
+def R285 : AMDILReg<285, "r285">, DwarfRegNum<[285]>;
+def R286 : AMDILReg<286, "r286">, DwarfRegNum<[286]>;
+def R287 : AMDILReg<287, "r287">, DwarfRegNum<[287]>;
+def R288 : AMDILReg<288, "r288">, DwarfRegNum<[288]>;
+def R289 : AMDILReg<289, "r289">, DwarfRegNum<[289]>;
+def R290 : AMDILReg<290, "r290">, DwarfRegNum<[290]>;
+def R291 : AMDILReg<291, "r291">, DwarfRegNum<[291]>;
+def R292 : AMDILReg<292, "r292">, DwarfRegNum<[292]>;
+def R293 : AMDILReg<293, "r293">, DwarfRegNum<[293]>;
+def R294 : AMDILReg<294, "r294">, DwarfRegNum<[294]>;
+def R295 : AMDILReg<295, "r295">, DwarfRegNum<[295]>;
+def R296 : AMDILReg<296, "r296">, DwarfRegNum<[296]>;
+def R297 : AMDILReg<297, "r297">, DwarfRegNum<[297]>;
+def R298 : AMDILReg<298, "r298">, DwarfRegNum<[298]>;
+def R299 : AMDILReg<299, "r299">, DwarfRegNum<[299]>;
+def R300 : AMDILReg<300, "r300">, DwarfRegNum<[300]>;
+def R301 : AMDILReg<301, "r301">, DwarfRegNum<[301]>;
+def R302 : AMDILReg<302, "r302">, DwarfRegNum<[302]>;
+def R303 : AMDILReg<303, "r303">, DwarfRegNum<[303]>;
+def R304 : AMDILReg<304, "r304">, DwarfRegNum<[304]>;
+def R305 : AMDILReg<305, "r305">, DwarfRegNum<[305]>;
+def R306 : AMDILReg<306, "r306">, DwarfRegNum<[306]>;
+def R307 : AMDILReg<307, "r307">, DwarfRegNum<[307]>;
+def R308 : AMDILReg<308, "r308">, DwarfRegNum<[308]>;
+def R309 : AMDILReg<309, "r309">, DwarfRegNum<[309]>;
+def R310 : AMDILReg<310, "r310">, DwarfRegNum<[310]>;
+def R311 : AMDILReg<311, "r311">, DwarfRegNum<[311]>;
+def R312 : AMDILReg<312, "r312">, DwarfRegNum<[312]>;
+def R313 : AMDILReg<313, "r313">, DwarfRegNum<[313]>;
+def R314 : AMDILReg<314, "r314">, DwarfRegNum<[314]>;
+def R315 : AMDILReg<315, "r315">, DwarfRegNum<[315]>;
+def R316 : AMDILReg<316, "r316">, DwarfRegNum<[316]>;
+def R317 : AMDILReg<317, "r317">, DwarfRegNum<[317]>;
+def R318 : AMDILReg<318, "r318">, DwarfRegNum<[318]>;
+def R319 : AMDILReg<319, "r319">, DwarfRegNum<[319]>;
+def R320 : AMDILReg<320, "r320">, DwarfRegNum<[320]>;
+def R321 : AMDILReg<321, "r321">, DwarfRegNum<[321]>;
+def R322 : AMDILReg<322, "r322">, DwarfRegNum<[322]>;
+def R323 : AMDILReg<323, "r323">, DwarfRegNum<[323]>;
+def R324 : AMDILReg<324, "r324">, DwarfRegNum<[324]>;
+def R325 : AMDILReg<325, "r325">, DwarfRegNum<[325]>;
+def R326 : AMDILReg<326, "r326">, DwarfRegNum<[326]>;
+def R327 : AMDILReg<327, "r327">, DwarfRegNum<[327]>;
+def R328 : AMDILReg<328, "r328">, DwarfRegNum<[328]>;
+def R329 : AMDILReg<329, "r329">, DwarfRegNum<[329]>;
+def R330 : AMDILReg<330, "r330">, DwarfRegNum<[330]>;
+def R331 : AMDILReg<331, "r331">, DwarfRegNum<[331]>;
+def R332 : AMDILReg<332, "r332">, DwarfRegNum<[332]>;
+def R333 : AMDILReg<333, "r333">, DwarfRegNum<[333]>;
+def R334 : AMDILReg<334, "r334">, DwarfRegNum<[334]>;
+def R335 : AMDILReg<335, "r335">, DwarfRegNum<[335]>;
+def R336 : AMDILReg<336, "r336">, DwarfRegNum<[336]>;
+def R337 : AMDILReg<337, "r337">, DwarfRegNum<[337]>;
+def R338 : AMDILReg<338, "r338">, DwarfRegNum<[338]>;
+def R339 : AMDILReg<339, "r339">, DwarfRegNum<[339]>;
+def R340 : AMDILReg<340, "r340">, DwarfRegNum<[340]>;
+def R341 : AMDILReg<341, "r341">, DwarfRegNum<[341]>;
+def R342 : AMDILReg<342, "r342">, DwarfRegNum<[342]>;
+def R343 : AMDILReg<343, "r343">, DwarfRegNum<[343]>;
+def R344 : AMDILReg<344, "r344">, DwarfRegNum<[344]>;
+def R345 : AMDILReg<345, "r345">, DwarfRegNum<[345]>;
+def R346 : AMDILReg<346, "r346">, DwarfRegNum<[346]>;
+def R347 : AMDILReg<347, "r347">, DwarfRegNum<[347]>;
+def R348 : AMDILReg<348, "r348">, DwarfRegNum<[348]>;
+def R349 : AMDILReg<349, "r349">, DwarfRegNum<[349]>;
+def R350 : AMDILReg<350, "r350">, DwarfRegNum<[350]>;
+def R351 : AMDILReg<351, "r351">, DwarfRegNum<[351]>;
+def R352 : AMDILReg<352, "r352">, DwarfRegNum<[352]>;
+def R353 : AMDILReg<353, "r353">, DwarfRegNum<[353]>;
+def R354 : AMDILReg<354, "r354">, DwarfRegNum<[354]>;
+def R355 : AMDILReg<355, "r355">, DwarfRegNum<[355]>;
+def R356 : AMDILReg<356, "r356">, DwarfRegNum<[356]>;
+def R357 : AMDILReg<357, "r357">, DwarfRegNum<[357]>;
+def R358 : AMDILReg<358, "r358">, DwarfRegNum<[358]>;
+def R359 : AMDILReg<359, "r359">, DwarfRegNum<[359]>;
+def R360 : AMDILReg<360, "r360">, DwarfRegNum<[360]>;
+def R361 : AMDILReg<361, "r361">, DwarfRegNum<[361]>;
+def R362 : AMDILReg<362, "r362">, DwarfRegNum<[362]>;
+def R363 : AMDILReg<363, "r363">, DwarfRegNum<[363]>;
+def R364 : AMDILReg<364, "r364">, DwarfRegNum<[364]>;
+def R365 : AMDILReg<365, "r365">, DwarfRegNum<[365]>;
+def R366 : AMDILReg<366, "r366">, DwarfRegNum<[366]>;
+def R367 : AMDILReg<367, "r367">, DwarfRegNum<[367]>;
+def R368 : AMDILReg<368, "r368">, DwarfRegNum<[368]>;
+def R369 : AMDILReg<369, "r369">, DwarfRegNum<[369]>;
+def R370 : AMDILReg<370, "r370">, DwarfRegNum<[370]>;
+def R371 : AMDILReg<371, "r371">, DwarfRegNum<[371]>;
+def R372 : AMDILReg<372, "r372">, DwarfRegNum<[372]>;
+def R373 : AMDILReg<373, "r373">, DwarfRegNum<[373]>;
+def R374 : AMDILReg<374, "r374">, DwarfRegNum<[374]>;
+def R375 : AMDILReg<375, "r375">, DwarfRegNum<[375]>;
+def R376 : AMDILReg<376, "r376">, DwarfRegNum<[376]>;
+def R377 : AMDILReg<377, "r377">, DwarfRegNum<[377]>;
+def R378 : AMDILReg<378, "r378">, DwarfRegNum<[378]>;
+def R379 : AMDILReg<379, "r379">, DwarfRegNum<[379]>;
+def R380 : AMDILReg<380, "r380">, DwarfRegNum<[380]>;
+def R381 : AMDILReg<381, "r381">, DwarfRegNum<[381]>;
+def R382 : AMDILReg<382, "r382">, DwarfRegNum<[382]>;
+def R383 : AMDILReg<383, "r383">, DwarfRegNum<[383]>;
+def R384 : AMDILReg<384, "r384">, DwarfRegNum<[384]>;
+def R385 : AMDILReg<385, "r385">, DwarfRegNum<[385]>;
+def R386 : AMDILReg<386, "r386">, DwarfRegNum<[386]>;
+def R387 : AMDILReg<387, "r387">, DwarfRegNum<[387]>;
+def R388 : AMDILReg<388, "r388">, DwarfRegNum<[388]>;
+def R389 : AMDILReg<389, "r389">, DwarfRegNum<[389]>;
+def R390 : AMDILReg<390, "r390">, DwarfRegNum<[390]>;
+def R391 : AMDILReg<391, "r391">, DwarfRegNum<[391]>;
+def R392 : AMDILReg<392, "r392">, DwarfRegNum<[392]>;
+def R393 : AMDILReg<393, "r393">, DwarfRegNum<[393]>;
+def R394 : AMDILReg<394, "r394">, DwarfRegNum<[394]>;
+def R395 : AMDILReg<395, "r395">, DwarfRegNum<[395]>;
+def R396 : AMDILReg<396, "r396">, DwarfRegNum<[396]>;
+def R397 : AMDILReg<397, "r397">, DwarfRegNum<[397]>;
+def R398 : AMDILReg<398, "r398">, DwarfRegNum<[398]>;
+def R399 : AMDILReg<399, "r399">, DwarfRegNum<[399]>;
+def R400 : AMDILReg<400, "r400">, DwarfRegNum<[400]>;
+def R401 : AMDILReg<401, "r401">, DwarfRegNum<[401]>;
+def R402 : AMDILReg<402, "r402">, DwarfRegNum<[402]>;
+def R403 : AMDILReg<403, "r403">, DwarfRegNum<[403]>;
+def R404 : AMDILReg<404, "r404">, DwarfRegNum<[404]>;
+def R405 : AMDILReg<405, "r405">, DwarfRegNum<[405]>;
+def R406 : AMDILReg<406, "r406">, DwarfRegNum<[406]>;
+def R407 : AMDILReg<407, "r407">, DwarfRegNum<[407]>;
+def R408 : AMDILReg<408, "r408">, DwarfRegNum<[408]>;
+def R409 : AMDILReg<409, "r409">, DwarfRegNum<[409]>;
+def R410 : AMDILReg<410, "r410">, DwarfRegNum<[410]>;
+def R411 : AMDILReg<411, "r411">, DwarfRegNum<[411]>;
+def R412 : AMDILReg<412, "r412">, DwarfRegNum<[412]>;
+def R413 : AMDILReg<413, "r413">, DwarfRegNum<[413]>;
+def R414 : AMDILReg<414, "r414">, DwarfRegNum<[414]>;
+def R415 : AMDILReg<415, "r415">, DwarfRegNum<[415]>;
+def R416 : AMDILReg<416, "r416">, DwarfRegNum<[416]>;
+def R417 : AMDILReg<417, "r417">, DwarfRegNum<[417]>;
+def R418 : AMDILReg<418, "r418">, DwarfRegNum<[418]>;
+def R419 : AMDILReg<419, "r419">, DwarfRegNum<[419]>;
+def R420 : AMDILReg<420, "r420">, DwarfRegNum<[420]>;
+def R421 : AMDILReg<421, "r421">, DwarfRegNum<[421]>;
+def R422 : AMDILReg<422, "r422">, DwarfRegNum<[422]>;
+def R423 : AMDILReg<423, "r423">, DwarfRegNum<[423]>;
+def R424 : AMDILReg<424, "r424">, DwarfRegNum<[424]>;
+def R425 : AMDILReg<425, "r425">, DwarfRegNum<[425]>;
+def R426 : AMDILReg<426, "r426">, DwarfRegNum<[426]>;
+def R427 : AMDILReg<427, "r427">, DwarfRegNum<[427]>;
+def R428 : AMDILReg<428, "r428">, DwarfRegNum<[428]>;
+def R429 : AMDILReg<429, "r429">, DwarfRegNum<[429]>;
+def R430 : AMDILReg<430, "r430">, DwarfRegNum<[430]>;
+def R431 : AMDILReg<431, "r431">, DwarfRegNum<[431]>;
+def R432 : AMDILReg<432, "r432">, DwarfRegNum<[432]>;
+def R433 : AMDILReg<433, "r433">, DwarfRegNum<[433]>;
+def R434 : AMDILReg<434, "r434">, DwarfRegNum<[434]>;
+def R435 : AMDILReg<435, "r435">, DwarfRegNum<[435]>;
+def R436 : AMDILReg<436, "r436">, DwarfRegNum<[436]>;
+def R437 : AMDILReg<437, "r437">, DwarfRegNum<[437]>;
+def R438 : AMDILReg<438, "r438">, DwarfRegNum<[438]>;
+def R439 : AMDILReg<439, "r439">, DwarfRegNum<[439]>;
+def R440 : AMDILReg<440, "r440">, DwarfRegNum<[440]>;
+def R441 : AMDILReg<441, "r441">, DwarfRegNum<[441]>;
+def R442 : AMDILReg<442, "r442">, DwarfRegNum<[442]>;
+def R443 : AMDILReg<443, "r443">, DwarfRegNum<[443]>;
+def R444 : AMDILReg<444, "r444">, DwarfRegNum<[444]>;
+def R445 : AMDILReg<445, "r445">, DwarfRegNum<[445]>;
+def R446 : AMDILReg<446, "r446">, DwarfRegNum<[446]>;
+def R447 : AMDILReg<447, "r447">, DwarfRegNum<[447]>;
+def R448 : AMDILReg<448, "r448">, DwarfRegNum<[448]>;
+def R449 : AMDILReg<449, "r449">, DwarfRegNum<[449]>;
+def R450 : AMDILReg<450, "r450">, DwarfRegNum<[450]>;
+def R451 : AMDILReg<451, "r451">, DwarfRegNum<[451]>;
+def R452 : AMDILReg<452, "r452">, DwarfRegNum<[452]>;
+def R453 : AMDILReg<453, "r453">, DwarfRegNum<[453]>;
+def R454 : AMDILReg<454, "r454">, DwarfRegNum<[454]>;
+def R455 : AMDILReg<455, "r455">, DwarfRegNum<[455]>;
+def R456 : AMDILReg<456, "r456">, DwarfRegNum<[456]>;
+def R457 : AMDILReg<457, "r457">, DwarfRegNum<[457]>;
+def R458 : AMDILReg<458, "r458">, DwarfRegNum<[458]>;
+def R459 : AMDILReg<459, "r459">, DwarfRegNum<[459]>;
+def R460 : AMDILReg<460, "r460">, DwarfRegNum<[460]>;
+def R461 : AMDILReg<461, "r461">, DwarfRegNum<[461]>;
+def R462 : AMDILReg<462, "r462">, DwarfRegNum<[462]>;
+def R463 : AMDILReg<463, "r463">, DwarfRegNum<[463]>;
+def R464 : AMDILReg<464, "r464">, DwarfRegNum<[464]>;
+def R465 : AMDILReg<465, "r465">, DwarfRegNum<[465]>;
+def R466 : AMDILReg<466, "r466">, DwarfRegNum<[466]>;
+def R467 : AMDILReg<467, "r467">, DwarfRegNum<[467]>;
+def R468 : AMDILReg<468, "r468">, DwarfRegNum<[468]>;
+def R469 : AMDILReg<469, "r469">, DwarfRegNum<[469]>;
+def R470 : AMDILReg<470, "r470">, DwarfRegNum<[470]>;
+def R471 : AMDILReg<471, "r471">, DwarfRegNum<[471]>;
+def R472 : AMDILReg<472, "r472">, DwarfRegNum<[472]>;
+def R473 : AMDILReg<473, "r473">, DwarfRegNum<[473]>;
+def R474 : AMDILReg<474, "r474">, DwarfRegNum<[474]>;
+def R475 : AMDILReg<475, "r475">, DwarfRegNum<[475]>;
+def R476 : AMDILReg<476, "r476">, DwarfRegNum<[476]>;
+def R477 : AMDILReg<477, "r477">, DwarfRegNum<[477]>;
+def R478 : AMDILReg<478, "r478">, DwarfRegNum<[478]>;
+def R479 : AMDILReg<479, "r479">, DwarfRegNum<[479]>;
+def R480 : AMDILReg<480, "r480">, DwarfRegNum<[480]>;
+def R481 : AMDILReg<481, "r481">, DwarfRegNum<[481]>;
+def R482 : AMDILReg<482, "r482">, DwarfRegNum<[482]>;
+def R483 : AMDILReg<483, "r483">, DwarfRegNum<[483]>;
+def R484 : AMDILReg<484, "r484">, DwarfRegNum<[484]>;
+def R485 : AMDILReg<485, "r485">, DwarfRegNum<[485]>;
+def R486 : AMDILReg<486, "r486">, DwarfRegNum<[486]>;
+def R487 : AMDILReg<487, "r487">, DwarfRegNum<[487]>;
+def R488 : AMDILReg<488, "r488">, DwarfRegNum<[488]>;
+def R489 : AMDILReg<489, "r489">, DwarfRegNum<[489]>;
+def R490 : AMDILReg<490, "r490">, DwarfRegNum<[490]>;
+def R491 : AMDILReg<491, "r491">, DwarfRegNum<[491]>;
+def R492 : AMDILReg<492, "r492">, DwarfRegNum<[492]>;
+def R493 : AMDILReg<493, "r493">, DwarfRegNum<[493]>;
+def R494 : AMDILReg<494, "r494">, DwarfRegNum<[494]>;
+def R495 : AMDILReg<495, "r495">, DwarfRegNum<[495]>;
+def R496 : AMDILReg<496, "r496">, DwarfRegNum<[496]>;
+def R497 : AMDILReg<497, "r497">, DwarfRegNum<[497]>;
+def R498 : AMDILReg<498, "r498">, DwarfRegNum<[498]>;
+def R499 : AMDILReg<499, "r499">, DwarfRegNum<[499]>;
+def R500 : AMDILReg<500, "r500">, DwarfRegNum<[500]>;
+def R501 : AMDILReg<501, "r501">, DwarfRegNum<[501]>;
+def R502 : AMDILReg<502, "r502">, DwarfRegNum<[502]>;
+def R503 : AMDILReg<503, "r503">, DwarfRegNum<[503]>;
+def R504 : AMDILReg<504, "r504">, DwarfRegNum<[504]>;
+def R505 : AMDILReg<505, "r505">, DwarfRegNum<[505]>;
+def R506 : AMDILReg<506, "r506">, DwarfRegNum<[506]>;
+def R507 : AMDILReg<507, "r507">, DwarfRegNum<[507]>;
+def R508 : AMDILReg<508, "r508">, DwarfRegNum<[508]>;
+def R509 : AMDILReg<509, "r509">, DwarfRegNum<[509]>;
+def R510 : AMDILReg<510, "r510">, DwarfRegNum<[510]>;
+def R511 : AMDILReg<511, "r511">, DwarfRegNum<[511]>;
+def R512 : AMDILReg<512, "r512">, DwarfRegNum<[512]>;
+def R513 : AMDILReg<513, "r513">, DwarfRegNum<[513]>;
+def R514 : AMDILReg<514, "r514">, DwarfRegNum<[514]>;
+def R515 : AMDILReg<515, "r515">, DwarfRegNum<[515]>;
+def R516 : AMDILReg<516, "r516">, DwarfRegNum<[516]>;
+def R517 : AMDILReg<517, "r517">, DwarfRegNum<[517]>;
+def R518 : AMDILReg<518, "r518">, DwarfRegNum<[518]>;
+def R519 : AMDILReg<519, "r519">, DwarfRegNum<[519]>;
+def R520 : AMDILReg<520, "r520">, DwarfRegNum<[520]>;
+def R521 : AMDILReg<521, "r521">, DwarfRegNum<[521]>;
+def R522 : AMDILReg<522, "r522">, DwarfRegNum<[522]>;
+def R523 : AMDILReg<523, "r523">, DwarfRegNum<[523]>;
+def R524 : AMDILReg<524, "r524">, DwarfRegNum<[524]>;
+def R525 : AMDILReg<525, "r525">, DwarfRegNum<[525]>;
+def R526 : AMDILReg<526, "r526">, DwarfRegNum<[526]>;
+def R527 : AMDILReg<527, "r527">, DwarfRegNum<[527]>;
+def R528 : AMDILReg<528, "r528">, DwarfRegNum<[528]>;
+def R529 : AMDILReg<529, "r529">, DwarfRegNum<[529]>;
+def R530 : AMDILReg<530, "r530">, DwarfRegNum<[530]>;
+def R531 : AMDILReg<531, "r531">, DwarfRegNum<[531]>;
+def R532 : AMDILReg<532, "r532">, DwarfRegNum<[532]>;
+def R533 : AMDILReg<533, "r533">, DwarfRegNum<[533]>;
+def R534 : AMDILReg<534, "r534">, DwarfRegNum<[534]>;
+def R535 : AMDILReg<535, "r535">, DwarfRegNum<[535]>;
+def R536 : AMDILReg<536, "r536">, DwarfRegNum<[536]>;
+def R537 : AMDILReg<537, "r537">, DwarfRegNum<[537]>;
+def R538 : AMDILReg<538, "r538">, DwarfRegNum<[538]>;
+def R539 : AMDILReg<539, "r539">, DwarfRegNum<[539]>;
+def R540 : AMDILReg<540, "r540">, DwarfRegNum<[540]>;
+def R541 : AMDILReg<541, "r541">, DwarfRegNum<[541]>;
+def R542 : AMDILReg<542, "r542">, DwarfRegNum<[542]>;
+def R543 : AMDILReg<543, "r543">, DwarfRegNum<[543]>;
+def R544 : AMDILReg<544, "r544">, DwarfRegNum<[544]>;
+def R545 : AMDILReg<545, "r545">, DwarfRegNum<[545]>;
+def R546 : AMDILReg<546, "r546">, DwarfRegNum<[546]>;
+def R547 : AMDILReg<547, "r547">, DwarfRegNum<[547]>;
+def R548 : AMDILReg<548, "r548">, DwarfRegNum<[548]>;
+def R549 : AMDILReg<549, "r549">, DwarfRegNum<[549]>;
+def R550 : AMDILReg<550, "r550">, DwarfRegNum<[550]>;
+def R551 : AMDILReg<551, "r551">, DwarfRegNum<[551]>;
+def R552 : AMDILReg<552, "r552">, DwarfRegNum<[552]>;
+def R553 : AMDILReg<553, "r553">, DwarfRegNum<[553]>;
+def R554 : AMDILReg<554, "r554">, DwarfRegNum<[554]>;
+def R555 : AMDILReg<555, "r555">, DwarfRegNum<[555]>;
+def R556 : AMDILReg<556, "r556">, DwarfRegNum<[556]>;
+def R557 : AMDILReg<557, "r557">, DwarfRegNum<[557]>;
+def R558 : AMDILReg<558, "r558">, DwarfRegNum<[558]>;
+def R559 : AMDILReg<559, "r559">, DwarfRegNum<[559]>;
+def R560 : AMDILReg<560, "r560">, DwarfRegNum<[560]>;
+def R561 : AMDILReg<561, "r561">, DwarfRegNum<[561]>;
+def R562 : AMDILReg<562, "r562">, DwarfRegNum<[562]>;
+def R563 : AMDILReg<563, "r563">, DwarfRegNum<[563]>;
+def R564 : AMDILReg<564, "r564">, DwarfRegNum<[564]>;
+def R565 : AMDILReg<565, "r565">, DwarfRegNum<[565]>;
+def R566 : AMDILReg<566, "r566">, DwarfRegNum<[566]>;
+def R567 : AMDILReg<567, "r567">, DwarfRegNum<[567]>;
+def R568 : AMDILReg<568, "r568">, DwarfRegNum<[568]>;
+def R569 : AMDILReg<569, "r569">, DwarfRegNum<[569]>;
+def R570 : AMDILReg<570, "r570">, DwarfRegNum<[570]>;
+def R571 : AMDILReg<571, "r571">, DwarfRegNum<[571]>;
+def R572 : AMDILReg<572, "r572">, DwarfRegNum<[572]>;
+def R573 : AMDILReg<573, "r573">, DwarfRegNum<[573]>;
+def R574 : AMDILReg<574, "r574">, DwarfRegNum<[574]>;
+def R575 : AMDILReg<575, "r575">, DwarfRegNum<[575]>;
+def R576 : AMDILReg<576, "r576">, DwarfRegNum<[576]>;
+def R577 : AMDILReg<577, "r577">, DwarfRegNum<[577]>;
+def R578 : AMDILReg<578, "r578">, DwarfRegNum<[578]>;
+def R579 : AMDILReg<579, "r579">, DwarfRegNum<[579]>;
+def R580 : AMDILReg<580, "r580">, DwarfRegNum<[580]>;
+def R581 : AMDILReg<581, "r581">, DwarfRegNum<[581]>;
+def R582 : AMDILReg<582, "r582">, DwarfRegNum<[582]>;
+def R583 : AMDILReg<583, "r583">, DwarfRegNum<[583]>;
+def R584 : AMDILReg<584, "r584">, DwarfRegNum<[584]>;
+def R585 : AMDILReg<585, "r585">, DwarfRegNum<[585]>;
+def R586 : AMDILReg<586, "r586">, DwarfRegNum<[586]>;
+def R587 : AMDILReg<587, "r587">, DwarfRegNum<[587]>;
+def R588 : AMDILReg<588, "r588">, DwarfRegNum<[588]>;
+def R589 : AMDILReg<589, "r589">, DwarfRegNum<[589]>;
+def R590 : AMDILReg<590, "r590">, DwarfRegNum<[590]>;
+def R591 : AMDILReg<591, "r591">, DwarfRegNum<[591]>;
+def R592 : AMDILReg<592, "r592">, DwarfRegNum<[592]>;
+def R593 : AMDILReg<593, "r593">, DwarfRegNum<[593]>;
+def R594 : AMDILReg<594, "r594">, DwarfRegNum<[594]>;
+def R595 : AMDILReg<595, "r595">, DwarfRegNum<[595]>;
+def R596 : AMDILReg<596, "r596">, DwarfRegNum<[596]>;
+def R597 : AMDILReg<597, "r597">, DwarfRegNum<[597]>;
+def R598 : AMDILReg<598, "r598">, DwarfRegNum<[598]>;
+def R599 : AMDILReg<599, "r599">, DwarfRegNum<[599]>;
+def R600 : AMDILReg<600, "r600">, DwarfRegNum<[600]>;
+def R601 : AMDILReg<601, "r601">, DwarfRegNum<[601]>;
+def R602 : AMDILReg<602, "r602">, DwarfRegNum<[602]>;
+def R603 : AMDILReg<603, "r603">, DwarfRegNum<[603]>;
+def R604 : AMDILReg<604, "r604">, DwarfRegNum<[604]>;
+def R605 : AMDILReg<605, "r605">, DwarfRegNum<[605]>;
+def R606 : AMDILReg<606, "r606">, DwarfRegNum<[606]>;
+def R607 : AMDILReg<607, "r607">, DwarfRegNum<[607]>;
+def R608 : AMDILReg<608, "r608">, DwarfRegNum<[608]>;
+def R609 : AMDILReg<609, "r609">, DwarfRegNum<[609]>;
+def R610 : AMDILReg<610, "r610">, DwarfRegNum<[610]>;
+def R611 : AMDILReg<611, "r611">, DwarfRegNum<[611]>;
+def R612 : AMDILReg<612, "r612">, DwarfRegNum<[612]>;
+def R613 : AMDILReg<613, "r613">, DwarfRegNum<[613]>;
+def R614 : AMDILReg<614, "r614">, DwarfRegNum<[614]>;
+def R615 : AMDILReg<615, "r615">, DwarfRegNum<[615]>;
+def R616 : AMDILReg<616, "r616">, DwarfRegNum<[616]>;
+def R617 : AMDILReg<617, "r617">, DwarfRegNum<[617]>;
+def R618 : AMDILReg<618, "r618">, DwarfRegNum<[618]>;
+def R619 : AMDILReg<619, "r619">, DwarfRegNum<[619]>;
+def R620 : AMDILReg<620, "r620">, DwarfRegNum<[620]>;
+def R621 : AMDILReg<621, "r621">, DwarfRegNum<[621]>;
+def R622 : AMDILReg<622, "r622">, DwarfRegNum<[622]>;
+def R623 : AMDILReg<623, "r623">, DwarfRegNum<[623]>;
+def R624 : AMDILReg<624, "r624">, DwarfRegNum<[624]>;
+def R625 : AMDILReg<625, "r625">, DwarfRegNum<[625]>;
+def R626 : AMDILReg<626, "r626">, DwarfRegNum<[626]>;
+def R627 : AMDILReg<627, "r627">, DwarfRegNum<[627]>;
+def R628 : AMDILReg<628, "r628">, DwarfRegNum<[628]>;
+def R629 : AMDILReg<629, "r629">, DwarfRegNum<[629]>;
+def R630 : AMDILReg<630, "r630">, DwarfRegNum<[630]>;
+def R631 : AMDILReg<631, "r631">, DwarfRegNum<[631]>;
+def R632 : AMDILReg<632, "r632">, DwarfRegNum<[632]>;
+def R633 : AMDILReg<633, "r633">, DwarfRegNum<[633]>;
+def R634 : AMDILReg<634, "r634">, DwarfRegNum<[634]>;
+def R635 : AMDILReg<635, "r635">, DwarfRegNum<[635]>;
+def R636 : AMDILReg<636, "r636">, DwarfRegNum<[636]>;
+def R637 : AMDILReg<637, "r637">, DwarfRegNum<[637]>;
+def R638 : AMDILReg<638, "r638">, DwarfRegNum<[638]>;
+def R639 : AMDILReg<639, "r639">, DwarfRegNum<[639]>;
+def R640 : AMDILReg<640, "r640">, DwarfRegNum<[640]>;
+def R641 : AMDILReg<641, "r641">, DwarfRegNum<[641]>;
+def R642 : AMDILReg<642, "r642">, DwarfRegNum<[642]>;
+def R643 : AMDILReg<643, "r643">, DwarfRegNum<[643]>;
+def R644 : AMDILReg<644, "r644">, DwarfRegNum<[644]>;
+def R645 : AMDILReg<645, "r645">, DwarfRegNum<[645]>;
+def R646 : AMDILReg<646, "r646">, DwarfRegNum<[646]>;
+def R647 : AMDILReg<647, "r647">, DwarfRegNum<[647]>;
+def R648 : AMDILReg<648, "r648">, DwarfRegNum<[648]>;
+def R649 : AMDILReg<649, "r649">, DwarfRegNum<[649]>;
+def R650 : AMDILReg<650, "r650">, DwarfRegNum<[650]>;
+def R651 : AMDILReg<651, "r651">, DwarfRegNum<[651]>;
+def R652 : AMDILReg<652, "r652">, DwarfRegNum<[652]>;
+def R653 : AMDILReg<653, "r653">, DwarfRegNum<[653]>;
+def R654 : AMDILReg<654, "r654">, DwarfRegNum<[654]>;
+def R655 : AMDILReg<655, "r655">, DwarfRegNum<[655]>;
+def R656 : AMDILReg<656, "r656">, DwarfRegNum<[656]>;
+def R657 : AMDILReg<657, "r657">, DwarfRegNum<[657]>;
+def R658 : AMDILReg<658, "r658">, DwarfRegNum<[658]>;
+def R659 : AMDILReg<659, "r659">, DwarfRegNum<[659]>;
+def R660 : AMDILReg<660, "r660">, DwarfRegNum<[660]>;
+def R661 : AMDILReg<661, "r661">, DwarfRegNum<[661]>;
+def R662 : AMDILReg<662, "r662">, DwarfRegNum<[662]>;
+def R663 : AMDILReg<663, "r663">, DwarfRegNum<[663]>;
+def R664 : AMDILReg<664, "r664">, DwarfRegNum<[664]>;
+def R665 : AMDILReg<665, "r665">, DwarfRegNum<[665]>;
+def R666 : AMDILReg<666, "r666">, DwarfRegNum<[666]>;
+def R667 : AMDILReg<667, "r667">, DwarfRegNum<[667]>;
+def R668 : AMDILReg<668, "r668">, DwarfRegNum<[668]>;
+def R669 : AMDILReg<669, "r669">, DwarfRegNum<[669]>;
+def R670 : AMDILReg<670, "r670">, DwarfRegNum<[670]>;
+def R671 : AMDILReg<671, "r671">, DwarfRegNum<[671]>;
+def R672 : AMDILReg<672, "r672">, DwarfRegNum<[672]>;
+def R673 : AMDILReg<673, "r673">, DwarfRegNum<[673]>;
+def R674 : AMDILReg<674, "r674">, DwarfRegNum<[674]>;
+def R675 : AMDILReg<675, "r675">, DwarfRegNum<[675]>;
+def R676 : AMDILReg<676, "r676">, DwarfRegNum<[676]>;
+def R677 : AMDILReg<677, "r677">, DwarfRegNum<[677]>;
+def R678 : AMDILReg<678, "r678">, DwarfRegNum<[678]>;
+def R679 : AMDILReg<679, "r679">, DwarfRegNum<[679]>;
+def R680 : AMDILReg<680, "r680">, DwarfRegNum<[680]>;
+def R681 : AMDILReg<681, "r681">, DwarfRegNum<[681]>;
+def R682 : AMDILReg<682, "r682">, DwarfRegNum<[682]>;
+def R683 : AMDILReg<683, "r683">, DwarfRegNum<[683]>;
+def R684 : AMDILReg<684, "r684">, DwarfRegNum<[684]>;
+def R685 : AMDILReg<685, "r685">, DwarfRegNum<[685]>;
+def R686 : AMDILReg<686, "r686">, DwarfRegNum<[686]>;
+def R687 : AMDILReg<687, "r687">, DwarfRegNum<[687]>;
+def R688 : AMDILReg<688, "r688">, DwarfRegNum<[688]>;
+def R689 : AMDILReg<689, "r689">, DwarfRegNum<[689]>;
+def R690 : AMDILReg<690, "r690">, DwarfRegNum<[690]>;
+def R691 : AMDILReg<691, "r691">, DwarfRegNum<[691]>;
+def R692 : AMDILReg<692, "r692">, DwarfRegNum<[692]>;
+def R693 : AMDILReg<693, "r693">, DwarfRegNum<[693]>;
+def R694 : AMDILReg<694, "r694">, DwarfRegNum<[694]>;
+def R695 : AMDILReg<695, "r695">, DwarfRegNum<[695]>;
+def R696 : AMDILReg<696, "r696">, DwarfRegNum<[696]>;
+def R697 : AMDILReg<697, "r697">, DwarfRegNum<[697]>;
+def R698 : AMDILReg<698, "r698">, DwarfRegNum<[698]>;
+def R699 : AMDILReg<699, "r699">, DwarfRegNum<[699]>;
+def R700 : AMDILReg<700, "r700">, DwarfRegNum<[700]>;
+def R701 : AMDILReg<701, "r701">, DwarfRegNum<[701]>;
+def R702 : AMDILReg<702, "r702">, DwarfRegNum<[702]>;
+def R703 : AMDILReg<703, "r703">, DwarfRegNum<[703]>;
+def R704 : AMDILReg<704, "r704">, DwarfRegNum<[704]>;
+def R705 : AMDILReg<705, "r705">, DwarfRegNum<[705]>;
+def R706 : AMDILReg<706, "r706">, DwarfRegNum<[706]>;
+def R707 : AMDILReg<707, "r707">, DwarfRegNum<[707]>;
+def R708 : AMDILReg<708, "r708">, DwarfRegNum<[708]>;
+def R709 : AMDILReg<709, "r709">, DwarfRegNum<[709]>;
+def R710 : AMDILReg<710, "r710">, DwarfRegNum<[710]>;
+def R711 : AMDILReg<711, "r711">, DwarfRegNum<[711]>;
+def R712 : AMDILReg<712, "r712">, DwarfRegNum<[712]>;
+def R713 : AMDILReg<713, "r713">, DwarfRegNum<[713]>;
+def R714 : AMDILReg<714, "r714">, DwarfRegNum<[714]>;
+def R715 : AMDILReg<715, "r715">, DwarfRegNum<[715]>;
+def R716 : AMDILReg<716, "r716">, DwarfRegNum<[716]>;
+def R717 : AMDILReg<717, "r717">, DwarfRegNum<[717]>;
+def R718 : AMDILReg<718, "r718">, DwarfRegNum<[718]>;
+def R719 : AMDILReg<719, "r719">, DwarfRegNum<[719]>;
+def R720 : AMDILReg<720, "r720">, DwarfRegNum<[720]>;
+def R721 : AMDILReg<721, "r721">, DwarfRegNum<[721]>;
+def R722 : AMDILReg<722, "r722">, DwarfRegNum<[722]>;
+def R723 : AMDILReg<723, "r723">, DwarfRegNum<[723]>;
+def R724 : AMDILReg<724, "r724">, DwarfRegNum<[724]>;
+def R725 : AMDILReg<725, "r725">, DwarfRegNum<[725]>;
+def R726 : AMDILReg<726, "r726">, DwarfRegNum<[726]>;
+def R727 : AMDILReg<727, "r727">, DwarfRegNum<[727]>;
+def R728 : AMDILReg<728, "r728">, DwarfRegNum<[728]>;
+def R729 : AMDILReg<729, "r729">, DwarfRegNum<[729]>;
+def R730 : AMDILReg<730, "r730">, DwarfRegNum<[730]>;
+def R731 : AMDILReg<731, "r731">, DwarfRegNum<[731]>;
+def R732 : AMDILReg<732, "r732">, DwarfRegNum<[732]>;
+def R733 : AMDILReg<733, "r733">, DwarfRegNum<[733]>;
+def R734 : AMDILReg<734, "r734">, DwarfRegNum<[734]>;
+def R735 : AMDILReg<735, "r735">, DwarfRegNum<[735]>;
+def R736 : AMDILReg<736, "r736">, DwarfRegNum<[736]>;
+def R737 : AMDILReg<737, "r737">, DwarfRegNum<[737]>;
+def R738 : AMDILReg<738, "r738">, DwarfRegNum<[738]>;
+def R739 : AMDILReg<739, "r739">, DwarfRegNum<[739]>;
+def R740 : AMDILReg<740, "r740">, DwarfRegNum<[740]>;
+def R741 : AMDILReg<741, "r741">, DwarfRegNum<[741]>;
+def R742 : AMDILReg<742, "r742">, DwarfRegNum<[742]>;
+def R743 : AMDILReg<743, "r743">, DwarfRegNum<[743]>;
+def R744 : AMDILReg<744, "r744">, DwarfRegNum<[744]>;
+def R745 : AMDILReg<745, "r745">, DwarfRegNum<[745]>;
+def R746 : AMDILReg<746, "r746">, DwarfRegNum<[746]>;
+def R747 : AMDILReg<747, "r747">, DwarfRegNum<[747]>;
+def R748 : AMDILReg<748, "r748">, DwarfRegNum<[748]>;
+def R749 : AMDILReg<749, "r749">, DwarfRegNum<[749]>;
+def R750 : AMDILReg<750, "r750">, DwarfRegNum<[750]>;
+def R751 : AMDILReg<751, "r751">, DwarfRegNum<[751]>;
+def R752 : AMDILReg<752, "r752">, DwarfRegNum<[752]>;
+def R753 : AMDILReg<753, "r753">, DwarfRegNum<[753]>;
+def R754 : AMDILReg<754, "r754">, DwarfRegNum<[754]>;
+def R755 : AMDILReg<755, "r755">, DwarfRegNum<[755]>;
+def R756 : AMDILReg<756, "r756">, DwarfRegNum<[756]>;
+def R757 : AMDILReg<757, "r757">, DwarfRegNum<[757]>;
+def R758 : AMDILReg<758, "r758">, DwarfRegNum<[758]>;
+def R759 : AMDILReg<759, "r759">, DwarfRegNum<[759]>;
+def R760 : AMDILReg<760, "r760">, DwarfRegNum<[760]>;
+def R761 : AMDILReg<761, "r761">, DwarfRegNum<[761]>;
+def R762 : AMDILReg<762, "r762">, DwarfRegNum<[762]>;
+def R763 : AMDILReg<763, "r763">, DwarfRegNum<[763]>;
+def R764 : AMDILReg<764, "r764">, DwarfRegNum<[764]>;
+def R765 : AMDILReg<765, "r765">, DwarfRegNum<[765]>;
+def R766 : AMDILReg<766, "r766">, DwarfRegNum<[766]>;
+def R767 : AMDILReg<767, "r767">, DwarfRegNum<[767]>;
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1009 is used as the frame pointer register
+// r999 is used as the mem register.
+// r998 is used as the return address register.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
+def GPRI8 : RegisterClass<"AMDIL", [i8], 8,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV2I8 : RegisterClass<"AMDIL", [v2i8], 16,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV4I8 : RegisterClass<"AMDIL", [v4i8], 32,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRI16 : RegisterClass<"AMDIL", [i16], 16,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV2I16 : RegisterClass<"AMDIL", [v2i16], 32,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV4I16 : RegisterClass<"AMDIL", [v4i16], 64,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRI32 : RegisterClass<"AMDIL", [i32], 32,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRF32 : RegisterClass<"AMDIL", [f32], 32,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+// For 64 bit integer emulation, the lower 32 bits are in x
+// and the upper 32 bits are in y
+def GPRI64 : RegisterClass<"AMDIL", [i64], 64,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRF64 : RegisterClass<"AMDIL", [f64], 64,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV4F32 : RegisterClass<"AMDIL", [v4f32], 128,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV4I32 : RegisterClass<"AMDIL", [v4i32], 128,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV2I32 : RegisterClass<"AMDIL", [v2i32], 64,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV2F32 : RegisterClass<"AMDIL", [v2f32], 64,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV2I64 : RegisterClass<"AMDIL", [v2i64], 128,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRV2F64 : RegisterClass<"AMDIL", [v2f64], 128,
+ (add (sequence "R%u", 1, 767), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
+{
+ let AltOrders = [(add (sequence "R%u", 1, 767))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+
diff --git a/src/gallium/drivers/radeon/AMDILRegisterUsesScalar.td b/src/gallium/drivers/radeon/AMDILRegisterUsesScalar.td
new file mode 100644
index 00000000000..3e586bdd0e1
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterUsesScalar.td
@@ -0,0 +1 @@
+Rx1, Ry1, Rz1, Rw1, Rx2, Ry2, Rz2, Rw2, Rx3, Ry3, Rz3, Rw3, Rx4, Ry4, Rz4, Rw4, Rx5, Ry5, Rz5, Rw5, Rx6, Ry6, Rz6, Rw6, Rx7, Ry7, Rz7, Rw7, Rx8, Ry8, Rz8, Rw8, Rx9, Ry9, Rz9, Rw9, Rx10, Ry10, Rz10, Rw10, Rx11, Ry11, Rz11, Rw11, Rx12, Ry12, Rz12, Rw12, Rx13, Ry13, Rz13, Rw13, Rx14, Ry14, Rz14, Rw14, Rx15, Ry15, Rz15, Rw15, Rx16, Ry16, Rz16, Rw16, Rx17, Ry17, Rz17, Rw17, Rx18, Ry18, Rz18, Rw18, Rx19, Ry19, Rz19, Rw19, Rx20, Ry20, Rz20, Rw20, Rx21, Ry21, Rz21, Rw21, Rx22, Ry22, Rz22, Rw22, Rx23, Ry23, Rz23, Rw23, Rx24, Ry24, Rz24, Rw24, Rx25, Ry25, Rz25, Rw25, Rx26, Ry26, Rz26, Rw26, Rx27, Ry27, Rz27, Rw27, Rx28, Ry28, Rz28, Rw28, Rx29, Ry29, Rz29, Rw29, Rx30, Ry30, Rz30, Rw30, Rx31, Ry31, Rz31, Rw31, Rx32, Ry32, Rz32, Rw32, Rx33, Ry33, Rz33, Rw33, Rx34, Ry34, Rz34, Rw34, Rx35, Ry35, Rz35, Rw35, Rx36, Ry36, Rz36, Rw36, Rx37, Ry37, Rz37, Rw37, Rx38, Ry38, Rz38, Rw38, Rx39, Ry39, Rz39, Rw39, Rx40, Ry40, Rz40, Rw40, Rx41, Ry41, Rz41, Rw41, Rx42, Ry42, Rz42, Rw42, Rx43, Ry43, Rz43, Rw43, Rx44, Ry44, Rz44, Rw44, Rx45, Ry45, Rz45, Rw45, Rx46, Ry46, Rz46, Rw46, Rx47, Ry47, Rz47, Rw47, Rx48, Ry48, Rz48, Rw48, Rx49, Ry49, Rz49, Rw49, Rx50, Ry50, Rz50, Rw50, Rx51, Ry51, Rz51, Rw51, Rx52, Ry52, Rz52, Rw52, Rx53, Ry53, Rz53, Rw53, Rx54, Ry54, Rz54, Rw54, Rx55, Ry55, Rz55, Rw55, Rx56, Ry56, Rz56, Rw56, Rx57, Ry57, Rz57, Rw57, Rx58, Ry58, Rz58, Rw58, Rx59, Ry59, Rz59, Rw59, Rx60, Ry60, Rz60, Rw60, Rx61, Ry61, Rz61, Rw61, Rx62, Ry62, Rz62, Rw62, Rx63, Ry63, Rz63, Rw63, Rx64, Ry64, Rz64, Rw64, Rx65, Ry65, Rz65, Rw65, Rx66, Ry66, Rz66, Rw66, Rx67, Ry67, Rz67, Rw67, Rx68, Ry68, Rz68, Rw68, Rx69, Ry69, Rz69, Rw69, Rx70, Ry70, Rz70, Rw70, Rx71, Ry71, Rz71, Rw71, Rx72, Ry72, Rz72, Rw72, Rx73, Ry73, Rz73, Rw73, Rx74, Ry74, Rz74, Rw74, Rx75, Ry75, Rz75, Rw75, Rx76, Ry76, Rz76, Rw76, Rx77, Ry77, Rz77, Rw77, Rx78, Ry78, Rz78, Rw78, Rx79, Ry79, Rz79, Rw79, Rx80, Ry80, Rz80, Rw80, Rx81, Ry81, Rz81, Rw81, Rx82, Ry82, Rz82, Rw82, Rx83, Ry83, Rz83, Rw83, Rx84, Ry84, Rz84, Rw84, Rx85, Ry85, Rz85, Rw85, Rx86, Ry86, Rz86, Rw86, Rx87, Ry87, Rz87, Rw87, Rx88, Ry88, Rz88, Rw88, Rx89, Ry89, Rz89, Rw89, Rx90, Ry90, Rz90, Rw90, Rx91, Ry91, Rz91, Rw91, Rx92, Ry92, Rz92, Rw92, Rx93, Ry93, Rz93, Rw93, Rx94, Ry94, Rz94, Rw94, Rx95, Ry95, Rz95, Rw95, Rx96, Ry96, Rz96, Rw96, Rx97, Ry97, Rz97, Rw97, Rx98, Ry98, Rz98, Rw98, Rx99, Ry99, Rz99, Rw99, Rx100, Ry100, Rz100, Rw100, Rx101, Ry101, Rz101, Rw101, Rx102, Ry102, Rz102, Rw102, Rx103, Ry103, Rz103, Rw103, Rx104, Ry104, Rz104, Rw104, Rx105, Ry105, Rz105, Rw105, Rx106, Ry106, Rz106, Rw106, Rx107, Ry107, Rz107, Rw107, Rx108, Ry108, Rz108, Rw108, Rx109, Ry109, Rz109, Rw109, Rx110, Ry110, Rz110, Rw110, Rx111, Ry111, Rz111, Rw111, Rx112, Ry112, Rz112, Rw112, Rx113, Ry113, Rz113, Rw113, Rx114, Ry114, Rz114, Rw114, Rx115, Ry115, Rz115, Rw115, Rx116, Ry116, Rz116, Rw116, Rx117, Ry117, Rz117, Rw117, Rx118, Ry118, Rz118, Rw118, Rx119, Ry119, Rz119, Rw119, Rx120, Ry120, Rz120, Rw120, Rx121, Ry121, Rz121, Rw121, Rx122, Ry122, Rz122, Rw122, Rx123, Ry123, Rz123, Rw123, Rx124, Ry124, Rz124, Rw124, Rx125, Ry125, Rz125, Rw125, Rx126, Ry126, Rz126, Rw126, Rx127, Ry127, Rz127, Rw127, Rx128, Ry128, Rz128, Rw128, Rx129, Ry129, Rz129, Rw129, Rx130, Ry130, Rz130, Rw130, Rx131, Ry131, Rz131, Rw131, Rx132, Ry132, Rz132, Rw132, Rx133, Ry133, Rz133, Rw133, Rx134, Ry134, Rz134, Rw134, Rx135, Ry135, Rz135, Rw135, Rx136, Ry136, Rz136, Rw136, Rx137, Ry137, Rz137, Rw137, Rx138, Ry138, Rz138, Rw138, Rx139, Ry139, Rz139, Rw139, Rx140, Ry140, Rz140, Rw140, Rx141, Ry141, Rz141, Rw141, Rx142, Ry142, Rz142, Rw142, Rx143, Ry143, Rz143, Rw143, Rx144, Ry144, Rz144, Rw144, Rx145, Ry145, Rz145, Rw145, Rx146, Ry146, Rz146, Rw146, Rx147, Ry147, Rz147, Rw147, Rx148, Ry148, Rz148, Rw148, Rx149, Ry149, Rz149, Rw149, Rx150, Ry150, Rz150, Rw150, Rx151, Ry151, Rz151, Rw151, Rx152, Ry152, Rz152, Rw152, Rx153, Ry153, Rz153, Rw153, Rx154, Ry154, Rz154, Rw154, Rx155, Ry155, Rz155, Rw155, Rx156, Ry156, Rz156, Rw156, Rx157, Ry157, Rz157, Rw157, Rx158, Ry158, Rz158, Rw158, Rx159, Ry159, Rz159, Rw159, Rx160, Ry160, Rz160, Rw160, Rx161, Ry161, Rz161, Rw161, Rx162, Ry162, Rz162, Rw162, Rx163, Ry163, Rz163, Rw163, Rx164, Ry164, Rz164, Rw164, Rx165, Ry165, Rz165, Rw165, Rx166, Ry166, Rz166, Rw166, Rx167, Ry167, Rz167, Rw167, Rx168, Ry168, Rz168, Rw168, Rx169, Ry169, Rz169, Rw169, Rx170, Ry170, Rz170, Rw170, Rx171, Ry171, Rz171, Rw171, Rx172, Ry172, Rz172, Rw172, Rx173, Ry173, Rz173, Rw173, Rx174, Ry174, Rz174, Rw174, Rx175, Ry175, Rz175, Rw175, Rx176, Ry176, Rz176, Rw176, Rx177, Ry177, Rz177, Rw177, Rx178, Ry178, Rz178, Rw178, Rx179, Ry179, Rz179, Rw179, Rx180, Ry180, Rz180, Rw180, Rx181, Ry181, Rz181, Rw181, Rx182, Ry182, Rz182, Rw182, Rx183, Ry183, Rz183, Rw183, Rx184, Ry184, Rz184, Rw184, Rx185, Ry185, Rz185, Rw185, Rx186, Ry186, Rz186, Rw186, Rx187, Ry187, Rz187, Rw187, Rx188, Ry188, Rz188, Rw188, Rx189, Ry189, Rz189, Rw189, Rx190, Ry190, Rz190, Rw190, Rx191, Ry191, Rz191, Rw191, Rx1000, Ry1000, Rz1000, Rw1000, Rx1001, Ry1001, Rz1001, Rw1001, Rx1002, Ry1002, Rz1002, Rw1002, Rx1003, Ry1003, Rz1003, Rw1003, Rx1004, Ry1004, Rz1004, Rw1004, Rx1005, Ry1005, Rz1005, Rw1005, Rx1006, Ry1006, Rz1006, Rw1006, Rx1007, Ry1007, Rz1007, Rw1007, Rx1008, Ry1008, Rz1008, Rw1008, Rx1009, Ry1009, Rz1009, Rw1009, Rx1010, Ry1010, Rz1010, Rw1010, Rx1011, Ry1011, Rz1011, Rw1011, Rx1012, Ry1012, Rz1012, Rw1012 \ No newline at end of file
diff --git a/src/gallium/drivers/radeon/AMDILRegisterUsesV2.td b/src/gallium/drivers/radeon/AMDILRegisterUsesV2.td
new file mode 100644
index 00000000000..a3b7a5ea551
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterUsesV2.td
@@ -0,0 +1 @@
+Rxy1, Rzw1, Rxy2, Rzw2, Rxy3, Rzw3, Rxy4, Rzw4, Rxy5, Rzw5, Rxy6, Rzw6, Rxy7, Rzw7, Rxy8, Rzw8, Rxy9, Rzw9, Rxy10, Rzw10, Rxy11, Rzw11, Rxy12, Rzw12, Rxy13, Rzw13, Rxy14, Rzw14, Rxy15, Rzw15, Rxy16, Rzw16, Rxy17, Rzw17, Rxy18, Rzw18, Rxy19, Rzw19, Rxy20, Rzw20, Rxy21, Rzw21, Rxy22, Rzw22, Rxy23, Rzw23, Rxy24, Rzw24, Rxy25, Rzw25, Rxy26, Rzw26, Rxy27, Rzw27, Rxy28, Rzw28, Rxy29, Rzw29, Rxy30, Rzw30, Rxy31, Rzw31, Rxy32, Rzw32, Rxy33, Rzw33, Rxy34, Rzw34, Rxy35, Rzw35, Rxy36, Rzw36, Rxy37, Rzw37, Rxy38, Rzw38, Rxy39, Rzw39, Rxy40, Rzw40, Rxy41, Rzw41, Rxy42, Rzw42, Rxy43, Rzw43, Rxy44, Rzw44, Rxy45, Rzw45, Rxy46, Rzw46, Rxy47, Rzw47, Rxy48, Rzw48, Rxy49, Rzw49, Rxy50, Rzw50, Rxy51, Rzw51, Rxy52, Rzw52, Rxy53, Rzw53, Rxy54, Rzw54, Rxy55, Rzw55, Rxy56, Rzw56, Rxy57, Rzw57, Rxy58, Rzw58, Rxy59, Rzw59, Rxy60, Rzw60, Rxy61, Rzw61, Rxy62, Rzw62, Rxy63, Rzw63, Rxy64, Rzw64, Rxy65, Rzw65, Rxy66, Rzw66, Rxy67, Rzw67, Rxy68, Rzw68, Rxy69, Rzw69, Rxy70, Rzw70, Rxy71, Rzw71, Rxy72, Rzw72, Rxy73, Rzw73, Rxy74, Rzw74, Rxy75, Rzw75, Rxy76, Rzw76, Rxy77, Rzw77, Rxy78, Rzw78, Rxy79, Rzw79, Rxy80, Rzw80, Rxy81, Rzw81, Rxy82, Rzw82, Rxy83, Rzw83, Rxy84, Rzw84, Rxy85, Rzw85, Rxy86, Rzw86, Rxy87, Rzw87, Rxy88, Rzw88, Rxy89, Rzw89, Rxy90, Rzw90, Rxy91, Rzw91, Rxy92, Rzw92, Rxy93, Rzw93, Rxy94, Rzw94, Rxy95, Rzw95, Rxy96, Rzw96, Rxy97, Rzw97, Rxy98, Rzw98, Rxy99, Rzw99, Rxy100, Rzw100, Rxy101, Rzw101, Rxy102, Rzw102, Rxy103, Rzw103, Rxy104, Rzw104, Rxy105, Rzw105, Rxy106, Rzw106, Rxy107, Rzw107, Rxy108, Rzw108, Rxy109, Rzw109, Rxy110, Rzw110, Rxy111, Rzw111, Rxy112, Rzw112, Rxy113, Rzw113, Rxy114, Rzw114, Rxy115, Rzw115, Rxy116, Rzw116, Rxy117, Rzw117, Rxy118, Rzw118, Rxy119, Rzw119, Rxy120, Rzw120, Rxy121, Rzw121, Rxy122, Rzw122, Rxy123, Rzw123, Rxy124, Rzw124, Rxy125, Rzw125, Rxy126, Rzw126, Rxy127, Rzw127, Rxy128, Rzw128, Rxy129, Rzw129, Rxy130, Rzw130, Rxy131, Rzw131, Rxy132, Rzw132, Rxy133, Rzw133, Rxy134, Rzw134, Rxy135, Rzw135, Rxy136, Rzw136, Rxy137, Rzw137, Rxy138, Rzw138, Rxy139, Rzw139, Rxy140, Rzw140, Rxy141, Rzw141, Rxy142, Rzw142, Rxy143, Rzw143, Rxy144, Rzw144, Rxy145, Rzw145, Rxy146, Rzw146, Rxy147, Rzw147, Rxy148, Rzw148, Rxy149, Rzw149, Rxy150, Rzw150, Rxy151, Rzw151, Rxy152, Rzw152, Rxy153, Rzw153, Rxy154, Rzw154, Rxy155, Rzw155, Rxy156, Rzw156, Rxy157, Rzw157, Rxy158, Rzw158, Rxy159, Rzw159, Rxy160, Rzw160, Rxy161, Rzw161, Rxy162, Rzw162, Rxy163, Rzw163, Rxy164, Rzw164, Rxy165, Rzw165, Rxy166, Rzw166, Rxy167, Rzw167, Rxy168, Rzw168, Rxy169, Rzw169, Rxy170, Rzw170, Rxy171, Rzw171, Rxy172, Rzw172, Rxy173, Rzw173, Rxy174, Rzw174, Rxy175, Rzw175, Rxy176, Rzw176, Rxy177, Rzw177, Rxy178, Rzw178, Rxy179, Rzw179, Rxy180, Rzw180, Rxy181, Rzw181, Rxy182, Rzw182, Rxy183, Rzw183, Rxy184, Rzw184, Rxy185, Rzw185, Rxy186, Rzw186, Rxy187, Rzw187, Rxy188, Rzw188, Rxy189, Rzw189, Rxy190, Rzw190, Rxy191, Rzw191, Rxy1000, Rzw1000, Rxy1001, Rzw1001, Rxy1002, Rzw1002, Rxy1003, Rzw1003, Rxy1004, Rzw1004, Rxy1005, Rzw1005, Rxy1006, Rzw1006, Rxy1007, Rzw1007, Rxy1008, Rzw1008, Rxy1009, Rzw1009, Rxy1010, Rzw1010, Rxy1011, Rzw1011, Rxy1012, Rzw1012 \ No newline at end of file
diff --git a/src/gallium/drivers/radeon/AMDILRegisterUsesV4.td b/src/gallium/drivers/radeon/AMDILRegisterUsesV4.td
new file mode 100644
index 00000000000..97941f8705e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILRegisterUsesV4.td
@@ -0,0 +1 @@
+R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183, R184, R185, R186, R187, R188, R189, R190, R191, R1000, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, R1009, R1010, R1011, R1012 \ No newline at end of file
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
new file mode 100644
index 00000000000..54b6dabe423
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
@@ -0,0 +1,222 @@
+//===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the AMD IL specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSubtarget.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILUtilityFunctions.h"
+#include "AMDILGenSubtarget.inc"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_MC_DESC
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "AMDILGenSubtarget.inc"
+
+AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS )
+{
+ memset(CapsOverride, 0, sizeof(*CapsOverride)
+ * AMDILDeviceInfo::MaxNumberCapabilities);
+ // Default card
+ std::string GPU = "rv770";
+ GPU = CPU;
+ mIs64bit = false;
+ mVersion = 0;
+ SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features;
+ SplitString(FS, Features, ",");
+ mDefaultSize[0] = 64;
+ mDefaultSize[1] = 1;
+ mDefaultSize[2] = 1;
+ std::string newFeatures = "";
+#if defined(_DEBUG) || defined(DEBUG)
+ bool useTest = false;
+#endif
+ for (size_t x = 0; x < Features.size(); ++x) {
+ if (Features[x].startswith("+mwgs")) {
+ SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes;
+ SplitString(Features[x], sizes, "-");
+ size_t mDim = ::atoi(sizes[1].data());
+ if (mDim > 3) {
+ mDim = 3;
+ }
+ for (size_t y = 0; y < mDim; ++y) {
+ mDefaultSize[y] = ::atoi(sizes[y+2].data());
+ }
+#if defined(_DEBUG) || defined(DEBUG)
+ } else if (!Features[x].compare("test")) {
+ useTest = true;
+#endif
+ } else if (Features[x].startswith("+cal")) {
+ SmallVector<StringRef, DEFAULT_VEC_SLOTS> version;
+ SplitString(Features[x], version, "=");
+ mVersion = ::atoi(version[1].data());
+ } else {
+ GPU = CPU;
+ if (x > 0) newFeatures += ',';
+ newFeatures += Features[x];
+ }
+ }
+ // If we don't have a version then set it to
+ // -1 which enables everything. This is for
+ // offline devices.
+ if (!mVersion) {
+ mVersion = (uint32_t)-1;
+ }
+ for (int x = 0; x < 3; ++x) {
+ if (!mDefaultSize[x]) {
+ mDefaultSize[x] = 1;
+ }
+ }
+#if defined(_DEBUG) || defined(DEBUG)
+ if (useTest) {
+ GPU = "kauai";
+ }
+#endif
+ ParseSubtargetFeatures(GPU, newFeatures);
+#if defined(_DEBUG) || defined(DEBUG)
+ if (useTest) {
+ GPU = "test";
+ }
+#endif
+ mDevName = GPU;
+ mDevice = getDeviceFromName(mDevName, this, mIs64bit);
+}
+AMDILSubtarget::~AMDILSubtarget()
+{
+ delete mDevice;
+}
+bool
+AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const
+{
+ assert(caps < AMDILDeviceInfo::MaxNumberCapabilities &&
+ "Caps index is out of bounds!");
+ return CapsOverride[caps];
+}
+bool
+AMDILSubtarget::is64bit() const
+{
+ return mIs64bit;
+}
+bool
+AMDILSubtarget::isTargetELF() const
+{
+ return false;
+}
+size_t
+AMDILSubtarget::getDefaultSize(uint32_t dim) const
+{
+ if (dim > 3) {
+ return 1;
+ } else {
+ return mDefaultSize[dim];
+ }
+}
+uint32_t
+AMDILSubtarget::calVersion() const
+{
+ return mVersion;
+}
+
+const AMDILGlobalManager*
+AMDILSubtarget::getGlobalManager() const
+{
+ return mGM;
+}
+void
+AMDILSubtarget::setGlobalManager(const AMDILGlobalManager *gm) const
+{
+ mGM = gm;
+}
+
+const AMDILKernelManager*
+AMDILSubtarget::getKernelManager() const
+{
+ return mKM;
+}
+void
+AMDILSubtarget::setKernelManager(const AMDILKernelManager *km) const
+{
+ mKM = km;
+}
+std::string
+AMDILSubtarget::getDataLayout() const
+{
+ if (!mDevice) {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+ }
+ return mDevice->getDataLayout();
+}
+
+std::string
+AMDILSubtarget::getDeviceName() const
+{
+ return mDevName;
+}
+const AMDILDevice *
+AMDILSubtarget::device() const
+{
+ return mDevice;
+}
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.h b/src/gallium/drivers/radeon/AMDILSubtarget.h
new file mode 100644
index 00000000000..2a7a687382f
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.h
@@ -0,0 +1,115 @@
+//=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDIL specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILSUBTARGET_H_
+#define _AMDILSUBTARGET_H_
+
+#include "AMDIL.h"
+#include "AMDILDevice.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDILGenSubtarget.inc"
+
+#include <string>
+#include <cstdlib>
+#define MAX_CB_SIZE (1 << 16)
+namespace llvm {
+ class Module;
+ class AMDILKernelManager;
+ class AMDILGlobalManager;
+ class AMDILDevice;
+ class AMDILSubtarget : public AMDILGenSubtargetInfo {
+ private:
+ bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities];
+ mutable const AMDILGlobalManager *mGM;
+ mutable const AMDILKernelManager *mKM;
+ const AMDILDevice *mDevice;
+ size_t mDefaultSize[3];
+ size_t mMinimumSize[3];
+ std::string mDevName;
+ uint32_t mVersion;
+ bool mIs64bit;
+ bool mIs32on64bit;
+ public:
+ AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
+ virtual ~AMDILSubtarget();
+ bool isOverride(AMDILDeviceInfo::Caps) const;
+ bool is64bit() const;
+
+ // Helper functions to simplify if statements
+ bool isTargetELF() const;
+ const AMDILGlobalManager* getGlobalManager() const;
+ void setGlobalManager(const AMDILGlobalManager *gm) const;
+ const AMDILKernelManager* getKernelManager() const;
+ void setKernelManager(const AMDILKernelManager *gm) const;
+ const AMDILDevice* device() const;
+ std::string getDataLayout() const;
+ std::string getDeviceName() const;
+ virtual size_t getDefaultSize(uint32_t dim) const;
+ // Return the version of CAL that the backend should target.
+ uint32_t calVersion() const;
+ // ParseSubtargetFeatures - Parses features string setting specified
+ // subtarget options. Definition of function is
+ //auto generated by tblgen.
+ void
+ ParseSubtargetFeatures(
+ llvm::StringRef CPU,
+ llvm::StringRef FS);
+
+ };
+
+} // end namespace llvm
+
+#endif // AMDILSUBTARGET_H_
diff --git a/src/gallium/drivers/radeon/AMDILSwizzleEncoder.cpp b/src/gallium/drivers/radeon/AMDILSwizzleEncoder.cpp
new file mode 100644
index 00000000000..884fb2e65ed
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILSwizzleEncoder.cpp
@@ -0,0 +1,1225 @@
+//===-------- AMDILSwizzleEncoder.cpp - Encode the swizzle information ----===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// The implementation of the AMDIL Swizzle Encoder. The swizzle encoder goes
+// through all instructions in a machine function and all operands and
+// encodes swizzle information in the operands. The AsmParser can then
+// use the swizzle information to print out the swizzles correctly.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SwizzleEncoder"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "AMDILSwizzleEncoder.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+/// Encode all of the swizzles for the instructions in the machine operand.
+static void encodeSwizzles(MachineFunction &MF, bool mDebug,
+ const AMDILTargetMachine *ATM);
+#if 0
+static void allocateSwizzles(MachineFunction &MF, bool mDebug);
+static void allocateDstOperands(MachineFunction &MF,
+ std::map<unsigned, unsigned> &scalars,
+ std::map<unsigned, unsigned> &doubles,
+ bool mDebug);
+static void allocateSrcOperands(MachineFunction &MF,
+ std::map<unsigned, unsigned> &scalars,
+ std::map<unsigned, unsigned> &doubles,
+ bool mDebug);
+#endif
+/// Get the swizzle id for the src swizzle that corresponds to the
+/// current operand.
+static OpSwizzle getSrcSwizzleID(MachineInstr *MI, unsigned opNum,
+ const AMDILTargetMachine *ATM);
+
+/// Get the swizzle id for the dst swizzle that corresponds to the
+/// current instruction.
+static OpSwizzle getDstSwizzleID(MachineInstr *MI,
+ const AMDILTargetMachine *ATM);
+
+/// Determine if the custom source swizzle or the
+/// default swizzle for the specified operand should be used.
+static bool isCustomSrcInst(MachineInstr *MI, unsigned opNum);
+
+/// Get the custom source swizzle that corresponds to the specified
+/// operand for the instruction.
+static OpSwizzle getCustomSrcSwizzle(MachineInstr *MI, unsigned opNum);
+
+/// Determine if the custom destination swizzle or the
+/// default swizzle should be used for the instruction.
+static bool isCustomDstInst(MachineInstr *MI);
+
+/// Get the custom destination swizzle that corresponds tothe
+/// instruction.
+static OpSwizzle getCustomDstSwizzle(MachineInstr *MI);
+
+/// Determine if the instruction is a custom vector instruction
+/// that needs a unique swizzle type.
+static bool isCustomVectorInst(MachineInstr *MI);
+
+/// Encode the new swizzle for the vector instruction.
+static void encodeVectorInst(MachineInstr *MI,
+ const AMDILTargetMachine *STM, bool mDebug);
+#if 0
+static void dumpSwizzle(OpSwizzle);
+#endif
+/// Helper function to dump the operand for the machine instruction
+/// and the relevant target flags.
+static void dumpOperand(MachineInstr *MI, unsigned opNum);
+#if 0
+static void propogateSrcSwizzles(MachineOperand *MO, unsigned idx, bool mDebug);
+#endif
+namespace llvm {
+ FunctionPass*
+ createAMDILSwizzleEncoder(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ {
+ return new AMDILSwizzleEncoder(TM, OptLevel);
+ }
+}
+
+AMDILSwizzleEncoder::AMDILSwizzleEncoder(TargetMachine &tm,
+ CodeGenOpt::Level OptLevel) :
+#if LLVM_VERSION >= 2500
+ MachineFunctionPass(ID)
+#else
+ MachineFunctionPass((intptr_t)&ID)
+#endif
+{
+ ATM = reinterpret_cast<const AMDILTargetMachine*>(&tm);
+ mDebug = DEBUGME;
+ opt = OptLevel;
+}
+
+const char* AMDILSwizzleEncoder::getPassName() const
+{
+ return "AMD IL Swizzle Encoder Pass";
+}
+
+bool AMDILSwizzleEncoder::runOnMachineFunction(MachineFunction &MF)
+{
+ // Encode swizzles in instruction operands.
+ encodeSwizzles(MF, mDebug, ATM);
+#if 0
+ // pack the swizzles into vector lanes for
+ // more efficient code generation
+ const char *packSwizz = getenv("GPU_PACK_SWIZZLES");
+ if (packSwizz && packSwizz[0] == '1') {
+ if (opt != CodeGenOpt::None) {
+ if (mDebug) {
+ dbgs() << "//--------------------------- Packing Stage "
+ << "---------------------------//\n";
+ }
+ allocateSwizzles(MF, mDebug);
+ }
+ }
+#endif
+ return true;
+}
+#if 0
+void dumpSwizzle(OpSwizzle swizID) {
+ dbgs() << "\t" << (swizID.bits.dst ? "Dst" : "Src")
+ << " Operand SwizID: "
+ << (unsigned)swizID.bits.swizzle
+ << " Swizzle: " << (swizID.bits.dst
+ ? getDstSwizzle(swizID.bits.swizzle)
+ : getSrcSwizzle(swizID.bits.swizzle)) << "\n";
+
+}
+#endif
+/// Dump the operand swizzle information to the dbgs() stream.
+void dumpOperand(MachineInstr *MI, unsigned opNum)
+{
+ OpSwizzle swizID;
+ swizID.u8all = MI->getOperand(opNum).getTargetFlags();
+ dbgs() << "\t" << (swizID.bits.dst ? "Dst" : "Src")
+ << " Operand: " << opNum << " SwizID: "
+ << (unsigned)swizID.bits.swizzle
+ << " Swizzle: " << (swizID.bits.dst
+ ? getDstSwizzle(swizID.bits.swizzle)
+ : getSrcSwizzle(swizID.bits.swizzle)) << "\n";
+
+}
+// This function checks for instructions that don't have
+// normal swizzle patterns to their source operands. These have to be
+// handled on a case by case basis.
+bool isCustomSrcInst(MachineInstr *MI, unsigned opNum) {
+ unsigned opcode = MI->getOpcode();
+ switch (opcode) {
+ default:
+ if ((opcode >= AMDIL::VEXTRACT_v2f32 &&
+ opcode <= AMDIL::VINSERT_v4i8)) {
+ return true;
+ }
+ break;
+ case AMDIL::LDSLOAD:
+ case AMDIL::LDSLOAD_i8:
+ case AMDIL::LDSLOAD_u8:
+ case AMDIL::LDSLOAD_i16:
+ case AMDIL::LDSLOAD_u16:
+ case AMDIL::LDSSTORE:
+ case AMDIL::LDSSTORE_i8:
+ case AMDIL::LDSSTORE_i16:
+ case AMDIL::LDSLOAD_Y:
+ case AMDIL::LDSSTORE_Y:
+ case AMDIL::LDSLOAD_Z:
+ case AMDIL::LDSSTORE_Z:
+ case AMDIL::LDSLOAD_W:
+ case AMDIL::LDSSTORE_W:
+ case AMDIL::GDSLOAD:
+ case AMDIL::GDSSTORE:
+ case AMDIL::GDSLOAD_Y:
+ case AMDIL::GDSSTORE_Y:
+ case AMDIL::GDSLOAD_Z:
+ case AMDIL::GDSSTORE_Z:
+ case AMDIL::GDSLOAD_W:
+ case AMDIL::GDSSTORE_W:
+ case AMDIL::SCRATCHLOAD:
+ case AMDIL::CBLOAD:
+ case AMDIL::SCRATCHSTORE:
+ case AMDIL::SCRATCHSTORE_X:
+ case AMDIL::SCRATCHSTORE_Y:
+ case AMDIL::SCRATCHSTORE_Z:
+ case AMDIL::SCRATCHSTORE_W:
+ case AMDIL::SCRATCHSTORE_XY:
+ case AMDIL::SCRATCHSTORE_ZW:
+ case AMDIL::UAVARENALOAD_i8:
+ case AMDIL::UAVARENALOAD_i16:
+ case AMDIL::UAVARENALOAD_i32:
+ case AMDIL::UAVARENASTORE_i8:
+ case AMDIL::UAVARENASTORE_i16:
+ case AMDIL::UAVARENASTORE_i32:
+ case AMDIL::UAVARENALOAD_Y_i32:
+ case AMDIL::UAVARENALOAD_Z_i32:
+ case AMDIL::UAVARENALOAD_W_i32:
+ case AMDIL::UAVARENASTORE_Y_i32:
+ case AMDIL::UAVARENASTORE_Z_i32:
+ case AMDIL::UAVARENASTORE_W_i32:
+ return true;
+ case AMDIL::CMOVLOG_Y_i32:
+ case AMDIL::CMOVLOG_Z_i32:
+ case AMDIL::CMOVLOG_W_i32:
+ case AMDIL::CMOVLOG_f64:
+ case AMDIL::CMOVLOG_i64:
+ return (opNum == 1) ? true : false;
+ case AMDIL::SUB_f64:
+ case AMDIL::SUB_v2f64:
+ return (opNum == 2) ? true : false;
+ case AMDIL::APPEND_CONSUME:
+ case AMDIL::APPEND_CONSUME_NORET:
+ case AMDIL::APPEND_ALLOC:
+ case AMDIL::APPEND_ALLOC_NORET:
+ case AMDIL::LLO:
+ case AMDIL::LLO_v2i64:
+ case AMDIL::LHI:
+ case AMDIL::LHI_v2i64:
+ case AMDIL::LCREATE:
+ case AMDIL::LCREATE_v2i64:
+ case AMDIL::LNEGATE:
+ case AMDIL::LNEGATE_v2i64:
+ case AMDIL::CALL:
+ case AMDIL::RETURN:
+ case AMDIL::RETDYN:
+ case AMDIL::NEG_f32:
+ case AMDIL::NEG_v2f32:
+ case AMDIL::NEG_v4f32:
+ case AMDIL::NEG_f64:
+ case AMDIL::NEG_v2f64:
+ case AMDIL::DHI:
+ case AMDIL::DLO:
+ case AMDIL::DCREATE:
+ case AMDIL::DHI_v2f64:
+ case AMDIL::DLO_v2f64:
+ case AMDIL::DCREATE_v2f64:
+ case AMDIL::ADDri:
+ case AMDIL::ADDir:
+ case AMDIL::HILO_BITOR_v2i32:
+ case AMDIL::HILO_BITOR_v4i16:
+ case AMDIL::HILO_BITOR_v2i64:
+ case AMDIL::CONTINUE_LOGICALNZ_f64:
+ case AMDIL::BREAK_LOGICALNZ_f64:
+ case AMDIL::IF_LOGICALNZ_f64:
+ case AMDIL::CONTINUE_LOGICALZ_f64:
+ case AMDIL::BREAK_LOGICALZ_f64:
+ case AMDIL::IF_LOGICALZ_f64:
+ case AMDIL::CONTINUE_LOGICALNZ_i64:
+ case AMDIL::BREAK_LOGICALNZ_i64:
+ case AMDIL::IF_LOGICALNZ_i64:
+ case AMDIL::CONTINUE_LOGICALZ_i64:
+ case AMDIL::BREAK_LOGICALZ_i64:
+ case AMDIL::IF_LOGICALZ_i64:
+ case AMDIL::SWITCH:
+ return true;
+ case AMDIL::UBIT_INSERT_i32:
+ case AMDIL::UBIT_INSERT_v2i32:
+ case AMDIL::UBIT_INSERT_v4i32:
+ return (opNum == 1 || opNum == 2);
+ };
+ return !strncmp(MI->getDesc().getName(), "MACRO", 5);
+}
+
+// This function returns the OpSwizzle with the custom swizzle set
+// correclty for source operands.
+OpSwizzle getCustomSrcSwizzle(MachineInstr *MI, unsigned opNum) {
+ OpSwizzle opSwiz;
+ opSwiz.u8all = 0;
+ if (!strncmp(MI->getDesc().getName(), "MACRO", 5)) {
+ return opSwiz;
+ }
+ unsigned opcode = MI->getOpcode();
+ switch (opcode) {
+ default:
+ if ((opcode >= AMDIL::VEXTRACT_v2f32 &&
+ opcode <= AMDIL::VINSERT_v4i8)) {
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_DEFAULT;
+ }
+ break;
+ case AMDIL::SCRATCHSTORE:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::SCRATCHLOAD:
+ case AMDIL::CBLOAD:
+ case AMDIL::SCRATCHSTORE_X:
+ case AMDIL::SCRATCHSTORE_Y:
+ case AMDIL::SCRATCHSTORE_Z:
+ case AMDIL::SCRATCHSTORE_W:
+ case AMDIL::UAVARENALOAD_i8:
+ case AMDIL::UAVARENALOAD_i16:
+ case AMDIL::UAVARENALOAD_i32:
+ case AMDIL::UAVARENASTORE_i8:
+ case AMDIL::UAVARENASTORE_i16:
+ case AMDIL::UAVARENASTORE_i32:
+ case AMDIL::LDSLOAD:
+ case AMDIL::LDSLOAD_i8:
+ case AMDIL::LDSLOAD_u8:
+ case AMDIL::LDSLOAD_i16:
+ case AMDIL::LDSLOAD_u16:
+ case AMDIL::LDSSTORE:
+ case AMDIL::LDSSTORE_i8:
+ case AMDIL::LDSSTORE_i16:
+ case AMDIL::GDSLOAD:
+ case AMDIL::GDSSTORE:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_X : AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::LDSLOAD_Y:
+ case AMDIL::LDSSTORE_Y:
+ case AMDIL::GDSLOAD_Y:
+ case AMDIL::GDSSTORE_Y:
+ case AMDIL::UAVARENALOAD_Y_i32:
+ case AMDIL::UAVARENASTORE_Y_i32:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_Y : AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::LDSLOAD_Z:
+ case AMDIL::LDSSTORE_Z:
+ case AMDIL::GDSLOAD_Z:
+ case AMDIL::GDSSTORE_Z:
+ case AMDIL::UAVARENALOAD_Z_i32:
+ case AMDIL::UAVARENASTORE_Z_i32:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_Z : AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::LDSLOAD_W:
+ case AMDIL::LDSSTORE_W:
+ case AMDIL::GDSLOAD_W:
+ case AMDIL::GDSSTORE_W:
+ case AMDIL::UAVARENALOAD_W_i32:
+ case AMDIL::UAVARENASTORE_W_i32:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_W : AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::SCRATCHSTORE_XY:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_XY00 : AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::SCRATCHSTORE_ZW:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_00XY : AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::APPEND_CONSUME:
+ case AMDIL::APPEND_CONSUME_NORET:
+ case AMDIL::APPEND_ALLOC:
+ case AMDIL::APPEND_ALLOC_NORET:
+ case AMDIL::ADDri:
+ case AMDIL::ADDir:
+ case AMDIL::CALL:
+ case AMDIL::RETURN:
+ case AMDIL::RETDYN:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ case AMDIL::CMOVLOG_Y_i32:
+ assert(opNum == 1 && "Only operand number 1 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_YYYY;
+ break;
+ case AMDIL::CMOVLOG_Z_i32:
+ assert(opNum == 1 && "Only operand number 1 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_ZZZZ;
+ break;
+ case AMDIL::CMOVLOG_W_i32:
+ assert(opNum == 1 && "Only operand number 1 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_WWWW;
+ break;
+ case AMDIL::CMOVLOG_f64:
+ case AMDIL::CMOVLOG_i64:
+ assert(opNum == 1 && "Only operand number 1 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XXXX;
+ break;
+ case AMDIL::DHI:
+ case AMDIL::LLO:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_X000;
+ break;
+ case AMDIL::DHI_v2f64:
+ case AMDIL::LLO_v2i64:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XZXZ;
+ break;
+ case AMDIL::DLO:
+ case AMDIL::LHI:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_Y000;
+ break;
+ case AMDIL::DLO_v2f64:
+ case AMDIL::LHI_v2i64:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_YWYW;
+ break;
+ case AMDIL::DCREATE:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_0X00 : AMDIL_SRC_SWIZZLE_X000;
+ break;
+ case AMDIL::DCREATE_v2f64:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_0X0Y : AMDIL_SRC_SWIZZLE_X0Y0;
+ break;
+ case AMDIL::LCREATE:
+ opSwiz.bits.swizzle = opNum;
+ break;
+ case AMDIL::LCREATE_v2i64:
+ opSwiz.bits.swizzle = opNum + 32;
+ break;
+ case AMDIL::SUB_f64:
+ assert(opNum == 2 && "Only operand number 2 is custom!");
+ case AMDIL::NEG_f64:
+ case AMDIL::LNEGATE:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XY_NEGY;;
+ break;
+ case AMDIL::SUB_v2f64:
+ assert(opNum == 2 && "Only operand number 2 is custom!");
+ case AMDIL::NEG_v2f64:
+ case AMDIL::LNEGATE_v2i64:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_NEGYW;
+ break;
+ case AMDIL::NEG_f32:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_NEGX;
+ break;
+ case AMDIL::NEG_v2f32:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XY_NEGXY;
+ break;
+ case AMDIL::NEG_v4f32:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_NEG_XYZW;
+ break;
+ case AMDIL::SWITCH:
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_X;
+ break;
+ case AMDIL::CONTINUE_LOGICALNZ_f64:
+ case AMDIL::BREAK_LOGICALNZ_f64:
+ case AMDIL::IF_LOGICALNZ_f64:
+ case AMDIL::CONTINUE_LOGICALZ_f64:
+ case AMDIL::BREAK_LOGICALZ_f64:
+ case AMDIL::IF_LOGICALZ_f64:
+ case AMDIL::CONTINUE_LOGICALNZ_i64:
+ case AMDIL::BREAK_LOGICALNZ_i64:
+ case AMDIL::IF_LOGICALNZ_i64:
+ case AMDIL::CONTINUE_LOGICALZ_i64:
+ case AMDIL::BREAK_LOGICALZ_i64:
+ case AMDIL::IF_LOGICALZ_i64:
+ assert(opNum == 0
+ && "Only operand numbers 0 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_X;
+ break;
+ case AMDIL::UBIT_INSERT_i32:
+ assert(opNum == 1 || opNum == 2
+ && "Only operand numbers 1 or 2 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_X;
+ break;
+ case AMDIL::UBIT_INSERT_v2i32:
+ assert(opNum == 1 || opNum == 2
+ && "Only operand numbers 1 or 2 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XY;
+ break;
+ case AMDIL::UBIT_INSERT_v4i32:
+ assert(opNum == 1 || opNum == 2
+ && "Only operand numbers 1 or 2 is custom!");
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XYZW;
+ break;
+ case AMDIL::HILO_BITOR_v4i16:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_XZXZ : AMDIL_SRC_SWIZZLE_YWYW;
+ break;
+ case AMDIL::HILO_BITOR_v2i32:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_X000 : AMDIL_SRC_SWIZZLE_Y000;
+ break;
+ case AMDIL::HILO_BITOR_v2i64:
+ opSwiz.bits.swizzle = (opNum == 1)
+ ? AMDIL_SRC_SWIZZLE_XY00 : AMDIL_SRC_SWIZZLE_ZW00;
+ break;
+ };
+ return opSwiz;
+}
+
+// This function checks for instructions that don't have
+// normal swizzle patterns to their destination operand.
+// These have to be handled on a case by case basis.
+bool isCustomDstInst(MachineInstr *MI) {
+ unsigned opcode = MI->getOpcode();
+ if ((opcode >= AMDIL::VEXTRACT_v2f32 &&
+ opcode <= AMDIL::VINSERT_v4i8)) {
+ return true;
+ }
+ if ((opcode >= AMDIL::VCREATE_v2f32 &&
+ opcode <= AMDIL::VCREATE_v4i8) ||
+ (opcode >= AMDIL::LOADCONST_f32 &&
+ opcode <= AMDIL::LOADCONST_i8)) {
+ return true;
+ }
+ switch (opcode) {
+ default:
+ break;
+ case AMDIL::UAVARENASTORE_i8:
+ case AMDIL::UAVARENASTORE_i16:
+ case AMDIL::UAVARENASTORE_i32:
+ case AMDIL::UAVARENASTORE_Y_i32:
+ case AMDIL::UAVARENASTORE_Z_i32:
+ case AMDIL::UAVARENASTORE_W_i32:
+ case AMDIL::UAVARENALOAD_i8:
+ case AMDIL::UAVARENALOAD_i16:
+ case AMDIL::UAVARENALOAD_i32:
+ case AMDIL::UAVARENALOAD_Y_i32:
+ case AMDIL::UAVARENALOAD_Z_i32:
+ case AMDIL::UAVARENALOAD_W_i32:
+ case AMDIL::LDSLOAD:
+ case AMDIL::LDSLOAD_i8:
+ case AMDIL::LDSLOAD_u8:
+ case AMDIL::LDSLOAD_i16:
+ case AMDIL::LDSLOAD_u16:
+ case AMDIL::LDSSTORE:
+ case AMDIL::LDSSTORE_i8:
+ case AMDIL::LDSSTORE_i16:
+ case AMDIL::LDSLOAD_Y:
+ case AMDIL::LDSSTORE_Y:
+ case AMDIL::LDSLOAD_Z:
+ case AMDIL::LDSSTORE_Z:
+ case AMDIL::LDSLOAD_W:
+ case AMDIL::LDSSTORE_W:
+ case AMDIL::GDSLOAD:
+ case AMDIL::GDSSTORE:
+ case AMDIL::GDSLOAD_Y:
+ case AMDIL::GDSSTORE_Y:
+ case AMDIL::GDSLOAD_Z:
+ case AMDIL::GDSSTORE_Z:
+ case AMDIL::GDSLOAD_W:
+ case AMDIL::GDSSTORE_W:
+ case AMDIL::SCRATCHSTORE:
+ case AMDIL::SCRATCHSTORE_X:
+ case AMDIL::SCRATCHSTORE_Y:
+ case AMDIL::SCRATCHSTORE_Z:
+ case AMDIL::SCRATCHSTORE_W:
+ case AMDIL::SCRATCHSTORE_XY:
+ case AMDIL::SCRATCHSTORE_ZW:
+ case AMDIL::APPEND_CONSUME:
+ case AMDIL::APPEND_CONSUME_NORET:
+ case AMDIL::APPEND_ALLOC:
+ case AMDIL::APPEND_ALLOC_NORET:
+ case AMDIL::ADDri:
+ case AMDIL::ADDir:
+ case AMDIL::HILO_BITOR_v2i32:
+ case AMDIL::HILO_BITOR_v4i16:
+ case AMDIL::HILO_BITOR_v2i64:
+ return true;
+ }
+
+ return !strncmp(MI->getDesc().getName(), "MACRO", 5);
+}
+// This function returns the OpSwizzle with the custom swizzle set
+// correclty for destination operands.
+OpSwizzle getCustomDstSwizzle(MachineInstr *MI) {
+ OpSwizzle opSwiz;
+ opSwiz.u8all = 0;
+ unsigned opcode = MI->getOpcode();
+ opSwiz.bits.dst = 1;
+ if (!strncmp(MI->getDesc().getName(), "MACRO", 5)) {
+ return opSwiz;
+ }
+ if ((opcode >= AMDIL::VEXTRACT_v2f32 &&
+ opcode <= AMDIL::VINSERT_v4i8)) {
+ opSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_DEFAULT;
+ } else if ((opcode >= AMDIL::VCREATE_v2f32 &&
+ opcode <= AMDIL::VCREATE_v4i8) ||
+ (opcode >= AMDIL::LOADCONST_f32 &&
+ opcode <= AMDIL::LOADCONST_i8) ||
+ opcode == AMDIL::ADDri ||
+ opcode == AMDIL::ADDir) {
+ opSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_DEFAULT;
+ } else {
+ switch (opcode) {
+ case AMDIL::LDSLOAD_Y:
+ case AMDIL::LDSSTORE_Y:
+ case AMDIL::GDSLOAD_Y:
+ case AMDIL::GDSSTORE_Y:
+ case AMDIL::UAVARENASTORE_Y_i32:
+ case AMDIL::UAVARENALOAD_Y_i32:
+ opSwiz.bits.dst = 0;
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_Y;
+ break;
+ case AMDIL::LDSLOAD_Z:
+ case AMDIL::LDSSTORE_Z:
+ case AMDIL::GDSLOAD_Z:
+ case AMDIL::GDSSTORE_Z:
+ case AMDIL::UAVARENASTORE_Z_i32:
+ case AMDIL::UAVARENALOAD_Z_i32:
+ opSwiz.bits.dst = 0;
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_Z;
+ break;
+ case AMDIL::LDSLOAD_W:
+ case AMDIL::LDSSTORE_W:
+ case AMDIL::GDSLOAD_W:
+ case AMDIL::GDSSTORE_W:
+ case AMDIL::UAVARENASTORE_W_i32:
+ case AMDIL::UAVARENALOAD_W_i32:
+ opSwiz.bits.dst = 0;
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_W;
+ break;
+ case AMDIL::LDSLOAD:
+ case AMDIL::LDSLOAD_i8:
+ case AMDIL::LDSLOAD_u8:
+ case AMDIL::LDSLOAD_i16:
+ case AMDIL::LDSLOAD_u16:
+ case AMDIL::LDSSTORE:
+ case AMDIL::LDSSTORE_i8:
+ case AMDIL::LDSSTORE_i16:
+ case AMDIL::UAVARENALOAD_i8:
+ case AMDIL::UAVARENALOAD_i16:
+ case AMDIL::UAVARENALOAD_i32:
+ case AMDIL::UAVARENASTORE_i8:
+ case AMDIL::UAVARENASTORE_i16:
+ case AMDIL::UAVARENASTORE_i32:
+ case AMDIL::GDSLOAD:
+ case AMDIL::GDSSTORE:
+ case AMDIL::SCRATCHSTORE:
+ case AMDIL::SCRATCHSTORE_X:
+ case AMDIL::SCRATCHSTORE_Y:
+ case AMDIL::SCRATCHSTORE_Z:
+ case AMDIL::SCRATCHSTORE_W:
+ case AMDIL::SCRATCHSTORE_XY:
+ case AMDIL::SCRATCHSTORE_ZW:
+ case AMDIL::APPEND_CONSUME:
+ case AMDIL::APPEND_CONSUME_NORET:
+ case AMDIL::APPEND_ALLOC:
+ case AMDIL::APPEND_ALLOC_NORET:
+ opSwiz.bits.dst = 0;
+ opSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_X;
+ break;
+ case AMDIL::HILO_BITOR_v2i32:
+ opSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_X___;
+ break;
+ case AMDIL::HILO_BITOR_v4i16:
+ case AMDIL::HILO_BITOR_v2i64:
+ opSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_XY__;
+ break;
+ default:
+ assert(0
+ && "getCustomDstSwizzle hit an opcode it doesnt' understand!");
+ opSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_X___;
+ };
+ }
+ return opSwiz;
+}
+
+OpSwizzle getSrcSwizzleID(MachineInstr *MI, unsigned opNum,
+ const AMDILTargetMachine *ATM)
+{
+ assert(opNum < MI->getNumOperands() &&
+ "Must pass in a valid operand number.");
+ OpSwizzle curSwiz;
+ curSwiz.u8all = 0;
+ curSwiz.bits.dst = 0; // We need to reset the dst bit.
+ if (isCustomSrcInst(MI, opNum)) {
+ curSwiz = getCustomSrcSwizzle(MI, opNum);
+ } else if (isLoadInst(MI) || isStoreInst(MI)) {
+ if (!MI->getOperand(opNum).isReg()) {
+ // If we aren't a register, then we base the
+ // operand swizzle on the first operand.
+ opNum = 0;
+ }
+ // Load/store swizzles need to be based on
+ // the size of the load/store, not on the
+ // number of components.
+ const TargetInstrInfo *TII = ATM->getInstrInfo();
+ const AMDILRegisterInfo *TRI = ATM->getRegisterInfo();
+ const TargetRegisterClass *TRC = TII->getRegClass(MI->getDesc(), 0, TRI);
+ unsigned trcID = (TRC) ? TRC->getID() : AMDIL::GPRV4I32RegClassID;
+ switch(trcID) {
+ default:
+ curSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XXXX;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XYXY;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2F64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ }
+ } else if (!MI->getDesc().isCall()) {
+ if (!MI->getOperand(opNum).isReg()) {
+ // If we aren't a register, then we base the
+ // operand swizzle on the first operand.
+ opNum = 0;
+ }
+ // All other non-special case instructions need to be
+ // based on the number of components
+ const TargetInstrInfo *TII = ATM->getInstrInfo();
+ const AMDILRegisterInfo *TRI = ATM->getRegisterInfo();
+ const TargetRegisterClass *TRC = TII->getRegClass(MI->getDesc(), opNum, TRI);
+ unsigned trcID = (TRC) ? TRC->getID() : AMDIL::GPRV4I32RegClassID;
+ switch(trcID) {
+ default:
+ curSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XXXX;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_XYXY;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2F64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_SRC_SWIZZLE_DEFAULT;
+ break;
+ }
+ }
+ return curSwiz;
+}
+
+OpSwizzle getDstSwizzleID(MachineInstr *MI,
+ const AMDILTargetMachine *ATM)
+{
+ OpSwizzle curSwiz;
+ curSwiz.bits.dst = 1;
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_DEFAULT;
+ if (isCustomDstInst(MI)) {
+ curSwiz = getCustomDstSwizzle(MI);
+ } else if (isLoadInst(MI) || isStoreInst(MI)) {
+ // Load/store swizzles need to be based on
+ // the size of the load/store, not on the
+ // number of components.
+ const TargetInstrInfo *TII = ATM->getInstrInfo();
+ const AMDILRegisterInfo *TRI = ATM->getRegisterInfo();
+ const TargetRegisterClass *TRC = TII->getRegClass(MI->getDesc(), 0, TRI);
+ unsigned trcID = (TRC) ? TRC->getID() : AMDIL::GPRV4I32RegClassID;
+ switch(trcID) {
+ default:
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_X___;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_XY__;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2F64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_DEFAULT;
+ break;
+ }
+ } else if (!MI->getDesc().isCall()) {
+ // All other non-special case instructions need to be
+ // based on the number of components
+ const TargetInstrInfo *TII = ATM->getInstrInfo();
+ const AMDILRegisterInfo *TRI = ATM->getRegisterInfo();
+ const TargetRegisterClass *TRC = TII->getRegClass(MI->getDesc(), 0, TRI);
+ unsigned trcID = (TRC) ? TRC->getID() : AMDIL::GPRV4I32RegClassID;
+ switch(trcID) {
+ default:
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_X___;
+ break;
+ case AMDIL::GPRV2F32RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRF64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_XY__;
+ break;
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ case AMDIL::GPRV4I16RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ case AMDIL::GPRV2I64RegClassID:
+ case AMDIL::GPRV2F64RegClassID:
+ curSwiz.bits.swizzle = AMDIL_DST_SWIZZLE_DEFAULT;
+ break;
+ }
+ }
+
+ return curSwiz;
+}
+
+/// All vector instructions except for VCREATE_* need to be handled
+/// with custom swizzle packing code.
+bool isCustomVectorInst(MachineInstr *MI)
+{
+ unsigned opcode = MI->getOpcode();
+ return (opcode >= AMDIL::VCONCAT_v2f32 && opcode <= AMDIL::VCONCAT_v4i8)
+ || (opcode >= AMDIL::VEXTRACT_v2f32 && opcode <= AMDIL::VINSERT_v4i8);
+}
+
+void encodeVectorInst(MachineInstr *MI,
+ const AMDILTargetMachine *ATM,
+ bool mDebug)
+{
+ assert(isCustomVectorInst(MI) && "Only a vector instruction can be"
+ " used to generate a new vector instruction!");
+ unsigned opcode = MI->getOpcode();
+ // For all of the opcodes, the destination swizzle is the same.
+ OpSwizzle swizID = getDstSwizzleID(MI, ATM);
+ OpSwizzle srcID;
+ srcID.u8all = 0;
+ MI->getOperand(0).setTargetFlags(swizID.u8all);
+ switch (opcode) {
+ case AMDIL::VCONCAT_v2f32:
+ case AMDIL::VCONCAT_v2i16:
+ case AMDIL::VCONCAT_v2i32:
+ case AMDIL::VCONCAT_v2i8:
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_X000;
+ MI->getOperand(1).setTargetFlags(srcID.u8all);
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_0X00;
+ MI->getOperand(2).setTargetFlags(srcID.u8all);
+ break;
+ case AMDIL::VCONCAT_v2f64:
+ case AMDIL::VCONCAT_v2i64:
+ case AMDIL::VCONCAT_v4f32:
+ case AMDIL::VCONCAT_v4i16:
+ case AMDIL::VCONCAT_v4i32:
+ case AMDIL::VCONCAT_v4i8:
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_XY00;
+ MI->getOperand(1).setTargetFlags(srcID.u8all);
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_00XY;
+ MI->getOperand(2).setTargetFlags(srcID.u8all);
+ break;
+ case AMDIL::VEXTRACT_v2f32:
+ case AMDIL::VEXTRACT_v2i16:
+ case AMDIL::VEXTRACT_v2i32:
+ case AMDIL::VEXTRACT_v2i8:
+ assert(MI->getOperand(2).getImm() <= 2
+ && "Invalid immediate value encountered for this formula!");
+ case AMDIL::VEXTRACT_v4f32:
+ case AMDIL::VEXTRACT_v4i16:
+ case AMDIL::VEXTRACT_v4i32:
+ case AMDIL::VEXTRACT_v4i8:
+ assert(MI->getOperand(2).getImm() <= 4
+ && "Invalid immediate value encountered for this formula!");
+ srcID.bits.swizzle = ((MI->getOperand(2).getImm() - 1) * 4) + 1;
+ MI->getOperand(1).setTargetFlags(srcID.u8all);
+ MI->getOperand(2).setTargetFlags(0);
+ break;
+ case AMDIL::VEXTRACT_v2i64:
+ case AMDIL::VEXTRACT_v2f64:
+ assert(MI->getOperand(2).getImm() <= 2
+ && "Invalid immediate value encountered for this formula!");
+ srcID.bits.swizzle = 15 + (MI->getOperand(2).getImm() * 2);
+ MI->getOperand(1).setTargetFlags(srcID.u8all);
+ MI->getOperand(2).setTargetFlags(0);
+ break;
+ break;
+ case AMDIL::VINSERT_v2f32:
+ case AMDIL::VINSERT_v2i32:
+ case AMDIL::VINSERT_v2i16:
+ case AMDIL::VINSERT_v2i8:
+ case AMDIL::VINSERT_v4f32:
+ case AMDIL::VINSERT_v4i16:
+ case AMDIL::VINSERT_v4i32:
+ case AMDIL::VINSERT_v4i8:
+ {
+ unsigned swizVal = (unsigned)MI->getOperand(4).getImm();
+ OpSwizzle src2ID;
+ src2ID.u8all = 0;
+ if ((swizVal >> 8 & 0xFF) == 1) {
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_X0ZW;
+ src2ID.bits.swizzle = AMDIL_SRC_SWIZZLE_0X00;
+ } else if ((swizVal >> 16 & 0xFF) == 1) {
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_XY0W;
+ src2ID.bits.swizzle = AMDIL_SRC_SWIZZLE_00X0;
+ } else if ((swizVal >> 24 & 0xFF) == 1) {
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_XYZ0;
+ src2ID.bits.swizzle = AMDIL_SRC_SWIZZLE_000X;
+ } else {
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_0YZW;
+ src2ID.bits.swizzle = AMDIL_SRC_SWIZZLE_X000;
+ }
+ MI->getOperand(1).setTargetFlags(srcID.u8all);
+ MI->getOperand(2).setTargetFlags(src2ID.u8all);
+ MI->getOperand(3).setTargetFlags(0);
+ MI->getOperand(4).setTargetFlags(0);
+ }
+ break;
+ case AMDIL::VINSERT_v2f64:
+ case AMDIL::VINSERT_v2i64:
+ {
+ unsigned swizVal = (unsigned)MI->getOperand(4).getImm();
+ OpSwizzle src2ID;
+ src2ID.u8all = 0;
+ if ((swizVal >> 8 & 0xFF) == 1) {
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_XY00;
+ src2ID.bits.swizzle = AMDIL_SRC_SWIZZLE_00XY;
+ } else {
+ srcID.bits.swizzle = AMDIL_SRC_SWIZZLE_00ZW;
+ src2ID.bits.swizzle = AMDIL_SRC_SWIZZLE_XY00;
+ }
+ MI->getOperand(1).setTargetFlags(srcID.u8all);
+ MI->getOperand(2).setTargetFlags(src2ID.u8all);
+ MI->getOperand(3).setTargetFlags(0);
+ MI->getOperand(4).setTargetFlags(0);
+ }
+ break;
+ };
+ if (mDebug) {
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ dumpOperand(MI, i);
+ }
+ dbgs() << "\n";
+ }
+}
+
+// This function loops through all of the instructions, skipping function
+// calls, and encodes the swizzles in the operand.
+void encodeSwizzles(MachineFunction &MF, bool mDebug,
+ const AMDILTargetMachine *ATM)
+{
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr *MI = MBI;
+ if (MI->getOpcode() == AMDIL::RETDYN
+ || MI->getOpcode() == AMDIL::RETURN
+ || MI->getOpcode() == AMDIL::DBG_VALUE) {
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "Encoding instruction: ";
+ MI->print(dbgs());
+ }
+ if (isCustomVectorInst(MI)) {
+ encodeVectorInst(MI, ATM, mDebug);
+ continue;
+ }
+ for (unsigned a = 0, z = MI->getNumOperands(); a < z; ++a) {
+ OpSwizzle swizID;
+ if (MI->getOperand(a).isReg() && MI->getOperand(a).isDef()) {
+ swizID = getDstSwizzleID(MI, ATM);
+ } else {
+ swizID = getSrcSwizzleID(MI, a, ATM);
+ }
+ MI->getOperand(a).setTargetFlags(swizID.u8all);
+ if (mDebug) {
+ dumpOperand(MI, a);
+ }
+ }
+ if (mDebug) {
+ dbgs() << "\n";
+ }
+ }
+ }
+}
+#if 0
+void allocateSwizzles(MachineFunction &MF, bool mDebug)
+{
+ std::map<unsigned, unsigned> scalarSwizMap;
+ std::map<unsigned, unsigned> doubleSwizMap;
+ allocateDstOperands(MF, scalarSwizMap, doubleSwizMap, mDebug);
+ allocateSrcOperands(MF, scalarSwizMap, doubleSwizMap, mDebug);
+}
+
+void allocateDstOperands(MachineFunction &MF,
+ std::map<unsigned, unsigned> &scalarSwizMap,
+ std::map<unsigned, unsigned> &doubleSwizMap,
+ bool mDebug)
+{
+ unsigned scalarIdx = 0;
+ unsigned scalarPairs[4][2] = { {1, 1}, {5, 5}, {8, 9}, {10, 13} };
+ unsigned doubleIdx = 0;
+ unsigned doublePairs[2][2] = { {2, 17}, {9, 19} };
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr *MI = MBI;
+ if (MI->getOpcode() == AMDIL::RETDYN
+ || MI->getOpcode() == AMDIL::RETURN
+ || !MI->getNumOperands()) {
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "Packing instruction(Dst Stage): ";
+ MI->print(dbgs());
+ }
+ MachineOperand &mOp = MI->getOperand(0);
+ if (!mOp.isReg() || !mOp.isDef()) {
+ if (mDebug) {
+ dbgs() << "\tSkipping instruction, no def found!\n\n";
+ }
+ continue;
+ }
+ OpSwizzle flags;
+ flags.u8all = mOp.getTargetFlags();
+ if ((flags.bits.swizzle == AMDIL_DST_SWIZZLE_X___)) {
+ if (mDebug) {
+ dbgs() << "\tOld Encoding: ";
+ dumpSwizzle(flags);
+ }
+ scalarIdx = (scalarIdx + 1) & 0x3;
+ std::map<unsigned, unsigned>::iterator regIter =
+ scalarSwizMap.find(mOp.getReg());
+ if (regIter == scalarSwizMap.end()) {
+ flags.bits.swizzle = scalarPairs[scalarIdx][0];
+ scalarSwizMap[mOp.getReg()] = scalarIdx;
+ propogateSrcSwizzles(&mOp, scalarPairs[scalarIdx][1], mDebug);
+ } else {
+ flags.bits.swizzle = scalarPairs[regIter->second][0];
+ propogateSrcSwizzles(&mOp, scalarPairs[scalarIdx][1], mDebug);
+ }
+ if (mDebug) {
+ dbgs() << "\tNew Encoding: ";
+ dumpSwizzle(flags);
+ }
+ mOp.setTargetFlags(flags.u8all);
+ } else if ((flags.bits.swizzle == AMDIL_DST_SWIZZLE_XY__)) {
+ doubleIdx = (doubleIdx + 1) & 0x1;
+ if (mDebug) {
+ dbgs() << "\tOld Encoding: ";
+ dumpSwizzle(flags);
+ }
+ std::map<unsigned, unsigned>::iterator regIter =
+ doubleSwizMap.find(mOp.getReg());
+ if (regIter == scalarSwizMap.end()) {
+ flags.bits.swizzle = doublePairs[doubleIdx][0];
+ doubleSwizMap[mOp.getReg()] = doubleIdx;
+ propogateSrcSwizzles(&mOp, doublePairs[doubleIdx][1], mDebug);
+ } else {
+ flags.bits.swizzle = doublePairs[regIter->second][0];
+ propogateSrcSwizzles(&mOp, doublePairs[regIter->second][1], mDebug);
+ }
+ if (mDebug) {
+ dbgs() << "\tNew Encoding: ";
+ dumpSwizzle(flags);
+ }
+ mOp.setTargetFlags(flags.u8all);
+ } else if ((flags.bits.swizzle == AMDIL_DST_SWIZZLE_DEFAULT)) {
+ if (mDebug) {
+ dbgs() << "\tSkipping instruction, fully packed!\n";
+ }
+ } else {
+ assert(0 && "Found an instruction that didn't have a swizzle!");
+ }
+ if (mDebug) {
+ dbgs() << "\n";
+ }
+ }
+ }
+}
+
+void allocateSrcOperands(MachineFunction &MF,
+ std::map<unsigned, unsigned> &scalarSwizMap,
+ std::map<unsigned, unsigned> &doubleSwizMap,
+ bool mDebug)
+{
+ unsigned scalarPairs[4][2] = {
+ {AMDIL_DST_SWIZZLE_X___, AMDIL_SRC_SWIZZLE_X000},
+ {AMDIL_DST_SWIZZLE__Y__, AMDIL_SRC_SWIZZLE_Y000},
+ {AMDIL_DST_SWIZZLE___Z_, AMDIL_SRC_SWIZZLE_Z000},
+ {AMDIL_DST_SWIZZLE____W, AMDIL_SRC_SWIZZLE_W000} };
+ unsigned doublePairs[2][2] = {
+ {AMDIL_DST_SWIZZLE_XY__, AMDIL_SRC_SWIZZLE_XY00},
+ {AMDIL_DST_SWIZZLE___ZW, AMDIL_SRC_SWIZZLE_ZW00} };
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr *MI = MBI;
+ if (MI->getOpcode() == AMDIL::RETDYN
+ || MI->getOpcode() == AMDIL::RETURN
+ || !MI->getNumOperands()) {
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "Packing instruction(Src Stage): ";
+ MI->print(dbgs());
+ }
+ unsigned dstIdx = 0;
+ for (unsigned a = 0, z = MI->getNumOperands(); a < z; ++a) {
+ MachineOperand &mOp = MI->getOperand(a);
+ if (mOp.isReg() && mOp.isDef()) {
+ if (mDebug) {
+ dbgs() << "\tEncoding: ";
+ dumpOperand(MI, a);
+ dbgs() << "\tSkipping operand: " << a << ", def found!\n\n";
+ }
+ OpSwizzle dstSwiz;
+ dstSwiz.u8all = mOp.getTargetFlags();
+ dstIdx = dstSwiz.bits.swizzle;
+ continue;
+ }
+ if (!mOp.isReg()) {
+ if (mDebug) {
+ dbgs() << "\tEncoding: ";
+ dumpOperand(MI, a);
+ dbgs() << "\tSkipping operand: " << a << ", no reg found!\n\n";
+ }
+ continue;
+ }
+ if (mDebug) {
+ dbgs() << "\tOld Encoding: ";
+ dumpOperand(MI, a);
+ }
+ OpSwizzle srcSwiz;
+ srcSwiz.u8all = mOp.getTargetFlags();
+ unsigned reg = mOp.getReg();
+ unsigned srcOffset = 0;
+ std::map<unsigned, unsigned>::iterator regIter;
+ switch (dstIdx) {
+ default:
+ break;
+ case AMDIL_DST_SWIZZLE____W:
+ case AMDIL_DST_SWIZZLE___Z_:
+ case AMDIL_DST_SWIZZLE__Y__:
+ case AMDIL_DST_SWIZZLE_X___:
+ srcOffset = dstIdx / 3;
+ regIter = scalarSwizMap.find(reg);
+ if (regIter != scalarSwizMap.end()) {
+ srcSwiz.bits.swizzle = scalarPairs[regIter->second][1]
+ + srcOffset;
+ mOp.setTargetFlags(srcSwiz.u8all);
+ } else {
+ if (mDebug) {
+ MI->dump();
+ }
+ }
+ break;
+ case AMDIL_DST_SWIZZLE___ZW:
+ srcOffset = 1;
+ case AMDIL_DST_SWIZZLE_XY__:
+ regIter = doubleSwizMap.find(reg);
+ if (regIter != doubleSwizMap.end()) {
+ srcSwiz.bits.swizzle = doublePairs[regIter->second][1]
+ + srcOffset;
+ mOp.setTargetFlags(srcSwiz.u8all);
+ } else {
+ if (mDebug) {
+ MI->dump();
+ }
+ }
+ break;
+ }
+ if (mDebug) {
+ dbgs() << "\tNew Encoding: ";
+ dumpOperand(MI, a);
+ }
+ if (mDebug) {
+ dbgs() << "\n";
+ }
+ }
+ }
+ }
+}
+
+void propogateSrcSwizzles(MachineOperand *MO, unsigned idx, bool mDebug)
+{
+ MachineOperand *nMO = MO->getNextOperandForReg();
+ while (nMO && !nMO->isDef()) {
+ OpSwizzle flags;
+ flags.u8all = nMO->getTargetFlags();
+ if (mDebug) {
+ dbgs() << "\t\tOld Swizzle: ";
+ dumpSwizzle(flags);
+ }
+ flags.bits.swizzle = idx;
+ if (mDebug) {
+ dbgs() << "\t\tNew Swizzle: ";
+ dumpSwizzle(flags);
+ }
+ nMO->setTargetFlags(flags.u8all);
+ nMO = nMO->getNextOperandForReg();
+ }
+}
+#endif
diff --git a/src/gallium/drivers/radeon/AMDILSwizzleEncoder.h b/src/gallium/drivers/radeon/AMDILSwizzleEncoder.h
new file mode 100644
index 00000000000..d9920270788
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILSwizzleEncoder.h
@@ -0,0 +1,81 @@
+//===---------- AMDILSwizzleEncoder.h - Encode the swizzle information ----===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// The AMDIL Swizzle Encoder is a class that encodes swizzle information in the
+// machine operand target specific flag. This encoding can then be used to
+// optimize the swizzles of a specific instruction to better pack the
+// registers which will help allocation with SC.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL_SWIZZLE_ENCODER_H_
+#define _AMDIL_SWIZZLE_ENCODER_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ class AMDILSwizzleEncoder : public MachineFunctionPass
+ {
+ public:
+ AMDILSwizzleEncoder(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ const char* getPassName() const;
+ bool runOnMachineFunction(MachineFunction &MF);
+ static char ID;
+ private:
+ const AMDILTargetMachine *ATM; ///< Used to get register information.
+ bool mDebug; ///< Flag to specify whether to dump debug information.
+ CodeGenOpt::Level opt;
+ }; // class AMDILSwizzleEncoder
+ char AMDILSwizzleEncoder::ID = 0;
+} // end llvm namespace
+#endif // _AMDIL_SWIZZLE_ENCODER_H_
diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
new file mode 100644
index 00000000000..d3b1e4f6149
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
@@ -0,0 +1,259 @@
+//===-- AMDILTargetMachine.cpp - Define TargetMachine for AMDIL -----------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDILTargetMachine.h"
+#include "AMDILDevices.h"
+#if LLVM_VERSION >= 2500
+#include "AMDILFrameLowering.h"
+#else
+#include "AMDILFrameInfo.h"
+#endif
+#include "AMDILMCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT)
+{
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ case Triple::UnknownOS:
+ return new AMDILMCAsmInfo(TheTriple);
+ }
+}
+
+extern "C" void LLVMInitializeAMDILTarget() {
+ // Register the target
+// RegisterTargetMachine<TheAMDILTargetMachine> X(TheAMDILTarget);
+
+ // Register the target asm info
+// RegisterMCAsmInfoFn A(TheAMDILTarget, createMCAsmInfo);
+
+ // Register the code emitter
+ //TargetRegistry::RegisterCodeEmitter(TheAMDILTarget,
+ //createAMDILMCCodeEmitter);
+}
+
+TheAMDILTargetMachine::TheAMDILTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+: AMDILTargetMachine(T, TT, CPU, FS, RM, CM)
+{
+}
+
+/// AMDILTargetMachine ctor -
+///
+AMDILTargetMachine::AMDILTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getDataLayout()),
+#if LLVM_VERSION >= 2500
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+#else
+ FrameInfo(TargetFrameInfo::StackGrowsUp,
+#endif
+ Subtarget.device()->getStackAlignment(), 0),
+ InstrInfo(*this), //JITInfo(*this),
+ TLInfo(*this),
+ IntrinsicInfo(this),
+ ELFWriterInfo(false, true)
+{
+ setAsmVerbosityDefault(true);
+ setMCUseLoc(false);
+}
+
+AMDILTargetLowering*
+AMDILTargetMachine::getTargetLowering() const
+{
+ return const_cast<AMDILTargetLowering*>(&TLInfo);
+}
+
+const AMDILInstrInfo*
+AMDILTargetMachine::getInstrInfo() const
+{
+ return &InstrInfo;
+}
+#if LLVM_VERSION >= 2500
+const AMDILFrameLowering*
+AMDILTargetMachine::getFrameLowering() const
+{
+ return &FrameLowering;
+}
+#else
+const AMDILFrameInfo*
+AMDILTargetMachine::getFrameInfo() const
+{
+ return &FrameInfo;
+}
+#endif
+
+const AMDILSubtarget*
+AMDILTargetMachine::getSubtargetImpl() const
+{
+ return &Subtarget;
+}
+
+const AMDILRegisterInfo*
+AMDILTargetMachine::getRegisterInfo() const
+{
+ return &InstrInfo.getRegisterInfo();
+}
+
+const TargetData*
+AMDILTargetMachine::getTargetData() const
+{
+ return &DataLayout;
+}
+
+const AMDILELFWriterInfo*
+AMDILTargetMachine::getELFWriterInfo() const
+{
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+}
+
+const AMDILIntrinsicInfo*
+AMDILTargetMachine::getIntrinsicInfo() const
+{
+ return &IntrinsicInfo;
+}
+bool
+AMDILTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+ return true;
+}
+ bool
+AMDILTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+ mOptLevel = OptLevel;
+ PM.add(createAMDILBarrierDetect(*this, OptLevel));
+ PM.add(createAMDILPrintfConvert(*this, OptLevel));
+ PM.add(createAMDILInlinePass(*this, OptLevel));
+ PM.add(createAMDILPeepholeOpt(*this, OptLevel));
+ PM.add(createAMDILISelDag(*this, OptLevel));
+ return false;
+}
+ bool
+AMDILTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+
+{
+ // If debugging, reduce code motion. Use less aggressive pre-RA scheduler
+ if (OptLevel == CodeGenOpt::None) {
+ llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler);
+ }
+
+ PM.add(createAMDILMachinePeephole(*this, OptLevel));
+ PM.add(createAMDILPointerManager(*this, OptLevel));
+ return false;
+}
+
+bool
+AMDILTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ return false; // -print-machineinstr should print after this.
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+ bool
+AMDILTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+ PM.add(createAMDILCFGPreparationPass(*this, OptLevel));
+ PM.add(createAMDILCFGStructurizerPass(*this, OptLevel));
+ PM.add(createAMDILLiteralManager(*this, OptLevel));
+ PM.add(createAMDILIOExpansion(*this, OptLevel));
+ PM.add(createAMDILSwizzleEncoder(*this, OptLevel));
+ return true;
+}
+
+ void
+AMDILTargetMachine::dump(OSTREAM_TYPE &O)
+{
+ if (!mDebugMode) {
+ return;
+ }
+ O << ";AMDIL Target Machine State Dump: \n";
+}
+
+ void
+AMDILTargetMachine::setDebug(bool debugMode)
+{
+ mDebugMode = debugMode;
+}
+
+bool
+AMDILTargetMachine::getDebug() const
+{
+ return mDebugMode;
+}
diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.h b/src/gallium/drivers/radeon/AMDILTargetMachine.h
new file mode 100644
index 00000000000..9b9e1453a1d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILTargetMachine.h
@@ -0,0 +1,155 @@
+//===-- AMDILTargetMachine.h - Define TargetMachine for AMDIL ---*- C++ -*-===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDIL specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILTARGETMACHINE_H_
+#define AMDILTARGETMACHINE_H_
+
+
+#include "AMDIL.h"
+#include "AMDILELFWriterInfo.h"
+#if LLVM_VERSION < 2500
+#include "AMDILFrameInfo.h"
+#else
+#include "AMDILFrameLowering.h"
+#endif
+#include "AMDILInstrInfo.h"
+#include "AMDILISelLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILSubtarget.h"
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm
+{
+ class raw_ostream;
+
+ class AMDILTargetMachine : public LLVMTargetMachine
+ {
+ private:
+ AMDILSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+#if LLVM_VERSION < 2500
+ AMDILFrameInfo FrameLowering;
+#else
+ AMDILFrameLowering FrameLowering;
+#endif
+ AMDILInstrInfo InstrInfo;
+ AMDILTargetLowering TLInfo;
+ AMDILIntrinsicInfo IntrinsicInfo;
+ AMDILELFWriterInfo ELFWriterInfo;
+ bool mDebugMode;
+ CodeGenOpt::Level mOptLevel;
+
+ protected:
+
+ public:
+ AMDILTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+
+ // Get Target/Subtarget specific information
+ virtual AMDILTargetLowering* getTargetLowering() const;
+ virtual const AMDILInstrInfo* getInstrInfo() const;
+#if LLVM_VERSION < 2500
+ virtual const AMDILFrameInfo* getFrameInfo() const;
+#else
+ virtual const AMDILFrameLowering* getFrameLowering() const;
+#endif
+ virtual const AMDILSubtarget* getSubtargetImpl() const;
+ virtual const AMDILRegisterInfo* getRegisterInfo() const;
+ virtual const TargetData* getTargetData() const;
+ virtual const AMDILIntrinsicInfo *getIntrinsicInfo() const;
+ virtual const AMDILELFWriterInfo *getELFWriterInfo() const;
+
+ // Pass Pipeline Configuration
+ virtual bool
+ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool
+ addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool
+ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool
+ addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool
+ addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+#if LLVM_VERSION < 2500
+ virtual bool
+ addPassesToEmitFile(PassManagerBase &, formatted_raw_ostream &,
+ CodeGenFileType, CodeGenOpt::Level,
+ bool = true);
+ virtual bool
+ addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
+ MCContext *&OutContext);
+#endif
+ void dump(OSTREAM_TYPE &O);
+ void setDebug(bool debugMode);
+ bool getDebug() const;
+ CodeGenOpt::Level getOptLevel() const { return mOptLevel; }
+
+
+ }; // AMDILTargetMachine
+
+ class TheAMDILTargetMachine : public AMDILTargetMachine {
+ public:
+ TheAMDILTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
+ }; // TheAMDILTargetMachine
+
+} // end namespace llvm
+
+#endif // AMDILTARGETMACHINE_H_
diff --git a/src/gallium/drivers/radeon/AMDILTokenDesc.td b/src/gallium/drivers/radeon/AMDILTokenDesc.td
new file mode 100644
index 00000000000..aebf549336b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILTokenDesc.td
@@ -0,0 +1,166 @@
+//===--------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// ILTokenDesc.td - The IL Token Descriptions - Micah Villmow - 9-3-2008
+//===--------------------------------------------------------------------===//
+include "AMDILEnumeratedTypes.td"
+//===--------------------------------------------------------------------===//
+//
+//
+//
+//===--------------------------------------------------------------------===//
+// Each token is 32 bits as specified in section 2.1 of the IL spec
+class ILToken <bits<32> n> {
+ field bits<32> _bits = n;
+}
+
+// Section 2.2.1 - IL Language Token
+class ILLang<bits<8> client_type> : ILToken<0> {
+ let _bits{0-7} = client_type;
+}
+
+// Section 2.2.2 - IL Version Token
+class ILVersion<bits<8> minor_version, bits<8> major_version, ILShader shader_type> : ILToken<0> {
+ let _bits{0-7} = minor_version;
+ let _bits{8-15} = major_version;
+ let _bits{16-23} = shader_type.Value;
+}
+
+// Section 2.2.3 - IL Opcode Token
+class ILOpcode<ILOpCode opcode, bits<14> control, bit sec_mod_pre, bit pri_mod_pre> : ILToken<0> {
+ let _bits{0-15} = opcode.Value;
+ let _bits{16-29} = control;
+ let _bits{30} = sec_mod_pre;
+ let _bits{31} = pri_mod_pre;
+}
+
+// Section 2.2.4 - IL Destination Token
+class ILDst<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
+ let _bits{0-15} = register_num.Value;
+ let _bits{16-21} = register_type.Value;
+ let _bits{22} = mod_pre;
+ let _bits{23-24} = relative_address;
+ let _bits{25} = dimension;
+ let _bits{26} = immediate_pre;
+ let _bits{31} = extended;
+}
+
+// Section 2.2.5 - IL Destination Modifier Token
+class ILDstMod<ILModDstComp x, ILModDstComp y, ILModDstComp z, ILModDstComp w, bit clamp, ILShiftScale shift_scale> : ILToken<0> {
+ let _bits{0-1} = x.Value;
+ let _bits{2-3} = y.Value;
+ let _bits{4-5} = z.Value;
+ let _bits{6-7} = w.Value;
+ let _bits{8} = clamp;
+ //let _bits{9-12} = shift_scale;
+}
+
+// Section 2.2.6 - IL Source Token
+class ILSrc<AMDILReg register_num, ILRegType register_type, bit mod_pre, bits<2> relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> {
+ let _bits{0-15} = register_num.Value;
+ let _bits{16-21} = register_type.Value;
+ let _bits{22} = mod_pre;
+ let _bits{23-24} = relative_address;
+ let _bits{25} = dimension;
+ let _bits{26} = immediate_pre;
+ let _bits{31} = extended;
+}
+
+// Section 2.2.7 - IL Source Modifier Token
+class ILSrcMod<ILComponentSelect swizzle_x, bit negate_x, ILComponentSelect swizzle_y, bit negate_y,
+ ILComponentSelect swizzle_z, bit negate_z, ILComponentSelect swizzle_w, bit negate_w,
+ bit invert, bit bias, bit x2, bit sign, bit abs, ILDivComp divComp,
+ bits<8> clamp> : ILToken<0> {
+ let _bits{0-2} = swizzle_x.Value;
+ let _bits{3} = negate_x;
+ let _bits{4-6} = swizzle_y.Value;
+ let _bits{7} = negate_y;
+ let _bits{8-10} = swizzle_z.Value;
+ let _bits{11} = negate_z;
+ let _bits{12-14} = swizzle_w.Value;
+ let _bits{15} = negate_w;
+ let _bits{16} = invert;
+ let _bits{17} = bias;
+ let _bits{18} = x2;
+ let _bits{19} = sign;
+ let _bits{20} = abs;
+ let _bits{21-23} = divComp.Value;
+ let _bits{24-31} = clamp;
+}
+
+// Section 2.2.8 - IL Relative Address Token
+class ILRelAddr<AMDILReg address_register, bit loop_relative, ILAddressing component> : ILToken<0> {
+ let _bits{0-15} = address_register.Value;
+ let _bits{16} = loop_relative;
+ let _bits{17-19} = component.Value;
+}
+
+// IL Literal Token
+class ILLiteral<bits<32> val> : ILToken<0> {
+ let _bits = val;
+}
+
+// All tokens required for a destination register
+class ILDstReg<ILDst Reg, ILDstMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
+ ILDst reg = Reg;
+ ILDstMod mod = Mod;
+ ILRelAddr rel = Rel;
+ ILSrc reg_rel = Reg_Rel;
+ ILSrcMod reg_rel_mod = Reg_Rel_Mod;
+}
+
+// All tokens required for a source register
+class ILSrcReg<ILSrc Reg, ILSrcMod Mod, ILRelAddr Rel, ILSrc Reg_Rel, ILSrcMod Reg_Rel_Mod> {
+ ILSrc reg = Reg;
+ ILSrcMod mod = Mod;
+ ILRelAddr rel = Rel;
+ ILSrc reg_rel = Reg_Rel;
+ ILSrcMod reg_rel_mod = Reg_Rel_Mod;
+}
+
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp b/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp
new file mode 100644
index 00000000000..5c70944c67b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp
@@ -0,0 +1,727 @@
+//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions ---------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file provides the implementations of functions that are declared in the
+// AMDILUtilityFUnctions.h file.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILUtilityFunctions.h"
+#include "AMDILISelLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include <cstdio>
+#include <queue>
+#include <list>
+using namespace llvm;
+int64_t GET_SCALAR_SIZE(llvm::Type *A) {
+ return A->getScalarSizeInBits();
+}
+
+const TargetRegisterClass * getRegClassFromID(unsigned int ID) {
+ switch (ID) {
+ default:
+ assert(0 && "Passed in ID does not match any register classes.");
+ return NULL;
+ case AMDIL::GPRI8RegClassID:
+ return &AMDIL::GPRI8RegClass;
+ case AMDIL::GPRI16RegClassID:
+ return &AMDIL::GPRI16RegClass;
+ case AMDIL::GPRI32RegClassID:
+ return &AMDIL::GPRI32RegClass;
+ case AMDIL::GPRF32RegClassID:
+ return &AMDIL::GPRF32RegClass;
+ case AMDIL::GPRI64RegClassID:
+ return &AMDIL::GPRI64RegClass;
+ case AMDIL::GPRF64RegClassID:
+ return &AMDIL::GPRF64RegClass;
+ case AMDIL::GPRV4F32RegClassID:
+ return &AMDIL::GPRV4F32RegClass;
+ case AMDIL::GPRV4I8RegClassID:
+ return &AMDIL::GPRV4I8RegClass;
+ case AMDIL::GPRV4I16RegClassID:
+ return &AMDIL::GPRV4I16RegClass;
+ case AMDIL::GPRV4I32RegClassID:
+ return &AMDIL::GPRV4I32RegClass;
+ case AMDIL::GPRV2F32RegClassID:
+ return &AMDIL::GPRV2F32RegClass;
+ case AMDIL::GPRV2I8RegClassID:
+ return &AMDIL::GPRV2I8RegClass;
+ case AMDIL::GPRV2I16RegClassID:
+ return &AMDIL::GPRV2I16RegClass;
+ case AMDIL::GPRV2I32RegClassID:
+ return &AMDIL::GPRV2I32RegClass;
+ case AMDIL::GPRV2F64RegClassID:
+ return &AMDIL::GPRV2F64RegClass;
+ case AMDIL::GPRV2I64RegClassID:
+ return &AMDIL::GPRV2I64RegClass;
+ };
+}
+
+unsigned int getMoveInstFromID(unsigned int ID) {
+ switch (ID) {
+ default:
+ assert(0 && "Passed in ID does not match any move instructions.");
+ case AMDIL::GPRI8RegClassID:
+ return AMDIL::MOVE_i8;
+ case AMDIL::GPRI16RegClassID:
+ return AMDIL::MOVE_i16;
+ case AMDIL::GPRI32RegClassID:
+ return AMDIL::MOVE_i32;
+ case AMDIL::GPRF32RegClassID:
+ return AMDIL::MOVE_f32;
+ case AMDIL::GPRI64RegClassID:
+ return AMDIL::MOVE_i64;
+ case AMDIL::GPRF64RegClassID:
+ return AMDIL::MOVE_f64;
+ case AMDIL::GPRV4F32RegClassID:
+ return AMDIL::MOVE_v4f32;
+ case AMDIL::GPRV4I8RegClassID:
+ return AMDIL::MOVE_v4i8;
+ case AMDIL::GPRV4I16RegClassID:
+ return AMDIL::MOVE_v4i16;
+ case AMDIL::GPRV4I32RegClassID:
+ return AMDIL::MOVE_v4i32;
+ case AMDIL::GPRV2F32RegClassID:
+ return AMDIL::MOVE_v2f32;
+ case AMDIL::GPRV2I8RegClassID:
+ return AMDIL::MOVE_v2i8;
+ case AMDIL::GPRV2I16RegClassID:
+ return AMDIL::MOVE_v2i16;
+ case AMDIL::GPRV2I32RegClassID:
+ return AMDIL::MOVE_v2i32;
+ case AMDIL::GPRV2F64RegClassID:
+ return AMDIL::MOVE_v2f64;
+ case AMDIL::GPRV2I64RegClassID:
+ return AMDIL::MOVE_v2i64;
+ };
+ return -1;
+}
+
+unsigned int getPHIMoveInstFromID(unsigned int ID) {
+ switch (ID) {
+ default:
+ assert(0 && "Passed in ID does not match any move instructions.");
+ case AMDIL::GPRI8RegClassID:
+ return AMDIL::PHIMOVE_i8;
+ case AMDIL::GPRI16RegClassID:
+ return AMDIL::PHIMOVE_i16;
+ case AMDIL::GPRI32RegClassID:
+ return AMDIL::PHIMOVE_i32;
+ case AMDIL::GPRF32RegClassID:
+ return AMDIL::PHIMOVE_f32;
+ case AMDIL::GPRI64RegClassID:
+ return AMDIL::PHIMOVE_i64;
+ case AMDIL::GPRF64RegClassID:
+ return AMDIL::PHIMOVE_f64;
+ case AMDIL::GPRV4F32RegClassID:
+ return AMDIL::PHIMOVE_v4f32;
+ case AMDIL::GPRV4I8RegClassID:
+ return AMDIL::PHIMOVE_v4i8;
+ case AMDIL::GPRV4I16RegClassID:
+ return AMDIL::PHIMOVE_v4i16;
+ case AMDIL::GPRV4I32RegClassID:
+ return AMDIL::PHIMOVE_v4i32;
+ case AMDIL::GPRV2F32RegClassID:
+ return AMDIL::PHIMOVE_v2f32;
+ case AMDIL::GPRV2I8RegClassID:
+ return AMDIL::PHIMOVE_v2i8;
+ case AMDIL::GPRV2I16RegClassID:
+ return AMDIL::PHIMOVE_v2i16;
+ case AMDIL::GPRV2I32RegClassID:
+ return AMDIL::PHIMOVE_v2i32;
+ case AMDIL::GPRV2F64RegClassID:
+ return AMDIL::PHIMOVE_v2f64;
+ case AMDIL::GPRV2I64RegClassID:
+ return AMDIL::PHIMOVE_v2i64;
+ };
+ return -1;
+}
+
+const TargetRegisterClass* getRegClassFromType(unsigned int type) {
+ switch (type) {
+ default:
+ assert(0 && "Passed in type does not match any register classes.");
+ case MVT::i8:
+ return &AMDIL::GPRI8RegClass;
+ case MVT::i16:
+ return &AMDIL::GPRI16RegClass;
+ case MVT::i32:
+ return &AMDIL::GPRI32RegClass;
+ case MVT::f32:
+ return &AMDIL::GPRF32RegClass;
+ case MVT::i64:
+ return &AMDIL::GPRI64RegClass;
+ case MVT::f64:
+ return &AMDIL::GPRF64RegClass;
+ case MVT::v4f32:
+ return &AMDIL::GPRV4F32RegClass;
+ case MVT::v4i8:
+ return &AMDIL::GPRV4I8RegClass;
+ case MVT::v4i16:
+ return &AMDIL::GPRV4I16RegClass;
+ case MVT::v4i32:
+ return &AMDIL::GPRV4I32RegClass;
+ case MVT::v2f32:
+ return &AMDIL::GPRV2F32RegClass;
+ case MVT::v2i8:
+ return &AMDIL::GPRV2I8RegClass;
+ case MVT::v2i16:
+ return &AMDIL::GPRV2I16RegClass;
+ case MVT::v2i32:
+ return &AMDIL::GPRV2I32RegClass;
+ case MVT::v2f64:
+ return &AMDIL::GPRV2F64RegClass;
+ case MVT::v2i64:
+ return &AMDIL::GPRV2I64RegClass;
+ }
+}
+
+void printSDNode(const SDNode *N) {
+ printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n",
+ N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode());
+ printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n",
+ N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId());
+ for (unsigned int i = 0; i < N->getNumOperands(); ++i) {
+ printf("OperandNum: %d ValueCount: %d ValueType: %d\n",
+ i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy);
+ printSDValue(N->getOperand(i), 0);
+ }
+}
+
+void printSDValue(const SDValue &Op, int level) {
+ printf("\nOp: %p OpCode: %d NumOperands: %d ", &Op, Op.getOpcode(),
+ Op.getNumOperands());
+ printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(),
+ Op.isMachineOpcode());
+ if (Op.isMachineOpcode()) {
+ printf("MachineOpcode: %d\n", Op.getMachineOpcode());
+ } else {
+ printf("\n");
+ }
+ EVT vt = Op.getValueType();
+ printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy);
+ printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse());
+ if (level) {
+ printf("Children for %d:\n", level);
+ for (unsigned int i = 0; i < Op.getNumOperands(); ++i) {
+ printf("Child %d->%d:", level, i);
+ printSDValue(Op.getOperand(i), level - 1);
+ }
+ }
+}
+
+bool isPHIMove(unsigned int opcode) {
+ switch (opcode) {
+ default:
+ return false;
+ ExpandCaseToAllTypes(AMDIL::PHIMOVE);
+ return true;
+ }
+ return false;
+}
+
+bool isMove(unsigned int opcode) {
+ switch (opcode) {
+ default:
+ return false;
+ ExpandCaseToAllTypes(AMDIL::MOVE);
+ return true;
+ }
+ return false;
+}
+
+bool isMoveOrEquivalent(unsigned int opcode) {
+ switch (opcode) {
+ default:
+ return isMove(opcode) || isPHIMove(opcode);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT);
+ ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT);
+ case AMDIL::INTTOANY_i8:
+ case AMDIL::INTTOANY_i16:
+ case AMDIL::INTTOANY_i32:
+ case AMDIL::INTTOANY_f32:
+ case AMDIL::DLO:
+ case AMDIL::LLO:
+ case AMDIL::LLO_v2i64:
+ return true;
+ };
+ return false;
+}
+
+bool check_type(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+size_t getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t getTypeSize(StructType * const ST, bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t getTypeSize(VectorType * const VT, bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t getTypeSize(PointerType * const PT, bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
+
+size_t getNumElements(Type * const T) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = 1;
+ break;
+ case Type::PointerTyID:
+ size = getNumElements(dyn_cast<PointerType>(T));
+ break;
+ case Type::IntegerTyID:
+ size = getNumElements(dyn_cast<IntegerType>(T));
+ break;
+ case Type::StructTyID:
+ size = getNumElements(dyn_cast<StructType>(T));
+ break;
+ case Type::ArrayTyID:
+ size = getNumElements(dyn_cast<ArrayType>(T));
+ break;
+ case Type::FunctionTyID:
+ size = getNumElements(dyn_cast<FunctionType>(T));
+ break;
+ case Type::VectorTyID:
+ size = getNumElements(dyn_cast<VectorType>(T));
+ break;
+ };
+ return size;
+}
+
+size_t getNumElements(StructType * const ST) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end();
+ eib != eie; ++eib) {
+ curType = *eib;
+ size += getNumElements(curType);
+ }
+ return size;
+}
+
+size_t getNumElements(IntegerType * const IT) {
+ return (!IT) ? 0 : 1;
+}
+
+size_t getNumElements(FunctionType * const FT) {
+ assert(0 && "Should not be able to calculate the number of "
+ "elements of a function type");
+ return 0;
+}
+
+size_t getNumElements(ArrayType * const AT) {
+ return (!AT) ? 0
+ : (size_t)(getNumElements(AT->getElementType()) *
+ AT->getNumElements());
+}
+
+size_t getNumElements(VectorType * const VT) {
+ return (!VT) ? 0
+ : VT->getNumElements() * getNumElements(VT->getElementType());
+}
+
+size_t getNumElements(PointerType * const PT) {
+ size_t size = 0;
+ if (!PT) {
+ return size;
+ }
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getNumElements(PT->getContainedType(x));
+ }
+ return size;
+}
+
+const llvm::Value *getBasePointerValue(const llvm::Value *V)
+{
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ // assert(0 && "Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) {
+ const Value *moVal = NULL;
+ if (!MI->memoperands_empty()) {
+ const MachineMemOperand *memOp = (*MI->memoperands_begin());
+ moVal = memOp ? memOp->getValue() : NULL;
+ moVal = getBasePointerValue(moVal);
+ }
+ return moVal;
+}
+
+bool commaPrint(int i, OSTREAM_TYPE &O) {
+ O << ":" << i;
+ return false;
+}
+
+bool isLoadInst(MachineInstr *MI) {
+ if (strstr(MI->getDesc().getName(), "LOADCONST")) {
+ return false;
+ }
+ return strstr(MI->getDesc().getName(), "LOAD");
+}
+
+bool isSWSExtLoadInst(MachineInstr *MI)
+{
+switch (MI->getOpcode()) {
+ default:
+ break;
+ ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD);
+ ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD);
+ ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD);
+ return true;
+ };
+ return false;
+}
+
+bool isExtLoadInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "EXTLOAD");
+}
+
+bool isSExtLoadInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "SEXTLOAD");
+}
+
+bool isAExtLoadInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "AEXTLOAD");
+}
+
+bool isZExtLoadInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "ZEXTLOAD");
+}
+
+bool isStoreInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "STORE");
+}
+
+bool isTruncStoreInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "TRUNCSTORE");
+}
+
+bool isAtomicInst(MachineInstr *MI) {
+ return strstr(MI->getDesc().getName(), "ATOM");
+}
+
+bool isVolatileInst(MachineInstr *MI) {
+ if (!MI->memoperands_empty()) {
+ for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
+ moe = MI->memoperands_end(); mob != moe; ++mob) {
+ // If there is a volatile mem operand, this is a volatile instruction.
+ if ((*mob)->isVolatile()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+bool isGlobalInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "GLOBAL");
+}
+bool isPrivateInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "PRIVATE");
+}
+bool isConstantInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "CONSTANT")
+ || strstr(MI->getDesc().getName(), "CPOOL");
+}
+bool isRegionInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "REGION");
+}
+bool isLocalInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "LOCAL");
+}
+bool isImageInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "IMAGE");
+}
+bool isAppendInst(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "APPEND");
+}
+bool isRegionAtomic(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "ATOM_R");
+}
+bool isLocalAtomic(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "ATOM_L");
+}
+bool isGlobalAtomic(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "ATOM_G")
+ || isArenaAtomic(MI);
+}
+bool isArenaAtomic(llvm::MachineInstr *MI)
+{
+ return strstr(MI->getDesc().getName(), "ATOM_A");
+}
+
+const char* getSrcSwizzle(unsigned idx) {
+ const char *srcSwizzles[] = {
+ "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y",
+ ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w",
+ ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000",
+ ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw",
+ ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)",
+ "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy",
+ ".zw"
+ };
+ assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0])
+ && "Idx passed in is invalid!");
+ return srcSwizzles[idx];
+}
+const char* getDstSwizzle(unsigned idx) {
+ const char *dstSwizzles[] = {
+ "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_",
+ ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w",
+ };
+ assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0])
+ && "Idx passed in is invalid!");
+ return dstSwizzles[idx];
+}
+/// Helper function to get the currently set flags
+void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
+{
+#if LLVM_VERSION < 2500
+ curRes.u16all = MI->getAsmPrinterFlags();
+#else
+ // We need 16 bits of information, but LLVMr127097 cut the field in half.
+ // So we have to use two different fields to store all of our information.
+ uint16_t upper = MI->getFlags() << 8;
+ uint16_t lower = MI->getAsmPrinterFlags();
+ curRes.u16all = upper | lower;
+#endif
+}
+/// Helper function to clear the currently set flags and add the new flags.
+void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
+{
+#if LLVM_VERSION < 2500
+ MI->clearAsmPrinterFlags();
+ MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)curRes.u16all);
+#else
+ // We need 16 bits of information, but LLVMr127097 cut the field in half.
+ // So we have to use two different fields to store all of our information.
+ MI->clearAsmPrinterFlags();
+ MI->setFlags(0);
+ uint8_t lower = curRes.u16all & 0xFF;
+ uint8_t upper = (curRes.u16all >> 8) & 0xFF;
+ MI->setFlags(upper);
+ MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower);
+#endif
+}
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
new file mode 100644
index 00000000000..fd06fcca00d
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
@@ -0,0 +1,403 @@
+//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file provides declarations for functions that are used across different
+// classes and provide various conversions or utility to shorten the code
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILUTILITYFUNCTIONS_H_
+#define AMDILUTILITYFUNCTIONS_H_
+
+#include "AMDIL.h"
+#include "AMDILLLVMVersion.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
+// Utility functions from ID
+//
+namespace llvm {
+class TargetRegisterClass;
+class SDValue;
+class SDNode;
+class Value;
+class Type;
+class StructType;
+class IntegerType;
+class FunctionType;
+class VectorType;
+class ArrayType;
+class PointerType;
+class OpaqueType;
+class MachineInstr;
+
+}
+enum SrcSwizzles {
+ AMDIL_SRC_SWIZZLE_DEFAULT = 0,
+ AMDIL_SRC_SWIZZLE_X000,
+ AMDIL_SRC_SWIZZLE_0X00,
+ AMDIL_SRC_SWIZZLE_00X0,
+ AMDIL_SRC_SWIZZLE_000X,
+ AMDIL_SRC_SWIZZLE_Y000,
+ AMDIL_SRC_SWIZZLE_0Y00,
+ AMDIL_SRC_SWIZZLE_00Y0,
+ AMDIL_SRC_SWIZZLE_000Y,
+ AMDIL_SRC_SWIZZLE_Z000,
+ AMDIL_SRC_SWIZZLE_0Z00,
+ AMDIL_SRC_SWIZZLE_00Z0,
+ AMDIL_SRC_SWIZZLE_000Z,
+ AMDIL_SRC_SWIZZLE_W000,
+ AMDIL_SRC_SWIZZLE_0W00,
+ AMDIL_SRC_SWIZZLE_00W0,
+ AMDIL_SRC_SWIZZLE_000W,
+ AMDIL_SRC_SWIZZLE_XY00,
+ AMDIL_SRC_SWIZZLE_00XY,
+ AMDIL_SRC_SWIZZLE_ZW00,
+ AMDIL_SRC_SWIZZLE_00ZW,
+ AMDIL_SRC_SWIZZLE_XYZ0,
+ AMDIL_SRC_SWIZZLE_0XYZ,
+ AMDIL_SRC_SWIZZLE_XYZW,
+ AMDIL_SRC_SWIZZLE_0000,
+ AMDIL_SRC_SWIZZLE_XXXX,
+ AMDIL_SRC_SWIZZLE_YYYY,
+ AMDIL_SRC_SWIZZLE_ZZZZ,
+ AMDIL_SRC_SWIZZLE_WWWW,
+ AMDIL_SRC_SWIZZLE_XYXY,
+ AMDIL_SRC_SWIZZLE_ZWZW,
+ AMDIL_SRC_SWIZZLE_XZXZ,
+ AMDIL_SRC_SWIZZLE_YWYW,
+ AMDIL_SRC_SWIZZLE_X0Y0,
+ AMDIL_SRC_SWIZZLE_0X0Y,
+ AMDIL_SRC_SWIZZLE_XY_NEGY,
+ AMDIL_SRC_SWIZZLE_NEGYW,
+ AMDIL_SRC_SWIZZLE_NEGX,
+ AMDIL_SRC_SWIZZLE_XY_NEGXY,
+ AMDIL_SRC_SWIZZLE_NEG_XYZW,
+ AMDIL_SRC_SWIZZLE_0YZW,
+ AMDIL_SRC_SWIZZLE_X0ZW,
+ AMDIL_SRC_SWIZZLE_XY0W,
+ AMDIL_SRC_SWIZZLE_X,
+ AMDIL_SRC_SWIZZLE_Y,
+ AMDIL_SRC_SWIZZLE_Z,
+ AMDIL_SRC_SWIZZLE_W,
+ AMDIL_SRC_SWIZZLE_XY,
+ AMDIL_SRC_SWIZZLE_ZW,
+ AMDIL_SRC_SWIZZLE_LAST
+};
+enum DstSwizzles {
+ AMDIL_DST_SWIZZLE_DEFAULT = 0,
+ AMDIL_DST_SWIZZLE_X___,
+ AMDIL_DST_SWIZZLE_XY__,
+ AMDIL_DST_SWIZZLE_XYZ_,
+ AMDIL_DST_SWIZZLE_XYZW,
+ AMDIL_DST_SWIZZLE__Y__,
+ AMDIL_DST_SWIZZLE__YZ_,
+ AMDIL_DST_SWIZZLE__YZW,
+ AMDIL_DST_SWIZZLE___Z_,
+ AMDIL_DST_SWIZZLE___ZW,
+ AMDIL_DST_SWIZZLE____W,
+ AMDIL_DST_SWIZZLE_X_ZW,
+ AMDIL_DST_SWIZZLE_XY_W,
+ AMDIL_DST_SWIZZLE_X_Z_,
+ AMDIL_DST_SWIZZLE_X__W,
+ AMDIL_DST_SWIZZLE__Y_W,
+ AMDIL_DST_SWIZZLE_LAST
+};
+// Function to get the correct src swizzle string from ID
+const char *getSrcSwizzle(unsigned);
+
+// Function to get the correct dst swizzle string from ID
+const char *getDstSwizzle(unsigned);
+
+const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID);
+
+unsigned int getMoveInstFromID(unsigned int ID);
+unsigned int getPHIMoveInstFromID(unsigned int ID);
+
+// Utility functions from Type.
+const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type);
+unsigned int getTargetIndependentMoveFromType(unsigned int type);
+
+// Debug functions for SDNode and SDValue.
+void printSDValue(const llvm::SDValue &Op, int level);
+void printSDNode(const llvm::SDNode *N);
+
+// Functions to check if an opcode is a specific type.
+bool isMove(unsigned int opcode);
+bool isPHIMove(unsigned int opcode);
+bool isMoveOrEquivalent(unsigned int opcode);
+
+// Function to check address space
+bool check_type(const llvm::Value *ptr, unsigned int addrspace);
+
+// Group of functions that recursively calculate the size of a structure based
+// on it's sub-types.
+size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false);
+size_t
+getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false);
+
+// Group of functions that recursively calculate the number of elements of a
+// structure based on it's sub-types.
+size_t getNumElements(llvm::Type * const T);
+size_t getNumElements(llvm::StructType * const ST);
+size_t getNumElements(llvm::IntegerType * const IT);
+size_t getNumElements(llvm::FunctionType * const FT);
+size_t getNumElements(llvm::ArrayType * const AT);
+size_t getNumElements(llvm::VectorType * const VT);
+size_t getNumElements(llvm::PointerType * const PT);
+size_t getNumElements(llvm::OpaqueType * const OT);
+const llvm::Value *getBasePointerValue(const llvm::Value *V);
+const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI);
+
+
+int64_t GET_SCALAR_SIZE(llvm::Type* A);
+
+// Helper functions that check the opcode for status information
+bool isLoadInst(llvm::MachineInstr *MI);
+bool isExtLoadInst(llvm::MachineInstr *MI);
+bool isSWSExtLoadInst(llvm::MachineInstr *MI);
+bool isSExtLoadInst(llvm::MachineInstr *MI);
+bool isZExtLoadInst(llvm::MachineInstr *MI);
+bool isAExtLoadInst(llvm::MachineInstr *MI);
+bool isStoreInst(llvm::MachineInstr *MI);
+bool isTruncStoreInst(llvm::MachineInstr *MI);
+bool isAtomicInst(llvm::MachineInstr *MI);
+bool isVolatileInst(llvm::MachineInstr *MI);
+bool isGlobalInst(llvm::MachineInstr *MI);
+bool isPrivateInst(llvm::MachineInstr *MI);
+bool isConstantInst(llvm::MachineInstr *MI);
+bool isRegionInst(llvm::MachineInstr *MI);
+bool isLocalInst(llvm::MachineInstr *MI);
+bool isImageInst(llvm::MachineInstr *MI);
+bool isAppendInst(llvm::MachineInstr *MI);
+bool isRegionAtomic(llvm::MachineInstr *MI);
+bool isLocalAtomic(llvm::MachineInstr *MI);
+bool isGlobalAtomic(llvm::MachineInstr *MI);
+bool isArenaAtomic(llvm::MachineInstr *MI);
+
+
+// Macros that are used to help with switch statements for various data types
+// However, these macro's do not return anything unlike the second set below.
+#define ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_i8: \
+case Instr##_i16: \
+case Instr##_i32:
+
+#define ExpandCaseTo32bitIntTruncTypes(Instr) \
+case Instr##_i16i8: \
+case Instr##_i32i8: \
+case Instr##_i32i16:
+
+#define ExpandCaseToIntTypes(Instr) \
+ ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_i64:
+
+#define ExpandCaseToIntTruncTypes(Instr) \
+ ExpandCaseTo32bitIntTruncTypes(Instr) \
+case Instr##_i64i8:\
+case Instr##_i64i16:\
+case Instr##_i64i32:\
+
+#define ExpandCaseToFloatTypes(Instr) \
+ case Instr##_f32: \
+case Instr##_f64:
+
+#define ExpandCaseToFloatTruncTypes(Instr) \
+case Instr##_f64f32:
+
+#define ExpandCaseTo32bitScalarTypes(Instr) \
+ ExpandCaseTo32bitIntTypes(Instr) \
+case Instr##_f32:
+
+#define ExpandCaseToAllScalarTypes(Instr) \
+ ExpandCaseToFloatTypes(Instr) \
+ExpandCaseToIntTypes(Instr)
+
+#define ExpandCaseToAllScalarTruncTypes(Instr) \
+ ExpandCaseToFloatTruncTypes(Instr) \
+ExpandCaseToIntTruncTypes(Instr)
+
+// Vector versions of above macros
+#define ExpandCaseToVectorIntTypes(Instr) \
+ case Instr##_v2i8: \
+case Instr##_v4i8: \
+case Instr##_v2i16: \
+case Instr##_v4i16: \
+case Instr##_v2i32: \
+case Instr##_v4i32: \
+case Instr##_v2i64:
+
+#define ExpandCaseToVectorIntTruncTypes(Instr) \
+case Instr##_v2i16i8: \
+case Instr##_v4i16i8: \
+case Instr##_v2i32i8: \
+case Instr##_v4i32i8: \
+case Instr##_v2i32i16: \
+case Instr##_v4i32i16: \
+case Instr##_v2i64i8: \
+case Instr##_v2i64i16: \
+case Instr##_v2i64i32:
+
+#define ExpandCaseToVectorFloatTypes(Instr) \
+ case Instr##_v2f32: \
+case Instr##_v4f32: \
+case Instr##_v2f64:
+
+#define ExpandCaseToVectorFloatTruncTypes(Instr) \
+case Instr##_v2f64f32:
+
+#define ExpandCaseToVectorByteTypes(Instr) \
+ case Instr##_v4i8:\
+case Instr##_v2i16: \
+case Instr##_v4i16:
+
+#define ExpandCaseToAllVectorTypes(Instr) \
+ ExpandCaseToVectorFloatTypes(Instr) \
+ExpandCaseToVectorIntTypes(Instr)
+
+#define ExpandCaseToAllVectorTruncTypes(Instr) \
+ ExpandCaseToVectorFloatTruncTypes(Instr) \
+ExpandCaseToVectorIntTruncTypes(Instr)
+
+#define ExpandCaseToAllTypes(Instr) \
+ ExpandCaseToAllVectorTypes(Instr) \
+ExpandCaseToAllScalarTypes(Instr)
+
+#define ExpandCaseToAllTruncTypes(Instr) \
+ ExpandCaseToAllVectorTruncTypes(Instr) \
+ExpandCaseToAllScalarTruncTypes(Instr)
+
+#define ExpandCaseToPackedTypes(Instr) \
+ case Instr##_v2i8: \
+ case Instr##_v4i8: \
+ case Instr##_v2i16: \
+ case Instr##_v4i16:
+
+#define ExpandCaseToByteShortTypes(Instr) \
+ case Instr##_i8: \
+ case Instr##_i16: \
+ ExpandCaseToPackedTypes(Instr)
+
+// Macros that expand into case statements with return values
+#define ExpandCaseTo32bitIntReturn(Instr, Return) \
+case Instr##_i8: return Return##_i8;\
+case Instr##_i16: return Return##_i16;\
+case Instr##_i32: return Return##_i32;
+
+#define ExpandCaseToIntReturn(Instr, Return) \
+ ExpandCaseTo32bitIntReturn(Instr, Return) \
+case Instr##_i64: return Return##_i64;
+
+#define ExpandCaseToFloatReturn(Instr, Return) \
+ case Instr##_f32: return Return##_f32;\
+case Instr##_f64: return Return##_f64;
+
+#define ExpandCaseToAllScalarReturn(Instr, Return) \
+ ExpandCaseToFloatReturn(Instr, Return) \
+ExpandCaseToIntReturn(Instr, Return)
+
+// These macros expand to common groupings of RegClass ID's
+#define ExpandCaseTo1CompRegID \
+case AMDIL::GPRI8RegClassID: \
+case AMDIL::GPRI16RegClassID: \
+case AMDIL::GPRI32RegClassID: \
+case AMDIL::GPRF32RegClassID:
+
+#define ExpandCaseTo2CompRegID \
+ case AMDIL::GPRI64RegClassID: \
+case AMDIL::GPRF64RegClassID: \
+case AMDIL::GPRV2I8RegClassID: \
+case AMDIL::GPRV2I16RegClassID: \
+case AMDIL::GPRV2I32RegClassID: \
+case AMDIL::GPRV2F32RegClassID:
+
+// Macros that expand to case statements for specific bitlengths
+#define ExpandCaseTo8BitType(Instr) \
+ case Instr##_i8:
+
+#define ExpandCaseTo16BitType(Instr) \
+ case Instr##_v2i8: \
+case Instr##_i16:
+
+#define ExpandCaseTo32BitType(Instr) \
+ case Instr##_v4i8: \
+case Instr##_v2i16: \
+case Instr##_i32: \
+case Instr##_f32:
+
+#define ExpandCaseTo64BitType(Instr) \
+ case Instr##_v4i16: \
+case Instr##_v2i32: \
+case Instr##_v2f32: \
+case Instr##_i64: \
+case Instr##_f64:
+
+#define ExpandCaseTo128BitType(Instr) \
+ case Instr##_v4i32: \
+case Instr##_v4f32: \
+case Instr##_v2i64: \
+case Instr##_v2f64:
+
+bool commaPrint(int i, OSTREAM_TYPE &O);
+/// Helper function to get the currently get/set flags.
+void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
+void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
+
+#endif // AMDILUTILITYFUNCTIONS_H_
diff --git a/src/gallium/drivers/radeon/AMDILVersion.td b/src/gallium/drivers/radeon/AMDILVersion.td
new file mode 100644
index 00000000000..eff799ac4d9
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDILVersion.td
@@ -0,0 +1,116 @@
+//===--------------------------------------------------------------------===//
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+// Intrinsic operation support
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+def int_AMDIL_barrier : GCCBuiltin<"barrier">,
+ BinaryIntNoRetInt;
+def int_AMDIL_barrier_global : GCCBuiltin<"barrierGlobal">,
+ BinaryIntNoRetInt;
+def int_AMDIL_barrier_local : GCCBuiltin<"barrierLocal">,
+ BinaryIntNoRetInt;
+def int_AMDIL_barrier_region : GCCBuiltin<"barrierRegion">,
+ BinaryIntNoRetInt;
+def int_AMDIL_get_region_id : GCCBuiltin<"__amdil_get_region_id_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+def int_AMDIL_get_region_local_id : GCCBuiltin<"__amdil_get_region_local_id_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+def int_AMDIL_get_num_regions : GCCBuiltin<"__amdil_get_num_regions_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+def int_AMDIL_get_region_size : GCCBuiltin<"__amdil_get_region_size_int">,
+ Intrinsic<[llvm_v4i32_ty], [], []>;
+}
+
+let isCall=1, isNotDuplicable=1 in {
+ let Predicates=[hasRegionAS] in {
+def BARRIER_EGNI : BinaryOpNoRet<IL_OP_BARRIER, (outs),
+ (ins GPRI32:$flag, GPRI32:$id),
+ "fence_threads_memory_lds_gds_gws",
+ [(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
+}
+let Predicates=[noRegionAS] in {
+def BARRIER_7XX : BinaryOpNoRet<IL_OP_BARRIER, (outs),
+ (ins GPRI32:$flag, GPRI32:$id),
+ "fence_threads_memory_lds",
+ [(int_AMDIL_barrier GPRI32:$flag, GPRI32:$id)]>;
+}
+
+def BARRIER_LOCAL : BinaryOpNoRet<IL_OP_BARRIER_LOCAL, (outs),
+ (ins GPRI32:$flag, GPRI32:$id),
+ "fence_threads_lds",
+ [(int_AMDIL_barrier_local GPRI32:$flag, GPRI32:$id)]>;
+
+def BARRIER_GLOBAL : BinaryOpNoRet<IL_OP_BARRIER_GLOBAL, (outs),
+ (ins GPRI32:$flag, GPRI32:$id),
+ "fence_threads_memory",
+ [(int_AMDIL_barrier_global GPRI32:$flag, GPRI32:$id)]>;
+
+def BARRIER_REGION : BinaryOpNoRet<IL_OP_BARRIER_REGION, (outs),
+ (ins GPRI32:$flag, GPRI32:$id),
+ "fence_threads_gds",
+ [(int_AMDIL_barrier_region GPRI32:$flag, GPRI32:$id)]>;
+
+def GET_REGION_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1022.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_region_id))]>;
+
+def GET_REGION_LOCAL_ID : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1022.xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_region_local_id))]>;
+
+def GET_REGION_SIZE : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[10].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_region_size))]>;
+
+def GET_NUM_REGIONS : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[11].xyz0"),
+ [(set GPRV4I32:$dst, (int_AMDIL_get_num_regions))]>;
+
+}
diff --git a/src/gallium/drivers/radeon/AMDISA.h b/src/gallium/drivers/radeon/AMDISA.h
new file mode 100644
index 00000000000..12d28a6d49b
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISA.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef AMDISA_H
+#define AMDISA_H
+
+//#include "MCTargetDesc/GPUMCTargetDesc.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "AMDISATargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class AMDISATargetMachine;
+
+ FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+ FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm);
+ FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDISAReorderPreloadInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDISALowerShaderInstructionsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDISADelimitInstGroupsPass(TargetMachine &tm);
+
+ FunctionPass *createAMDISAConvertToISAPass(TargetMachine &tm);
+
+ FunctionPass *createAMDISAFixRegClassesPass(TargetMachine &tm);
+
+} /* End namespace llvm */
+#endif /* AMDISA_H */
diff --git a/src/gallium/drivers/radeon/AMDISA.td b/src/gallium/drivers/radeon/AMDISA.td
new file mode 100644
index 00000000000..fbb02f2c3de
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISA.td
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+include "AMDIL.td"
+include "AMDISAIntrinsics.td"
+include "AMDISARegisterInfo.td"
+include "AMDISAInstructions.td"
diff --git a/src/gallium/drivers/radeon/AMDISAConstants.pm b/src/gallium/drivers/radeon/AMDISAConstants.pm
new file mode 100644
index 00000000000..6a0954fd232
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAConstants.pm
@@ -0,0 +1,35 @@
+#
+# Copyright 2011 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors: Tom Stellard <thomas.stellard@amd.com>
+#
+
+package AMDISAConstants;
+
+use base 'Exporter';
+
+use constant INPUT_REG_COUNT => 64;
+use constant CONST_REG_COUNT => 1024;
+
+our @EXPORT = ('INPUT_REG_COUNT', 'CONST_REG_COUNT');
+
+1;
diff --git a/src/gallium/drivers/radeon/AMDISAConvertToISA.cpp b/src/gallium/drivers/radeon/AMDISAConvertToISA.cpp
new file mode 100644
index 00000000000..ec8eb16d0ff
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAConvertToISA.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include "AMDIL.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILRegisterInfo.h"
+#include "AMDISA.h"
+#include "AMDISAInstrInfo.h"
+#include "AMDISAUtil.h"
+
+#include "R600InstrInfo.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class AMDISAConvertToISAPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ void lowerFLT(MachineInstr &MI);
+
+ public:
+ AMDISAConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+} /* End anonymous namespace */
+
+char AMDISAConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDISAConvertToISAPass(TargetMachine &tm) {
+ return new AMDISAConvertToISAPass(tm);
+}
+
+bool AMDISAConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDISAInstrInfo * TII =
+ static_cast<const AMDISAInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ MachineInstr * newInstr = TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ if (!newInstr) {
+ continue;
+ }
+ MBB.insert(I, newInstr);
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/AMDISADelimitInstGroups.cpp b/src/gallium/drivers/radeon/AMDISADelimitInstGroups.cpp
new file mode 100644
index 00000000000..d28e360d776
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISADelimitInstGroups.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "AMDIL.h"
+#include "AMDISA.h"
+#include "AMDISARegisterInfo.h"
+#include "AMDISAUtil.h"
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <iostream>
+#include <stdio.h>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+ class AMDISADelimitInstGroupsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+ int currentLast;
+ std::map<unsigned,bool> constantReads;
+ /* The key for this map is a register index, and its value are MachineInstr
+ * pointers. This map keeps track of register uses whose defintions have already
+ * been added to the current instruction group, and therefore cannot be
+ * emitted in the current group. */
+ IndexedMap<MachineInstr*> invalidUses;
+
+ void endGroup(MachineBasicBlock &BB, MachineFunction &MF,
+ MachineBasicBlock::iterator lastInst);
+
+ void clearALUUnits();
+
+ void addConstantReads(MachineInstr &MI);
+
+ public:
+ AMDISADelimitInstGroupsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm), currentLast(-1) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} /* End anonymous namespace */
+
+char AMDISADelimitInstGroupsPass::ID = 0;
+
+FunctionPass *llvm::createAMDISADelimitInstGroupsPass(TargetMachine &tm) {
+ return new AMDISADelimitInstGroupsPass(tm);
+}
+
+bool AMDISADelimitInstGroupsPass::runOnMachineFunction(MachineFunction &MF)
+{
+// MF.dump();
+ const AMDISARegisterInfo * TRI =
+ static_cast<const AMDISARegisterInfo*>(TM.getRegisterInfo());
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator lastRealInst = MBB.begin();
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == AMDIL::EXPORT_REG) {
+ endGroup(MBB, MF, lastRealInst);
+ continue;
+ }
+
+ if (isReductionOp(MI.getOpcode())) {
+ endGroup(MBB, MF, lastRealInst);
+ endGroup(MBB, MF, I);
+ continue;
+ }
+
+/* if (isFCOp(MI.getOpcode())) {
+ endGroup(MBB, MF, lastRealInst);
+ continue;
+ }
+*/
+ if (isPlaceHolderOpcode(MI.getOpcode()) || MI.getNumOperands() == 0) {
+ continue;
+ }
+
+ MachineOperand dstOp = MI.getOperand(0);
+ if (!dstOp.isReg()) {
+ continue;
+ }
+ unsigned dstReg = dstOp.getReg();
+
+ /* Only 4 different constant registers are allowed to be read per
+ * instruction group */
+ addConstantReads(MI);
+
+ /* XXX: This is assuming the register is of type f32 */
+ unsigned element;
+ if (MI.getOpcode() == AMDIL::SET_CHAN) {
+ element = MI.getOperand(2).getImm();
+ } else {
+ element = getRegElement(TRI, dstReg);
+ }
+ if (currentLast > -1 && element <= (unsigned)currentLast) {
+ endGroup(MBB, MF, lastRealInst);
+ } else if (constantReads.size() > 4){
+ endGroup(MBB, MF, lastRealInst);
+ } else {
+ for (unsigned i = 1, numOps = MI.getNumOperands(); i < numOps; i++) {
+ if (!MI.getOperand(i).isReg()) {
+ continue;
+ }
+ unsigned reg = MI.getOperand(i).getReg();
+ if (invalidUses.inBounds(reg) && invalidUses[reg]) {
+ endGroup(MBB, MF, lastRealInst);
+ break;
+ }
+ }
+ }
+
+ if (isTransOp(MI.getOpcode())) {
+ endGroup(MBB, MF, I);
+ continue;
+ }
+
+ /* XXX; getNextOperandForReg() appears to go in the reverse order of what I
+ * would expect. It finds the previous use, not the next one. */
+ MachineOperand * nextUse = dstOp.getNextOperandForReg();
+ if (nextUse) {
+ invalidUses.grow(dstReg);
+ invalidUses[dstReg] = nextUse->getParent();
+ }
+ /* Update the current last element */
+ currentLast = element;
+
+ /* We need to add the constant reads again in case they were cleared
+ * by the endGroup() function. */
+ addConstantReads(MI);
+
+ lastRealInst = I;
+ }
+ endGroup(MBB, MF, lastRealInst);
+ }
+ return false;
+}
+
+void AMDISADelimitInstGroupsPass::endGroup(MachineBasicBlock &BB,
+ MachineFunction &MF, MachineBasicBlock::iterator lastInst)
+{
+ currentLast = -1;
+ constantReads.clear();
+ invalidUses.clear();
+ /* XXX: Enum here */
+ BB.insertAfter(lastInst, BuildMI(MF, BB.findDebugLoc(lastInst),
+ TM.getInstrInfo()->get(AMDIL::LAST)));
+}
+
+void AMDISADelimitInstGroupsPass::addConstantReads(MachineInstr &MI)
+{
+ for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
+ MachineOperand MO = MI.getOperand(i);
+ if (!MO.isReg()) {
+ continue;
+ }
+ if (AMDIL::CRRegClass.contains(MO.getReg())) {
+ constantReads[MO.getReg()] = true;
+ }
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDISAFixRegClasses.cpp b/src/gallium/drivers/radeon/AMDISAFixRegClasses.cpp
new file mode 100644
index 00000000000..460c68f6d86
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAFixRegClasses.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#include "AMDIL.h"
+#include "AMDISA.h"
+#include "AMDISARegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+ class AMDISAFixRegClassesPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ AMDISAFixRegClassesPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+
+} /* End anonymous namespace */
+
+char AMDISAFixRegClassesPass::ID = 0;
+
+FunctionPass *llvm::createAMDISAFixRegClassesPass(TargetMachine &tm)
+{
+ return new AMDISAFixRegClassesPass(tm);
+}
+
+bool AMDISAFixRegClassesPass::runOnMachineFunction(MachineFunction &MF)
+{
+ MachineRegisterInfo & MRI = MF.getRegInfo();
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand & MO = MI.getOperand(i);
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ continue;
+ }
+
+ const TargetRegisterClass * TRC = MRI.getRegClass(MO.getReg());
+ if (TRC->getID() == AMDIL::GPRV4F32RegClassID) {
+ MRI.setRegClass(MO.getReg(), &AMDIL::REPLRegClass);
+ }
+ }
+ }
+ }
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/AMDISAGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDISAGenInstrEnums.pl
new file mode 100644
index 00000000000..3865db2f9de
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAGenInstrEnums.pl
@@ -0,0 +1,130 @@
+#
+# Copyright 2011 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors: Tom Stellard <thomas.stellard@amd.com>
+#
+
+use warnings;
+use strict;
+
+my @F32_MULTICLASSES = qw {
+ UnaryIntrinsicFloat
+ UnaryIntrinsicFloatScalar
+ BinaryIntrinsicFloat
+ TernaryIntrinsicFloat
+ BinaryOpMCFloat
+};
+
+my @I32_MULTICLASSES = qw {
+ BinaryOpMCInt
+ BinaryOpMCi32
+};
+
+my @GENERATION_ENUM = qw {
+ R600_CAYMAN
+ R600
+ EG_CAYMAN
+ CAYMAN
+};
+
+my $FILE_TYPE = $ARGV[0];
+
+open AMDIL, '<', 'AMDILInstructions.td';
+
+my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32');
+
+while (<AMDIL>) {
+ if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z]+)</) {
+ if (grep {$_ eq $2} @F32_MULTICLASSES) {
+ push @INST_ENUMS, "$1\_f32";
+ } elsif (grep {$_ eq $2} @I32_MULTICLASSES) {
+ push @INST_ENUMS, "$1\_i32";
+ }
+ } elsif ($_ =~ /def\s+([A-Z_]+)(_[fi]32)/) {
+ push @INST_ENUMS, "$1$2";
+ }
+}
+
+if ($FILE_TYPE eq 'td') {
+
+ print_td_enum('AMDILInst', 'AMDILInstEnums', 'field bits<16>', @INST_ENUMS);
+
+ print_td_enum('AMDISAGen', 'AMDISAGenEnums', 'field bits<3>', @GENERATION_ENUM);
+
+ my %constants = (
+ 'PI' => '0x40490fdb',
+ 'TWO_PI' => '0x40c90fdb',
+ 'TWO_PI_INV' => '0x3e22f983'
+ );
+
+ print "class Constants {\n";
+ foreach (keys(%constants)) {
+ print "int $_ = $constants{$_};\n";
+ }
+ print "}\n";
+ print "def CONST : Constants;\n";
+
+} elsif ($FILE_TYPE eq 'h') {
+
+ print "unsigned GetRealAMDILOpcode(unsigned internalOpcode) const;\n";
+
+ print_h_enum('AMDILTblgenOpcode', @INST_ENUMS);
+
+ print_h_enum('AMDISAGen', @GENERATION_ENUM);
+
+} elsif ($FILE_TYPE eq 'inc') {
+ print "unsigned AMDISAInstrInfo::GetRealAMDILOpcode(unsigned internalOpcode) const\n{\n";
+ print " switch(internalOpcode) {\n";
+ #Start at 1 so we skip NONE
+ for (my $i = 1; $i < scalar(@INST_ENUMS); $i++) {
+ my $inst = $INST_ENUMS[$i];
+ print " case AMDISAInstrInfo::$inst: return AMDIL::$inst;\n";
+ }
+ print " default: abort();\n";
+ print " }\n}\n";
+}
+
+
+sub print_td_enum {
+ my ($instance, $class, $field, @values) = @_;
+
+ print "class $class {\n";
+
+ for (my $i = 0; $i < scalar(@values); $i++) {
+ print " $field $values[$i] = $i;\n";
+ }
+ print "}\n";
+
+ print "def $instance : $class;\n";
+}
+
+sub print_h_enum {
+
+ my ($enum, @list) = @_;
+ print "enum $enum {\n";
+
+ for (my $i = 0; $i < scalar(@list); $i++) {
+ print " $list[$i] = $i,\n";
+ }
+ print "};\n";
+}
+
diff --git a/src/gallium/drivers/radeon/AMDISAGenShaderPatterns.pl b/src/gallium/drivers/radeon/AMDISAGenShaderPatterns.pl
new file mode 100644
index 00000000000..fde77ed87e7
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAGenShaderPatterns.pl
@@ -0,0 +1,40 @@
+#
+# Copyright 2011 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors: Tom Stellard <thomas.stellard@amd.com>
+#
+
+use strict;
+use warnings;
+
+use AMDISAConstants;
+
+my $reg_prefix = $ARGV[0];
+
+for (my $i = 0; $i < CONST_REG_COUNT; $i++) {
+print <<STRING;
+def : Pat <
+ (int_AMDISA_load_const $i),
+ (MOVE_f32 $reg_prefix$i)
+>;
+STRING
+}
diff --git a/src/gallium/drivers/radeon/AMDISAISelLowering.cpp b/src/gallium/drivers/radeon/AMDISAISelLowering.cpp
new file mode 100644
index 00000000000..eeb474a1b62
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAISelLowering.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "AMDISAISelLowering.h"
+
+using namespace llvm;
+
+AMDISATargetLowering::AMDISATargetLowering(TargetMachine &TM) :
+ AMDILTargetLowering(TM)
+{
+ const AMDILSubtarget &STM = TM.getSubtarget<AMDILSubtarget>();
+
+ /* XXX: Not supported yet on R600 */
+ if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDISAISelLowering.h b/src/gallium/drivers/radeon/AMDISAISelLowering.h
new file mode 100644
index 00000000000..e27564fd873
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAISelLowering.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef AMDISAISELLOWERING_H
+#define AMDISAISELLOWERING_H
+
+#include "AMDILISelLowering.h"
+
+namespace llvm {
+
+ class AMDISATargetLowering : public AMDILTargetLowering
+ {
+ public:
+ AMDISATargetLowering(TargetMachine &TM);
+
+ };
+
+} /* End namespace llvm */
+
+#endif /* AMDISAISELLOWERING_H */
diff --git a/src/gallium/drivers/radeon/AMDISAInstrEnums.h b/src/gallium/drivers/radeon/AMDISAInstrEnums.h
new file mode 100644
index 00000000000..42dac67d3b6
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAInstrEnums.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+namespace AMDISAInstrInfo {
+ enum AMDILInstructions {
+ NONE = 0,
+ ADD_f32 = 1,
+ ABS_f32 = 2,
+ FRAC_f32 = 3,
+ PIREDUCE_f32 = 4,
+ ACOS_f32 = 5,
+ ATAN_f32 = 6,
+ ASIN_f32 = 7,
+ TAN_f32 = 8,
+ SIN_f32 = 9,
+ COS_f32 = 10,
+ SQRT_f32 = 11,
+ EXP_f32 = 12,
+ EXPVEC_f32 = 13,
+ SQRTVEC_f32 = 14,
+ COSVEC_f32 = 15,
+ SINVEC_f32 = 16,
+ LOGVEC_f32 = 17,
+ RSQVEC_f32 = 18,
+ EXN_f32 = 19,
+ SIGN_f32 = 20,
+ LENGTH_f32 = 21,
+ POW_f32 = 22,
+ MIN_f32 = 23,
+ MAX_f32 = 24,
+ MAD_f32 = 25,
+ LN_f32 = 26,
+ LOG_f32 = 27,
+ RSQ_f32 = 28,
+ DIV_f32 = 29,
+ CLAMP_f32 = 30,
+ FMA_f32 = 31,
+ LERP_f32 = 32,
+ NEG_f32 = 33,
+ INTTOANY_f32 = 34,
+ };
+}
+namespace AMDISAInstrInfo {
+ enum AMDISAGen {
+ R600_CAYMAN = 0,
+ EG_CAYMAN = 1,
+ CAYMAN = 2,
+ };
+}
diff --git a/src/gallium/drivers/radeon/AMDISAInstrInfo.cpp b/src/gallium/drivers/radeon/AMDISAInstrInfo.cpp
new file mode 100644
index 00000000000..ed40e8e64ae
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAInstrInfo.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+#include "AMDIL.h"
+#include "AMDISAInstrInfo.h"
+
+#include "AMDISARegisterInfo.h"
+#include "AMDISATargetMachine.h"
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDISAInstrInfo::AMDISAInstrInfo(AMDISATargetMachine &tm)
+ : AMDILInstrInfo(tm), TM(tm)
+{
+ unsigned deviceGen =
+ TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+ for (unsigned i = 0; i < AMDIL::INSTRUCTION_LIST_END; i++) {
+ const MCInstrDesc & instDesc = get(i);
+ uint32_t instGen = (instDesc.TSFlags >> 40) & 0x7;
+ uint32_t inst = (instDesc.TSFlags >> 48) & 0xffff;
+ if (inst == 0) {
+ continue;
+ }
+ switch (instGen) {
+ case AMDISAInstrInfo::R600_CAYMAN:
+ if (deviceGen > AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ case AMDISAInstrInfo::R600:
+ if (deviceGen != AMDILDeviceInfo::HD4XXX) {
+ continue;
+ }
+ break;
+ case AMDISAInstrInfo::EG_CAYMAN:
+ if (deviceGen < AMDILDeviceInfo::HD5XXX
+ || deviceGen > AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ case AMDISAInstrInfo::CAYMAN:
+ if (deviceGen != AMDILDeviceInfo::HD6XXX) {
+ continue;
+ }
+ break;
+ default:
+ abort();
+ break;
+ }
+
+ unsigned amdilOpcode = GetRealAMDILOpcode(inst);
+ amdilToISA[amdilOpcode] = instDesc.Opcode;
+ }
+}
+
+MachineInstr * AMDISAInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const
+{
+ MachineInstrBuilder newInstr;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDISARegisterInfo & RI = getRegisterInfo();
+ unsigned ISAOpcode = getISAOpcode(MI.getOpcode());
+
+ /* Create the new instruction */
+ newInstr = BuildMI(MF, DL, TM.getInstrInfo()->get(ISAOpcode));
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ /* Convert dst regclass to one that is supported by the ISA */
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ /* Add the operand to the new instruction */
+ newInstr.addOperand(MO);
+ }
+
+ return newInstr;
+}
+
+unsigned AMDISAInstrInfo::getISAOpcode(unsigned opcode) const
+{
+ if (amdilToISA.count(opcode) == 0) {
+ return opcode;
+ } else {
+ return amdilToISA.find(opcode)->second;
+ }
+}
+
+bool AMDISAInstrInfo::isRegPreload(const MachineInstr &MI) const
+{
+ return (get(MI.getOpcode()).TSFlags >> AMDISA_TFLAG_SHIFTS::PRELOAD_REG) & 0x1;
+}
+
+#include "AMDISAInstrEnums.inc"
diff --git a/src/gallium/drivers/radeon/AMDISAInstrInfo.h b/src/gallium/drivers/radeon/AMDISAInstrInfo.h
new file mode 100644
index 00000000000..73e61dba3c3
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAInstrInfo.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef AMDISAINSTRUCTIONINFO_H_
+#define AMDISAINSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "AMDISARegisterInfo.h"
+
+#include <map>
+
+
+namespace llvm {
+
+ class AMDISATargetMachine;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class AMDISAInstrInfo : public AMDILInstrInfo {
+ private:
+ AMDISATargetMachine & TM;
+ std::map<unsigned, unsigned> amdilToISA;
+
+ public:
+ explicit AMDISAInstrInfo(AMDISATargetMachine &tm);
+
+ virtual const AMDISARegisterInfo &getRegisterInfo() const = 0;
+
+ unsigned getISAOpcode(unsigned AMDILopcode) const;
+
+ MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+ bool isRegPreload(const MachineInstr &MI) const;
+
+ #include "AMDISAInstrEnums.h.inc"
+ };
+
+} // End llvm namespace
+
+/* AMDISA target flags are stored in bits 32-39 */
+namespace AMDISA_TFLAG_SHIFTS {
+ enum TFLAGS {
+ PRELOAD_REG = 32
+ };
+}
+
+
+#endif // AMDISAINSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDISAInstructions.td b/src/gallium/drivers/radeon/AMDISAInstructions.td
new file mode 100644
index 00000000000..7c8e8e56f3c
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAInstructions.td
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+include "AMDISAInstrEnums.td"
+
+class AMDISAInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bits<16> AMDILOp = 0;
+ field bits<3> Gen = 0;
+ field bit PreloadReg = 0;
+
+ let Namespace = "AMDIL";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let TSFlags{32} = PreloadReg;
+ let TSFlags{42-40} = Gen;
+ let TSFlags{63-48} = AMDILOp;
+}
+
+class AMDISAShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDISAInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+let isCodeGenOnly = 1 in {
+
+ def EXPORT_REG : AMDISAShaderInst <
+ (outs),
+ (ins GPRF32:$src),
+ "EXPORT_REG $src",
+ [(int_AMDISA_export_reg GPRF32:$src)]
+ >;
+
+ def LOAD_INPUT : AMDISAShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_INPUT $dst, $src",
+ [] >{
+ let PreloadReg = 1;
+ }
+
+ def MASK_WRITE : AMDISAShaderInst <
+ (outs),
+ (ins GPRF32:$src),
+ "MASK_WRITE $src",
+ []
+ >;
+
+ def RESERVE_REG : AMDISAShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "RESERVE_REG $dst, $src",
+ [(set GPRF32:$dst, (int_AMDISA_reserve_reg imm:$src))]> {
+ let PreloadReg = 1;
+ }
+
+ def STORE_OUTPUT: AMDISAShaderInst <
+ (outs GPRF32:$dst),
+ (ins GPRF32:$src0, i32imm:$src1),
+ "STORE_OUTPUT $dst, $src0, $src1",
+ [(set GPRF32:$dst, (int_AMDISA_store_output GPRF32:$src0, imm:$src1))]
+ >;
+}
+
+include "R600Instructions.td"
+
+
diff --git a/src/gallium/drivers/radeon/AMDISAIntrinsics.td b/src/gallium/drivers/radeon/AMDISAIntrinsics.td
new file mode 100644
index 00000000000..6fc2b319e88
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAIntrinsics.td
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+let TargetPrefix = "AMDISA", isTarget = 1 in {
+
+ def int_AMDISA_export_reg : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDISA_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
+ def int_AMDISA_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>;
+ def int_AMDISA_reserve_reg : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
+ def int_AMDISA_store_output : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDISA_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>;
+
+ def int_AMDISA_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>;
+ def int_AMDISA_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>;
+ def int_AMDISA_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_kill : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_kilp : Intrinsic<[], [], []>;
+ def int_AMDISA_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_sge : BinaryIntFloat;
+ def int_AMDISA_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+ def int_AMDISA_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ def int_AMDISA_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDISA_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDISA_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDISA_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_AMDISA_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
+}
diff --git a/src/gallium/drivers/radeon/AMDISALowerShaderInstructions.cpp b/src/gallium/drivers/radeon/AMDISALowerShaderInstructions.cpp
new file mode 100644
index 00000000000..8f79e48e6fe
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISALowerShaderInstructions.cpp
@@ -0,0 +1,19 @@
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#include "AMDISALowerShaderInstructions.h"
+
+using namespace llvm;
+
+void AMDISALowerShaderInstructionsPass::preloadRegister(
+ unsigned physReg, unsigned virtReg) const
+{
+ if (!MRI->isLiveIn(physReg)) {
+ MRI->addLiveIn(physReg, virtReg);
+ } else {
+ /* We can't mark the same register as preloaded twice, but we still must
+ * associate virtReg with the correct preloaded register. */
+ unsigned newReg = MRI->getLiveInVirtReg(physReg);
+ MRI->replaceRegWith(virtReg, newReg);
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDISALowerShaderInstructions.h b/src/gallium/drivers/radeon/AMDISALowerShaderInstructions.h
new file mode 100644
index 00000000000..342c996f540
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISALowerShaderInstructions.h
@@ -0,0 +1,24 @@
+
+#ifndef AMDISA_LOWER_SHADER_INSTRUCTIONS
+#define AMDISA_LOWER_SHADER_INSTRUCTIONS
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class AMDISALowerShaderInstructionsPass {
+
+ protected:
+ MachineRegisterInfo * MRI;
+ /**
+ * @param physReg The physical register that will be preloaded.
+ * @param virtReg The virtual register that currently holds the
+ * preloaded value.
+ */
+ void preloadRegister(unsigned physReg, unsigned virtReg) const;
+};
+
+} // end namespace llvm
+
+
+#endif // AMDISA_LOWER_SHADER_INSTRUCTIONS
diff --git a/src/gallium/drivers/radeon/AMDISARegisterInfo.cpp b/src/gallium/drivers/radeon/AMDISARegisterInfo.cpp
new file mode 100644
index 00000000000..fa8de081725
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISARegisterInfo.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "AMDISARegisterInfo.h"
+
+#include "AMDISATargetMachine.h"
+
+using namespace llvm;
+
+AMDISARegisterInfo::AMDISARegisterInfo(AMDISATargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDILRegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
diff --git a/src/gallium/drivers/radeon/AMDISARegisterInfo.h b/src/gallium/drivers/radeon/AMDISARegisterInfo.h
new file mode 100644
index 00000000000..4f18e242105
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISARegisterInfo.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef AMDISAREGISTERINFO_H_
+#define AMDISAREGISTERINFO_H_
+
+#include "AMDILRegisterInfo.h"
+
+namespace llvm {
+
+ class AMDISATargetMachine;
+ class TargetInstrInfo;
+
+ struct AMDISARegisterInfo : public AMDILRegisterInfo
+ {
+ AMDISATargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ AMDISARegisterInfo(AMDISATargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+
+ /* This is used to help calculate the index of a register. A return value
+ * of true means that the index of any register in this class may be
+ * calcluated in this way:
+ * TargetRegisterClass * TRC;
+ * index = register - TRC->getRegister(0);
+ */
+ virtual bool isBaseRegClass(unsigned regClassID) const = 0;
+
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const = 0;
+ };
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDISARegisterInfo.td b/src/gallium/drivers/radeon/AMDISARegisterInfo.td
new file mode 100644
index 00000000000..e1d67e4ae28
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISARegisterInfo.td
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+include "R600RegisterInfo.td"
diff --git a/src/gallium/drivers/radeon/AMDISAReorderPreloadInstructions.cpp b/src/gallium/drivers/radeon/AMDISAReorderPreloadInstructions.cpp
new file mode 100644
index 00000000000..7549dbe1717
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAReorderPreloadInstructions.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#include "AMDIL.h"
+#include "AMDISA.h"
+#include "AMDILInstrInfo.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+ class AMDISAReorderPreloadInstructionsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ public:
+ AMDISAReorderPreloadInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "AMDISA Reorder Preload Instructions"; }
+ };
+} /* End anonymous namespace */
+
+char AMDISAReorderPreloadInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createAMDISAReorderPreloadInstructionsPass(TargetMachine &tm) {
+ return new AMDISAReorderPreloadInstructionsPass(tm);
+}
+
+/* This pass moves instructions that represent preloaded registers to the
+ * start of the program. */
+bool AMDISAReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDISAInstrInfo * TII =
+ static_cast<const AMDISAInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+ if (TII->isRegPreload(MI)) {
+ MF.front().insert(MF.front().begin(), MI.removeFromParent());
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDISATargetMachine.cpp b/src/gallium/drivers/radeon/AMDISATargetMachine.cpp
new file mode 100644
index 00000000000..7fdf74b1518
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISATargetMachine.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "AMDISATargetMachine.h"
+
+#include "AMDILGlobalManager.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMCAsmInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDISA.h"
+#include "AMDISAISelLowering.h"
+#include "R600InstrInfo.h"
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_os_ostream.h"
+
+#include <iostream>
+
+using namespace llvm;
+
+MCAsmInfo* llvm::createMCAsmInfo(const Target &T, StringRef TT)
+{
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ case Triple::UnknownOS:
+ return new AMDILMCAsmInfo(TheTriple);
+ }
+}
+
+AMDISATargetMachine::AMDISATargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM)
+:
+ AMDILTargetMachine(T, TT, CPU, FS, RM, CM),
+ Subtarget(TT, CPU, FS),
+ InstrInfo(new R600InstrInfo(*this)),
+ TLInfo(*this),
+ mGM(new AMDILGlobalManager(0 /* Debug mode */)),
+ mKM(new AMDILKernelManager(this, mGM)),
+ mDump(false)
+// DataLayout(""/*Subtarget.getDataLayout()*/),
+// TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+// FrameLowering(Subtarget)
+
+{
+ /* XXX: Add these two initializations to fix a segfault, not sure if this
+ * is correct. These are normally initialized in the AsmPrinter, but AMDISA
+ * does not use the asm printer */
+ Subtarget.setGlobalManager(mGM);
+ Subtarget.setKernelManager(mKM);
+}
+
+AMDISATargetMachine::~AMDISATargetMachine()
+{
+ delete mGM;
+ delete mKM;
+}
+
+bool AMDISATargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (AMDILTargetMachine::addInstSelector(PM, OptLevel)) {
+ return true;
+ }
+
+// PM.add(createAMDISAFixRegClassesPass(*this));
+ return false;
+}
+
+bool AMDISATargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+ /* This is exactly the same as in AMDILTargetManager, minus theSwizzleEncoder
+ * pass. */
+
+ PM.add(createAMDILCFGPreparationPass(*this, OptLevel));
+ PM.add(createAMDILCFGStructurizerPass(*this, OptLevel));
+// PM.add(createAMDILIOExpansion(*this, OptLevel));
+ return false;
+}
+
+bool AMDISATargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+// if (AMDILTargetMachine::addPreRegAlloc(PM, OptLevel)) {
+// return true;
+// }
+
+ PM.add(createAMDILLiteralManager(*this, OptLevel));
+ PM.add(createAMDISAReorderPreloadInstructionsPass(*this));
+ if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600LowerShaderInstructionsPass(*this));
+ PM.add(createR600LowerInstructionsPass(*this));
+ }
+ PM.add(createAMDISAConvertToISAPass(*this));
+ return false;
+}
+
+bool AMDISATargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+// PM.add(createAMDISADelimitInstGroupsPass(*this));
+}
+
+/*XXX: We should use addPassesToEmitMC in llvm 3.0 */
+bool AMDISATargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ // Turn exception handling constructs into something the code generators can
+ // handle.
+ switch (getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ PM.add(createSjLjEHPass(getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ PM.add(createDwarfEHPass(this));
+ break;
+ case ExceptionHandling::None:
+ PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+ break;
+ }
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel(PM, OptLevel);
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Standard Lower-Level Passes.
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ (MCObjectFileInfo*)&getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ MCContext &OutContext = MMI->getContext(); // Return the MCContext specifically by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(true));
+ }
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineCSEPass());
+ PM.add(createMachineSinkingPass());
+
+ PM.add(createPeepholeOptimizerPass());
+ }
+
+ // Run pre-ra passes.
+ addPreRegAlloc(PM, OptLevel);
+
+ // Perform register allocation.
+ PM.add(createRegisterAllocator(OptLevel));
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ if (OptLevel != CodeGenOpt::None) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ PM.add(createStackSlotColoringPass(false));
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ PM.add(createMachineLICMPass(false));
+
+ }
+
+ // Run post-ra passes.
+ addPostRegAlloc(PM, OptLevel);
+
+ PM.add(createExpandPostRAPseudosPass());
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+
+ // Run pre-sched2 passes.
+ addPreSched2(PM, OptLevel);
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createPostRAScheduler(OptLevel));
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(false));
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createCodePlacementOptPass());
+ }
+
+ addPreEmitPass(PM, OptLevel);
+
+ if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600CodeEmitterPass(Out));
+ } else {
+ abort();
+ return true;
+ }
+ PM.add(createGCInfoDeleter());
+
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/AMDISATargetMachine.h b/src/gallium/drivers/radeon/AMDISATargetMachine.h
new file mode 100644
index 00000000000..6c3fdab8ac4
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISATargetMachine.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef AMDISA_TARGET_MACHINE_H
+#define AMDISA_TARGET_MACHINE_H
+
+#include "AMDILTargetMachine.h"
+
+#include "AMDISAInstrInfo.h"
+#include "AMDISAISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+//#include "llvm/Target/TargetFrameInfo.h"
+
+/*
+#include "AMDISA.h"
+#include "AMDISAFrameLowering.h"
+#include "AMDISAInstrInfo.h"
+#include "AMDISAISelLowering.h"
+#include "AMDISASelectionDAGInfo.h"
+#include "AMDISASubtarget.h"
+*/
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDISATargetMachine : public AMDILTargetMachine {
+ AMDILSubtarget Subtarget;
+/* const TargetData DataLayout;
+*/ AMDISATargetLowering TLInfo;
+/* AMDISASelectionDAGInfo TSInfo;
+*/ OwningPtr<AMDISAInstrInfo> InstrInfo;
+// AMDISAFrameLowering FrameLowering;
+ AMDILGlobalManager *mGM;
+ AMDILKernelManager *mKM;
+ bool mDump;
+
+public:
+ AMDISATargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU, Reloc::Model RM, CodeModel::Model CM);
+ ~AMDISATargetMachine();
+ virtual const AMDISAInstrInfo *getInstrInfo() const {return InstrInfo.get();}
+/*
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+*/
+ virtual const AMDILSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDISARegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDISATargetLowering * getTargetLowering() const {
+ return const_cast<AMDISATargetLowering*>(&TLInfo);
+ }
+/* virtual const AMDISASelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+*/
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
+public:
+ void dumpCode() { mDump = true; }
+ bool shouldDumpCode() const { return mDump; }
+};
+
+} /* End namespace llvm */
+
+#endif /* AMDISA_TARGET_MACHINE_H */
diff --git a/src/gallium/drivers/radeon/AMDISAUtil.cpp b/src/gallium/drivers/radeon/AMDISAUtil.cpp
new file mode 100644
index 00000000000..4580b544190
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAUtil.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+#include "AMDISAUtil.h"
+#include "AMDIL.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDISARegisterInfo.h"
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+/* Some instructions act as place holders to emulate operations that the GPU
+ * hardware does automatically. This function can be used to check if
+ * an opcode falls into this category. */
+bool llvm::isPlaceHolderOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ default: return false;
+ case AMDIL::EXPORT_REG:
+ case AMDIL::LOAD_INPUT:
+ case AMDIL::LAST:
+ case AMDIL::RESERVE_REG:
+ return true;
+ }
+}
+
+/* For f32 registers, this returns the corresponding element (X,Y,Z, or W) of
+ * the v4f32 super register that it belongs to.
+ */
+unsigned llvm::getRegElement(const AMDISARegisterInfo * TRI, unsigned regNo)
+{
+ if (AMDIL::REPLRegisterClass->contains(regNo)
+ || AMDIL::SPECIALRegisterClass->contains(regNo)) {
+ return 0;
+ } else {
+ return getHWRegNum(TRI, regNo) % 4;
+ }
+}
+
+unsigned llvm::getHWRegNum(const AMDISARegisterInfo * TRI, unsigned amdilRegNo)
+{
+ for (TargetRegisterInfo::regclass_iterator RI = TRI->regclass_begin(),
+ RE = TRI->regclass_end(); RI != RE; ++RI) {
+ const TargetRegisterClass * TRC = *RI;
+ if (TRC->contains(amdilRegNo) && TRI->isBaseRegClass(TRC->getID())) {
+ return amdilRegNo - TRC->getRegister(0);
+ }
+ }
+ abort();
+ return 0;
+}
+
+/* XXX: This is not efficient. The best solution is to subclass AMDILMachineFunctionInfo
+ * and create our own map that reverses the key andn values of the literal map */
+uint32_t llvm::getLiteral(AMDILMachineFunctionInfo * MFI, uint32_t literal_index)
+{
+ for (std::map<uint32_t, uint32_t>::iterator I = MFI->begin_32(),
+ E = MFI->end_32(); I != E; ++I) {
+ if (I->second == literal_index) {
+ return I->first;
+ }
+ }
+ abort();
+ return 0;
+}
+
+bool llvm::isTransOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+
+ case AMDIL::COS_f32:
+ case AMDIL::COS_r600:
+ case AMDIL::COS_eg:
+ case AMDIL::RSQ_f32:
+ case AMDIL::FTOI:
+ case AMDIL::ITOF:
+ case AMDIL::MULLIT:
+ case AMDIL::MUL_LIT_r600:
+ case AMDIL::MUL_LIT_eg:
+ case AMDIL::SHR_i32:
+ case AMDIL::SIN_f32:
+ case AMDIL::EXP_f32:
+ case AMDIL::EXP_IEEE_r600:
+ case AMDIL::EXP_IEEE_eg:
+ case AMDIL::LOG_CLAMPED_r600:
+ case AMDIL::LOG_IEEE_r600:
+ case AMDIL::LOG_CLAMPED_eg:
+ case AMDIL::LOG_IEEE_eg:
+ case AMDIL::LOG_f32:
+ return true;
+ }
+}
+
+bool llvm::isTexOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::TEX_SAMPLE:
+ case AMDIL::TEX_SAMPLE_C:
+ case AMDIL::TEX_SAMPLE_L:
+ case AMDIL::TEX_SAMPLE_C_L:
+ case AMDIL::TEX_SAMPLE_LB:
+ case AMDIL::TEX_SAMPLE_C_LB:
+ case AMDIL::TEX_SAMPLE_G:
+ case AMDIL::TEX_SAMPLE_C_G:
+ return true;
+ }
+}
+
+bool llvm::isReductionOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::DOT4_r600:
+ case AMDIL::DOT4_eg:
+ return true;
+ }
+}
+
+bool llvm::isFCOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::BREAK_LOGICALZ_f32:
+ case AMDIL::CONTINUE_LOGICALNZ_f32:
+ case AMDIL::IF_LOGICALZ_f32:
+ case AMDIL::ELSE:
+ case AMDIL::ENDIF:
+ case AMDIL::ENDLOOP:
+ case AMDIL::IF_LOGICALNZ_f32:
+ case AMDIL::WHILELOOP:
+ return true;
+ }
+}
diff --git a/src/gallium/drivers/radeon/AMDISAUtil.h b/src/gallium/drivers/radeon/AMDISAUtil.h
new file mode 100644
index 00000000000..c421ee0ab0e
--- /dev/null
+++ b/src/gallium/drivers/radeon/AMDISAUtil.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef AMDISA_UTIL_H
+#define AMDISA_UTIL_H
+
+#include "llvm/Support/DataTypes.h"
+#include "AMDISARegisterInfo.h"
+
+namespace llvm {
+
+class AMDILMachineFunctionInfo;
+
+class TargetMachine;
+class TargetRegisterInfo;
+
+bool isPlaceHolderOpcode(unsigned opcode);
+
+unsigned getRegElement(const AMDISARegisterInfo * TRI, unsigned regNo);
+unsigned getHWRegNum(const AMDISARegisterInfo * TRI, unsigned amdilRegNo);
+
+uint32_t getLiteral(AMDILMachineFunctionInfo * MFI, uint32_t literal_index);
+
+bool isTransOp(unsigned opcode);
+bool isTexOp(unsigned opcode);
+bool isReductionOp(unsigned opcode);
+bool isFCOp(unsigned opcode);
+
+/* XXX: Move these to AMDISAInstrInfo.h */
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+
+} /* End namespace llvm */
+
+#endif /* AMDISA_UTIL_H */
diff --git a/src/gallium/drivers/radeon/LICENSE.TXT b/src/gallium/drivers/radeon/LICENSE.TXT
new file mode 100644
index 00000000000..2b34a76d4fc
--- /dev/null
+++ b/src/gallium/drivers/radeon/LICENSE.TXT
@@ -0,0 +1,48 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+
diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile
new file mode 100644
index 00000000000..1ccca6b879a
--- /dev/null
+++ b/src/gallium/drivers/radeon/Makefile
@@ -0,0 +1,68 @@
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+include Makefile.sources
+
+LIBNAME = radeon
+
+LIBRARY_INCLUDES = -I$(TOP)/include
+
+TBLGEN = $(LLVM_BINDIR)/llvm-tblgen
+
+CXXFLAGS+= $(LLVM_CXXFLAGS)
+
+include ../../Makefile.template
+
+CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
+
+tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
+
+gen: $(GENERATED_SOURCES)
+
+R600ShaderPatterns.td: AMDISAGenShaderPatterns.pl
+ $(PERL) $^ C > $@
+
+R600RegisterInfo.td: R600GenRegisterInfo.pl
+ $(PERL) $^ > $@
+
+AMDISAInstrEnums.td: AMDISAGenInstrEnums.pl
+ $(PERL) $^ td > $@
+
+AMDISAInstrEnums.h.inc: AMDISAGenInstrEnums.pl
+ $(PERL) $^ h > $@
+
+AMDISAInstrEnums.inc: AMDISAGenInstrEnums.pl
+ $(PERL) $^ inc > $@
+
+
+AMDILGenRegisterInfo.inc: *.td
+ $(call tablegen, -gen-register-info, AMDISA.td, $@)
+
+AMDILGenInstrInfo.inc: *.td
+ $(call tablegen, -gen-instr-info, AMDISA.td, $@)
+
+AMDILGenAsmWriter.inc: *.td
+ $(call tablegen, -gen-asm-writer, AMDISA.td, $@)
+
+AMDILGenDAGISel.inc: *.td
+ $(call tablegen, -gen-dag-isel, AMDISA.td, $@)
+
+AMDILGenCallingConv.inc: *.td
+ $(call tablegen, -gen-callingconv, AMDISA.td, $@)
+
+AMDILGenSubtarget.inc: *.td
+ $(call tablegen, -gen-subtarget, AMDISA.td, $@)
+
+AMDILGenEDInfo.inc: *.td
+ $(call tablegen, -gen-enhanced-disassembly-info, AMDISA.td, $@)
+
+AMDILGenIntrinsics.inc: *.td
+ $(call tablegen, -gen-tgt-intrinsic, AMDISA.td, $@)
+
+AMDISAGenCodeEmitter.inc: *.td
+ $(call tablegen, -gen-emitter, AMDISA.td, $@)
+
+%.td: ;
+
+
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
new file mode 100644
index 00000000000..966a7d157e8
--- /dev/null
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -0,0 +1,78 @@
+
+GENERATED_SOURCES := \
+ R600ShaderPatterns.td \
+ R600RegisterInfo.td \
+ AMDISAInstrEnums.td \
+ AMDILGenRegisterInfo.inc \
+ AMDILGenInstrInfo.inc \
+ AMDILGenAsmWriter.inc \
+ AMDILGenDAGISel.inc \
+ AMDILGenCallingConv.inc \
+ AMDILGenSubtarget.inc \
+ AMDILGenEDInfo.inc \
+ AMDILGenIntrinsics.inc \
+ AMDISAGenCodeEmitter.inc \
+ AMDISAInstrEnums.h.inc \
+ AMDISAInstrEnums.inc
+
+CPP_SOURCES := \
+ AMDIL7XXDevice.cpp \
+ AMDIL7XXIOExpansion.cpp \
+ AMDIL789IOExpansion.cpp \
+ AMDILAsmBackend.cpp \
+ AMDILBarrierDetect.cpp \
+ AMDILCFGStructurizer.cpp \
+ AMDILDevice.cpp \
+ AMDILDeviceInfo.cpp \
+ AMDILEGIOExpansion.cpp \
+ AMDILEvergreenDevice.cpp \
+ AMDILELFWriterInfo.cpp \
+ AMDILFrameLowering.cpp \
+ AMDILGlobalManager.cpp \
+ AMDILImageExpansion.cpp \
+ AMDILInliner.cpp \
+ AMDILInstPrinter.cpp \
+ AMDILInstrInfo.cpp \
+ AMDILIntrinsicInfo.cpp \
+ AMDILIOExpansion.cpp \
+ AMDILISelDAGToDAG.cpp \
+ AMDILISelLowering.cpp \
+ AMDILKernelManager.cpp \
+ AMDILLiteralManager.cpp \
+ AMDILMachineFunctionInfo.cpp \
+ AMDILMachinePeephole.cpp \
+ AMDILMCAsmInfo.cpp \
+ AMDILMCCodeEmitter.cpp \
+ AMDILModuleInfo.cpp \
+ AMDILNIDevice.cpp \
+ AMDILPeepholeOptimizer.cpp \
+ AMDILPointerManager.cpp \
+ AMDILPrintfConvert.cpp \
+ AMDILRegisterInfo.cpp \
+ AMDILSubtarget.cpp \
+ AMDILSwizzleEncoder.cpp \
+ AMDILTargetMachine.cpp \
+ AMDILUtilityFunctions.cpp \
+ AMDILAsmPrinter.cpp \
+ AMDILEGAsmPrinter.cpp \
+ AMDIL7XXAsmPrinter.cpp \
+ macrodata.cpp \
+ AMDISATargetMachine.cpp \
+ AMDISADelimitInstGroups.cpp \
+ AMDISAFixRegClasses.cpp \
+ AMDISAISelLowering.cpp \
+ AMDISAConvertToISA.cpp \
+ AMDISALowerShaderInstructions.cpp \
+ AMDISAReorderPreloadInstructions.cpp \
+ AMDISAInstrInfo.cpp \
+ AMDISARegisterInfo.cpp \
+ AMDISAUtil.cpp \
+ R600CodeEmitter.cpp \
+ R600InstrInfo.cpp \
+ R600LowerInstructions.cpp \
+ R600LowerShaderInstructions.cpp \
+ R600RegisterInfo.cpp \
+ radeon_llvm_emit.cpp
+
+C_SOURCES := \
+ radeon_setup_tgsi_llvm.c
diff --git a/src/gallium/drivers/radeon/Processors.td b/src/gallium/drivers/radeon/Processors.td
new file mode 100644
index 00000000000..e8e9148d5ea
--- /dev/null
+++ b/src/gallium/drivers/radeon/Processors.td
@@ -0,0 +1,66 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// AMDIL processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+: Processor<Name, NoItineraries, Features>;
+def : Proc<"rv710", []>;
+def : Proc<"rv730", []>;
+def : Proc<"rv770", [FeatureFP64]>;
+def : Proc<"cedar", [FeatureByteAddress, FeatureImages]>;
+def : Proc<"redwood", [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cypress", [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"barts", [FeatureByteAddress, FeatureImages]>;
+def : Proc<"turks", [FeatureByteAddress, FeatureImages]>;
+def : Proc<"caicos", [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
new file mode 100644
index 00000000000..453e4536067
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -0,0 +1,752 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "AMDILInstrInfo.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "AMDISA.h"
+#include "AMDISAUtil.h"
+
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 10
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+ /* XXX: Temp HACK to work around tablegen name generation */
+ class AMDILCodeEmitter {
+ public:
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+ };
+
+ class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
+
+ private:
+
+ static char ID;
+ formatted_raw_ostream &_OS;
+ const TargetMachine * TM;
+ const MachineRegisterInfo * MRI;
+ AMDILMachineFunctionInfo * MFI;
+ const AMDISARegisterInfo * TRI;
+ bool evergreenEncoding;
+
+ bool isReduction;
+ unsigned reductionElement;
+ bool isLast;
+
+ public:
+
+ R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
+ _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+ isLast(true) { }
+
+ const char *getPassName() const { return "AMDISA Machine Code Emitter"; }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+
+ void emitALUInstr(MachineInstr &MI);
+ void emitSrc(const MachineOperand & MO);
+ void emitDst(const MachineOperand & MO);
+ void emitALU(MachineInstr &MI, unsigned numSrc);
+ void emitTexInstr(MachineInstr &MI);
+ void emitFCInstr(MachineInstr &MI);
+
+ unsigned int getHWInst(const MachineInstr &MI);
+
+ void emitNullBytes(unsigned int byteCount);
+
+ void emitByte(unsigned int byte);
+
+ void emitTwoBytes(uint32_t bytes);
+
+ void emit(uint32_t value);
+
+ unsigned getHWReg(unsigned regNo);
+
+ unsigned getElement(unsigned regNo);
+ int getElement(MachineInstr &MI);
+
+};
+
+} /* End anonymous namespace */
+
+#define WRITE_MASK_X 0x1
+#define WRITE_MASK_Y 0x2
+#define WRITE_MASK_Z 0x4
+#define WRITE_MASK_W 0x8
+
+enum RegElement {
+ ELEMENT_X = 0,
+ ELEMENT_Y,
+ ELEMENT_Z,
+ ELEMENT_W
+};
+
+enum InstrTypes {
+ INSTR_ALU = 0,
+ INSTR_TEX,
+ INSTR_FC
+};
+
+enum FCInstr {
+ FC_IF = 0,
+ FC_ELSE,
+ FC_ENDIF,
+ FC_BGNLOOP,
+ FC_ENDLOOP,
+ FC_BREAK,
+ FC_CONTINUE
+};
+
+enum TextureTypes {
+ TEXTURE_1D = 1,
+ TEXTURE_2D,
+ TEXTURE_3D,
+ TEXTURE_CUBE,
+ TEXTURE_RECT,
+ TEXTURE_SHADOW1D,
+ TEXTURE_SHADOW2D,
+ TEXTURE_SHADOWRECT,
+ TEXTURE_1D_ARRAY,
+ TEXTURE_2D_ARRAY,
+ TEXTURE_SHADOW1D_ARRAY,
+ TEXTURE_SHADOW2D_ARRAY
+};
+
+static bool isTrans(unsigned int opcode);
+
+static unsigned int getLastBit(unsigned int writeMask);
+
+static RegElement maskBitToElement(unsigned int maskBit);
+
+static unsigned int dstSwizzleToWriteMask(unsigned swizzle);
+
+static unsigned getRegElement(unsigned swizzle,
+ unsigned int writeMaskBit);
+
+
+
+
+char R600CodeEmitter::ID = 0;
+
+FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
+ return new R600CodeEmitter(OS);
+}
+
+
+bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+
+ TM = &MF.getTarget();
+ MRI = &MF.getRegInfo();
+ MFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ TRI = static_cast<const AMDISARegisterInfo *>(TM->getRegisterInfo());
+ const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
+ std::string gpu = STM.getDeviceName();
+ if (!gpu.compare(0,3, "rv7")) {
+ evergreenEncoding = false;
+ } else {
+ evergreenEncoding = true;
+ }
+ const AMDISATargetMachine *amdtm =
+ static_cast<const AMDISATargetMachine *>(&MF.getTarget());
+
+ if (amdtm->shouldDumpCode()) {
+ MF.dump();
+ }
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ if (isTexOp(MI.getOpcode())) {
+ emitTexInstr(MI);
+ } else if (isFCOp(MI.getOpcode())){
+ emitFCInstr(MI);
+ } else if (isReductionOp(MI.getOpcode())) {
+ isReduction = true;
+ isLast = false;
+ for (reductionElement = 0; reductionElement < 4; reductionElement++) {
+ isLast = (reductionElement == 3);
+ emitALUInstr(MI);
+ }
+ isReduction = false;
+ } else if (MI.getOpcode() == AMDIL::RETURN) {
+ continue;
+ } else {
+ emitALUInstr(MI);
+ }
+ }
+ }
+ return false;
+}
+
+void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
+{
+
+ unsigned numOperands = MI.getNumOperands();
+
+ /* Some instructions are just place holder instructions that represent
+ * operations that the GPU does automatically. They should be ignored. */
+ if (isPlaceHolderOpcode(MI.getOpcode())) {
+ return;
+ }
+
+ /* We need to handle some opcodes differently */
+ switch (MI.getOpcode()) {
+ default: break;
+
+ /* Custom swizzle instructions, ignore the last two operands */
+ case AMDIL::SET_CHAN:
+ numOperands = 2;
+ break;
+
+ case AMDIL::VEXTRACT_v4f32:
+ numOperands = 2;
+ break;
+
+ /* XXX: Temp Hack */
+ case AMDIL::STORE_OUTPUT:
+ numOperands = 2;
+ break;
+ }
+
+ /* XXX Check if instruction writes a result */
+ if (numOperands < 1) {
+ return;
+ }
+ const MachineOperand dstOp = MI.getOperand(0);
+ unsigned int writemask = dstSwizzleToWriteMask(dstOp.getTargetFlags());
+
+ if (isTrans(MI.getOpcode())) {
+ writemask = WRITE_MASK_X;
+ }
+
+ /* Emit instruction type */
+ emitByte(0);
+
+ unsigned int opIndex;
+ for (opIndex = 1; opIndex < numOperands; opIndex++) {
+ emitSrc(MI.getOperand(opIndex));
+// fprintf(stderr, "Src %u -> bytes in buffer = %u\n", opIndex - 1, _OS.GetNumBytesInBuffer());
+ }
+
+ /* Emit zeros for unused sources */
+ for ( ; opIndex < 4; opIndex++) {
+ emitNullBytes(SRC_BYTE_COUNT);
+// fprintf(stderr, "Src %u -> bytes in buffer = %u\n", opIndex - 1, _OS.GetNumBytesInBuffer());
+ }
+
+ emitDst(dstOp);
+// fprintf(stderr, "Dst -> bytes in buffer = %u\n", _OS.GetNumBytesInBuffer());
+
+ emitALU(MI, numOperands - 1);
+// fprintf(stderr, "ALU -> bytes in buffer = %u\n", _OS.GetNumBytesInBuffer());
+}
+
+void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+{
+
+ uint32_t value = 0;
+ unsigned select = 0;
+ unsigned element = 0;
+ /* Emit the source select (2 bytes). For GPRs, this is the register index.
+ * For other potential instruction operands, (e.g. constant registers) the
+ * value of the source select is defined in the r600isa docs. */
+ if (MO.isReg()) {
+ emitTwoBytes(getHWReg(MO.getReg()));
+ } else if (MO.isImm()) {
+ /* XXX: Magic number, comment this */
+ emitTwoBytes(253);
+ value = getLiteral(MFI, MO.getImm());
+ } else {
+ /* XXX: Handle other operand types. */
+ emitTwoBytes(0);
+ }
+
+ /* Emit the source channel (1 byte) */
+ if (isReduction) {
+ emitByte(reductionElement);
+ } else if (MO.isReg()) {
+ const MachineInstr * parent = MO.getParent();
+ /* The source channel for EXTRACT is stored in operand 2. */
+ if (parent->getOpcode() == AMDIL::VEXTRACT_v4f32) {
+ emitByte(parent->getOperand(2).getImm());
+ } else {
+ emitByte(getRegElement(TRI, MO.getReg()));
+ }
+ } else {
+ emitByte(0);
+ }
+
+ /* XXX: Emit isNegated (1 byte) */
+ if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
+ && (MO.getTargetFlags() & MO_FLAG_NEG ||
+ (MO.isReg() &&
+ (MO.getReg() == AMDIL::NEG_ONE || MO.getReg() == AMDIL::NEG_HALF)))){
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* Emit isAbsolute (1 byte) */
+ if (MO.getTargetFlags() & MO_FLAG_ABS) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* XXX: Emit relative addressing mode (1 byte) */
+ emitByte(0);
+
+ /* Emit the literal value, if applicable (4 bytes). */
+ emit(value);
+
+}
+
+void R600CodeEmitter::emitDst(const MachineOperand & MO)
+{
+ if (MO.isReg()) {
+ /* Emit the destination register index (1 byte) */
+ emitByte(getHWReg(MO.getReg()));
+
+ /* Emit the element of the destination register (1 byte)*/
+ const MachineInstr * parent = MO.getParent();
+ if (isReduction) {
+ emitByte(reductionElement);
+
+ /* The destination element for SET_CHAN is stored in the 3rd operand. */
+ } else if (parent->getOpcode() == AMDIL::SET_CHAN) {
+ emitByte(parent->getOperand(2).getImm());
+ } else if (parent->getOpcode() == AMDIL::VCREATE_v4f32) {
+ emitByte(ELEMENT_X);
+ } else {
+ emitByte(getRegElement(TRI, MO.getReg()));
+ }
+
+ /* Emit isClamped (1 byte) */
+ if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* Emit writemask (1 byte). */
+ if ((isReduction && reductionElement != getRegElement(TRI, MO.getReg()))
+ || MO.getTargetFlags() & MO_FLAG_MASK) {
+ emitByte(0);
+ } else {
+ emitByte(1);
+ }
+
+ /* XXX: Emit relative addressing mode */
+ emitByte(0);
+ } else {
+ /* XXX: Handle other operand types. Are there any for destination regs? */
+ emitNullBytes(DST_BYTE_COUNT);
+ }
+}
+
+void R600CodeEmitter::emitALU(MachineInstr &MI, unsigned numSrc)
+{
+ /* Emit the instruction (2 bytes) */
+ emitTwoBytes(getHWInst(MI));
+
+ /* Emit isLast (for this instruction group) (1 byte) */
+ if (isLast) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+ /* Emit isOp3 (1 byte) */
+ if (numSrc == 3) {
+ emitByte(1);
+ } else {
+ emitByte(0);
+ }
+
+ /* XXX: Emit predicate (1 byte) */
+ emitByte(0);
+
+ /* XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
+ * r600_asm.c sets it. */
+ emitByte(0);
+
+ /* XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. */
+ emitByte(0);
+
+ /* XXX: Emit OMOD (1 byte) Not implemented. */
+ emitByte(0);
+}
+
+void R600CodeEmitter::emitTexInstr(MachineInstr &MI)
+{
+
+ int64_t sampler = MI.getOperand(2).getImm();
+ int64_t textureType = MI.getOperand(3).getImm();
+ unsigned opcode = MI.getOpcode();
+ unsigned srcSelect[4] = {0, 1, 2, 3};
+
+ /* Emit instruction type */
+ emitByte(1);
+
+ /* Emit instruction */
+ emitByte(getHWInst(MI));
+
+ /* XXX: Emit resource id r600_shader.c uses sampler + 1. Why? */
+ emitByte(sampler + 1);
+
+ /* Emit source register */
+ emitByte(getHWReg(MI.getOperand(1).getReg()));
+
+ /* XXX: Emit src isRelativeAddress */
+ emitByte(0);
+
+ /* Emit destination register */
+ emitByte(getHWReg(MI.getOperand(0).getReg()));
+
+ /* XXX: Emit dst isRealtiveAddress */
+ emitByte(0);
+
+ /* XXX: Emit dst select */
+ emitByte(0); /* X */
+ emitByte(1); /* Y */
+ emitByte(2); /* Z */
+ emitByte(3); /* W */
+
+ /* XXX: Emit lod bias */
+ emitByte(0);
+
+ /* XXX: Emit coord types */
+ unsigned coordType[4] = {1, 1, 1, 1};
+
+ if (textureType == TEXTURE_RECT
+ || textureType == TEXTURE_SHADOWRECT) {
+ coordType[ELEMENT_X] = 0;
+ coordType[ELEMENT_Y] = 0;
+ }
+
+ if (textureType == TEXTURE_1D_ARRAY
+ || textureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (opcode == AMDIL::TEX_SAMPLE_C_L || opcode == AMDIL::TEX_SAMPLE_C_LB) {
+ coordType[ELEMENT_Y] = 0;
+ } else {
+ coordType[ELEMENT_Z] = 0;
+ srcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (textureType == TEXTURE_2D_ARRAY
+ || textureType == TEXTURE_SHADOW2D_ARRAY) {
+ coordType[ELEMENT_Z] = 0;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ emitByte(coordType[i]);
+ }
+
+ /* XXX: Emit offsets */
+ emitByte(0); /* X */
+ emitByte(0); /* Y */
+ emitByte(0); /* Z */
+ /* There is no OFFSET_W */
+
+ /* Emit sampler id */
+ emitByte(sampler);
+
+ /* XXX:Emit source select */
+ if ((textureType == TEXTURE_SHADOW1D
+ || textureType == TEXTURE_SHADOW2D
+ || textureType == TEXTURE_SHADOWRECT
+ || textureType == TEXTURE_SHADOW1D_ARRAY)
+ && opcode != AMDIL::TEX_SAMPLE_C_L
+ && opcode != AMDIL::TEX_SAMPLE_C_LB) {
+ srcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ emitByte(srcSelect[i]);
+ }
+}
+
+void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
+{
+ /* Emit instruction type */
+ emitByte(INSTR_FC);
+
+ /* Emit SRC */
+ unsigned numOperands = MI.getNumOperands();
+ if (numOperands > 0) {
+ assert(numOperands == 1);
+ emitSrc(MI.getOperand(0));
+ } else {
+ emitNullBytes(SRC_BYTE_COUNT);
+ }
+
+ /* Emit FC Instruction */
+ enum FCInstr instr;
+ switch (MI.getOpcode()) {
+ case AMDIL::BREAK_LOGICALZ_f32:
+ instr = FC_BREAK;
+ break;
+ case AMDIL::CONTINUE_LOGICALNZ_f32:
+ instr = FC_CONTINUE;
+ break;
+ /* XXX: This assumes that all IFs will be if (x != 0). If we add
+ * optimizations this might not be the case */
+ case AMDIL::IF_LOGICALNZ_f32:
+ instr = FC_IF;
+ break;
+ case AMDIL::IF_LOGICALZ_f32:
+ abort();
+ break;
+ case AMDIL::ELSE:
+ instr = FC_ELSE;
+ break;
+ case AMDIL::ENDIF:
+ instr = FC_ENDIF;
+ break;
+ case AMDIL::ENDLOOP:
+ instr = FC_ENDLOOP;
+ break;
+ case AMDIL::WHILELOOP:
+ instr = FC_BGNLOOP;
+ break;
+ default:
+ abort();
+ break;
+ }
+ emitByte(instr);
+}
+
+#define INSTR_FLOAT2_V(inst, hw) \
+ case AMDIL:: inst##_v4f32: \
+ case AMDIL:: inst##_v2f32: return HW_INST2(hw);
+
+#define INSTR_FLOAT2_S(inst, hw) \
+ case AMDIL:: inst##_f32: return HW_INST2(hw);
+
+#define INSTR_FLOAT2(inst, hw) \
+ INSTR_FLOAT2_V(inst, hw) \
+ INSTR_FLOAT2_S(inst, hw)
+
+unsigned int R600CodeEmitter::getHWInst(const MachineInstr &MI)
+{
+ unsigned hwInst = getBinaryCodeForInstr(MI);
+ if (hwInst != 0xffffffff) {
+ return hwInst;
+ }
+
+ /* XXX: Lower these to MOV before the code emitter. */
+ switch (MI.getOpcode()) {
+ case AMDIL::STORE_OUTPUT:
+ case AMDIL::VCREATE_v4i32:
+ case AMDIL::VCREATE_v4f32:
+ case AMDIL::VEXTRACT_v4f32:
+ case AMDIL::VINSERT_v4f32:
+ case AMDIL::LOADCONST_i32:
+ case AMDIL::LOADCONST_f32:
+ case AMDIL::MOVE_v4i32:
+ case AMDIL::SET_CHAN:
+ /* Instructons to reinterpret bits as ... */
+ case AMDIL::IL_ASINT_f32:
+ case AMDIL::IL_ASINT_i32:
+ case AMDIL::IL_ASFLOAT_f32:
+ case AMDIL::IL_ASFLOAT_i32:
+ return 0x19;
+
+ default:
+ fprintf(stderr, "Unhandled opcode: %s\n", MI.getDesc().getName());
+ abort();
+ return 0;
+ }
+}
+
+void R600CodeEmitter::emitNullBytes(unsigned int byteCount)
+{
+ for (unsigned int i = 0; i < byteCount; i++) {
+ emitByte(0);
+ }
+}
+
+//void R600CodeEmitter::emitByte(unsigned int byte) { }
+
+void R600CodeEmitter::emitByte(unsigned int byte)
+{
+ _OS.write((uint8_t) byte & 0xff);
+}
+void R600CodeEmitter::emitTwoBytes(unsigned int bytes)
+{
+ _OS.write((uint8_t) (bytes & 0xff));
+ _OS.write((uint8_t) ((bytes >> 8) & 0xff));
+}
+
+void R600CodeEmitter::emit(uint32_t value)
+{
+ for (unsigned i = 0; i < 4; i++) {
+ _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
+ }
+}
+
+unsigned R600CodeEmitter::getHWReg(unsigned regNo)
+{
+ unsigned hwReg;
+
+ if (AMDIL::SPECIALRegClass.contains(regNo)) {
+ switch(regNo) {
+ case AMDIL::ZERO: return 248;
+ case AMDIL::ONE:
+ case AMDIL::NEG_ONE: return 249;
+ case AMDIL::HALF:
+ case AMDIL::NEG_HALF: return 252;
+ default:
+ abort();
+ return 0;
+ }
+ }
+
+ hwReg = getHWRegNum(TRI, regNo);
+ /* XXX: Clean this up */
+ if (AMDIL::REPLRegClass.contains(regNo)) {
+ return hwReg;
+ }
+ hwReg = hwReg / 4;
+ if (AMDIL::CRRegClass.contains(regNo)) {
+ hwReg += 512;
+ }
+ return hwReg;
+}
+
+int R600CodeEmitter::getElement(MachineInstr &MI)
+{
+ if (MI.getNumOperands() == 0 || !MI.getOperand(0).isReg()) {
+ return -1;
+ } else {
+ switch(MI.getOpcode()) {
+ case AMDIL::EXPORT_REG:
+ case AMDIL::SWIZZLE:
+ return -1;
+ default:
+ return getRegElement(TRI, MI.getOperand(0).getReg());
+ }
+ }
+}
+
+bool isTrans(unsigned int opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDIL::RSQVEC_v4f32: return true;
+ }
+}
+
+
+unsigned int getLastBit(unsigned int writeMask)
+{
+ int i;
+ for (i = 3; i >=0; i++) {
+ unsigned int bit = 1 << i;
+ if (bit & writeMask) {
+ return bit;
+ }
+ }
+ return 0;
+}
+
+RegElement maskBitToElement(unsigned int maskBit)
+{
+ switch (maskBit) {
+ case WRITE_MASK_X: return ELEMENT_X;
+ case WRITE_MASK_Y: return ELEMENT_Y;
+ case WRITE_MASK_Z: return ELEMENT_Z;
+ case WRITE_MASK_W: return ELEMENT_W;
+ default:
+ assert("Invalid maskBit");
+ return ELEMENT_X;
+ }
+}
+
+unsigned int dstSwizzleToWriteMask(unsigned swizzle)
+{
+ switch(swizzle) {
+ default:
+ case AMDIL_DST_SWIZZLE_DEFAULT:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_X___:
+ return WRITE_MASK_X;
+ case AMDIL_DST_SWIZZLE_XY__:
+ return WRITE_MASK_X | WRITE_MASK_Y;
+ case AMDIL_DST_SWIZZLE_XYZ_:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE_XYZW:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE__Y__:
+ return WRITE_MASK_Y;
+ case AMDIL_DST_SWIZZLE__YZ_:
+ return WRITE_MASK_Y | WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE__YZW:
+ return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE___Z_:
+ return WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE___ZW:
+ return WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE____W:
+ return WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_X_ZW:
+ return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_XY_W:
+ return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE_X_Z_:
+ return WRITE_MASK_X | WRITE_MASK_Z;
+ case AMDIL_DST_SWIZZLE_X__W:
+ return WRITE_MASK_X | WRITE_MASK_W;
+ case AMDIL_DST_SWIZZLE__Y_W:
+ return WRITE_MASK_Y | WRITE_MASK_W;
+ }
+}
+
+unsigned getRegElement(unsigned swizzle,
+ unsigned int writeMaskBit)
+{
+ unsigned chan = maskBitToElement(writeMaskBit);
+ return (swizzle >> (2 * chan)) & 0x3;
+}
+
+#include "AMDISAGenCodeEmitter.inc"
+
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
new file mode 100644
index 00000000000..853a90c99c2
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
@@ -0,0 +1,132 @@
+#
+# Copyright 2011 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors: Tom Stellard <thomas.stellard@amd.com>
+#
+
+use strict;
+use warnings;
+
+use AMDISAConstants;
+
+my $REPL_REG_COUNT = 100;
+
+print <<STRING;
+
+class AMDISAReg <bits<16> value, string name> : Register<name> {
+ field bits<16> Value;
+ let Value = value;
+ let Namespace = "AMDIL";
+}
+
+class AMDISAInputReg <bits<16> value, string name, Register gprAlias> :
+ AMDISAReg<value, name> {
+
+ let Aliases = [gprAlias];
+}
+
+STRING
+
+my $i;
+
+### INPUT REGS ###
+
+my @ireg_list;
+for ($i = 0; $i < INPUT_REG_COUNT; $i++) {
+ print input_reg($i);
+ $ireg_list[$i] = "I$i";
+}
+
+print "\n";
+print 'def IR : RegisterClass <"AMDIL", [f32], 32, (sequence "I%u", 0, ', INPUT_REG_COUNT - 1, ")>;\n\n";
+
+### CONSTANT REGS ###
+
+my @creg_list;
+for ($i = 0; $i < CONST_REG_COUNT; $i++) {
+ print const_reg($i);
+ $creg_list[$i] = "C$i";
+}
+print 'def CR : RegisterClass <"AMDIL", [f32], 32, (sequence "C%u", 0, ', CONST_REG_COUNT - 1, ")>;\n";
+
+sub input_reg {
+ my ($index) = @_;
+ return sprintf(qq{def I%d : AMDISAInputReg <%d, "I%d", R%d>;\n}, $index, $index, $index, $index + 1);
+}
+
+sub const_reg {
+ my ($index) = @_;
+ return sprintf(qq{def C%d : AMDISAReg <%d, "C%d">;\n}, $index, $index, $index);
+}
+
+print <<STRING;
+
+let Namespace = "AMDIL" in {
+def sel_x : SubRegIndex;
+def sel_y : SubRegIndex;
+def sel_z : SubRegIndex;
+def sel_w : SubRegIndex;
+}
+
+class AMDISARegWithSubReg<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDIL";
+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+}
+
+STRING
+
+### REPL REGS ###
+my @repl_reg_list;
+
+for (my $i = 0; $i < $REPL_REG_COUNT; $i++) {
+ print repl_reg($i);
+ $repl_reg_list[$i] = "REPL$i";
+}
+
+print 'def REPL : RegisterClass<"AMDIL", [v4f32], 128, (sequence "REPL%u", 0, ', $REPL_REG_COUNT - 1, ')';
+print ">;\n\n";
+
+sub repl_reg {
+ my ($index) = @_;
+
+ return sprintf(qq{def REPL%d : AMDISARegWithSubReg<"R%d.xyzw", [R%d, R%d, R%d, R%d]>;\n},
+ $index, $index, ($index * 4) + 1, ($index * 4) + 2, ($index * 4) + 3, ($index * 4) + 4);
+}
+
+print <<STRING;
+
+def ADDR0 : AMDILReg<870, "addr0">;
+
+def RELADDR : RegisterClass<"AMDIL", [i32], 32,
+ (add ADDR0)
+>;
+
+def ZERO : AMDILReg<871, "0.0">;
+def HALF : AMDILReg<872, "0.5">;
+def ONE : AMDILReg<873, "1.0">;
+def NEG_HALF : AMDILReg<874, "-0.5">;
+def NEG_ONE : AMDILReg<875, "-1.0">;
+def PV_X : AMDILReg<876, "pv.x">;
+
+def SPECIAL : RegisterClass<"AMDIL", [f32], 32, (add ZERO, HALF, ONE, NEG_HALF, NEG_ONE, PV_X)>;
+
+STRING
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp
new file mode 100644
index 00000000000..b31f16ae189
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "R600InstrInfo.h"
+
+#include "R600RegisterInfo.h"
+#include "AMDISATargetMachine.h"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDISATargetMachine &tm)
+ : AMDISAInstrInfo(tm),
+ RI(tm, *this),
+ TM(tm)
+ { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
+{
+ return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const
+{
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ BuildMI(MBB, MI, DL, get(AMDIL::MOV_f32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h
new file mode 100644
index 00000000000..54dbcc528ae
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "R600RegisterInfo.h"
+
+#include <map>
+
+
+namespace llvm {
+
+ struct InstrGroup {
+ unsigned amdil;
+ unsigned r600;
+ unsigned eg;
+ unsigned cayman;
+ };
+
+ class AMDISATargetMachine;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class R600InstrInfo : public AMDISAInstrInfo {
+ private:
+ const R600RegisterInfo RI;
+ AMDISATargetMachine &TM;
+
+ public:
+ explicit R600InstrInfo(AMDISATargetMachine &tm);
+
+ const R600RegisterInfo &getRegisterInfo() const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ bool isTrig(const MachineInstr &MI) const;
+
+ };
+
+} // End llvm namespace
+
+namespace R600_InstFlag {
+ enum TIF {
+ TRANS_ONLY = (1 << 0),
+ TEX = (1 << 1),
+ REDUCTION = (1 << 2),
+ FC = (1 << 3),
+ TRIG = (1 << 4),
+ OP3 = (1 << 5)
+ };
+}
+
+#endif // R600INSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
new file mode 100644
index 00000000000..761dc25af7f
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -0,0 +1,546 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+include "R600Intrinsics.td"
+
+class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDISAInst <outs, ins, asm, pattern> {
+
+ field bits<32> Inst;
+ bit Trig = 0;
+ bit Op3 = 0;
+
+ let Inst = inst;
+ let Namespace = "AMDIL";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+
+ let TSFlags{4} = Trig;
+ let TSFlags{5} = Op3;
+}
+
+class R600_1OP <bits<32> inst, string opName, list<dag> pattern,
+ RegisterClass rcDst, RegisterClass rcSrc> :
+ InstR600 <inst,
+ (outs rcDst:$dst),
+ (ins rcSrc:$src),
+ !strconcat(opName, " $dst, $src"),
+ pattern
+ >;
+
+class R600_1OP_Float <bits<32> inst, string opName, list<dag> pattern> :
+ R600_1OP <inst, opName, pattern, GPRF32, GPRF32>;
+
+class R600_1OP_Int <bits<32> inst, string opName, list<dag> pattern> :
+ R600_1OP <inst, opName, pattern, GPRI32, GPRI32>;
+
+class R600_2OP <bits<32> inst, string opName, list<dag> pattern> :
+ InstR600 <inst,
+ (outs GPRF32:$dst),
+ (ins GPRF32:$src0, GPRF32:$src1),
+ !strconcat(opName, " $dst, $src0, $src1"),
+ pattern
+ >;
+
+class R600_3OP <bits<32> inst, string opName, list<dag> pattern> :
+ InstR600 <inst,
+ (outs GPRF32:$dst),
+ (ins GPRF32:$src0, GPRF32:$src1, GPRF32:$src2),
+ !strconcat(opName, "$dst $src0, $src1, $src2"),
+ pattern>{
+
+ let Op3 = 1;
+ }
+
+class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern> :
+ InstR600 <inst,
+ (outs GPRF32:$dst),
+ ins,
+ asm,
+ pattern
+ >;
+
+class R600_TEX <bits<32> inst, string opName, list<dag> pattern> :
+ InstR600 <inst,
+ (outs REPL:$dst),
+ (ins REPL:$src0, i32imm:$src1, i32imm:$src2),
+ !strconcat(opName, "$dst, $src0, $src1, $src2"),
+ pattern
+ >;
+
+def TEX_SHADOW : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12;
+ }]
+>;
+
+def isR600 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDILDeviceInfo::HD4XXX">;
+def isEG : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDILDeviceInfo::HD5XXX">;
+def isCayman : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDILDeviceInfo::HD6XXX">;
+def isEGorCayman : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDILDeviceInfo::HD5XXX">;
+
+def isR600toCayman : Predicate<
+ "Subtarget.device()->getGeneration() >= AMDILDeviceInfo::HD4XXX"
+ " && Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX">;
+
+
+let Predicates = [isR600toCayman] in {
+
+/* ------------------------------------------- */
+/* Common Instructions R600, R700, Evergreen, Cayman */
+/* ------------------------------------------- */
+let Gen = AMDISAGen.R600_CAYMAN in {
+
+def ADD : R600_2OP <
+ 0x0, "ADD",
+ [(set GPRF32:$dst, (fadd GPRF32:$src0, GPRF32:$src1))] > {
+ let AMDILOp = AMDILInst.ADD_f32;
+}
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP <
+ 0x1, "MUL NON-IEEE",
+ [(set GPRF32:$dst, (int_AMDISA_mul GPRF32:$src0, GPRF32:$src1))]
+>;
+
+def MUL_IEEE : R600_2OP <
+ 0x2, "MUL_IEEE",
+ [(set GPRF32:$dst, (fmul GPRF32:$src0, GPRF32:$src1))]> {
+ let AMDILOp = AMDILInst.MUL_IEEE_f32;
+}
+
+def MAX : R600_2OP <
+ 0x3, "MAX",
+ [(set GPRF32:$dst, (int_AMDIL_max GPRF32:$src0, GPRF32:$src1))]> {
+ let AMDILOp = AMDILInst.MAX_f32;
+}
+
+def MIN : R600_2OP <
+ 0x4, "MIN",
+ [(set GPRF32:$dst, (int_AMDIL_min GPRF32:$src0, GPRF32:$src1))]> {
+ let AMDILOp = AMDILInst.MIN_f32;
+}
+
+/* For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+ * so some of the instruction names don't match the asm string.
+ * XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+ */
+
+def SETE : R600_2OP <
+ 0x08, "SETE",
+ [(set GPRF32:$dst, (int_AMDISA_seq GPRF32:$src0, GPRF32:$src1))]> {
+ let AMDILOp = AMDILInst.FEQ;
+}
+
+def SGT : R600_2OP <
+ 0x09, "SETGT",
+ [(set GPRF32:$dst, (int_AMDISA_sgt GPRF32:$src0, GPRF32:$src1))]
+>;
+
+def SGE : R600_2OP <
+ 0xA, "SETGE",
+ [(set GPRF32:$dst, (int_AMDISA_sge GPRF32:$src0, GPRF32:$src1))]> {
+ let AMDILOp = AMDILInst.FGE;
+}
+
+def SNE : R600_2OP <
+ 0xB, "SETNE",
+ [(set GPRF32:$dst, (int_AMDISA_sne GPRF32:$src0, GPRF32:$src1))]> {
+ let AMDILOp = AMDILInst.FNE;
+}
+
+def FRACT : R600_1OP_Float <
+ 0x10, "FRACT",
+ []> {
+ let AMDILOp = AMDILInst.FRAC_f32;
+}
+
+def TRUNC : R600_1OP_Float <
+ 0x11, "TRUNC",
+ [(set GPRF32:$dst, (int_AMDISA_trunc GPRF32:$src))]
+>;
+
+def FLOOR : R600_1OP_Float <
+ 0x14, "FLOOR",
+ [(set GPRF32:$dst, (int_AMDISA_floor GPRF32:$src))]
+>;
+
+multiclass MOV_Multi {
+ def _f32 : R600_1OP_Float <0x19, "MOV", []> {
+ let AMDILOp = AMDILInst.MOVE_f32;
+ }
+ def _i32 : R600_1OP_Int <0x19, "MOV", []> {
+ let AMDILOp = AMDILInst.MOVE_i32;
+ }
+ def _f32_i32 : R600_1OP <0x19, "MOV", [], GPRF32, GPRI32>;
+
+}
+
+defm MOV : MOV_Multi;
+
+def KILLGT : R600_2OP <
+ 0x2D, "KILLGT",
+ []
+>;
+
+def AND_INT : R600_2OP <
+ 0x30, "AND_INT",
+ []> {
+ let AMDILOp = AMDILInst.AND_i32;
+}
+
+/* Texture instructions */
+
+def TEX_SAMPLE : R600_TEX <
+ 0x10, "TEX_SAMPLE",
+ [(set REPL:$dst, (int_AMDISA_tex REPL:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C : R600_TEX <
+ 0x18, "TEX_SAMPLE_C",
+ [(set REPL:$dst, (int_AMDISA_tex REPL:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+def TEX_SAMPLE_L : R600_TEX <
+ 0x11, "TEX_SAMPLE_L",
+ [(set REPL:$dst, (int_AMDISA_txl REPL:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C_L : R600_TEX <
+ 0x19, "TEX_SAMPLE_C_L",
+ [(set REPL:$dst, (int_AMDISA_txl REPL:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+def TEX_SAMPLE_LB : R600_TEX <
+ 0x12, "TEX_SAMPLE_LB",
+ [(set REPL:$dst, (int_AMDISA_txb REPL:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C_LB : R600_TEX <
+ 0x1A, "TEX_SAMPLE_C_LB",
+ [(set REPL:$dst, (int_AMDISA_txb REPL:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+def TEX_SAMPLE_G : R600_TEX <
+ 0x14, "TEX_SAMPLE_G",
+ [(set REPL:$dst, (int_AMDISA_txd REPL:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_SAMPLE_C_G : R600_TEX <
+ 0x1C, "TEX_SAMPLE_C_G",
+ [(set REPL:$dst, (int_AMDISA_txd REPL:$src0, imm:$src1, TEX_SHADOW:$src2))]
+>;
+
+} // End Gen R600_CAYMAN
+
+def KILP : Pat <
+ (int_AMDISA_kilp),
+ (MASK_WRITE (KILLGT ONE, ZERO))
+>;
+
+/* Helper classes for common instructions */
+
+class MUL_LIT_Common <bits<32> inst> : R600_3OP <
+ inst, "MUL_LIT",
+ []
+>;
+
+class MULADD_Common <bits<32> inst> : R600_3OP <
+ inst, "MULADD",
+ []> {
+ let AMDILOp = AMDILInst.MAD_f32;
+}
+
+class CNDE_Common <bits<32> inst> : R600_3OP <
+ inst, "CNDE",
+ []> {
+ let AMDILOp = AMDILInst.CMOVLOG_f32;
+}
+
+class CNDGT_Common <bits<32> inst> : R600_3OP <
+ inst, "CNDGT",
+ []
+>;
+
+class CNDGE_Common <bits<32> inst> : R600_3OP <
+ inst, "CNDGE",
+ [(set GPRF32:$dst, (int_AMDISA_cndlt GPRF32:$src0, GPRF32:$src2, GPRF32:$src1))]
+>;
+
+class DOT4_Common <bits<32> inst> : R600_REDUCTION <
+ inst,
+ (ins REPL:$src0, REPL:$src1),
+ "DOT4 $dst $src0, $src1",
+ [(set GPRF32:$dst, (int_AMDISA_dp4 REPL:$src0, REPL:$src1))]
+>;
+
+class EXP_IEEE_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "EXP_IEEE",
+ []> {
+ let AMDILOp = AMDILInst.EXP_f32;
+}
+
+class FLT_TO_INT_Common <bits<32> inst> : R600_1OP <
+ inst, "FLT_TO_INT", [], GPRI32, GPRF32> {
+ let AMDILOp = AMDILInst.FTOI;
+}
+
+class INT_TO_FLT_Common <bits<32> inst> : R600_1OP <
+ inst, "INT_TO_FLT", [], GPRF32, GPRI32> {
+ let AMDILOp = AMDILInst.ITOF;
+}
+
+class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "LOG_CLAMPED",
+ []
+>;
+
+class LOG_IEEE_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "LOG_IEEE",
+ []> {
+ let AMDILOp = AMDILInst.LOG_f32;
+}
+
+class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "RECIP_CLAMPED",
+ []
+>;
+
+class RECIP_IEEE_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "RECIP_IEEE",
+ [(set GPRF32:$dst, (int_AMDISA_rcp GPRF32:$src))]> {
+ let AMDILOp = AMDILInst.RSQ_f32;
+}
+
+class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "RECIPSQRT_CLAMPED",
+ [(set GPRF32:$dst, (int_AMDISA_rsq GPRF32:$src))]
+>;
+
+class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "RECIPSQRT_IEEE",
+ []
+>;
+
+class SIN_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "SIN",
+ []>{
+ let AMDILOp = AMDILInst.SIN_f32;
+ let Trig = 1;
+}
+
+class COS_Common <bits<32> inst> : R600_1OP_Float <
+ inst, "COS",
+ []> {
+ let AMDILOp = AMDILInst.COS_f32;
+ let Trig = 1;
+}
+
+/* Helper patterns for complex intrinsics */
+/* -------------------------------------- */
+
+class DIV_Common <InstR600 recip_ieee> : Pat<
+ (int_AMDISA_div GPRF32:$src0, GPRF32:$src1),
+ (MUL GPRF32:$src0, (recip_ieee GPRF32:$src1))
+>;
+
+class LRP_Common <InstR600 muladd> : Pat <
+ (int_AMDISA_lrp GPRF32:$src0, GPRF32:$src1, GPRF32:$src2),
+ (muladd GPRF32:$src0, GPRF32:$src1, (MUL (SUB_f32 ONE, GPRF32:$src0), GPRF32:$src2))
+>;
+
+class POW_Common <InstR600 log_ieee, InstR600 exp_ieee> : Pat <
+ (int_AMDISA_pow GPRF32:$src0, GPRF32:$src1),
+ (exp_ieee (MUL GPRF32:$src1, (log_ieee GPRF32:$src0)))
+>;
+
+class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat <
+ (int_AMDISA_ssg GPRF32:$src),
+ (cndgt GPRF32:$src, ONE, (cndge GPRF32:$src, ZERO, NEG_ONE))
+>;
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
+ (int_TGSI_lit_z GPRF32:$src_x, GPRF32:$src_y, GPRF32:$src_w),
+ (exp_ieee (mul_lit (log_clamped (MAX GPRF32:$src_y, ZERO)), GPRF32:$src_w, GPRF32:$src_x))
+>;
+
+/* ---------------------- */
+/* R600 / R700 Only Instructions */
+/* ---------------------- */
+
+let Predicates = [isR600] in {
+
+let Gen = AMDISAGen.R600 in {
+
+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+ def MULADD_r600 : MULADD_Common<0x10>;
+ def CNDE_r600 : CNDE_Common<0x18>;
+ def CNDGT_r600 : CNDGT_Common<0x19>;
+ def CNDGE_r600 : CNDGE_Common<0x1A>;
+ def DOT4_r600 : DOT4_Common<0x50>;
+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+ def SIN_r600 : SIN_Common<0x6E>;
+ def COS_r600 : COS_Common<0x6F>;
+
+} // End AMDISAGen.R600
+
+ def DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def LRP_r600 : LRP_Common<MULADD_r600>;
+ def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600>;
+ def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>;
+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+}
+
+/* ----------------- */
+/* R700+ Trig helper */
+/* ----------------- */
+
+/*
+class TRIG_HELPER_r700 <InstR600 trig_inst>: Pat <
+ (trig_inst GPRF32:$src),
+ (trig_inst (fmul GPRF32:$src, (PI))))
+>;
+*/
+
+/* ------------------------------- */
+/* Evergreen / Cayman Instructions */
+/* ------------------------------- */
+
+let Predicates = [isEGorCayman] in {
+
+class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat<
+ (intr GPRF32:$src),
+ (trig (MUL (MOV_f32_i32 (LOADCONST_i32 CONST.TWO_PI_INV)), GPRF32:$src))
+>;
+
+let Gen = AMDISAGen.EG_CAYMAN in {
+
+ def MULADD_eg : MULADD_Common<0x14>;
+ def CNDE_eg : CNDE_Common<0x19>;
+ def CNDGT_eg : CNDGT_Common<0x1A>;
+ def CNDGE_eg : CNDGE_Common<0x1B>;
+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50>;
+ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+ def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+ def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+ def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+ def SIN_eg : SIN_Common<0x8D>;
+ def COS_eg : COS_Common<0x8E>;
+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+ def DOT4_eg : DOT4_Common<0xBE>;
+
+} // End AMDISAGen.EG_CAYMAN
+
+ def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+ def LRP_eg : LRP_Common<MULADD_eg>;
+ def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg>;
+ def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>;
+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+ def : TRIG_eg <SIN_eg, int_AMDISA_sin>;
+ def : TRIG_eg <COS_eg, int_AMDISA_cos>;
+
+}
+/* Other Instructions */
+
+let isCodeGenOnly = 1 in {
+
+ def ARL : AMDISAShaderInst <
+ (outs RELADDR:$dst),
+ (ins GPRF32:$src),
+ "ARL $dst, $src",
+ [(set RELADDR:$dst, (int_AMDISA_arl GPRF32:$src))]
+ >;
+
+ def SWIZZLE : AMDISAShaderInst <
+ (outs GPRV4F32:$dst),
+ (ins GPRV4F32:$src0, i32imm:$src1),
+ "SWIZZLE $dst, $src0, $src1",
+ [(set GPRV4F32:$dst, (int_AMDISA_swizzle GPRV4F32:$src0, imm:$src1))]
+ >;
+
+
+ def LAST : AMDISAShaderInst <
+ (outs),
+ (ins),
+ "LAST",
+ []
+ >;
+
+ def GET_CHAN : AMDISAShaderInst <
+ (outs GPRF32:$dst),
+ (ins REPL:$src0, i32imm:$src1),
+ "GET_CHAN $dst, $src0, $src1",
+ []
+ >;
+
+ def SET_CHAN : AMDISAShaderInst <
+ (outs REPL:$dst),
+ (ins GPRF32:$src0, i32imm:$src1),
+ "SET_CHAN $dst, $src0, $src1",
+ []
+ >;
+
+ def MULLIT : AMDISAShaderInst <
+ (outs REPL:$dst),
+ (ins GPRF32:$src0, GPRF32:$src1, GPRF32:$src2),
+ "MULLIT $dst, $src0, $src1",
+ [(set REPL:$dst, (int_AMDISA_mullit GPRF32:$src0, GPRF32:$src1, GPRF32:$src2))]
+ >;
+
+}
+
+include "R600ShaderPatterns.td"
+
+// We need this pattern to avoid having real registers in PHI nodes.
+// For some reason this pattern only works when it comes after the other
+// instruction defs.
+def : Pat <
+ (int_R600_load_input imm:$src),
+ (LOAD_INPUT imm:$src)
+>;
+
+} // End isR600toCayman Predicate
diff --git a/src/gallium/drivers/radeon/R600Intrinsics.td b/src/gallium/drivers/radeon/R600Intrinsics.td
new file mode 100644
index 00000000000..39d06e5c06d
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600Intrinsics.td
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+let TargetPrefix = "R600", isTarget = 1 in {
+ def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
new file mode 100644
index 00000000000..8d8ad221e70
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include "AMDIL.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILRegisterInfo.h"
+#include "AMDISA.h"
+#include "AMDISAInstrInfo.h"
+#include "AMDISAUtil.h"
+
+#include "R600InstrInfo.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class R600LowerInstructionsPass : public MachineFunctionPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+ AMDILMachineFunctionInfo * MFI;
+
+ void lowerFLT(MachineInstr &MI);
+
+ public:
+ R600LowerInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ };
+} /* End anonymous namespace */
+
+char R600LowerInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createR600LowerInstructionsPass(TargetMachine &tm) {
+ return new R600LowerInstructionsPass(tm);
+}
+
+bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ MachineRegisterInfo & MRI = MF.getRegInfo();
+ MFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ const R600InstrInfo * TII =
+ static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next, Next = llvm::next(I) ) {
+ MachineInstr &MI = *I;
+
+ switch(MI.getOpcode()) {
+ case AMDIL::FLT:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FGE))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(1));
+ break;
+
+ /* XXX: We could propagate the ABS flag to all of the uses of Operand0 and
+ * remove the ABS instruction.*/
+ case AMDIL::FABS_f32:
+ case AMDIL::ABS_f32:
+ MI.getOperand(1).addTargetFlag(MO_FLAG_ABS);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::MOVE_f32))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+
+ case AMDIL::BINARY_OR_f32:
+ {
+ unsigned tmp0 = MRI.createVirtualRegister(&AMDIL::GPRI32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp0)
+ .addOperand(MI.getOperand(1));
+ unsigned tmp1 = MRI.createVirtualRegister(&AMDIL::GPRI32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::FTOI), tmp1)
+ .addOperand(MI.getOperand(2));
+ unsigned tmp2 = MRI.createVirtualRegister(&AMDIL::GPRI32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::BINARY_OR_i32), tmp2)
+ .addReg(tmp0)
+ .addReg(tmp1);
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::ITOF), MI.getOperand(0).getReg())
+ .addReg(tmp2);
+ break;
+ }
+ case AMDIL::CMOVLOG_f32:
+ case AMDIL::CMOVLOG_i32:
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(MI.getOpcode()))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(3))
+ .addOperand(MI.getOperand(2));
+ break;
+
+ case AMDIL::CLAMP_f32:
+ {
+ uint32_t zero = (uint32_t)APFloat(0.0f).bitcastToAPInt().getZExtValue();
+ uint32_t one = (uint32_t)APFloat(1.0f).bitcastToAPInt().getZExtValue();
+ uint32_t low = getLiteral(MFI, MI.getOperand(2).getImm());
+ uint32_t high = getLiteral(MFI, MI.getOperand(3).getImm());
+ if (low == zero && high == one) {
+ MachineInstr *def = NULL;
+ /* Even though we are in SSA, it is possible for a register to have
+ * more than one def. This occurs when an instruction writes to an
+ * output register that has also been used as an input register.
+ * This is only an issue when dealing with graphics shaders. */
+ for (MachineRegisterInfo::def_iterator DI =
+ MRI.def_begin(MI.getOperand(1).getReg()), DE = MRI.def_end();
+ DI != DE; ++DI) {
+ def = &*DI;
+ if (!isPlaceHolderOpcode((&*DI)->getOpcode())) {
+ def = &*DI;
+ break;
+ }
+ }
+ assert(def);
+ MI.getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(TII->getISAOpcode(AMDIL::MOVE_f32)))
+ .addOperand(MI.getOperand(0))
+ .addReg(def->getOperand(0).getReg());
+ } else {
+ /* XXX: Handle other cases */
+ abort();
+ }
+ break;
+ }
+ /* XXX: Figure out the semantics of DIV_INF_f32 and make sure this is OK */
+/* case AMDIL::DIV_INF_f32:
+ {
+ unsigned tmp0 = MRI.createVirtualRegister(&AMDIL::GPRF32RegClass);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TM.getInstrInfo()->get(AMDIL::RECIP_CLAMPED), tmp0)
+ .addOperand(MI.getOperand(2));
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TM.getInstrInfo()->get(AMDIL::MUL_IEEE_f32))
+ .addOperand(MI.getOperand(0))
+ .addReg(tmp0)
+ .addOperand(MI.getOperand(1));
+ break;
+ }
+*/ /* XXX: This is an optimization */
+ case AMDIL::LOADCONST_f32:
+ case AMDIL::LOADCONST_i32:
+ {
+ bool canDelete = true;
+ MachineOperand * use = MI.getOperand(0).getNextOperandForReg();
+ while (use) {
+ MachineOperand * next = use->getNextOperandForReg();
+ /* XXX: assert(next->isUse()) */
+ /* XXX: Having immediates in MOV instructions (maybe others) causes
+ * the register allocator to elminate them when there are IF
+ * statements. I'm not sure why this is happening, so for now we only
+ * propogate immediates to when they are needed by CLAMP instructions.
+ */
+ if (use->getParent()->getOpcode() != AMDIL::CLAMP_f32) {
+ canDelete = false;
+ } else {
+ use->ChangeToImmediate(MI.getOperand(1).getImm());
+ }
+ use = next;
+ }
+ if (!canDelete) {
+ continue;
+ }
+ break;
+ }
+
+ case AMDIL::MASK_WRITE:
+ {
+ unsigned maskedRegister = MI.getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+ MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
+ def->addTargetFlag(MO_FLAG_MASK);
+ break;
+ }
+
+ case AMDIL::VEXTRACT_v4f32:
+ MI.getOperand(2).setImm(MI.getOperand(2).getImm() - 1);
+ continue;
+
+ case AMDIL::NEG_f32:
+ case AMDIL::NEGATE_i32:
+ {
+ MI.getOperand(1).addTargetFlag(MO_FLAG_NEG);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(TII->getISAOpcode(AMDIL::MOV_f32)))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ }
+
+ case AMDIL::SUB_f32:
+ {
+ MI.getOperand(2).addTargetFlag(MO_FLAG_NEG);
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(TII->getISAOpcode(AMDIL::ADD_f32)))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2));
+ break;
+ }
+
+ case AMDIL::VINSERT_v4f32:
+ {
+
+ int64_t swz = MI.getOperand(4).getImm();
+ int64_t chan;
+ switch (swz) {
+ case (1 << 0):
+ chan = 0;
+ break;
+ case (1 << 8):
+ chan = 1;
+ break;
+ case (1 << 16):
+ chan = 2;
+ break;
+ case (1 << 24):
+ chan = 3;
+ break;
+ default:
+ chan = 0;
+ fprintf(stderr, "swizzle: %d\n", swz);
+ abort();
+ break;
+ }
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TM.getInstrInfo()->get(AMDIL::SET_CHAN))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2))
+ .addImm(chan);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TM.getInstrInfo()->get(AMDIL::COPY))
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ }
+
+ default:
+ continue;
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp b/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp
new file mode 100644
index 00000000000..e7157128836
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#include "AMDIL.h"
+#include "AMDISA.h"
+#include "AMDISALowerShaderInstructions.h"
+#include "AMDILInstrInfo.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+ class R600LowerShaderInstructionsPass : public MachineFunctionPass,
+ public AMDISALowerShaderInstructionsPass {
+
+ private:
+ static char ID;
+ TargetMachine &TM;
+
+ void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+ void lowerLOAD_INPUT(MachineInstr & MI);
+ bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+ void lowerSWIZZLE(MachineInstr &MI);
+
+ public:
+ R600LowerShaderInstructionsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Lower Shader Instructions"; }
+ };
+} /* End anonymous namespace */
+
+char R600LowerShaderInstructionsPass::ID = 0;
+
+FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) {
+ return new R600LowerShaderInstructionsPass(tm);
+}
+
+#define INSTR_CASE_FLOAT_V(inst) \
+ case AMDIL:: inst##_v4f32: \
+
+#define INSTR_CASE_FLOAT_S(inst) \
+ case AMDIL:: inst##_f32:
+
+#define INSTR_CASE_FLOAT(inst) \
+ INSTR_CASE_FLOAT_V(inst) \
+ INSTR_CASE_FLOAT_S(inst)
+bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
+{
+ MRI = &MF.getRegInfo();
+
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ MachineInstr &MI = *I;
+ bool deleteInstr = false;
+ switch (MI.getOpcode()) {
+
+ default: break;
+
+ case AMDIL::RESERVE_REG:
+ case AMDIL::EXPORT_REG:
+ deleteInstr = true;
+ break;
+
+ case AMDIL::LOAD_INPUT:
+ lowerLOAD_INPUT(MI);
+ deleteInstr = true;
+ break;
+
+ case AMDIL::STORE_OUTPUT:
+ deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I);
+ break;
+
+ case AMDIL::SWIZZLE:
+ lowerSWIZZLE(MI);
+ deleteInstr = true;
+ break;
+ }
+
+ ++I;
+
+ if (deleteInstr) {
+ MI.eraseFromParent();
+ }
+ }
+ }
+
+ MRI->EmitLiveInCopies(MF.begin(), *TM.getRegisterInfo(), *TM.getInstrInfo());
+
+// MF.dump();
+ return false;
+}
+
+/* The goal of this function is to replace the virutal destination register of
+ * a LOAD_INPUT instruction with the correct physical register that will.
+ *
+ * XXX: I don't think this is the right way things assign physical registers,
+ * but I'm not sure of another way to do this.
+ */
+void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI)
+{
+ MachineOperand &dst = MI.getOperand(0);
+ MachineOperand &arg = MI.getOperand(1);
+ int64_t inputIndex = arg.getImm();
+ const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::GPRF32RegClassID);
+ unsigned newRegister = inputClass->getRegister(inputIndex);
+ unsigned dstReg = dst.getReg();
+
+ preloadRegister(newRegister, dstReg);
+}
+
+bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
+{
+ MachineOperand &valueOp = MI.getOperand(1);
+ MachineOperand &indexOp = MI.getOperand(2);
+ unsigned valueReg = valueOp.getReg();
+ int64_t outputIndex = indexOp.getImm();
+ const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::GPRF32RegClassID);
+ unsigned newRegister = outputClass->getRegister(outputIndex);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY),
+ newRegister)
+ .addReg(valueReg);
+
+ if (!MRI->isLiveOut(newRegister))
+ MRI->addLiveOut(newRegister);
+
+ return true;
+
+}
+
+void R600LowerShaderInstructionsPass::lowerSWIZZLE(MachineInstr &MI)
+{
+ MachineOperand &dstOp = MI.getOperand(0);
+ MachineOperand &valOp = MI.getOperand(1);
+ MachineOperand &swzOp = MI.getOperand(2);
+ int64_t swizzle = swzOp.getImm();
+
+ /* Set the swizzle for all of the uses */
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(dstOp.getReg()),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ UI.getOperand().setTargetFlags(swizzle);
+ }
+
+ /* Progate the swizzle instruction */
+ MRI->replaceRegWith(dstOp.getReg(), valOp.getReg());
+}
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
new file mode 100644
index 00000000000..dfd6c44c6d5
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
new file mode 100644
index 00000000000..dfd6c44c6d5
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
new file mode 100644
index 00000000000..ffacb09ff6c
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#include "R600RegisterInfo.h"
+
+#include "AMDISATargetMachine.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDISATargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDISARegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ Reserved.set(AMDIL::ZERO);
+ Reserved.set(AMDIL::HALF);
+ Reserved.set(AMDIL::ONE);
+ Reserved.set(AMDIL::NEG_HALF);
+ Reserved.set(AMDIL::NEG_ONE);
+ Reserved.set(AMDIL::PV_X);
+ for (unsigned i = AMDIL::C0; i <= AMDIL::C1023; i++) {
+ Reserved.set(i);
+ }
+
+ for (MachineFunction::const_iterator BB = MF.begin(),
+ BB_E = MF.end(); BB != BB_E; ++BB) {
+ const MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (MI.getOpcode() == AMDIL::RESERVE_REG) {
+ if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
+ Reserved.set(MI.getOperand(0).getReg());
+ }
+ }
+ }
+ }
+ return Reserved;
+}
+
+bool R600RegisterInfo::isBaseRegClass(unsigned regClassID) const
+{
+ switch(regClassID) {
+ case AMDIL::CRRegClassID:
+ case AMDIL::GPRF32RegClassID:
+ case AMDIL::REPLRegClassID:
+ return true;
+ default:
+ return false;
+ }
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
+{
+ switch (rc->getID()) {
+ case AMDIL::GPRV4F32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ return &AMDIL::REPLRegClass;
+ default: return rc;
+}
+
+
+}
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h
new file mode 100644
index 00000000000..0566c3bfd27
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDILRegisterInfo.h"
+#include "AMDISATargetMachine.h"
+
+namespace llvm {
+
+ class R600TargetMachine;
+ class TargetInstrInfo;
+
+ struct R600RegisterInfo : public AMDISARegisterInfo
+ {
+ AMDISATargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ R600RegisterInfo(AMDISATargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ virtual bool isBaseRegClass(unsigned regClassID) const;
+
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const;
+ };
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/src/gallium/drivers/radeon/TargetInfo/AMDILTargetInfo.cpp b/src/gallium/drivers/radeon/TargetInfo/AMDILTargetInfo.cpp
new file mode 100644
index 00000000000..53135760fea
--- /dev/null
+++ b/src/gallium/drivers/radeon/TargetInfo/AMDILTargetInfo.cpp
@@ -0,0 +1,61 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+/// The target for the AMDIL backend
+Target llvm::TheAMDILTarget;
+
+/// Extern function to initialize the targets for the AMDIL backend
+extern "C" void LLVMInitializeAMDILTargetInfo() {
+ RegisterTarget<Triple::amdil, false>
+ IL(TheAMDILTarget, "amdil", "ATI graphics cards");
+}
diff --git a/src/gallium/drivers/radeon/TargetInfo/CMakeLists.txt b/src/gallium/drivers/radeon/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..72a8982b494
--- /dev/null
+++ b/src/gallium/drivers/radeon/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAMDILInfo
+ AMDILTargetInfo.cpp
+ )
+
+add_dependencies(LLVMAMDILInfo AMDILCodeGenTable_gen)
diff --git a/src/gallium/drivers/radeon/generateRegisters.pl b/src/gallium/drivers/radeon/generateRegisters.pl
new file mode 100644
index 00000000000..3cab11a0b09
--- /dev/null
+++ b/src/gallium/drivers/radeon/generateRegisters.pl
@@ -0,0 +1,135 @@
+#/usr/bin/perl
+# Copyright (c) 2011, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# If you use the software (in whole or in part), you shall adhere to all
+# applicable U.S., European, and other export laws, including but not limited
+# to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+# 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+# 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+# that, except pursuant to a license granted by the United States Department
+# of Commerce Bureau of Industry and Security or as otherwise permitted
+# pursuant to a License Exception under the U.S. Export Administration
+# Regulations ("EAR"), you will not (1) export, re-export or release to a
+# national of a country in Country Groups D:1, E:1 or E:2 any restricted
+# technology, software, or source code you receive hereunder, or (2) export to
+# Country Groups D:1, E:1 or E:2 the direct product of such technology or
+# software, if such foreign produced direct product is subject to national
+# security controls as identified on the Commerce Control List (currently
+# found in Supplement 1 to Part 774 of EAR). For the most current Country
+# Group listings, or for additional information about the EAR or your
+# obligations under those regulations, please refer to the U.S. Bureau of
+# Industry and Security’s website at http://www.bis.doc.gov/.
+#
+$numRegs = 192;
+$numTotalRegs = 1013;
+$highRegs = 1000;
+$testNum = 1;
+open OUTPUT, ">AMDILRegisterDefsScalar.td" or die$!;
+while($testNum < $numTotalRegs) {
+ if ($testNum < $numRegs || $testNum >= $highRegs) {
+ $b = $testNum;
+ print OUTPUT "def Rx$testNum : AMDILReg<$b, \"r$testNum\">, DwarfRegNum<[$b]>;\n";
+ ++$b;
+ print OUTPUT "def Ry$testNum : AMDILReg<$b, \"r$testNum\">, DwarfRegNum<[$b]>;\n";
+ ++$b;
+ print OUTPUT "def Rz$testNum : AMDILReg<$b, \"r$testNum\">, DwarfRegNum<[$b]>;\n";
+ ++$b;
+ print OUTPUT "def Rw$testNum : AMDILReg<$b, \"r$testNum\">, DwarfRegNum<[$b]>;\n";
+ }
+ ++$testNum;
+}
+close(OUTPUT);
+$testNum = 1;
+open OUTPUT, ">AMDILRegisterDefsV2.td" or die$!;
+while($testNum < $numTotalRegs) {
+ if ($testNum < $numRegs || $testNum >= $highRegs) {
+ $b = $testNum;
+ print OUTPUT "def Rxy$testNum : AMDILRegWithSubReg<$b, \"r$testNum\", ";
+ print OUTPUT "[";
+ print OUTPUT "Rx$testNum, Ry$testNum";
+ print OUTPUT "], [sub_x_comp, sub_y_comp]>, DwarfRegNum<[$b]>;\n";
+ print OUTPUT "def Rzw$testNum : AMDILRegWithSubReg<$b, \"r$testNum\", ";
+ print OUTPUT "[";
+ print OUTPUT "Rz$testNum, Rw$testNum";
+ print OUTPUT "], [sub_z_comp, sub_w_comp]>, DwarfRegNum<[$b]>;\n";
+ }
+ ++$testNum;
+}
+close(OUTPUT);
+$testNum = 1;
+open OUTPUT, ">AMDILRegisterDefsV4.td" or die$!;
+while($testNum < $numTotalRegs) {
+ if ($testNum < $numRegs || $testNum >= $highRegs) {
+ $b = $testNum;
+ print OUTPUT "def R$testNum : AMDILRegWithSubReg<$b, \"r$testNum\", ";
+ print OUTPUT "[";
+ print OUTPUT "Rxy$testNum, Rzw$testNum";
+ print OUTPUT "], [sub_xy_comp, sub_zw_comp]>, DwarfRegNum<[$b]>;\n";
+ }
+ ++$testNum;
+}
+close(OUTPUT);
+
+$testNum = 1;
+open OUTPUT, ">AMDILRegisterUsesScalar.td" or die$!;
+while ($testNum < $numTotalRegs) {
+ if ($testNum < $numRegs || $testNum >= $highRegs) {
+ print OUTPUT "Rx$testNum, Ry$testNum, Rz$testNum, Rw$testNum";
+ }
+ ++$testNum;
+ if ($testNum < $numRegs || ($testNum >= $highRegs && $testNum < $numTotalRegs)) {
+ print OUTPUT ", ";
+ }
+}
+close(OUTPUT);
+$testNum = 1;
+open OUTPUT, ">AMDILRegisterUsesV2.td" or die$!;
+while ($testNum < $numTotalRegs) {
+ if ($testNum < $numRegs || $testNum >= $highRegs) {
+ print OUTPUT "Rxy$testNum, Rzw$testNum";
+ }
+ ++$testNum;
+ if ($testNum < $numRegs || ($testNum >= $highRegs && $testNum < $numTotalRegs)) {
+ print OUTPUT ", ";
+ }
+}
+close(OUTPUT);
+$testNum = 1;
+open OUTPUT, ">AMDILRegisterUsesV4.td" or die$!;
+while ($testNum < $numTotalRegs) {
+ if ($testNum < $numRegs || $testNum >= $highRegs) {
+ print OUTPUT "R$testNum";
+ }
+ ++$testNum;
+ if ($testNum < $numRegs || ($testNum >= $highRegs && $testNum < $numTotalRegs)) {
+ print OUTPUT ", ";
+ }
+}
+close(OUTPUT);
+
diff --git a/src/gallium/drivers/radeon/macrodata.cpp b/src/gallium/drivers/radeon/macrodata.cpp
new file mode 100644
index 00000000000..269bb918828
--- /dev/null
+++ b/src/gallium/drivers/radeon/macrodata.cpp
@@ -0,0 +1,338 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+/*! \file macrodata.hpp
+ * \brief Macrodata compile-time/run-time implementation.
+ *
+ * \author Alexander Lyashevsky (Alexander.Lyashevsky@amd.com)
+ * \date March 2009
+ */
+#include <stdio.h>
+#include <string.h>
+#include "macrodata.h"
+#include "macrodb.h"
+
+
+
+namespace amd {
+
+static const char *csMacroCallPattern = "mcall(";
+
+static CMacroData sMacroDataDBObject;
+
+CMacroData :: CMacroData()
+{
+ mInit = 0;
+ mMacroDBCounter = 0;
+ mRefNbr = 0;
+ mRefIndex = 0;
+ mMacroRef = 0;
+
+ InitMacroDB();
+ ResolveReferences();
+}
+
+CMacroData :: ~CMacroData()
+{
+
+ if ( mRefNbr )
+ {
+ delete [] mRefNbr;
+ mRefNbr = 0;
+ }
+ if ( mRefIndex )
+ {
+ delete [] mRefIndex;
+ mRefIndex = 0;
+ }
+ if ( mMacroRef )
+ {
+ delete [] mMacroRef;
+ mMacroRef = 0;
+ }
+}
+
+int CMacroData ::SearchForPattern(char *_SearchBuf, const char *_Pattern, int _PatLen)
+{
+int r = -1;
+int BufLen = (int)strlen(_SearchBuf);
+ for(int i = 0; i < BufLen - _PatLen; i++)
+ {
+ if (!memcmp(&_SearchBuf[i],_Pattern,_PatLen))
+ {
+ r = i;
+ break;
+ }
+ }
+ return(r);
+}
+
+int CMacroData ::ExtractString(int *_Pos0, int *_Pos1,char * _Name, char *_pBuf, const char*_Delim0, const char*_Delim1)
+{
+int r = 0;
+//int len = (int)strlen(_pBuf);
+int len0 = (int)strlen(_Delim0);
+int len1 = (int)strlen(_Delim1);
+ *_Pos0 = SearchForPattern(_pBuf, _Delim0,len0 );
+ *_Pos1 = SearchForPattern(&_pBuf[(*_Pos0)+len0], _Delim1, len1);
+ if ( *_Pos0 != -1 && *_Pos1 != -1 )
+ {
+ int nameLen = *_Pos1;
+ *_Pos1 += (*_Pos0)+len0;
+ memcpy(_Name, &_pBuf[(*_Pos0)+len0], nameLen);
+ _Name[nameLen] = 0;
+ r = 1;
+ }
+ return(r);
+}
+
+int CMacroData :: InitMacroDB( void )
+{
+int r = 1;
+// count macros
+ for(mMacroDBCounter = 0; amd::sMacroDB[mMacroDBCounter].Name[0] != 0; mMacroDBCounter++);
+ mInit = 1;
+ return (r);
+}
+
+int CMacroData :: NumberOfReferences( int Ord )
+{
+int r = 0;
+char *pMacro;
+int patLen = (int)strlen(csMacroCallPattern);
+int pos = 0;
+ pMacro = (char*)sMacroDB[Ord].Body;
+ while( 1 )
+ {
+ pos = SearchForPattern(&pMacro[pos], csMacroCallPattern, patLen);
+ if ( pos != -1)
+ {
+ r++;
+ pos += patLen;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ return(r);
+}
+
+int CMacroData :: InsertReferences( int Ord, int StartPos )
+{
+int r = 0;
+char *pMacro;
+int patLen = (int)strlen(csMacroCallPattern);
+int pos = 0;
+ pMacro = (char*)sMacroDB[Ord].Body;
+ r = 0;
+ while( 1 )
+ {
+ pos = SearchForPattern(&pMacro[pos], csMacroCallPattern, patLen);
+ if ( pos != -1)
+ {
+ char Nmbr[64];
+ int pos0,pos1;
+ if (ExtractString(&pos0,&pos1,Nmbr, &pMacro[pos], csMacroCallPattern, ")") != -1)
+ {
+ int newOrd;
+ sscanf(Nmbr,"%d",&newOrd);
+ mMacroRef[StartPos + r] = (char*)sMacroDB[newOrd].Body;
+ r++;
+ }
+ pos += patLen;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+// last is itself
+ mMacroRef[StartPos + mRefNbr[Ord] - 1] = (char*)sMacroDB[Ord].Body;
+ return(r);
+}
+
+int CMacroData :: ResolveReferences( void )
+{
+int r = 1;
+int totalRef;
+int startPos;
+ if ( mRefNbr )
+ {
+ delete [] mRefNbr;
+ }
+ mRefNbr = new int [mMacroDBCounter];
+ for(int i = 0; i < mMacroDBCounter; i++)
+ {
+// plus itself
+ mRefNbr[i] = NumberOfReferences(i) + 1;
+ }
+// count total ref and set starting ref position per macro
+ totalRef = 0;
+ for(int i = 0; i < mMacroDBCounter; i++)
+ {
+ totalRef += mRefNbr[i];
+ }
+
+
+ if ( mRefIndex )
+ {
+ delete [] mRefIndex;
+ }
+ mRefIndex = new int [mMacroDBCounter];
+
+
+ if ( mMacroRef )
+ {
+ delete [] mMacroRef;
+ }
+
+ mMacroRef = new char*[totalRef];
+
+ startPos = 0;
+ for( int i = 0; i < mMacroDBCounter; i++)
+ {
+ InsertReferences( i, startPos );
+ mRefIndex[i] = startPos;
+ startPos += mRefNbr[i];
+
+ }
+
+ return (r);
+}
+
+
+int CMacroData :: MacroDBFindMacro( const char * _pcMacroNm )
+{
+int r = -1;
+ if ( mInit )
+ {
+ for ( int i = 0; i < mMacroDBCounter; i++)
+ {
+ if ( !strcmp(_pcMacroNm,sMacroDB[i].Name))
+ {
+ r = i;
+ break;
+ }
+ }
+ }
+ return(r);
+}
+
+const char *CMacroData :: MacroDBGetMacro( int _iMacroId )
+{
+const char *r = 0;
+ if ( mInit && _iMacroId >= 0 && _iMacroId < mMacroDBCounter)
+ {
+ r = sMacroDB[_iMacroId].Body;
+ }
+
+ return r;
+}
+
+const char ** CMacroData :: MacroDBGetMacroList( int *_MacroListCounter, int _iMacroId )
+{
+const char **r = 0;
+ if ( mInit && _MacroListCounter && _iMacroId >= 0 && _iMacroId < mMacroDBCounter)
+ {
+ int refPos = mRefIndex[_iMacroId];
+ r = (const char **)&mMacroRef[refPos];
+ *_MacroListCounter = mRefNbr[_iMacroId];
+ }
+ return(r);
+}
+
+int CMacroData :: MacroDBFindNumInputs( int _iMacroId )
+{
+ int r = 0;
+ if ( mInit && _iMacroId >=0 && _iMacroId < mMacroDBCounter)
+ {
+ r = sMacroDB[_iMacroId].Inputs;
+ }
+ return r;
+}
+
+int CMacroData :: MacroDBFindNumOutputs( int _iMacroId )
+{
+ int r = 0;
+ if ( mInit && _iMacroId >=0 && _iMacroId < mMacroDBCounter)
+ {
+ r = sMacroDB[_iMacroId].Outputs;
+ }
+ return r;
+}
+
+
+// public:
+
+int MacroDBFindMacro( const char * _pcMacroNm )
+{
+ return(sMacroDataDBObject.MacroDBFindMacro(_pcMacroNm));
+}
+
+const char ** MacroDBGetMacro( int *_MacroListCounter, int _iMacroId )
+{
+ return(sMacroDataDBObject.MacroDBGetMacroList(_MacroListCounter, _iMacroId));
+}
+
+int MacroDBNumInputs(int _iMacroId)
+{
+ return(sMacroDataDBObject.MacroDBFindNumInputs(_iMacroId));
+}
+
+int MacroDBNumOutputs(int _iMacroId)
+{
+ return (sMacroDataDBObject.MacroDBFindNumOutputs(_iMacroId));
+}
+} // namespace amd
diff --git a/src/gallium/drivers/radeon/macrodata.h b/src/gallium/drivers/radeon/macrodata.h
new file mode 100644
index 00000000000..585384966ee
--- /dev/null
+++ b/src/gallium/drivers/radeon/macrodata.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+/*! \file macrodata.hpp
+ * \brief Macrodata compile-time/run-time interface.
+ *
+ * \author Alexander Lyashevsky (Alexander.Lyashevsky@amd.com)
+ * \date March 2009
+ */
+
+#ifndef MACRODATA_HPP_
+#define MACRODATA_HPP_
+
+namespace amd {
+/*! \brief Returns macro ordinal.
+*
+* \details
+*/
+int MacroDBFindMacro( const char * _pcMacroNm );
+/*! \brief Returns list of macro and number of the elements in the list.
+*
+* \details
+* Examples of invocation:
+int ordinal = amd::MacroDBFindMacro( "asinpi_float" );
+const char **MacroPtrs;
+const char *MacroPtr;
+int MacrosCnt;
+ MacroPtrs = amd::MacroDBGetMacro(&MacrosCnt,ordinal);
+ for( int i = 0; i < MacrosCnt; i++)
+ {
+ MacroPtr = MacroPtrs[i];
+ }
+*/
+const char ** MacroDBGetMacro( int *_MacroListCounter, int _iMacroId );
+
+/*! \brief returns the number of inputs for the specific macro
+*/
+int MacroDBNumInputs(int macronum);
+
+/*! \brief returns the number of outputs for the specific macro
+*/
+int MacroDBNumOutputs(int macronum);
+} // namespace amd
+
+
+#endif /*MACRODATA_HPP_*/
diff --git a/src/gallium/drivers/radeon/macrodb.h b/src/gallium/drivers/radeon/macrodb.h
new file mode 100644
index 00000000000..8484b8614fa
--- /dev/null
+++ b/src/gallium/drivers/radeon/macrodb.h
@@ -0,0 +1,108 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+/*! \file macrodata.hpp
+ * \brief Declarations of CMacroData internal class.
+ * place to include 2 .hpp files generated with macrotool utility (see).
+ *
+ * \author Alexander Lyashevsky (Alexander.Lyashevsky@amd.com)
+ * \date March 2009
+ */
+#ifndef MACRODB_HPP_
+#define MACRODB_HPP_
+
+namespace amd {
+
+ namespace macrodata {
+ struct SMacroEntry{
+ const char* Name;
+ const char* Body;
+ int Inputs;
+ int Outputs;
+ };
+ }
+
+ class CMacroData{
+ public:
+ CMacroData();
+ ~CMacroData();
+ public:
+ int MacroDBFindMacro( const char * _pcMacroNm );
+ const char *MacroDBGetMacro( int _iMacroId );
+ const char ** MacroDBGetMacroList( int *_MacroListCounter, int _iMacroId );
+ int MacroDBFindNumInputs ( int _iMacroId );
+ int MacroDBFindNumOutputs ( int _iMacroId );
+ protected:
+ int mInit;
+ int mMacroDBCounter;
+ int *mRefNbr;
+ int *mRefIndex;
+ char **mMacroRef;
+
+ int InitMacroDB( void );
+ int ResolveReferences( void );
+ int NumberOfReferences( int Ord );
+ int InsertReferences( int Ord, int StartPos );
+ int SearchForPattern(char *_SearchBuf, const char *_Pattern, int _PatLen);
+ int ExtractString(int *_Pos0, int *_Pos1,char * _Name, char *_pBuf, const char*_Delim0, const char*_Delim1);
+
+
+
+ };
+
+
+// real macros
+#include "macrodb_gen.h"
+
+}
+
+#endif /*MACRODB_HPP_*/
diff --git a/src/gallium/drivers/radeon/macrodb_gen.h b/src/gallium/drivers/radeon/macrodb_gen.h
new file mode 100644
index 00000000000..1996d06dd5a
--- /dev/null
+++ b/src/gallium/drivers/radeon/macrodb_gen.h
@@ -0,0 +1,32186 @@
+// Copyright (c) 2011, Advanced Micro Devices, Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// Neither the name of the copyright holder nor the names of its contributors
+// may be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+// If you use the software (in whole or in part), you shall adhere to all
+// applicable U.S., European, and other export laws, including but not limited
+// to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. Sections
+// 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June
+// 2000. Further, pursuant to Section 740.6 of the EAR, you hereby certify
+// that, except pursuant to a license granted by the United States Department
+// of Commerce Bureau of Industry and Security or as otherwise permitted
+// pursuant to a License Exception under the U.S. Export Administration
+// Regulations ("EAR"), you will not (1) export, re-export or release to a
+// national of a country in Country Groups D:1, E:1 or E:2 any restricted
+// technology, software, or source code you receive hereunder, or (2) export to
+// Country Groups D:1, E:1 or E:2 the direct product of such technology or
+// software, if such foreign produced direct product is subject to national
+// security controls as identified on the Commerce Control List (currently
+// found in Supplement 1 to Part 774 of EAR). For the most current Country
+// Group listings, or for additional information about the EAR or your
+// obligations under those regulations, please refer to the U.S. Bureau of
+// Industry and Security’s website at http://www.bis.doc.gov/.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _macrodb_gen_HPP_
+#define _macrodb_gen_HPP_
+/*
+The time is Mon Jul 18 18:22:11 2011
+*/
+const macrodata::SMacroEntry sMacroDB[] =
+{
+{ "barrier",
+"mdef(0)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"fence_threads_memory_lds\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "barrierGlobal",
+"mdef(1)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"fence_threads_memory\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "barrierLocal",
+"mdef(2)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"fence_threads_lds\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "read_mem_fence",
+"mdef(3)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"fence_lds_mem_read_only\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "write_mem_fence",
+"mdef(4)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"fence_lds_mem_write_only\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__acosh_f32",
+"mdef(5)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r0._y__, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x007FFFFF\n"
+"and r0.__zw, r1.x, l1\n"
+"dcl_literal l2, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r1._yz_, r0.z, l2\n"
+"mul_ieee r1.__z_, r1.z, r1.y\n"
+"sqrt_vec r1.__z_, r1.z\n"
+"add r0.x___, r1.y, r1.z\n"
+"dcl_literal l3, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1._y__, r0.x, l3\n"
+"dcl_literal l4, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.__z_, r1.y, l4\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1._y__, r1.y, l5\n"
+"dcl_literal l6, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1.___w, l6, r1.z\n"
+"dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.___w, r1.w, l7\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r1.w, l8\n"
+"dcl_literal l9, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1.___w, r1.w, l9\n"
+"dcl_literal l10, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1._y__, r1.y, l10\n"
+"iadd r1._y__, r1.y, r2.x\n"
+"itof r2._y__, r1.y\n"
+"ior r1._y__, r1.z, r1.w\n"
+"dcl_literal l11, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r2.x___, r1.y, l11\n"
+"dcl_literal l12, 0x3ECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD\n"
+"ge r1._y__, r0.x, l12\n"
+"cmov_logical r0.xy__, r1.y, r2.xyxx, r0.xyxx\n"
+"dcl_literal l13, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1._y__, r0.x, l13\n"
+"div_zeroop(infinity) r1._y__, r0.x, r1.y\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"mul_ieee r1.___w, r1.z, r1.z\n"
+"dcl_literal l14, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"dcl_literal l15, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.w, l14, l15\n"
+"dcl_literal l16, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.w, r2.xyxx, l16\n"
+"mul_ieee r2.x___, r1.w, r2.x\n"
+"dcl_literal l17, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r1.___w, r1.w, r2.y, l17\n"
+"mad_ieee r1.__z_, r1.z, r1.w, r2.x\n"
+"mul_ieee r1.___w, r0.x, r0.x\n"
+"dcl_literal l18, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.__z_, r1.w, l18, r1.z\n"
+"dcl_literal l19, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r2.x___, r0.y, l19\n"
+"mad_ieee r1._y__, r1.y, r1.z, r2.x\n"
+"dcl_literal l20, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1._y__, r1.w, l20, r1.y_neg(xyzw)\n"
+"add r0.x___, r0.x_neg(xyzw), r1.y\n"
+"dcl_literal l21, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.x___, r0.y, l21, r0.x_neg(xyzw)\n"
+"dcl_literal l22, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r0._y__, l22, r0.w\n"
+"dcl_literal l23, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r0._y__, r0.y, l23\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1._y__, r0.y, l24\n"
+"dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0._y__, r0.y, l25\n"
+"ior r0.___w, r0.w, r1.y\n"
+"dcl_literal l26, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r1._yz_, r0.w, l26\n"
+"div_zeroop(infinity) r0.___w, r1.y, r1.z\n"
+"mul_ieee r1.__z_, r0.w, r0.w\n"
+"mul_ieee r1.___w, r1.z, r1.z\n"
+"dcl_literal l27, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"dcl_literal l28, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.w, l27, l28\n"
+"dcl_literal l29, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.w, r2.xyxx, l29\n"
+"mul_ieee r2.x___, r1.w, r2.x\n"
+"dcl_literal l30, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r1.___w, r1.w, r2.y, l30\n"
+"mad_ieee r1.__z_, r1.z, r1.w, r2.x\n"
+"mul_ieee r1.___w, r1.y, r1.y\n"
+"dcl_literal l31, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.__z_, r1.w, l31, r1.z\n"
+"dcl_literal l32, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r0.z, l32\n"
+"dcl_literal l33, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l33\n"
+"iadd r0._y__, r2.x, r0.y\n"
+"itof r0._y__, r0.y\n"
+"dcl_literal l34, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r2.x___, r0.y, l34\n"
+"mad_ieee r0.___w, r0.w, r1.z, r2.x\n"
+"dcl_literal l35, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r1.w, l35, r0.w_neg(xyzw)\n"
+"add r0.___w, r1.y_neg(xyzw), r0.w\n"
+"dcl_literal l36, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0._y__, r0.y, l36, r0.w_neg(xyzw)\n"
+"dcl_literal l37, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"add r0._y__, r0.y, l37\n"
+"dcl_literal l38, 0x46000000, 0x46000000, 0x46000000, 0x46000000\n"
+"ult r0.___w, l38, r0.z\n"
+"cmov_logical r0.x___, r0.w, r0.y, r0.x\n"
+"dcl_literal l39, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0._y__, l39, r1.x\n"
+"dcl_literal l40, 0x00000000, 0x3F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, r1.x, l40\n"
+"dcl_literal l41, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.z, l41\n"
+"dcl_literal l42, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l42, r0.z\n"
+"and r0._y__, r0.y, r0.w\n"
+"and r0.___w, r1.x, r0.w\n"
+"dcl_literal l43, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.y, l43, r0.x\n"
+"ior r0._y__, r0.z, r0.w\n"
+"dcl_literal l44, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l44, r0.x\n"
+"dcl_literal l45, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r1.y, l45, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__acospi_f32",
+"mdef(6)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r1.x, l1\n"
+"cmov_logical r0.x___, r0.w, r1.x_neg(xyzw), r1.x\n"
+"inot r0.___w, r0.w\n"
+"\n"
+"dcl_literal l2, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1._y__, r0.x_neg(xyzw), l2\n"
+"\n"
+"dcl_literal l3, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.___w, r1.y, l3\n"
+"sqrt_vec r1._yz_, r1.w\n"
+"\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.x___, r1.x, l4\n"
+"\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r2.x___, r2.x, l5\n"
+"\n"
+"dcl_literal l6, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l6\n"
+"\n"
+"dcl_literal l7, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"ige r2._y__, r2.x, l7\n"
+"\n"
+"dcl_literal l8, 0xFFFFFFE6, 0xFFFFFFE6, 0xFFFFFFE6, 0xFFFFFFE6\n"
+"ilt r2.x___, r2.x, l8\n"
+"mul_ieee r0.__z_, r0.x, r0.x\n"
+"cmov_logical r0.xyz_, r2.y, r1.yzwy, r0.xyzx\n"
+"\n"
+"dcl_literal l9, 0x00000000, 0x3B81CE6B, 0x3F561F0D, 0x00000000\n"
+"\n"
+"dcl_literal l10, 0x00000000, 0xBC5B3FE1, 0x3F8D6FA5, 0x00000000\n"
+"mad_ieee r1._yz_, r0.z_neg(xyzw), l9, l10\n"
+"\n"
+"dcl_literal l11, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD\n"
+"mad_ieee r1._y__, r1.y, r0.z, l11\n"
+"\n"
+"dcl_literal l12, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC\n"
+"mad_ieee r1._y__, r1.y, r0.z, l12\n"
+"mul_ieee r1._y__, r0.z, r1.y\n"
+"div_zeroop(infinity) r1._y__, r1.y, r1.z\n"
+"mul_ieee r1.__z_, r0.x, r1.y\n"
+"add r1.__z_, r1.z, r1.z\n"
+"\n"
+"dcl_literal l13, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"and r1.___w, r0.y, l13\n"
+"mad_ieee r0.__z_, r1.w_neg(xyzw), r1.w, r0.z\n"
+"add r2.__z_, r0.y, r1.w\n"
+"mad_ieee r0.x___, r0.x, r1.y, r0.y\n"
+"\n"
+"dcl_literal l14, 0x333BB645, 0x333BB645, 0x333BB645, 0x333BB645\n"
+"mad_ieee r0._y__, r1.x_neg(xyzw), r1.y, l14\n"
+"div_zeroop(infinity) r0.__z_, r0.z, r2.z\n"
+"\n"
+"dcl_literal l15, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r0.z, l15, r1.z\n"
+"\n"
+"dcl_literal l16, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r1.w, l16, r0.z\n"
+"\n"
+"dcl_literal l17, 0xB33BB645, 0xB33BB645, 0xB33BB645, 0xB33BB645\n"
+"add r0.x___, r0.x, l17\n"
+"\n"
+"dcl_literal l18, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"\n"
+"dcl_literal l19, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mad_ieee r0.x___, r0.x_neg(xyzw), l18, l19\n"
+"and r0.___w, r0.w, r2.y\n"
+"cmov_logical r0.x___, r0.w, r0.z, r0.x\n"
+"add r0._y__, r1.x, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l20, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"add r0._y__, r0.y_neg(xyzw), l20\n"
+"cmov_logical r0.x___, r2.y, r0.x, r0.y\n"
+"mov r0._y__, r1.x_abs\n"
+"\n"
+"dcl_literal l21, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0._y__, l21, r0.y\n"
+"\n"
+"dcl_literal l22, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.__z_, r1.x, l22\n"
+"\n"
+"dcl_literal l23, 0x3F800000, 0xBF800000, 0x00000000, 0x00000000\n"
+"eq r1.xy__, r1.x, l23\n"
+"cmov_logical r0.x___, r0.y, r0.z, r0.x\n"
+"\n"
+"dcl_literal l24, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r0.x___, r2.x, l24, r0.x\n"
+"\n"
+"dcl_literal l25, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r1.x, l25, r0.x\n"
+"\n"
+"dcl_literal l26, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"cmov_logical r0.x___, r1.y, l26, r0.x\n"
+"\n"
+"dcl_literal l27, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983\n"
+"mul_ieee r0.x___, r0.x, l27\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__acos_f32",
+"mdef(7)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r0._y__, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r1.x, l1\n"
+"cmov_logical r0.x___, r0.w, r1.x_neg(xyzw), r1.x\n"
+"inot r0.___w, r0.w\n"
+"dcl_literal l2, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1._y__, r0.x_neg(xyzw), l2\n"
+"dcl_literal l3, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.___w, r1.y, l3\n"
+"sqrt_vec r1._yz_, r1.w\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.x___, r1.x, l4\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r2.x___, r2.x, l5\n"
+"dcl_literal l6, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l6\n"
+"dcl_literal l7, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"ige r2._y__, r2.x, l7\n"
+"dcl_literal l8, 0xFFFFFFE6, 0xFFFFFFE6, 0xFFFFFFE6, 0xFFFFFFE6\n"
+"ilt r2.x___, r2.x, l8\n"
+"mul_ieee r0.__z_, r0.x, r0.x\n"
+"cmov_logical r0.xyz_, r2.y, r1.yzwy, r0.xyzx\n"
+"dcl_literal l9, 0x00000000, 0x3B81CE6B, 0x3F561F0D, 0x00000000\n"
+"dcl_literal l10, 0x00000000, 0xBC5B3FE1, 0x3F8D6FA5, 0x00000000\n"
+"mad_ieee r1._yz_, r0.z_neg(xyzw), l9, l10\n"
+"dcl_literal l11, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD\n"
+"mad_ieee r1._y__, r1.y, r0.z, l11\n"
+"dcl_literal l12, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC\n"
+"mad_ieee r1._y__, r1.y, r0.z, l12\n"
+"mul_ieee r1._y__, r0.z, r1.y\n"
+"div_zeroop(infinity) r1._y__, r1.y, r1.z\n"
+"mul_ieee r1.__z_, r0.x, r1.y\n"
+"add r1.__z_, r1.z, r1.z\n"
+"dcl_literal l13, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"and r1.___w, r0.y, l13\n"
+"mad_ieee r0.__z_, r1.w_neg(xyzw), r1.w, r0.z\n"
+"add r2.__z_, r0.y, r1.w\n"
+"mad_ieee r0.x___, r0.x, r1.y, r0.y\n"
+"dcl_literal l14, 0x333BB645, 0x333BB645, 0x333BB645, 0x333BB645\n"
+"mad_ieee r0._y__, r1.x_neg(xyzw), r1.y, l14\n"
+"div_zeroop(infinity) r0.__z_, r0.z, r2.z\n"
+"dcl_literal l15, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r0.z, l15, r1.z\n"
+"dcl_literal l16, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r1.w, l16, r0.z\n"
+"dcl_literal l17, 0xB33BB645, 0xB33BB645, 0xB33BB645, 0xB33BB645\n"
+"add r0.x___, r0.x, l17\n"
+"dcl_literal l18, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l19, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mad_ieee r0.x___, r0.x_neg(xyzw), l18, l19\n"
+"and r0.___w, r0.w, r2.y\n"
+"cmov_logical r0.x___, r0.w, r0.z, r0.x\n"
+"add r0._y__, r1.x, r0.y_neg(xyzw)\n"
+"dcl_literal l20, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"add r0._y__, r0.y_neg(xyzw), l20\n"
+"cmov_logical r0.x___, r2.y, r0.x, r0.y\n"
+"mov r0._y__, r1.x_abs\n"
+"dcl_literal l21, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0._y__, l21, r0.y\n"
+"dcl_literal l22, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.__z_, r1.x, l22\n"
+"dcl_literal l23, 0x3F800000, 0xBF800000, 0x00000000, 0x00000000\n"
+"eq r1.xy__, r1.x, l23\n"
+"cmov_logical r0.x___, r0.y, r0.z, r0.x\n"
+"dcl_literal l24, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r0.x___, r2.x, l24, r0.x\n"
+"dcl_literal l25, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r1.x, l25, r0.x\n"
+"dcl_literal l26, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"cmov_logical r0.x___, r1.y, l26, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_bitalign_1",
+"mdef(8)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bitalign r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bitalign_2",
+"mdef(9)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bitalign r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bitalign_3",
+"mdef(10)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bitalign r0.xyz_, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bitalign_4",
+"mdef(11)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bitalign r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bytealign_1",
+"mdef(12)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bytealign r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bytealign_2",
+"mdef(13)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bytealign r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bytealign_3",
+"mdef(14)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bytealign r0.xyz_, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_bytealign_4",
+"mdef(15)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"bytealign r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_cmovl_1",
+"mdef(16)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"cmov_logical r0.x, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_cmovl_2",
+"mdef(17)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"cmov_logical r0.xy, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_cmovl_3",
+"mdef(18)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"cmov_logical r0.xyz, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_cmovl_4",
+"mdef(19)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"cmov_logical r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_dfrexp",
+"mdef(20)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dfrexp r0, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_dldexp",
+"mdef(21)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dldexp r0.xy__, r0.xy, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_dmad",
+"mdef(22)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"dmad r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_dmax",
+"mdef(23)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dmax r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_dmin",
+"mdef(24)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dmin r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_dp2_ieee",
+"mdef(25)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dp2_ieee r0.x___, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_dp3_ieee",
+"mdef(26)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dp3_ieee r0.x___, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_dp4_ieee",
+"mdef(27)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dp4_ieee r0.x___, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_drcp",
+"mdef(28)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"drcp_zeroop(infinity) r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_drsq",
+"mdef(29)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"drsq_zeroop(infinity) r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_fabs_1",
+"mdef(30)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, r0.x_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_fabs_2",
+"mdef(31)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xy, r0.xy_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_fabs_3",
+"mdef(32)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xyz, r0.xyz_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_fabs_4",
+"mdef(33)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, r0_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_hi_1",
+"mdef(34)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_hi r0.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_hi_2",
+"mdef(35)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_hi r0.xy, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_hi_3",
+"mdef(36)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_hi r0.xyz, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_hi_4",
+"mdef(37)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_hi r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_lo_1",
+"mdef(38)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_lo r0.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_lo_2",
+"mdef(39)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_lo r0.xy, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_lo_3",
+"mdef(40)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_lo r0.xyz, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_ffb_lo_4",
+"mdef(41)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ffb_lo r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_near_f32",
+"mdef(42)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"dcl_literal l19, 143, 143, 143, 143\n"
+"iand r1.x, r0.x, l1.x\n"
+"ushr r2.x, r0.x, l2.x\n"
+"iand r2.x, r2.x, l3.x\n"
+"iadd r2.x, r2.x, l4.x\n"
+"iadd r3.x, l6.x, r2.x_neg(xyzw)\n"
+"imin r3.x, r3.x, l5.x\n"
+"imax r3.x, r3.x, l8.x\n"
+"ilt r4.x, l4.x, r2.x\n"
+"cmov_logical r4.x, r4.x, l7.x, l8.x\n"
+"ior r4.x, r1.x, r4.x\n"
+"iadd r13.x, l5.x, r3.x_neg(xyzw)\n"
+"ishl r4.x, r4.x, r13.x\n"
+"ushr r5.x, r0.x, l9.x\n"
+"iand r5.x, r5.x, l10.x\n"
+"ushr r6.x, r0.x, l11.x\n"
+"iand r6.x, r6.x, l12.x\n"
+"cmov_logical r7.x, r1.x, l14.x, l8.x\n"
+"ior r7.x, r7.x, r6.x\n"
+"ior r7.x, r7.x, l13.x\n"
+"ishl r8.x, r2.x, l15.x\n"
+"ior r8.x, r8.x, r6.x\n"
+"ior r9.x, l16.x, r6.x\n"
+"ushr r9.x, r9.x, r3.x\n"
+"ilt r10.x, r2.x, l6.x\n"
+"cmov_logical r10.x, r10.x, r9.x, r8.x\n"
+"iand r11.x, r10.x, l6.x\n"
+"ult r12.x, l17.x, r4.x\n"
+"cmov_logical r12.x, r12.x, l6.x, l8.x\n"
+"ieq r13.x, r4.x, l17.x\n"
+"cmov_logical r12.x, r13.x, r11.x, r12.x\n"
+"iadd r10.x, r10.x, r12.x\n"
+"ilt r13.x, l18.x, r2.x\n"
+"cmov_logical r10.x, r13.x, l13.x, r10.x\n"
+"ieq r13.x, r2.x, l19.x\n"
+"cmov_logical r10.x, r13.x, r7.x, r10.x\n"
+"ior r0.x, r5.x, r10.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_near_v2f32",
+"mdef(43)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"dcl_literal l19, 143, 143, 143, 143\n"
+"iand r1.xy, r0.xy, l1.xy\n"
+"ushr r2.xy, r0.xy, l2.xy\n"
+"iand r2.xy, r2.xy, l3.xy\n"
+"iadd r2.xy, r2.xy, l4.xy\n"
+"iadd r3.xy, l6.xy, r2.xy_neg(xyzw)\n"
+"imin r3.xy, r3.xy, l5.xy\n"
+"imax r3.xy, r3.xy, l8.xy\n"
+"ilt r4.xy, l4.xy, r2.xy\n"
+"cmov_logical r4.xy, r4.xy, l7.xy, l8.xy\n"
+"ior r4.xy, r1.xy, r4.xy\n"
+"iadd r13.xy, l5.xy, r3.xy_neg(xyzw)\n"
+"ishl r4.xy, r4.xy, r13.xy\n"
+"ushr r5.xy, r0.xy, l9.xy\n"
+"iand r5.xy, r5.xy, l10.xy\n"
+"ushr r6.xy, r0.xy, l11.xy\n"
+"iand r6.xy, r6.xy, l12.xy\n"
+"cmov_logical r7.xy, r1.xy, l14.xy, l8.xy\n"
+"ior r7.xy, r7.xy, r6.xy\n"
+"ior r7.xy, r7.xy, l13.xy\n"
+"ishl r8.xy, r2.xy, l15.xy\n"
+"ior r8.xy, r8.xy, r6.xy\n"
+"ior r9.xy, l16.xy, r6.xy\n"
+"ushr r9.xy, r9.xy, r3.xy\n"
+"ilt r10.xy, r2.xy, l6.xy\n"
+"cmov_logical r10.xy, r10.xy, r9.xy, r8.xy\n"
+"iand r11.xy, r10.xy, l6.xy\n"
+"ult r12.xy, l17.xy, r4.xy\n"
+"cmov_logical r12.xy, r12.xy, l6.xy, l8.xy\n"
+"ieq r13.xy, r4.xy, l17.xy\n"
+"cmov_logical r12.xy, r13.xy, r11.xy, r12.xy\n"
+"iadd r10.xy, r10.xy, r12.xy\n"
+"ilt r13.xy, l18.xy, r2.xy\n"
+"cmov_logical r10.xy, r13.xy, l13.xy, r10.xy\n"
+"ieq r13.xy, r2.xy, l19.xy\n"
+"cmov_logical r10.xy, r13.xy, r7.xy, r10.xy\n"
+"ior r0.xy, r5.xy, r10.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_near_v4f32",
+"mdef(44)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"dcl_literal l19, 143, 143, 143, 143\n"
+"iand r1, r0, l1\n"
+"ushr r2, r0, l2\n"
+"iand r2, r2, l3\n"
+"iadd r2, r2, l4\n"
+"iadd r3, l6, r2_neg(xyzw)\n"
+"imin r3, r3, l5\n"
+"imax r3, r3, l8\n"
+"ilt r4, l4, r2\n"
+"cmov_logical r4, r4, l7, l8\n"
+"ior r4, r1, r4\n"
+"iadd r13, l5, r3_neg(xyzw)\n"
+"ishl r4, r4, r13\n"
+"ushr r5, r0, l9\n"
+"iand r5, r5, l10\n"
+"ushr r6, r0, l11\n"
+"iand r6, r6, l12\n"
+"cmov_logical r7, r1, l14, l8\n"
+"ior r7, r7, r6\n"
+"ior r7, r7, l13\n"
+"ishl r8, r2, l15\n"
+"ior r8, r8, r6\n"
+"ior r9, l16, r6\n"
+"ushr r9, r9, r3\n"
+"ilt r10, r2, l6\n"
+"cmov_logical r10, r10, r9, r8\n"
+"iand r11, r10, l6\n"
+"ult r12, l17, r4\n"
+"cmov_logical r12, r12, l6, l8\n"
+"ieq r13, r4, l17\n"
+"cmov_logical r12, r13, r11, r12\n"
+"iadd r10, r10, r12\n"
+"ilt r13, l18, r2\n"
+"cmov_logical r10, r13, l13, r10\n"
+"ieq r13, r2, l19\n"
+"cmov_logical r10, r13, r7, r10\n"
+"ior r0, r5, r10\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_neg_inf_f32",
+"mdef(45)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 15, 15, 15, 15\n"
+"dcl_literal l18, 0x7bff, 0x7bff, 0x7bff, 0x7bff\n"
+"dcl_literal l19, 30, 30, 30, 30\n"
+"dcl_literal l20, 143, 143, 143, 143\n"
+"iand r1.x, r0.x, l1.x\n"
+"ushr r2.x, r0.x, l2.x\n"
+"iand r2.x, r2.x, l3.x\n"
+"iadd r2.x, r2.x, l4.x\n"
+"iadd r3.x, l6.x, r2.x_neg(xyzw)\n"
+"imin r3.x, r3.x, l5.x\n"
+"imax r3.x, r3.x, l8.x\n"
+"ilt r4.x, l4.x, r2.x\n"
+"cmov_logical r4.x, r4.x, l7.x, l8.x\n"
+"ior r4.x, r4.x, r1.x\n"
+"iadd r13.x, l5.x, r3.x_neg(xyzw)\n"
+"ishl r4.x, r4.x, r13.x\n"
+"ushr r5.x, r0.x, l9.x\n"
+"iand r5.x, r5.x, l10.x\n"
+"ushr r6.x, r0.x, l11.x\n"
+"iand r6.x, r6.x, l12.x\n"
+"cmov_logical r7.x, r1.x, l14.x, l8.x\n"
+"ior r7.x, r7.x, r6.x\n"
+"ior r7.x, r7.x, l13.x\n"
+"ishl r8.x, r2.x, l15.x\n"
+"ior r8.x, r8.x, r6.x\n"
+"ior r9.x, l16.x, r6.x\n"
+"ushr r9.x, r9.x, r3.x\n"
+"ilt r10.x, r2.x, l6.x\n"
+"cmov_logical r10.x, r10.x, r9.x, r8.x\n"
+"ushr r13.x, r5.x, l17.x\n"
+"ult r14.x, l8.x, r4.x\n"
+"iand r13.x, r13.x, r14.x\n"
+"iadd r10.x, r10.x, r13.x\n"
+"ushr r11.x, r5.x, l17.x\n"
+"iadd r11.x, r11.x, l18.x\n"
+"ilt r13.x, l19.x, r2.x\n"
+"cmov_logical r10.x, r13.x, r11.x, r10.x\n"
+"ieq r13.x, r2.x, l20.x\n"
+"cmov_logical r10.x, r13.x, r7.x, r10.x\n"
+"ior r0.x, r5.x, r10.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_neg_inf_v2f32",
+"mdef(46)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 15, 15, 15, 15\n"
+"dcl_literal l18, 0x7bff, 0x7bff, 0x7bff, 0x7bff\n"
+"dcl_literal l19, 30, 30, 30, 30\n"
+"dcl_literal l20, 143, 143, 143, 143\n"
+"iand r1.xy, r0.xy, l1.xy\n"
+"ushr r2.xy, r0.xy, l2.xy\n"
+"iand r2.xy, r2.xy, l3.xy\n"
+"iadd r2.xy, r2.xy, l4.xy\n"
+"iadd r3.xy, l6.xy, r2.xy_neg(xyzw)\n"
+"imin r3.xy, r3.xy, l5.xy\n"
+"imax r3.xy, r3.xy, l8.xy\n"
+"ilt r4.xy, l4.xy, r2.xy\n"
+"cmov_logical r4.xy, r4.xy, l7.xy, l8.xy\n"
+"ior r4.xy, r4.xy, r1.xy\n"
+"iadd r13.xy, l5.xy, r3.xy_neg(xyzw)\n"
+"ishl r4.xy, r4.xy, r13.xy\n"
+"ushr r5.xy, r0.xy, l9.xy\n"
+"iand r5.xy, r5.xy, l10.xy\n"
+"ushr r6.xy, r0.xy, l11.xy\n"
+"iand r6.xy, r6.xy, l12.xy\n"
+"cmov_logical r7.xy, r1.xy, l14.xy, l8.xy\n"
+"ior r7.xy, r7.xy, r6.xy\n"
+"ior r7.xy, r7.xy, l13.xy\n"
+"ishl r8.xy, r2.xy, l15.xy\n"
+"ior r8.xy, r8.xy, r6.xy\n"
+"ior r9.xy, l16.xy, r6.xy\n"
+"ushr r9.xy, r9.xy, r3.xy\n"
+"ilt r10.xy, r2.xy, l6.xy\n"
+"cmov_logical r10.xy, r10.xy, r9.xy, r8.xy\n"
+"ushr r13.xy, r5.xy, l17.xy\n"
+"ult r14.xy, l8.xy, r4.xy\n"
+"iand r13.xy, r13.xy, r14.xy\n"
+"iadd r10.xy, r10.xy, r13.xy\n"
+"ushr r11.xy, r5.xy, l17.xy\n"
+"iadd r11.xy, r11.xy, l18.xy\n"
+"ilt r13.xy, l19.xy, r2.xy\n"
+"cmov_logical r10.xy, r13.xy, r11.xy, r10.xy\n"
+"ieq r13.xy, r2.xy, l20.xy\n"
+"cmov_logical r10.xy, r13.xy, r7.xy, r10.xy\n"
+"ior r0.xy, r5.xy, r10.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_neg_inf_v4f32",
+"mdef(47)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 15, 15, 15, 15\n"
+"dcl_literal l18, 0x7bff, 0x7bff, 0x7bff, 0x7bff\n"
+"dcl_literal l19, 30, 30, 30, 30\n"
+"dcl_literal l20, 143, 143, 143, 143\n"
+"iand r1, r0, l1\n"
+"ushr r2, r0, l2\n"
+"iand r2, r2, l3\n"
+"iadd r2, r2, l4\n"
+"iadd r3, l6, r2_neg(xyzw)\n"
+"imin r3, r3, l5\n"
+"imax r3, r3, l8\n"
+"ilt r4, l4, r2\n"
+"cmov_logical r4, r4, l7, l8\n"
+"ior r4, r4, r1\n"
+"iadd r13, l5, r3_neg(xyzw)\n"
+"ishl r4, r4, r13\n"
+"ushr r5, r0, l9\n"
+"iand r5, r5, l10\n"
+"ushr r6, r0, l11\n"
+"iand r6, r6, l12\n"
+"cmov_logical r7, r1, l14, l8\n"
+"ior r7, r7, r6\n"
+"ior r7, r7, l13\n"
+"ishl r8, r2, l15\n"
+"ior r8, r8, r6\n"
+"ior r9, l16, r6\n"
+"ushr r9, r9, r3\n"
+"ilt r10, r2, l6\n"
+"cmov_logical r10, r10, r9, r8\n"
+"ushr r13, r5, l17\n"
+"ult r14, l8, r4\n"
+"iand r13, r13, r14\n"
+"iadd r10, r10, r13\n"
+"ushr r11, r5, l17\n"
+"iadd r11, r11, l18\n"
+"ilt r13, l19, r2\n"
+"cmov_logical r10, r13, r11, r10\n"
+"ieq r13, r2, l20\n"
+"cmov_logical r10, r13, r7, r10\n"
+"ior r0, r5, r10\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_plus_inf_f32",
+"mdef(48)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 15, 15, 15, 15\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"dcl_literal l19, 143, 143, 143, 143\n"
+"iand r1.x, r0.x, l1.x\n"
+"ushr r2.x, r0.x, l2.x\n"
+"iand r2.x, r2.x, l3.x\n"
+"iadd r2.x, r2.x, l4.x\n"
+"iadd r3.x, l6.x, r2.x_neg(xyzw)\n"
+"imin r3.x, r3.x, l5.x\n"
+"imax r3.x, r3.x, l8.x\n"
+"ilt r4.x, l4.x, r2.x\n"
+"cmov_logical r4.x, r4.x, l7.x, l8.x\n"
+"ior r4.x, r1.x, r4.x\n"
+"iadd r13.x, l5.x, r3.x_neg(xyzw)\n"
+"ishl r4.x, r4.x, r13.x\n"
+"ushr r5.x, r0.x, l9.x\n"
+"iand r5.x, r5.x, l10.x\n"
+"ushr r6.x, r0.x, l11.x\n"
+"iand r6.x, r6.x, l12.x\n"
+"cmov_logical r7.x, r1.x, l14.x, l8.x\n"
+"ior r7.x, r7.x, r6.x\n"
+"ior r7.x, r7.x, l13.x\n"
+"ishl r8.x, r2.x, l15.x\n"
+"ior r8.x, r8.x, r6.x\n"
+"ior r9.x, l16.x, r6.x\n"
+"ushr r9.x, r9.x, r3.x\n"
+"ilt r10.x, r2.x, l6.x\n"
+"cmov_logical r10.x, r10.x, r9.x, r8.x\n"
+"ieq r13.x, l8.x, r5.x\n"
+"ult r14.x, l8.x, r4.x\n"
+"iand r13.x, r13.x, r14.x\n"
+"iand r13.x, r13.x, l6.x\n"
+"iadd r10.x, r10.x, r13.x\n"
+"ushr r11.x, r5.x, l17.x\n"
+"iadd r11.x, l13.x, r11.x_neg(xyzw)\n"
+"ilt r13.x, l18.x, r2.x\n"
+"cmov_logical r10.x, r13.x, r11.x, r10.x\n"
+"ieq r13.x, r2.x, l19.x\n"
+"cmov_logical r10.x, r13.x, r7.x, r10.x\n"
+"ior r0.x, r5.x, r10.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_plus_inf_v2f32",
+"mdef(49)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 15, 15, 15, 15\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"dcl_literal l19, 143, 143, 143, 143\n"
+"iand r1.xy, r0.xy, l1.xy\n"
+"ushr r2.xy, r0.xy, l2.xy\n"
+"iand r2.xy, r2.xy, l3.xy\n"
+"iadd r2.xy, r2.xy, l4.xy\n"
+"iadd r3.xy, l6.xy, r2.xy_neg(xyzw)\n"
+"imin r3.xy, r3.xy, l5.xy\n"
+"imax r3.xy, r3.xy, l8.xy\n"
+"ilt r4.xy, l4.xy, r2.xy\n"
+"cmov_logical r4.xy, r4.xy, l7.xy, l8.xy\n"
+"ior r4.xy, r1.xy, r4.xy\n"
+"iadd r13.xy, l5.xy, r3.xy_neg(xyzw)\n"
+"ishl r4.xy, r4.xy, r13.xy\n"
+"ushr r5.xy, r0.xy, l9.xy\n"
+"iand r5.xy, r5.xy, l10.xy\n"
+"ushr r6.xy, r0.xy, l11.xy\n"
+"iand r6.xy, r6.xy, l12.xy\n"
+"cmov_logical r7.xy, r1.xy, l14.xy, l8.xy\n"
+"ior r7.xy, r7.xy, r6.xy\n"
+"ior r7.xy, r7.xy, l13.xy\n"
+"ishl r8.xy, r2.xy, l15.xy\n"
+"ior r8.xy, r8.xy, r6.xy\n"
+"ior r9.xy, l16.xy, r6.xy\n"
+"ushr r9.xy, r9.xy, r3.xy\n"
+"ilt r10.xy, r2.xy, l6.xy\n"
+"cmov_logical r10.xy, r10.xy, r9.xy, r8.xy\n"
+"ieq r13.xy, l8.xy, r5.xy\n"
+"ult r14.xy, l8.xy, r4.xy\n"
+"iand r13.xy, r13.xy, r14.xy\n"
+"iand r13.xy, r13.xy, l6.xy\n"
+"iadd r10.xy, r10.xy, r13.xy\n"
+"ushr r11.xy, r5.xy, l17.xy\n"
+"iadd r11.xy, l13.xy, r11.xy_neg(xyzw)\n"
+"ilt r13.xy, l18.xy, r2.xy\n"
+"cmov_logical r10.xy, r13.xy, r11.xy, r10.xy\n"
+"ieq r13.xy, r2.xy, l19.xy\n"
+"cmov_logical r10.xy, r13.xy, r7.xy, r10.xy\n"
+"ior r0.xy, r5.xy, r10.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_half_plus_inf_v4f32",
+"mdef(50)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"dcl_literal l2, 23, 23, 23, 23\n"
+"dcl_literal l3, 0xff, 0xff, 0xff, 0xff\n"
+"dcl_literal l4, 0xffffff90, 0xffffff90, 0xffffff90, 0xffffff90\n"
+"dcl_literal l5, 19, 19, 19, 19\n"
+"dcl_literal l6, 1, 1, 1, 1\n"
+"dcl_literal l7, 0x800000, 0x800000, 0x800000, 0x800000\n"
+"dcl_literal l8, 0, 0, 0, 0\n"
+"dcl_literal l9, 16, 16, 16, 16\n"
+"dcl_literal l10, 0x8000, 0x8000, 0x8000, 0x8000\n"
+"dcl_literal l11, 13, 13, 13, 13\n"
+"dcl_literal l12, 0x3ff, 0x3ff, 0x3ff, 0x3ff\n"
+"dcl_literal l13, 0x7c00, 0x7c00, 0x7c00, 0x7c00\n"
+"dcl_literal l14, 0x0200, 0x0200, 0x0200, 0x0200\n"
+"dcl_literal l15, 10, 10, 10, 10\n"
+"dcl_literal l16, 0x400, 0x400, 0x400, 0x400\n"
+"dcl_literal l17, 15, 15, 15, 15\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"dcl_literal l19, 143, 143, 143, 143\n"
+"iand r1, r0, l1\n"
+"ushr r2, r0, l2\n"
+"iand r2, r2, l3\n"
+"iadd r2, r2, l4\n"
+"iadd r3, l6, r2_neg(xyzw)\n"
+"imin r3, r3, l5\n"
+"imax r3, r3, l8\n"
+"ilt r4, l4, r2\n"
+"cmov_logical r4, r4, l7, l8\n"
+"ior r4, r1, r4\n"
+"iadd r13, l5, r3_neg(xyzw)\n"
+"ishl r4, r4, r13\n"
+"ushr r5, r0, l9\n"
+"iand r5, r5, l10\n"
+"ushr r6, r0, l11\n"
+"iand r6, r6, l12\n"
+"cmov_logical r7, r1, l14, l8\n"
+"ior r7, r7, r6\n"
+"ior r7, r7, l13\n"
+"ishl r8, r2, l15\n"
+"ior r8, r8, r6\n"
+"ior r9, l16, r6\n"
+"ushr r9, r9, r3\n"
+"ilt r10, r2, l6\n"
+"cmov_logical r10, r10, r9, r8\n"
+"ieq r13, l8, r5\n"
+"ult r14, l8, r4\n"
+"iand r13, r13, r14\n"
+"iand r13, r13, l6\n"
+"iadd r10, r10, r13\n"
+"ushr r11, r5, l17\n"
+"iadd r11, l13, r11_neg(xyzw)\n"
+"ilt r13, l18, r2\n"
+"cmov_logical r10, r13, r11, r10\n"
+"ieq r13, r2, l19\n"
+"cmov_logical r10, r13, r7, r10\n"
+"ior r0, r5, r10\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_int_flr_f32",
+"mdef(51)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0.x, r0.x\n"
+"ftoi r0.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_int_flr_v2f32",
+"mdef(52)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0.xy, r0.xy\n"
+"ftoi r0.xy, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_float_to_int_flr_v4f32",
+"mdef(53)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0, r0\n"
+"ftoi r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_f_2_u4",
+"mdef(54)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"f_2_u4 r0.x___, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_global_id",
+"mdef(55)_out(1)_in(0)\n"
+"mov r0.xyz, vAbsTid.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,0,1
+},
+{ "__amdil_global_id_flat",
+"mdef(56)_out(1)_in(0)\n"
+"mov r0.x, vAbsTidFlat.x\n"
+"mov out0, r0\n"
+"mend\n"
+,0,1
+},
+{ "__amdil_group_id",
+"mdef(57)_out(1)_in(0)\n"
+"mov r0.xyz, vThreadGrpId.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,0,1
+},
+{ "__amdil_group_id_flat",
+"mdef(58)_out(1)_in(0)\n"
+"mov r0.x, vThreadGrpIdFlat.x\n"
+"mov out0, r0\n"
+"mend\n"
+,0,1
+},
+{ "__amdil_imad_1",
+"mdef(59)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"imad r0.x, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_imad_2",
+"mdef(60)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"imad r0.xy, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_imad_3",
+"mdef(61)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"imad r0.xyz, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_imad_4",
+"mdef(62)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"imad r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_imax_1",
+"mdef(63)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imax r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imax_2",
+"mdef(64)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imax r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imax_3",
+"mdef(65)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imax r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imax_4",
+"mdef(66)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imax r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imin_1",
+"mdef(67)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imin r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imin_2",
+"mdef(68)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imin r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imin_3",
+"mdef(69)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imin r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imin_4",
+"mdef(70)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imin r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_improved_div_f32",
+"mdef(71)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l1, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000\n"
+"div_zeroop(infinity) r2.x, l1.x, r1.x\n"
+"mul_ieee r3.x, r0.x, r2.x\n"
+"fma r4.x, r3.x, r1.x_neg(xyzw), r0.x\n"
+"fma r0.x, r4.x, r2.x, r3.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_improved_div_v2f32",
+"mdef(72)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l1, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000\n"
+"div_zeroop(infinity) r2.xy, l1.xy, r1.xy\n"
+"mul_ieee r3.xy, r0.xy, r2.xy\n"
+"fma r4.xy, r3.xy, r1.xy_neg(xyzw), r0.xy\n"
+"fma r0.xy, r4.xy, r2.xy, r3.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_improved_div_v4f32",
+"mdef(73)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l1, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000\n"
+"div_zeroop(infinity) r2, l1, r1\n"
+"mul_ieee r3, r0, r2\n"
+"fma r4, r3, r1_neg(xyzw), r0\n"
+"fma r0, r4, r2, r3\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imul_high_1",
+"mdef(74)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imul_high r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imul_high_2",
+"mdef(75)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imul_high r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imul_high_3",
+"mdef(76)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imul_high r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_imul_high_4",
+"mdef(77)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"imul_high r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_local_id",
+"mdef(78)_out(1)_in(0)\n"
+"mov r0.xyz, vTidInGrp.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,0,1
+},
+{ "__amdil_local_id_flat",
+"mdef(79)_out(1)_in(0)\n"
+"mov r0.x, vTidInGrpFlat.x\n"
+"mov out0, r0\n"
+"mend\n"
+,0,1
+},
+{ "__amdil_lpr_3",
+"mdef(80)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"lrp r0.xyz_, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_lrp_1",
+"mdef(81)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"lrp r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_lrp_2",
+"mdef(82)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"lrp r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_lrp_4",
+"mdef(83)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"lrp r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_mad_ieee_1",
+"mdef(84)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mad_ieee r0.x, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_mad_ieee_2",
+"mdef(85)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mad_ieee r0.xy, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_mad_ieee_3",
+"mdef(86)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mad_ieee r0.xyz, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_mad_ieee_4",
+"mdef(87)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mad_ieee r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_max_ieee_1",
+"mdef(88)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"max_ieee r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_max_ieee_2",
+"mdef(89)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"max_ieee r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_max_ieee_3",
+"mdef(90)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"max_ieee r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_max_ieee_4",
+"mdef(91)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"max_ieee r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_min_ieee_1",
+"mdef(92)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"min_ieee r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_min_ieee_2",
+"mdef(93)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"min_ieee r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_min_ieee_3",
+"mdef(94)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"min_ieee r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_min_ieee_4",
+"mdef(95)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"min_ieee r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_rcp_1",
+"mdef(96)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rcp_zeroop(infinity) r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rcp_2",
+"mdef(97)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rcp_zeroop(infinity) r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rcp_3",
+"mdef(98)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rcp_zeroop(infinity) r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rcp_4",
+"mdef(99)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rcp_zeroop(infinity) r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_nearest_1",
+"mdef(100)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_nearest r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_nearest_2",
+"mdef(101)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_nearest r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_nearest_3",
+"mdef(102)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_nearest r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_nearest_4",
+"mdef(103)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_nearest r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_neginf_1",
+"mdef(104)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_neginf_2",
+"mdef(105)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_neginf_3",
+"mdef(106)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_neginf_4",
+"mdef(107)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_plusinf_1",
+"mdef(108)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_plusinf r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_plusinf_2",
+"mdef(109)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_plusinf r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_plusinf_3",
+"mdef(110)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_plusinf r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_plusinf_4",
+"mdef(111)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_plusinf r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_z_1",
+"mdef(112)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_z r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_z_2",
+"mdef(113)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_z r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_z_3",
+"mdef(114)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_z r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_round_z_4",
+"mdef(115)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_z r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rsq_vec_1",
+"mdef(116)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rsq_vec_2",
+"mdef(117)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rsq_vec_3",
+"mdef(118)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_rsq_vec_4",
+"mdef(119)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_sad4",
+"mdef(120)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sad4 r0.x___, r0, r1, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sadhi_1",
+"mdef(121)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sadhi r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sadhi_2",
+"mdef(122)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sadhi r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sadhi_3",
+"mdef(123)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sadhi r0.xyz_, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sadhi_4",
+"mdef(124)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sadhi r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sad_1",
+"mdef(125)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sad r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sad_2",
+"mdef(126)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sad r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sad_3",
+"mdef(127)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sad r0.xyz_, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sad_4",
+"mdef(128)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"sad r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_sqrt_vec_1",
+"mdef(129)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_sqrt_vec_2",
+"mdef(130)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_sqrt_vec_3",
+"mdef(131)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_sqrt_vec_4",
+"mdef(132)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_u4lerp_1",
+"mdef(133)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"u4lerp r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_u4lerp_2",
+"mdef(134)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"u4lerp r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_u4lerp_3",
+"mdef(135)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"u4lerp r0.xyz_, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_u4lerp_4",
+"mdef(136)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"u4lerp r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad24_1",
+"mdef(137)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad24 r0.x, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad24_2",
+"mdef(138)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad24 r0.xy, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad24_3",
+"mdef(139)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad24 r0.xyz, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad24_4",
+"mdef(140)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad24 r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad_1",
+"mdef(141)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad r0.x, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad_2",
+"mdef(142)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad r0.xy, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad_3",
+"mdef(143)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad r0.xyz, r0.xyz, r1.xyz, r2.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umad_4",
+"mdef(144)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"umad r0, r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__amdil_umax_1",
+"mdef(145)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umax r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umax_2",
+"mdef(146)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umax r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umax_3",
+"mdef(147)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umax r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umax_4",
+"mdef(148)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umax r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umin_1",
+"mdef(149)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umin r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umin_2",
+"mdef(150)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umin r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umin_3",
+"mdef(151)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umin r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umin_4",
+"mdef(152)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umin r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul24_1",
+"mdef(153)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul24 r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul24_2",
+"mdef(154)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul24 r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul24_3",
+"mdef(155)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul24 r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul24_4",
+"mdef(156)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul24 r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul_high_1",
+"mdef(157)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul_high r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul_high_2",
+"mdef(158)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul_high r0.xy, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul_high_3",
+"mdef(159)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul_high r0.xyz, r0.xyz, r1.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_umul_high_4",
+"mdef(160)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul_high r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__amdil_unpack0_1",
+"mdef(161)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack0 r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack0_2",
+"mdef(162)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack0 r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack0_3",
+"mdef(163)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack0 r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack0_4",
+"mdef(164)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack0 r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack1_1",
+"mdef(165)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack1 r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack1_2",
+"mdef(166)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack1 r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack1_3",
+"mdef(167)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack1 r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack1_4",
+"mdef(168)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack1 r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack2_1",
+"mdef(169)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack2 r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack2_2",
+"mdef(170)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack2 r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack2_3",
+"mdef(171)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack2 r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack2_4",
+"mdef(172)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack2 r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack3_1",
+"mdef(173)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack3 r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack3_2",
+"mdef(174)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack3 r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack3_3",
+"mdef(175)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack3 r0.xyz_, r0.xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amdil_unpack3_4",
+"mdef(176)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"unpack3 r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__amd_early_exit",
+"mdef(177)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"ret_logical_non_zero r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__asinh_f32",
+"mdef(178)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l1\n"
+"dcl_literal l2, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r0.__z_, r0.y, r0.y, l2\n"
+"sqrt_vec r0.__z_, r0.z\n"
+"add r0.__z_, r0.y, r0.z\n"
+"dcl_literal l3, 0x40000000, 0x46000000, 0x403EBF80, 0x00000000\n"
+"ige r1.xyz_, l3, r0.y\n"
+"cmov_logical r0.__z_, r1.y, r0.z, r0.y\n"
+"dcl_literal l4, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.___w, r0.z, l4\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l5\n"
+"dcl_literal l6, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1._y__, l6, r0.w\n"
+"dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1._y__, r1.y, l7\n"
+"dcl_literal l8, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1.___w, r1.y, l8\n"
+"dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1._y__, r1.y, l9\n"
+"ior r0.___w, r0.w, r1.w\n"
+"dcl_literal l10, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r2.xy__, r0.w, l10\n"
+"div_zeroop(infinity) r0.___w, r2.x, r2.y\n"
+"mul_ieee r1.___w, r0.w, r0.w\n"
+"mul_ieee r2._y__, r1.w, r1.w\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x3E1CD04F, 0x3E178897\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x3E638E29, 0x3E3A3325\n"
+"mad_ieee r2.__zw, r2.y, l11, l12\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x3ECCCCCD, 0x3E924925\n"
+"mad_ieee r2.__zw, r2.y, r2.zzzw, l13\n"
+"mul_ieee r2.__z_, r2.y, r2.z\n"
+"dcl_literal l14, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r2._y__, r2.y, r2.w, l14\n"
+"mad_ieee r1.___w, r1.w, r2.y, r2.z\n"
+"mul_ieee r2._y__, r2.x, r2.x\n"
+"dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.___w, r2.y, l15, r1.w\n"
+"dcl_literal l16, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l16\n"
+"iadd r0.__z_, r0.z, r1.y\n"
+"itof r0.__z_, r0.z\n"
+"dcl_literal l17, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r1._y__, r0.z, l17\n"
+"mad_ieee r0.___w, r0.w, r1.w, r1.y\n"
+"dcl_literal l18, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r2.y, l18, r0.w_neg(xyzw)\n"
+"add r0.___w, r2.x_neg(xyzw), r0.w\n"
+"dcl_literal l19, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.__z_, r0.z, l19, r0.w_neg(xyzw)\n"
+"dcl_literal l20, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"add r0.___w, r0.z, l20\n"
+"dcl_literal l21, 0x00000000, 0x46000000, 0x00000000, 0x7F800000\n"
+"ilt r1._y_w, l21, r0.y\n"
+"cmov_logical r0.__z_, r1.y, r0.w, r0.z\n"
+"mul_ieee r0.___w, r0.y, r0.y\n"
+"dcl_literal l22, 0x359E003F, 0x30CF9BA3, 0x00000000, 0x00000000\n"
+"dcl_literal l23, 0xB9DA3F30, 0xB663DD90, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r0.w_neg(xyzw), l22, l23\n"
+"dcl_literal l24, 0xBBA5E935, 0xB9151DC6, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r2.xyxx, r0.w, l24\n"
+"dcl_literal l25, 0xBC728443, 0xBA94FE2C, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r2.xyxx, r0.w, l25\n"
+"dcl_literal l26, 0xBC3CE6E4, 0xBAF316D1, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r2.xyxx, r0.w, l26\n"
+"mul_ieee r1._y__, r0.y, r0.w\n"
+"mul_ieee r2.xy__, r2.xyxx, r1.y\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x3A24BDC5, 0x3691AD45\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x3C4E7147, 0x39862A97\n"
+"mad_ieee r2.__zw, r0.w, l27, l28\n"
+"dcl_literal l29, 0x00000000, 0x00000000, 0x3D86CE89, 0x3B559594\n"
+"mad_ieee r2.__zw, r2.zzzw, r0.w, l29\n"
+"dcl_literal l30, 0x00000000, 0x00000000, 0x3DF5A451, 0x3C40F989\n"
+"mad_ieee r2.__zw, r2.zzzw, r0.w, l30\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x3D8DAD2B, 0x3C36A8AF\n"
+"mad_ieee r2.__zw, r2.zzzw, r0.w, l31\n"
+"div_zeroop(infinity) r2.xy__, r2.xyxx, r2.zwzz\n"
+"cmov_logical r0.___w, r1.x, r2.x, r2.y\n"
+"add r0.___w, r0.y, r0.w\n"
+"cmov_logical r0.__z_, r1.z, r0.w, r0.z\n"
+"dcl_literal l32, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"lt r0.___w, r0.y, l32\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r1.x___, r0.y, l33\n"
+"ior r0.___w, r0.w, r1.x\n"
+"cmov_logical r0._y__, r0.w, r0.y, r0.z\n"
+"cmov_logical r0.x___, r0.x, r0.y_neg(xyzw), r0.y\n"
+"dcl_literal l34, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r1.w, l34, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__asinpi_f32",
+"mdef(179)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x80000000, 0x7FFFFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l1, r1.y\n"
+"\n"
+"dcl_literal l2, 0x01490FDB, 0x01490FDB, 0x01490FDB, 0x01490FDB\n"
+"ige r0.__z_, l2, r1.y\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.__z_, r0.x, l3\n"
+" if_logicalz r0.z\n"
+" \n"
+" dcl_literal l4, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l4\n"
+" itof r0.___w, r0.w\n"
+" \n"
+" dcl_literal l5, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r0.w, l5\n"
+" \n"
+" dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r2.x, l6\n"
+" \n"
+" dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r0.___w, r0.w, l7\n"
+" \n"
+" dcl_literal l8, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.x___, r2.y, l8\n"
+" \n"
+" dcl_literal l9, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.___w, l9, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l10, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2._y__, l10, r0.w\n"
+" \n"
+" dcl_literal l11, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r2.y, l11, r0.w\n"
+" \n"
+" dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, l12, r0.w\n"
+" ishr r2.__z_, r2.x, r0.w\n"
+" inegate r0.___w, r0.w\n"
+" \n"
+" dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l13\n"
+" iadd r0.___w, r2.x, r0.w\n"
+" cmov_logical r0.___w, r2.y, r2.z, r0.w\n"
+" else\n"
+" \n"
+" dcl_literal l14, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.x___, r0.x, l14\n"
+" \n"
+" dcl_literal l15, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r0.z, l15\n"
+" \n"
+" dcl_literal l16, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000\n"
+" iadd r2.x___, r2.x, l16\n"
+" \n"
+" dcl_literal l17, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99\n"
+" iadd r0.__z_, r0.z, l17\n"
+" \n"
+" dcl_literal l18, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r0.__z_, l18, r0.z\n"
+" \n"
+" dcl_literal l19, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r0.z, l19, r2.x\n"
+" endif\n"
+" \n"
+" dcl_literal l20, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983\n"
+" mul_ieee r0.__z_, r0.w, l20\n"
+" \n"
+" dcl_literal l21, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r0.z, l21\n"
+" if_logicalz r2.x\n"
+" itof r0.___w, r2.y\n"
+" \n"
+" dcl_literal l22, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r2.__zw, r0.w, l22\n"
+" \n"
+" dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r2.z, l23\n"
+" \n"
+" dcl_literal l24, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r0.___w, r0.w, l24\n"
+" \n"
+" dcl_literal l25, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.__z_, r2.w, l25\n"
+" \n"
+" dcl_literal l26, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.___w, l26, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.___w, l27, r0.w\n"
+" \n"
+" dcl_literal l28, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r2.w, l28, r0.w\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.___w, l29, r0.w\n"
+" ishr r3.x___, r2.z, r0.w\n"
+" inegate r0.___w, r0.w\n"
+" \n"
+" dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l30\n"
+" iadd r0.___w, r2.z, r0.w\n"
+" cmov_logical r0.___w, r2.w, r3.x, r0.w\n"
+" else\n"
+" \n"
+" dcl_literal l31, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r0.__z_, r0.z, l31\n"
+" \n"
+" dcl_literal l32, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.x___, r2.x, l32\n"
+" \n"
+" dcl_literal l33, 0xF4000000, 0xF4000000, 0xF4000000, 0xF4000000\n"
+" iadd r0.__z_, r0.z, l33\n"
+" \n"
+" dcl_literal l34, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69\n"
+" iadd r2.x___, r2.x, l34\n"
+" \n"
+" dcl_literal l35, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.x___, l35, r2.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l36, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.y, l36\n"
+" \n"
+" dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000017\n"
+" ilt r2.__zw, l37, r2.x\n"
+" \n"
+" dcl_literal l38, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.x___, r2.w, l38, r2.x\n"
+" ishr r2.x___, r2.y, r2.x\n"
+" cmov_logical r0.___w, r2.z, r2.x, r0.z\n"
+" endif\n"
+"else\n"
+" \n"
+" dcl_literal l39, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.__z_, r0.x, l39\n"
+" \n"
+" dcl_literal l40, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r0.z, l40\n"
+" \n"
+" dcl_literal l41, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.__z_, r0.z, l41\n"
+" \n"
+" dcl_literal l42, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" ige r0.__z_, r0.z, l42\n"
+" \n"
+" dcl_literal l43, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r2.x___, r1.y_neg(xyzw), l43\n"
+" \n"
+" dcl_literal l44, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mul_ieee r2.__z_, r2.x, l44\n"
+" sqrt_vec r2.xy__, r2.z\n"
+" mul_ieee r1.__z_, r1.y, r1.y\n"
+" \n"
+" dcl_literal l45, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r1.___w, l45\n"
+" cmov_logical r2.xyz_, r0.z, r2.xyzx, r1.ywzy\n"
+" \n"
+" dcl_literal l46, 0x00000000, 0x00000000, 0x3B81CE6B, 0x3F561F0D\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0xBC5B3FE1, 0x3F8D6FA5\n"
+" mad_ieee r1.__zw, r2.z_neg(xyzw), l46, l47\n"
+" \n"
+" dcl_literal l48, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD\n"
+" mad_ieee r1.__z_, r1.z, r2.z, l48\n"
+" \n"
+" dcl_literal l49, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC\n"
+" mad_ieee r1.__z_, r1.z, r2.z, l49\n"
+" mul_ieee r1.__z_, r2.z, r1.z\n"
+" div_zeroop(infinity) r1.__z_, r1.z, r1.w\n"
+" \n"
+" dcl_literal l50, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+" and r1.___w, r2.y, l50\n"
+" mad_ieee r2.__z_, r1.w_neg(xyzw), r1.w, r2.z\n"
+" add r2.___w, r2.y, r1.w\n"
+" div_zeroop(infinity) r2.__z_, r2.z, r2.w\n"
+" dp2_ieee r2._y__, r1.z, r2.y\n"
+" \n"
+" dcl_literal l51, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" \n"
+" dcl_literal l52, 0x33A22168, 0x33A22168, 0x33A22168, 0x33A22168\n"
+" mad_ieee r2.__z_, r2.z_neg(xyzw), l51, l52\n"
+" add r2._y__, r2.y, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l53, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" \n"
+" dcl_literal l54, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA\n"
+" mad_ieee r1.___w, r1.w_neg(xyzw), l53, l54\n"
+" add r1.___w, r2.y, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l55, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA\n"
+" add r1.___w, r1.w_neg(xyzw), l55\n"
+" mad_ieee r1.__z_, r2.x, r1.z, r2.x\n"
+" cmov_logical r0.__z_, r0.z, r1.w, r1.z\n"
+" \n"
+" dcl_literal l56, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983\n"
+" mul_ieee r0.___w, r0.z, l56\n"
+"endif\n"
+"\n"
+"dcl_literal l57, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r0.__z_, r1.y, l57\n"
+"\n"
+"dcl_literal l58, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r0.__z_, r0.z, l58, r0.w\n"
+"ior r0.__z_, r1.x, r0.z\n"
+"cmov_logical r0.__z_, r1.y, r0.z, r0.x\n"
+"\n"
+"dcl_literal l59, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l59\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"\n"
+"dcl_literal l60, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"lt r0._y__, l60, r1.y\n"
+"\n"
+"dcl_literal l61, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r0.y, l61, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__asin_f32",
+"mdef(180)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r0._y__, l0\n"
+"dcl_literal l1, 0x00000000, 0x7FFFFFFF, 0x80000000, 0x7F800000\n"
+"and r1._yzw, r1.x, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.w, l2\n"
+"dcl_literal l3, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l3\n"
+"dcl_literal l4, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"ige r0.___w, r0.w, l4\n"
+"dcl_literal l5, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.x___, r1.y_neg(xyzw), l5\n"
+"dcl_literal l6, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r2.__z_, r2.x, l6\n"
+"sqrt_vec r2.xy__, r2.z\n"
+"mul_ieee r0.__z_, r1.y, r1.y\n"
+"mov r0.x___, r1.y\n"
+"cmov_logical r2.xyz_, r0.w, r2.xyzx, r0.xyzx\n"
+"dcl_literal l7, 0x00000000, 0x3B81CE6B, 0x3F561F0D, 0x00000000\n"
+"dcl_literal l8, 0x00000000, 0xBC5B3FE1, 0x3F8D6FA5, 0x00000000\n"
+"mad_ieee r0._yz_, r2.z_neg(xyzw), l7, l8\n"
+"dcl_literal l9, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD, 0xBD678BDD\n"
+"mad_ieee r0._y__, r0.y, r2.z, l9\n"
+"dcl_literal l10, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC, 0x3E3C94DC\n"
+"mad_ieee r0._y__, r0.y, r2.z, l10\n"
+"mul_ieee r0._y__, r2.z, r0.y\n"
+"div_zeroop(infinity) r0._y__, r0.y, r0.z\n"
+"dp2_ieee r0.__z_, r0.y, r2.y\n"
+"mad_ieee r0._y__, r2.x, r0.y, r2.x\n"
+"dcl_literal l11, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"and r1._y__, r2.y, l11\n"
+"mad_ieee r2.x___, r1.y_neg(xyzw), r1.y, r2.z\n"
+"add r2._y__, r2.y, r1.y\n"
+"dcl_literal l12, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l13, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA\n"
+"mad_ieee r1._y__, r1.y_neg(xyzw), l12, l13\n"
+"div_zeroop(infinity) r2.x___, r2.x, r2.y\n"
+"dcl_literal l14, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l15, 0x33A22168, 0x33A22168, 0x33A22168, 0x33A22168\n"
+"mad_ieee r2.x___, r2.x_neg(xyzw), l14, l15\n"
+"add r0.__z_, r0.z, r2.x_neg(xyzw)\n"
+"add r0.__z_, r1.y_neg(xyzw), r0.z\n"
+"dcl_literal l16, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA, 0x3F490FDA\n"
+"add r0.__z_, r0.z_neg(xyzw), l16\n"
+"cmov_logical r0._y__, r0.w, r0.z, r0.y\n"
+"dcl_literal l17, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r0.__z_, r1.x_abs, l17\n"
+"dcl_literal l18, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r0._y__, r0.z, l18, r0.y\n"
+"dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l19, r1.w\n"
+"dcl_literal l20, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ilt r0.___w, r1.w, l20\n"
+"and r0.__z_, r0.z, r0.w\n"
+"dcl_literal l21, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.___w, r1.w, l21\n"
+"dcl_literal l22, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1._y__, r0.x, l22\n"
+"and r0.___w, r0.w, r1.y\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"cmov_logical r0._y__, r1.z, r0.y_neg(xyzw), r0.y\n"
+"dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0.x___, l23, r0.x\n"
+"dcl_literal l24, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.__z_, r1.x, l24\n"
+"dcl_literal l25, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"lt r0.___w, l25, r1.x_abs\n"
+"cmov_logical r0.x___, r0.x, r0.z, r0.y\n"
+"dcl_literal l26, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r0.w, l26, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__atan2pi_f32",
+"mdef(181)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x80000000\n"
+"and r1, r0.xyxy, l0\n"
+"ult r0.__z_, r1.y, r1.x\n"
+"cmov_logical r2, r0.z, r1.yxyx, r1.xyxy\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7FFFFFFF, 0x7F800000, 0x7F800000\n"
+"and r3, r2.yzwx, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r4, r3.wzzw, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00000000\n"
+"ilt r5.xyz_, r3.xxyx, l3\n"
+"dcl_literal l4, 0xFFFFFF81, 0xFFFFFF9D, 0xFFFFFF9D, 0xFFFFFF81\n"
+"iadd r4, r4.xzwy, l4\n"
+"dcl_literal l5, 0x41E00000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r6.x_z_, l5\n"
+"mov r6._y_w, r4.yyyw\n"
+"cmov_logical r4._y_w, r5.yyyx, r6.yyyx, r6.wwwz\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x42280000, 0x41600000\n"
+"dcl_literal l7, 0xC1E00000, 0x00000000, 0x41600000, 0xC1600000\n"
+"cmov_logical r6.x_zw, r5.xxzy, l6, l7\n"
+"mov r5.x___, r6.x\n"
+"mov r5._y__, r4.z\n"
+"dcl_literal l8, 0x41E00000, 0x41E00000, 0x41E00000, 0x41E00000\n"
+"mov r5.___w, l8\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r4.__z_, l9\n"
+"cmov_logical r5.xyz_, r5.z, r5.xywx, r4.wxzw\n"
+"iadd r0.___w, r5.y, r4.y_neg(xyzw)\n"
+"dcl_literal l10, 0xFFFFFF83, 0xFFFFFF83, 0xFFFFFF83, 0xFFFFFF83\n"
+"ilt r0.___w, r0.w, l10\n"
+"dcl_literal l11, 0xFFFFFFF2, 0xFFFFFFF2, 0xFFFFFFF2, 0xFFFFFFF2\n"
+"iadd r6._y__, r4.y, l11\n"
+"dcl_literal l12, 0xC1E00000, 0xC1E00000, 0xC1E00000, 0xC1E00000\n"
+"add r6.x___, r5.x, l12\n"
+"mov r4.x_z_, r5.xxzx\n"
+"cmov_logical r4, r0.w, r6, r4\n"
+"dcl_literal l13, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r0.___w, l13, r4.y\n"
+"dcl_literal l14, 0x00000000, 0xC1600000, 0xC1600000, 0x00000000\n"
+"add r2._yz_, r4.wwxw, l14\n"
+"cmov_logical r2._yz_, r0.w, r2.yyzy, r4.wwxw\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0.___w, r4.z, l15\n"
+"if_logicalnz r0.w\n"
+" ftoi r0.___w, r4.z\n"
+" \n"
+" dcl_literal l16, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r4.x___, r2.x, l16\n"
+" itof r4.x___, r4.x\n"
+" cmov_logical r3._y__, r3.w, r3.y, r4.x\n"
+" \n"
+" dcl_literal l17, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r4.x___, r0.w, l17\n"
+" cmov_logical r0.___w, r3.w, r0.w, r4.x\n"
+" \n"
+" dcl_literal l18, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r3.y, l18\n"
+" \n"
+" dcl_literal l19, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.___w, r4.x, l19\n"
+" iadd r3.___w, r3.w, r0.w\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l20\n"
+" iadd r0.___w, r3.y, r0.w\n"
+" \n"
+" dcl_literal l21, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3._y__, r3.w, l21\n"
+" \n"
+" dcl_literal l22, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.___w, l22, r3.y\n"
+" \n"
+" dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r3.w, l23, r0.w\n"
+" \n"
+" dcl_literal l24, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3._y__, l24, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l25, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r4.y, l25\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.x___, l26, r3.y\n"
+" \n"
+" dcl_literal l27, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r4.x, l27, r3.y\n"
+" \n"
+" dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r3._y__, r3.y, l28\n"
+" ishr r3.___w, r3.w, r3.y\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3._y__, l29, r3.y\n"
+" cmov_logical r2.x___, r3.y, r3.w, r0.w\n"
+"endif\n"
+"dcl_literal l30, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r3._y_w, r2.yyyz, l30\n"
+"if_logicalnz r3.y\n"
+" ftoi r0.___w, r2.y\n"
+" \n"
+" dcl_literal l31, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r2._y__, r2.w, l31\n"
+" itof r2._y__, r2.y\n"
+" cmov_logical r2._y__, r3.z, r3.x, r2.y\n"
+" \n"
+" dcl_literal l32, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r3.x___, r0.w, l32\n"
+" cmov_logical r0.___w, r3.z, r0.w, r3.x\n"
+" \n"
+" dcl_literal l33, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.y, l33\n"
+" \n"
+" dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.x___, r3.x, l34\n"
+" iadd r3.x___, r3.x, r0.w\n"
+" \n"
+" dcl_literal l35, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l35\n"
+" iadd r0.___w, r2.y, r0.w\n"
+" \n"
+" dcl_literal l36, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2._y__, r3.x, l36\n"
+" \n"
+" dcl_literal l37, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.x___, l37, r2.y\n"
+" \n"
+" dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r3.x, l38, r0.w\n"
+" \n"
+" dcl_literal l39, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2._y__, l39, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l40, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.x___, r3.y, l40\n"
+" \n"
+" dcl_literal l41, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3._y__, l41, r2.y\n"
+" \n"
+" dcl_literal l42, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2._y__, r3.y, l42, r2.y\n"
+" \n"
+" dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2._y__, r2.y, l43\n"
+" ishr r3.x___, r3.x, r2.y\n"
+" \n"
+" dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, l44, r2.y\n"
+" cmov_logical r2.___w, r2.y, r3.x, r0.w\n"
+"endif\n"
+"div_zeroop(infinity) r0.___w, r2.x, r2.w\n"
+"ftoi r2.x___, r2.z\n"
+"dcl_literal l45, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r0.w, l45\n"
+"itof r2._y__, r4.w\n"
+"cmov_logical r2._y__, r4.z, r4.x, r2.y\n"
+"dcl_literal l46, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.__z_, r2.x, l46\n"
+"cmov_logical r2.x___, r4.z, r2.x, r2.z\n"
+"dcl_literal l47, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r2.__zw, r2.y, l47\n"
+"dcl_literal l48, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.z, l48\n"
+"iadd r2.__z_, r2.z, r2.x\n"
+"dcl_literal l49, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r2.x, l49\n"
+"iadd r2.x___, r2.y, r2.x\n"
+"dcl_literal l50, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2._y__, r2.z, l50\n"
+"dcl_literal l51, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r2.__z_, l51, r2.y\n"
+"dcl_literal l52, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.x___, r2.z, l52, r2.x\n"
+"dcl_literal l53, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2._y__, l53, r2.y_neg(xyzw)\n"
+"dcl_literal l54, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r2.__z_, r2.w, l54\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r2.___w, l55, r2.y\n"
+"dcl_literal l56, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2._y__, r2.w, l56, r2.y\n"
+"dcl_literal l57, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r2._y__, r2.y, l57\n"
+"ishr r2.__z_, r2.z, r2.y\n"
+"dcl_literal l58, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2._y__, l58, r2.y\n"
+"cmov_logical r2.x___, r2.y, r2.z, r2.x\n"
+"ior r2.x___, r4.y, r2.x\n"
+"cmov_logical r0.___w, r3.w, r2.x, r0.w\n"
+"dcl_literal l59, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r2.x___, r0.w, l59\n"
+"dcl_literal l60, 0x3EE00000, 0x3F300000, 0x3F980000, 0x401C0000\n"
+"uge r3, r2.x, l60\n"
+"dcl_literal l61, 0x00000000, 0x3F300000, 0x3F980000, 0x401C0000\n"
+"ult r2._yzw, r2.x, l61\n"
+"and r2._yzw, r3.xxyz, r2.yyzw\n"
+"dcl_literal l62, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l63, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"mad_ieee r3.x___, r2.x, l62, l63\n"
+"dcl_literal l64, 0x40000000, 0xBF800000, 0x3F800000, 0xBFC00000\n"
+"add r4, r2.x, l64\n"
+"div_zeroop(infinity) r3.x___, r3.x, r4.x\n"
+"cmov_logical r3.x___, r2.y, r3.x, r2.x\n"
+"dcl_literal l65, 0x3EED6338, 0x3EED6338, 0x3EED6338, 0x3EED6338\n"
+"and r2._y__, r2.y, l65\n"
+"div_zeroop(infinity) r3._y__, r4.y, r4.z\n"
+"cmov_logical r3.x___, r2.z, r3.y, r3.x\n"
+"dcl_literal l66, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"cmov_logical r2._y__, r2.z, l66, r2.y\n"
+"dcl_literal l67, 0x3FC00000, 0x3FC00000, 0x3FC00000, 0x3FC00000\n"
+"dcl_literal l68, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r2.__z_, r2.x, l67, l68\n"
+"div_zeroop(infinity) r2.__z_, r4.w, r2.z\n"
+"cmov_logical r2.__z_, r2.w, r2.z, r3.x\n"
+"dcl_literal l69, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F\n"
+"cmov_logical r2._y__, r2.w, l69, r2.y\n"
+"dcl_literal l70, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"div_zeroop(infinity) r2.x___, l70, r2.x\n"
+"cmov_logical r2.x___, r3.w, r2.x, r2.z\n"
+"dcl_literal l71, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r2._y__, r3.w, l71, r2.y\n"
+"mul_ieee r2.__z_, r2.x, r2.x\n"
+"mul_ieee r2.___w, r2.x, r2.z\n"
+"dcl_literal l72, 0x3B9A3B54, 0x3E993F1F, 0x00000000, 0x00000000\n"
+"dcl_literal l73, 0x3E44F0BA, 0x3F8E2C3D, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r2.z, l72, l73\n"
+"dcl_literal l74, 0x3E97D299, 0x3F63BBE5, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r3.xyxx, r2.z, l74\n"
+"mul_ieee r2.__z_, r2.w, r3.x\n"
+"div_zeroop(infinity) r2.__z_, r2.z, r3.y\n"
+"add r2.x___, r2.x_neg(xyzw), r2.z\n"
+"add r2.x___, r2.y, r2.x_neg(xyzw)\n"
+"dcl_literal l75, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ilt r2._y__, r0.w, l75\n"
+"cmov_logical r0.___w, r2.y, r0.w, r2.x\n"
+"dcl_literal l76, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.x___, r0.w, l76\n"
+"dcl_literal l77, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r2.x, l77\n"
+"dcl_literal l78, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"ilt r2.x___, r2.x, l78\n"
+"if_logicalnz r2.x\n"
+" \n"
+" dcl_literal l79, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r3, r0.w, l79\n"
+" itof r2._y__, r3.w\n"
+" cmov_logical r2._y__, r3.z, r3.x, r2.y\n"
+" \n"
+" dcl_literal l80, 0x00000000, 0x00000000, 0x0000001C, 0x0E000000\n"
+" \n"
+" dcl_literal l81, 0x00000000, 0x00000000, 0xFFFFFF87, 0xC3800000\n"
+" cmov_logical r2.__zw, r3.z, l80, l81\n"
+" \n"
+" dcl_literal l82, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r3.x_z_, r2.y, l82\n"
+" \n"
+" dcl_literal l83, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.x___, r3.x, l83\n"
+" iadd r2.__z_, r3.x, r2.z\n"
+" iadd r2._y__, r2.y, r2.w\n"
+" \n"
+" dcl_literal l84, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.__z_, r2.z, l84\n"
+" \n"
+" dcl_literal l85, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l85, r2.z\n"
+" \n"
+" dcl_literal l86, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2._y__, r2.w, l86, r2.y\n"
+" \n"
+" dcl_literal l87, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.__z_, l87, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l88, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r3.z, l88\n"
+" \n"
+" dcl_literal l89, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l89, r2.z\n"
+" \n"
+" dcl_literal l90, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r3.x, l90, r2.z\n"
+" \n"
+" dcl_literal l91, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.__z_, r2.z, l91\n"
+" ishr r2.___w, r2.w, r2.z\n"
+" \n"
+" dcl_literal l92, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, l92, r2.z\n"
+" cmov_logical r2._y__, r2.z, r2.w, r2.y\n"
+" ior r0.___w, r3.y, r2.y\n"
+"endif\n"
+"dcl_literal l93, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983\n"
+"mul_ieee r0.___w, r0.w, l93\n"
+"if_logicalnz r2.x\n"
+" \n"
+" dcl_literal l94, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r2, r0.w, l94\n"
+" itof r2.___w, r2.w\n"
+" cmov_logical r2.x___, r2.z, r2.x, r2.w\n"
+" \n"
+" dcl_literal l95, 0x00000000, 0x00000000, 0xFFFFFFE4, 0xF2000000\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0xFFFFFF4F, 0xA7800000\n"
+" cmov_logical r2.__zw, r2.z, l95, l96\n"
+" \n"
+" dcl_literal l97, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.x, l97\n"
+" \n"
+" dcl_literal l98, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.x___, r3.x, l98\n"
+" iadd r2.__z_, r3.x, r2.z\n"
+" iadd r2.x___, r2.x, r2.w\n"
+" \n"
+" dcl_literal l99, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.__z_, r2.z, l99\n"
+" \n"
+" dcl_literal l100, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.__z_, l100, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l101, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r3.y, l101\n"
+" \n"
+" dcl_literal l102, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l102, r2.z\n"
+" \n"
+" dcl_literal l103, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r3.x, l103, r2.z\n"
+" \n"
+" dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.__z_, r2.z, l104\n"
+" ishr r2.___w, r2.w, r2.z\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, l105, r2.z\n"
+" cmov_logical r2.x___, r2.z, r2.w, r2.x\n"
+" ior r0.___w, r2.y, r2.x\n"
+"endif\n"
+"dcl_literal l106, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"add r2.x___, r0.w_neg(xyzw), l106\n"
+"cmov_logical r0.__z_, r0.z, r2.x, r0.w\n"
+"dcl_literal l107, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ushr r2.x___, r1.z, l107\n"
+"dcl_literal l108, 0x0000001E, 0x0000001E, 0x0000001E, 0x0000001E\n"
+"ushr r2._y__, r1.w, l108\n"
+"dcl_literal l109, 0x00000001, 0x00000002, 0x00000000, 0x00000000\n"
+"and r2.xy__, r2.xyxx, l109\n"
+"ior r0.___w, r2.y, r2.x\n"
+"dcl_literal l110, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r2.xyz_, r0.w, l110\n"
+"dcl_literal l111, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ixor r0.___w, r0.z, l111\n"
+"cmov_logical r0.__z_, r2.x, r0.w, r0.z\n"
+"dcl_literal l112, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.z_neg(xyzw), l112\n"
+"cmov_logical r0.__z_, r2.y, r0.w, r0.z\n"
+"dcl_literal l113, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.z, l113\n"
+"cmov_logical r0.__z_, r2.z, r0.w, r0.z\n"
+"dcl_literal l114, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ult r2.xy__, l114, r1.xyxx\n"
+"dcl_literal l115, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ieq r2.__zw, r1.xxxy, l115\n"
+"dcl_literal l116, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l117, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"cmov_logical r3, r1.xyyx, l116, l117\n"
+"dcl_literal l118, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.xy__, r1.zwzz, l118\n"
+"inot r1.__zw, r1.xxxy\n"
+"and r0.___w, r3.x, r1.z\n"
+"and r4.xy__, r3.yzyy, r0.w\n"
+"and r4.__zw, r1.y, r4.xxxy\n"
+"dcl_literal l119, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r4.z, l119, r0.z\n"
+"and r0.___w, r3.x, r1.x\n"
+"and r3.x_z_, r3.yyzy, r0.w\n"
+"and r5.xy__, r1.y, r3.xzxx\n"
+"dcl_literal l120, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.__z_, r5.x, l120, r0.z\n"
+"and r4.xy__, r1.w, r4.xyxx\n"
+"dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r4.x, l121, r0.z\n"
+"and r3.x_z_, r1.w, r3.xxzx\n"
+"dcl_literal l122, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.__z_, r3.x, l122, r0.z\n"
+"dcl_literal l123, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r4.w, l123, r0.z\n"
+"dcl_literal l124, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.__z_, r5.y, l124, r0.z\n"
+"dcl_literal l125, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r4.y, l125, r0.z\n"
+"dcl_literal l126, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.__z_, r3.z, l126, r0.z\n"
+"inot r3.x_z_, r2.zzwz\n"
+"and r0.___w, r1.z, r3.x\n"
+"and r0.___w, r3.w, r0.w\n"
+"and r0.___w, r3.y, r0.w\n"
+"dcl_literal l127, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r0.__z_, r0.w, l127, r0.z\n"
+"and r0.___w, r1.x, r3.x\n"
+"and r0.___w, r3.w, r0.w\n"
+"and r0.___w, r3.y, r0.w\n"
+"dcl_literal l128, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"cmov_logical r0.__z_, r0.w, l128, r0.z\n"
+"and r0.___w, r3.w, r3.x\n"
+"and r3.x___, r1.z, r0.w\n"
+"and r3.x___, r2.w, r3.x\n"
+"and r3._y__, r1.y, r3.x\n"
+"dcl_literal l129, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r3.y, l129, r0.z\n"
+"and r0.___w, r1.x, r0.w\n"
+"and r0.___w, r2.w, r0.w\n"
+"and r3._y__, r1.y, r0.w\n"
+"dcl_literal l130, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.__z_, r3.y, l130, r0.z\n"
+"and r3.x___, r1.w, r3.x\n"
+"dcl_literal l131, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r3.x, l131, r0.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"dcl_literal l132, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.__z_, r0.w, l132, r0.z\n"
+"and r0.___w, r2.z, r1.z\n"
+"and r1.__z_, r3.z, r0.w\n"
+"dcl_literal l133, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r0.__z_, r1.z, l133, r0.z\n"
+"and r1.x___, r2.z, r1.x\n"
+"and r1.__z_, r3.z, r1.x\n"
+"dcl_literal l134, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"cmov_logical r0.__z_, r1.z, l134, r0.z\n"
+"and r0.___w, r2.w, r0.w\n"
+"and r1.__z_, r1.y, r0.w\n"
+"dcl_literal l135, 0x3F400000, 0x3F400000, 0x3F400000, 0x3F400000\n"
+"cmov_logical r0.__z_, r1.z, l135, r0.z\n"
+"and r1.x___, r2.w, r1.x\n"
+"and r1._y__, r1.y, r1.x\n"
+"dcl_literal l136, 0xBF400000, 0xBF400000, 0xBF400000, 0xBF400000\n"
+"cmov_logical r0.__z_, r1.y, l136, r0.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"dcl_literal l137, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"cmov_logical r0.__z_, r0.w, l137, r0.z\n"
+"and r0.___w, r1.w, r1.x\n"
+"dcl_literal l138, 0xBE800000, 0xBE800000, 0xBE800000, 0xBE800000\n"
+"cmov_logical r0.__z_, r0.w, l138, r0.z\n"
+"cmov_logical r0.x___, r2.x, r0.x, r0.z\n"
+"cmov_logical r0.x___, r2.y, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__atan2_f32",
+"mdef(182)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x80000000\n"
+"and r1, r0.xyxy, l0\n"
+"ult r0.__z_, r1.y, r1.x\n"
+"cmov_logical r2, r0.z, r1.yxyx, r1.xyxy\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7FFFFFFF, 0x7F800000, 0x7F800000\n"
+"and r3, r2.yzwx, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r4, r3.wzzw, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00000000\n"
+"ilt r5.xyz_, r3.xxyx, l3\n"
+"dcl_literal l4, 0xFFFFFF81, 0xFFFFFF9D, 0xFFFFFF9D, 0xFFFFFF81\n"
+"iadd r4, r4.xzwy, l4\n"
+"dcl_literal l5, 0x41E00000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r6.x_z_, l5\n"
+"mov r6._y_w, r4.yyyw\n"
+"cmov_logical r4._y_w, r5.yyyx, r6.yyyx, r6.wwwz\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x42280000, 0x41600000\n"
+"dcl_literal l7, 0xC1E00000, 0x00000000, 0x41600000, 0xC1600000\n"
+"cmov_logical r6.x_zw, r5.xxzy, l6, l7\n"
+"mov r5.x___, r6.x\n"
+"mov r5._y__, r4.z\n"
+"dcl_literal l8, 0x41E00000, 0x41E00000, 0x41E00000, 0x41E00000\n"
+"mov r5.___w, l8\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r4.__z_, l9\n"
+"cmov_logical r5.xyz_, r5.z, r5.xywx, r4.wxzw\n"
+"iadd r0.___w, r5.y, r4.y_neg(xyzw)\n"
+"dcl_literal l10, 0xFFFFFF83, 0xFFFFFF83, 0xFFFFFF83, 0xFFFFFF83\n"
+"ilt r0.___w, r0.w, l10\n"
+"dcl_literal l11, 0xFFFFFFF2, 0xFFFFFFF2, 0xFFFFFFF2, 0xFFFFFFF2\n"
+"iadd r6._y__, r4.y, l11\n"
+"dcl_literal l12, 0xC1E00000, 0xC1E00000, 0xC1E00000, 0xC1E00000\n"
+"add r6.x___, r5.x, l12\n"
+"mov r4.x_z_, r5.xxzx\n"
+"cmov_logical r4, r0.w, r6, r4\n"
+"dcl_literal l13, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r0.___w, l13, r4.y\n"
+"dcl_literal l14, 0x00000000, 0xC1600000, 0xC1600000, 0x00000000\n"
+"add r2._yz_, r4.wwxw, l14\n"
+"cmov_logical r2._yz_, r0.w, r2.yyzy, r4.wwxw\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0.___w, r4.z, l15\n"
+"if_logicalnz r0.w\n"
+" ftoi r0.___w, r4.z\n"
+" \n"
+" dcl_literal l16, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r4.x___, r2.x, l16\n"
+" itof r4.x___, r4.x\n"
+" cmov_logical r3._y__, r3.w, r3.y, r4.x\n"
+" \n"
+" dcl_literal l17, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r4.x___, r0.w, l17\n"
+" cmov_logical r0.___w, r3.w, r0.w, r4.x\n"
+" \n"
+" dcl_literal l18, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r3.y, l18\n"
+" \n"
+" dcl_literal l19, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.___w, r4.x, l19\n"
+" iadd r3.___w, r3.w, r0.w\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l20\n"
+" iadd r0.___w, r3.y, r0.w\n"
+" \n"
+" dcl_literal l21, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3._y__, r3.w, l21\n"
+" \n"
+" dcl_literal l22, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.___w, l22, r3.y\n"
+" \n"
+" dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r3.w, l23, r0.w\n"
+" \n"
+" dcl_literal l24, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3._y__, l24, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l25, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r4.y, l25\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.x___, l26, r3.y\n"
+" \n"
+" dcl_literal l27, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r4.x, l27, r3.y\n"
+" \n"
+" dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r3._y__, r3.y, l28\n"
+" ishr r3.___w, r3.w, r3.y\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3._y__, l29, r3.y\n"
+" cmov_logical r2.x___, r3.y, r3.w, r0.w\n"
+"endif\n"
+"dcl_literal l30, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r3._y_w, r2.yyyz, l30\n"
+"if_logicalnz r3.y\n"
+" ftoi r0.___w, r2.y\n"
+" \n"
+" dcl_literal l31, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r2._y__, r2.w, l31\n"
+" itof r2._y__, r2.y\n"
+" cmov_logical r2._y__, r3.z, r3.x, r2.y\n"
+" \n"
+" dcl_literal l32, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r3.x___, r0.w, l32\n"
+" cmov_logical r0.___w, r3.z, r0.w, r3.x\n"
+" \n"
+" dcl_literal l33, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.y, l33\n"
+" \n"
+" dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.x___, r3.x, l34\n"
+" iadd r3.x___, r3.x, r0.w\n"
+" \n"
+" dcl_literal l35, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l35\n"
+" iadd r0.___w, r2.y, r0.w\n"
+" \n"
+" dcl_literal l36, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2._y__, r3.x, l36\n"
+" \n"
+" dcl_literal l37, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.x___, l37, r2.y\n"
+" \n"
+" dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r3.x, l38, r0.w\n"
+" \n"
+" dcl_literal l39, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2._y__, l39, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l40, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.x___, r3.y, l40\n"
+" \n"
+" dcl_literal l41, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3._y__, l41, r2.y\n"
+" \n"
+" dcl_literal l42, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2._y__, r3.y, l42, r2.y\n"
+" \n"
+" dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2._y__, r2.y, l43\n"
+" ishr r3.x___, r3.x, r2.y\n"
+" \n"
+" dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, l44, r2.y\n"
+" cmov_logical r2.___w, r2.y, r3.x, r0.w\n"
+"endif\n"
+"div_zeroop(infinity) r0.___w, r2.x, r2.w\n"
+"ftoi r2.x___, r2.z\n"
+"dcl_literal l45, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r0.w, l45\n"
+"itof r2._y__, r4.w\n"
+"cmov_logical r2._y__, r4.z, r4.x, r2.y\n"
+"dcl_literal l46, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.__z_, r2.x, l46\n"
+"cmov_logical r2.x___, r4.z, r2.x, r2.z\n"
+"dcl_literal l47, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r2.__zw, r2.y, l47\n"
+"dcl_literal l48, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.z, l48\n"
+"iadd r2.__z_, r2.z, r2.x\n"
+"dcl_literal l49, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r2.x, l49\n"
+"iadd r2.x___, r2.y, r2.x\n"
+"dcl_literal l50, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2._y__, r2.z, l50\n"
+"dcl_literal l51, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r2.__z_, l51, r2.y\n"
+"dcl_literal l52, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.x___, r2.z, l52, r2.x\n"
+"dcl_literal l53, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2._y__, l53, r2.y_neg(xyzw)\n"
+"dcl_literal l54, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r2.__z_, r2.w, l54\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r2.___w, l55, r2.y\n"
+"dcl_literal l56, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2._y__, r2.w, l56, r2.y\n"
+"dcl_literal l57, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r2._y__, r2.y, l57\n"
+"ishr r2.__z_, r2.z, r2.y\n"
+"dcl_literal l58, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2._y__, l58, r2.y\n"
+"cmov_logical r2.x___, r2.y, r2.z, r2.x\n"
+"ior r2.x___, r4.y, r2.x\n"
+"cmov_logical r0.___w, r3.w, r2.x, r0.w\n"
+"dcl_literal l59, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r2.x___, r0.w, l59\n"
+"dcl_literal l60, 0x3EE00000, 0x3F300000, 0x3F980000, 0x401C0000\n"
+"uge r3, r2.x, l60\n"
+"dcl_literal l61, 0x00000000, 0x3F300000, 0x3F980000, 0x401C0000\n"
+"ult r2._yzw, r2.x, l61\n"
+"and r2._yzw, r3.xxyz, r2.yyzw\n"
+"dcl_literal l62, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l63, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"mad_ieee r3.x___, r2.x, l62, l63\n"
+"dcl_literal l64, 0x40000000, 0xBF800000, 0x3F800000, 0xBFC00000\n"
+"add r4, r2.x, l64\n"
+"div_zeroop(infinity) r3.x___, r3.x, r4.x\n"
+"cmov_logical r3.x___, r2.y, r3.x, r2.x\n"
+"dcl_literal l65, 0x3EED6338, 0x3EED6338, 0x3EED6338, 0x3EED6338\n"
+"and r2._y__, r2.y, l65\n"
+"div_zeroop(infinity) r3._y__, r4.y, r4.z\n"
+"cmov_logical r3.x___, r2.z, r3.y, r3.x\n"
+"dcl_literal l66, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"cmov_logical r2._y__, r2.z, l66, r2.y\n"
+"dcl_literal l67, 0x3FC00000, 0x3FC00000, 0x3FC00000, 0x3FC00000\n"
+"dcl_literal l68, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r2.__z_, r2.x, l67, l68\n"
+"div_zeroop(infinity) r2.__z_, r4.w, r2.z\n"
+"cmov_logical r2.__z_, r2.w, r2.z, r3.x\n"
+"dcl_literal l69, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F\n"
+"cmov_logical r2._y__, r2.w, l69, r2.y\n"
+"dcl_literal l70, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"div_zeroop(infinity) r2.x___, l70, r2.x\n"
+"cmov_logical r2.x___, r3.w, r2.x, r2.z\n"
+"dcl_literal l71, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r2._y__, r3.w, l71, r2.y\n"
+"mul_ieee r2.__z_, r2.x, r2.x\n"
+"mul_ieee r2.___w, r2.x, r2.z\n"
+"dcl_literal l72, 0x3B9A3B54, 0x3E993F1F, 0x00000000, 0x00000000\n"
+"dcl_literal l73, 0x3E44F0BA, 0x3F8E2C3D, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r2.z, l72, l73\n"
+"dcl_literal l74, 0x3E97D299, 0x3F63BBE5, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r3.xyxx, r2.z, l74\n"
+"mul_ieee r2.__z_, r2.w, r3.x\n"
+"div_zeroop(infinity) r2.__z_, r2.z, r3.y\n"
+"add r2.x___, r2.x_neg(xyzw), r2.z\n"
+"add r2.x___, r2.y, r2.x_neg(xyzw)\n"
+"dcl_literal l75, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ilt r2._y__, r0.w, l75\n"
+"cmov_logical r0.___w, r2.y, r0.w, r2.x\n"
+"dcl_literal l76, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"add r2.x___, r0.w_neg(xyzw), l76\n"
+"cmov_logical r0.__z_, r0.z, r2.x, r0.w\n"
+"dcl_literal l77, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ushr r2.x___, r1.z, l77\n"
+"dcl_literal l78, 0x0000001E, 0x0000001E, 0x0000001E, 0x0000001E\n"
+"ushr r2._y__, r1.w, l78\n"
+"dcl_literal l79, 0x00000001, 0x00000002, 0x00000000, 0x00000000\n"
+"and r2.xy__, r2.xyxx, l79\n"
+"ior r0.___w, r2.y, r2.x\n"
+"dcl_literal l80, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r2.xyz_, r0.w, l80\n"
+"dcl_literal l81, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ixor r0.___w, r0.z, l81\n"
+"cmov_logical r0.__z_, r2.x, r0.w, r0.z\n"
+"dcl_literal l82, 0xA50D3132, 0xA50D3132, 0xA50D3132, 0xA50D3132\n"
+"add r0.___w, r0.z, l82\n"
+"dcl_literal l83, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"add r0.___w, r0.w_neg(xyzw), l83\n"
+"cmov_logical r0.__z_, r2.y, r0.w, r0.z\n"
+"dcl_literal l84, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB\n"
+"add r0.___w, r0.z, l84\n"
+"cmov_logical r0.__z_, r2.z, r0.w, r0.z\n"
+"dcl_literal l85, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ult r2.xy__, l85, r1.xyxx\n"
+"dcl_literal l86, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ieq r2.__zw, r1.xxxy, l86\n"
+"dcl_literal l87, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l88, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"cmov_logical r3, r1.xyyx, l87, l88\n"
+"dcl_literal l89, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.xy__, r1.zwzz, l89\n"
+"inot r1.__zw, r1.xxxy\n"
+"and r0.___w, r3.x, r1.z\n"
+"and r4.xy__, r3.yzyy, r0.w\n"
+"and r4.__zw, r1.y, r4.xxxy\n"
+"dcl_literal l90, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"cmov_logical r0.__z_, r4.z, l90, r0.z\n"
+"and r0.___w, r3.x, r1.x\n"
+"and r3.x_z_, r3.yyzy, r0.w\n"
+"and r5.xy__, r1.y, r3.xzxx\n"
+"dcl_literal l91, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB\n"
+"cmov_logical r0.__z_, r5.x, l91, r0.z\n"
+"and r4.xy__, r1.w, r4.xyxx\n"
+"dcl_literal l92, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r4.x, l92, r0.z\n"
+"and r3.x_z_, r1.w, r3.xxzx\n"
+"dcl_literal l93, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.__z_, r3.x, l93, r0.z\n"
+"dcl_literal l94, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"cmov_logical r0.__z_, r4.w, l94, r0.z\n"
+"dcl_literal l95, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB\n"
+"cmov_logical r0.__z_, r5.y, l95, r0.z\n"
+"dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r4.y, l96, r0.z\n"
+"dcl_literal l97, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.__z_, r3.z, l97, r0.z\n"
+"inot r3.x_z_, r2.zzwz\n"
+"and r0.___w, r1.z, r3.x\n"
+"and r0.___w, r3.w, r0.w\n"
+"and r0.___w, r3.y, r0.w\n"
+"dcl_literal l98, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r0.__z_, r0.w, l98, r0.z\n"
+"and r0.___w, r1.x, r3.x\n"
+"and r0.___w, r3.w, r0.w\n"
+"and r0.___w, r3.y, r0.w\n"
+"dcl_literal l99, 0xBFC90FDB, 0xBFC90FDB, 0xBFC90FDB, 0xBFC90FDB\n"
+"cmov_logical r0.__z_, r0.w, l99, r0.z\n"
+"and r0.___w, r3.w, r3.x\n"
+"and r3.x___, r1.z, r0.w\n"
+"and r3.x___, r2.w, r3.x\n"
+"and r3._y__, r1.y, r3.x\n"
+"dcl_literal l100, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"cmov_logical r0.__z_, r3.y, l100, r0.z\n"
+"and r0.___w, r1.x, r0.w\n"
+"and r0.___w, r2.w, r0.w\n"
+"and r3._y__, r1.y, r0.w\n"
+"dcl_literal l101, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB, 0xC0490FDB\n"
+"cmov_logical r0.__z_, r3.y, l101, r0.z\n"
+"and r3.x___, r1.w, r3.x\n"
+"dcl_literal l102, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r3.x, l102, r0.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"dcl_literal l103, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.__z_, r0.w, l103, r0.z\n"
+"and r0.___w, r2.z, r1.z\n"
+"and r1.__z_, r3.z, r0.w\n"
+"dcl_literal l104, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"cmov_logical r0.__z_, r1.z, l104, r0.z\n"
+"and r1.x___, r2.z, r1.x\n"
+"and r1.__z_, r3.z, r1.x\n"
+"dcl_literal l105, 0xBFC90FDB, 0xBFC90FDB, 0xBFC90FDB, 0xBFC90FDB\n"
+"cmov_logical r0.__z_, r1.z, l105, r0.z\n"
+"and r0.___w, r2.w, r0.w\n"
+"and r1.__z_, r1.y, r0.w\n"
+"dcl_literal l106, 0x4016CBE4, 0x4016CBE4, 0x4016CBE4, 0x4016CBE4\n"
+"cmov_logical r0.__z_, r1.z, l106, r0.z\n"
+"and r1.x___, r2.w, r1.x\n"
+"and r1._y__, r1.y, r1.x\n"
+"dcl_literal l107, 0xC016CBE4, 0xC016CBE4, 0xC016CBE4, 0xC016CBE4\n"
+"cmov_logical r0.__z_, r1.y, l107, r0.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"dcl_literal l108, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"cmov_logical r0.__z_, r0.w, l108, r0.z\n"
+"and r0.___w, r1.w, r1.x\n"
+"dcl_literal l109, 0xBF490FDB, 0xBF490FDB, 0xBF490FDB, 0xBF490FDB\n"
+"cmov_logical r0.__z_, r0.w, l109, r0.z\n"
+"cmov_logical r0.x___, r2.x, r0.x, r0.z\n"
+"cmov_logical r0.x___, r2.y, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__atanh_f32",
+"mdef(183)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x ___, r0.x\n"
+"dcl_literal l0, 0xFFFFFFE7, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r0.x_z_, l0\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r1._y__, r1.x, l1\n"
+"add r1.__z_, r1.y, r1.y\n"
+"dcl_literal l2, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.___w, r1.y_neg(xyzw), l2\n"
+"div_zeroop(infinity) r1.__z_, r1.z, r1.w\n"
+"dcl_literal l3, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r1.z, l3\n"
+"dcl_literal l4, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r1.__z_, r0.w, l4\n"
+"dcl_literal l5, 0x4C000000, 0x4C000000, 0x4C000000, 0x4C000000\n"
+"mul_ieee r0._y__, r0.w, l5\n"
+"cmov_logical r0.xy__, r1.z, r0.xyxx, r0.zwzz\n"
+"dcl_literal l6, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.__z_, r0.y, l6\n"
+"dcl_literal l7, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r0.___w, l7, r0.z\n"
+"dcl_literal l8, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r0.___w, r0.w, l8\n"
+"dcl_literal l9, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1.__z_, r0.w, l9\n"
+"dcl_literal l10, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r0.w, l10\n"
+"ior r0.__z_, r0.z, r1.z\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0xBF800000, 0x3F800000\n"
+"add r1.__zw, r0.z, l11\n"
+"div_zeroop(infinity) r0.__z_, r1.z, r1.w\n"
+"mul_ieee r1.___w, r0.z, r0.z\n"
+"mul_ieee r2.x___, r1.w, r1.w\n"
+"dcl_literal l12, 0x00000000, 0x3E1CD04F, 0x3E178897, 0x00000000\n"
+"dcl_literal l13, 0x00000000, 0x3E638E29, 0x3E3A3325, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, l12, l13\n"
+"dcl_literal l14, 0x00000000, 0x3ECCCCCD, 0x3E924925, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, r2.yyzy, l14\n"
+"mul_ieee r2._y__, r2.x, r2.y\n"
+"dcl_literal l15, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r2.x___, r2.x, r2.z, l15\n"
+"mad_ieee r1.___w, r1.w, r2.x, r2.y\n"
+"mul_ieee r2.x___, r1.z, r1.z\n"
+"dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.___w, r2.x, l16, r1.w\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0._y__, r0.y, l17\n"
+"iadd r0.x___, r0.y, r0.x\n"
+"dcl_literal l18, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.x___, l18, r0.x\n"
+"iadd r0.x___, r0.x, r0.w\n"
+"itof r0.x___, r0.x\n"
+"dcl_literal l19, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r0._y__, r0.x, l19\n"
+"mad_ieee r0._y__, r0.z, r1.w, r0.y\n"
+"dcl_literal l20, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0._y__, r2.x, l20, r0.y_neg(xyzw)\n"
+"add r0._y__, r1.z_neg(xyzw), r0.y\n"
+"dcl_literal l21, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.x___, r0.x, l21, r0.y_neg(xyzw)\n"
+"dcl_literal l22, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r0.x___, r0.x, l22\n"
+"mul_ieee r0._y__, r1.y, r1.y\n"
+"dcl_literal l23, 0x00000000, 0x00000000, 0x3C18197E, 0x3EE7D7E1\n"
+"dcl_literal l24, 0x00000000, 0x00000000, 0xBE8FF9E7, 0xBFC6E215\n"
+"mad_ieee r0.__zw, r0.y, l23, l24\n"
+"dcl_literal l25, 0x00000000, 0x00000000, 0x3ECA00A9, 0x3F97807F\n"
+"mad_ieee r0.__zw, r0.zzzw, r0.y, l25\n"
+"mul_ieee r0._y__, r1.y, r0.y\n"
+"div_zeroop(infinity) r0.__z_, r0.z, r0.w\n"
+"mad_ieee r0._y__, r0.y, r0.z, r1.y\n"
+"dcl_literal l26, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"lt r0.__z_, r1.y, l26\n"
+"cmov_logical r0.x___, r0.z, r0.y, r0.x\n"
+"dcl_literal l27, 0x31800000, 0x31800000, 0x31800000, 0x31800000\n"
+"ige r0._y__, l27, r1.y\n"
+"cmov_logical r0.x___, r0.y, r1.y, r0.x\n"
+"dcl_literal l28, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ieq r0._y__, r1.y, l28\n"
+"dcl_literal l29, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.y, l29, r0.x\n"
+"dcl_literal l30, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0._y__, r1.x, l30\n"
+"dcl_literal l31, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.__z_, r1.x, l31\n"
+"dcl_literal l32, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r1.y, l32\n"
+"and r0._y__, r0.y, r0.w\n"
+"cmov_logical r0.x___, r0.y, r0.x_neg(xyzw), r0.x\n"
+"dcl_literal l33, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ilt r0._y__, l33, r1.y\n"
+"dcl_literal l34, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, r1.y, l34\n"
+"dcl_literal l35, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r1.x___, r1.y, l35\n"
+"and r0._y__, r0.y, r0.w\n"
+"dcl_literal l36, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l36, r0.x\n"
+"cmov_logical r0.x___, r1.x, r0.z, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__atanpi_f32",
+"mdef(184)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x80000000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x01490FDB, 0x01490FDB, 0x01490FDB, 0x01490FDB\n"
+"ige r0.___w, l1, r0.y\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.___w, r0.x, l2\n"
+" if_logicalz r0.w\n"
+" \n"
+" dcl_literal l3, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1.x___, r0.x, l3\n"
+" itof r1.x___, r1.x\n"
+" \n"
+" dcl_literal l4, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r1.xy__, r1.x, l4\n"
+" \n"
+" dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l5\n"
+" \n"
+" dcl_literal l6, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r1.x___, r1.x, l6\n"
+" \n"
+" dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1._y__, r1.y, l7\n"
+" \n"
+" dcl_literal l8, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.x___, l8, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1.__z_, l9, r1.x\n"
+" \n"
+" dcl_literal l10, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.x___, r1.z, l10, r1.x\n"
+" \n"
+" dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.__z_, l11, r1.x\n"
+" ishr r1.___w, r1.y, r1.x\n"
+" inegate r1.x___, r1.x\n"
+" \n"
+" dcl_literal l12, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.x___, r1.x, l12\n"
+" iadd r1.x___, r1.y, r1.x\n"
+" cmov_logical r1.x___, r1.z, r1.w, r1.x\n"
+" else\n"
+" \n"
+" dcl_literal l13, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1._y__, r0.x, l13\n"
+" \n"
+" dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r0.w, l14\n"
+" \n"
+" dcl_literal l15, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000\n"
+" iadd r1._y__, r1.y, l15\n"
+" \n"
+" dcl_literal l16, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99\n"
+" iadd r0.___w, r0.w, l16\n"
+" \n"
+" dcl_literal l17, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r0.___w, l17, r0.w\n"
+" \n"
+" dcl_literal l18, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.x___, r0.w, l18, r1.y\n"
+" endif\n"
+" \n"
+" dcl_literal l19, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983\n"
+" mul_ieee r0.___w, r1.x, l19\n"
+" \n"
+" dcl_literal l20, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r1.xy__, r0.w, l20\n"
+" if_logicalz r1.x\n"
+" itof r1.__z_, r1.y\n"
+" \n"
+" dcl_literal l21, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r1.__zw, r1.z, l21\n"
+" \n"
+" dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l22\n"
+" \n"
+" dcl_literal l23, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r1.__z_, r1.z, l23\n"
+" \n"
+" dcl_literal l24, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l24\n"
+" \n"
+" dcl_literal l25, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.__z_, l25, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.x___, l26, r1.z\n"
+" \n"
+" dcl_literal l27, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.x, l27, r1.z\n"
+" \n"
+" dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, l28, r1.z\n"
+" ishr r2._y__, r1.w, r1.z\n"
+" inegate r1.__z_, r1.z\n"
+" \n"
+" dcl_literal l29, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.__z_, r1.z, l29\n"
+" iadd r1.__z_, r1.w, r1.z\n"
+" cmov_logical r1.__z_, r2.x, r2.y, r1.z\n"
+" else\n"
+" \n"
+" dcl_literal l30, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r0.___w, r0.w, l30\n"
+" \n"
+" dcl_literal l31, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l31\n"
+" \n"
+" dcl_literal l32, 0xF4000000, 0xF4000000, 0xF4000000, 0xF4000000\n"
+" iadd r0.___w, r0.w, l32\n"
+" \n"
+" dcl_literal l33, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69\n"
+" iadd r1.x___, r1.x, l33\n"
+" \n"
+" dcl_literal l34, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.x___, l34, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l35, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1._y__, r1.y, l35\n"
+" \n"
+" dcl_literal l36, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r2.xy__, l36, r1.x\n"
+" \n"
+" dcl_literal l37, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.x___, r2.y, l37, r1.x\n"
+" ishr r1.x___, r1.y, r1.x\n"
+" cmov_logical r1.__z_, r2.x, r1.x, r0.w\n"
+" endif\n"
+"else\n"
+" \n"
+" dcl_literal l38, 0x31800000, 0x31800000, 0x31800000, 0x31800000\n"
+" lt r0.___w, r0.y, l38\n"
+" if_logicalnz r0.w\n"
+" mov r0.___w, r0.y\n"
+" else\n"
+" \n"
+" dcl_literal l39, 0x3EE00000, 0x3F300000, 0x3F980000, 0x401C0000\n"
+" uge r2, r0.y, l39\n"
+" \n"
+" dcl_literal l40, 0x3F300000, 0x3F980000, 0x00000000, 0x401C0000\n"
+" ult r1.xy_w, r0.y, l40\n"
+" and r1.xy_w, r2.xyxz, r1.xyxw\n"
+" \n"
+" dcl_literal l41, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" \n"
+" dcl_literal l42, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" mad_ieee r2.x___, r0.y, l41, l42\n"
+" \n"
+" dcl_literal l43, 0x40000000, 0xBF800000, 0x3F800000, 0xBFC00000\n"
+" add r3, r0.y, l43\n"
+" div_zeroop(infinity) r2.x___, r2.x, r3.x\n"
+" cmov_logical r2.x___, r1.x, r2.x, r0.y\n"
+" \n"
+" dcl_literal l44, 0x3EED6338, 0x3EED6338, 0x3EED6338, 0x3EED6338\n"
+" and r1.x___, r1.x, l44\n"
+" div_zeroop(infinity) r2._y__, r3.y, r3.z\n"
+" cmov_logical r2.x___, r1.y, r2.y, r2.x\n"
+" \n"
+" dcl_literal l45, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+" cmov_logical r1.x___, r1.y, l45, r1.x\n"
+" \n"
+" dcl_literal l46, 0x3FC00000, 0x3FC00000, 0x3FC00000, 0x3FC00000\n"
+" \n"
+" dcl_literal l47, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" mad_ieee r1._y__, r0.y, l46, l47\n"
+" div_zeroop(infinity) r1._y__, r3.w, r1.y\n"
+" cmov_logical r1._y__, r1.w, r1.y, r2.x\n"
+" \n"
+" dcl_literal l48, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F\n"
+" cmov_logical r1.x___, r1.w, l48, r1.x\n"
+" \n"
+" dcl_literal l49, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" div_zeroop(infinity) r1.___w, l49, r0.y\n"
+" cmov_logical r1._y__, r2.w, r1.w, r1.y\n"
+" \n"
+" dcl_literal l50, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" cmov_logical r1.x___, r2.w, l50, r1.x\n"
+" mul_ieee r1.___w, r1.y, r1.y\n"
+" mul_ieee r2.x___, r1.y, r1.w\n"
+" \n"
+" dcl_literal l51, 0x00000000, 0x3B9A3B54, 0x3E993F1F, 0x00000000\n"
+" \n"
+" dcl_literal l52, 0x00000000, 0x3E44F0BA, 0x3F8E2C3D, 0x00000000\n"
+" mad_ieee r2._yz_, r1.w, l51, l52\n"
+" \n"
+" dcl_literal l53, 0x00000000, 0x3E97D299, 0x3F63BBE5, 0x00000000\n"
+" mad_ieee r2._yz_, r2.yyzy, r1.w, l53\n"
+" mul_ieee r1.___w, r2.x, r2.y\n"
+" div_zeroop(infinity) r1.___w, r1.w, r2.z\n"
+" add r1._y__, r1.y_neg(xyzw), r1.w\n"
+" add r0.___w, r1.x, r1.y_neg(xyzw)\n"
+" endif\n"
+" \n"
+" dcl_literal l54, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983, 0x3EA2F983\n"
+" mul_ieee r1.__z_, r0.w, l54\n"
+"endif\n"
+"ior r0.__z_, r0.z, r1.z\n"
+"\n"
+"dcl_literal l55, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0._y__, l55, r0.y\n"
+"\n"
+"dcl_literal l56, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l56\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__atan_f32",
+"mdef(185)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x80000000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"lt r0.___w, r0.y, l1\n"
+"if_logicalnz r0.w\n"
+" mov r0.___w, r0.y\n"
+"else\n"
+" \n"
+" dcl_literal l2, 0x3EE00000, 0x3F300000, 0x3F980000, 0x401C0000\n"
+" uge r1, r0.y, l2\n"
+" \n"
+" dcl_literal l3, 0x3F300000, 0x3F980000, 0x401C0000, 0x00000000\n"
+" ult r2.xyz_, r0.y, l3\n"
+" and r1.xyz_, r1.xyzx, r2.xyzx\n"
+" \n"
+" dcl_literal l4, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" \n"
+" dcl_literal l5, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" mad_ieee r2.x___, r0.y, l4, l5\n"
+" \n"
+" dcl_literal l6, 0x40000000, 0xBF800000, 0x3F800000, 0xBFC00000\n"
+" add r3, r0.y, l6\n"
+" div_zeroop(infinity) r2.x___, r2.x, r3.x\n"
+" cmov_logical r2.x___, r1.x, r2.x, r0.y\n"
+" \n"
+" dcl_literal l7, 0x3EED6338, 0x3EED6338, 0x3EED6338, 0x3EED6338\n"
+" and r1.x___, r1.x, l7\n"
+" div_zeroop(infinity) r2._y__, r3.y, r3.z\n"
+" cmov_logical r2.x___, r1.y, r2.y, r2.x\n"
+" \n"
+" dcl_literal l8, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+" cmov_logical r1.x___, r1.y, l8, r1.x\n"
+" \n"
+" dcl_literal l9, 0x3FC00000, 0x3FC00000, 0x3FC00000, 0x3FC00000\n"
+" \n"
+" dcl_literal l10, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" mad_ieee r1._y__, r0.y, l9, l10\n"
+" div_zeroop(infinity) r1._y__, r3.w, r1.y\n"
+" cmov_logical r1._y__, r1.z, r1.y, r2.x\n"
+" \n"
+" dcl_literal l11, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F, 0x3F7B985F\n"
+" cmov_logical r1.x___, r1.z, l11, r1.x\n"
+" \n"
+" dcl_literal l12, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" div_zeroop(infinity) r1.__z_, l12, r0.y\n"
+" cmov_logical r1._y__, r1.w, r1.z, r1.y\n"
+" \n"
+" dcl_literal l13, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" cmov_logical r1.x___, r1.w, l13, r1.x\n"
+" mul_ieee r1.__z_, r1.y, r1.y\n"
+" mul_ieee r1.___w, r1.y, r1.z\n"
+" \n"
+" dcl_literal l14, 0x3B9A3B54, 0x3E993F1F, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l15, 0x3E44F0BA, 0x3F8E2C3D, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, l14, l15\n"
+" \n"
+" dcl_literal l16, 0x3E97D299, 0x3F63BBE5, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r2.xyxx, r1.z, l16\n"
+" mul_ieee r1.__z_, r1.w, r2.x\n"
+" div_zeroop(infinity) r1.__z_, r1.z, r2.y\n"
+" add r1._y__, r1.y_neg(xyzw), r1.z\n"
+" add r0.___w, r1.x, r1.y_neg(xyzw)\n"
+"endif\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l17, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0._y__, l17, r0.y\n"
+"\n"
+"dcl_literal l18, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l18\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__cbrt_f32",
+"mdef(186)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x007FFFFF\n"
+"and r0._yzw, r0.x, l0\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l1, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.w, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.x, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.x___, r1.y, l3\n"
+"dcl_literal l4, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"iadd r0.___w, r0.w, l4\n"
+"dcl_literal l5, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.___w, l5, r0.w_neg(xyzw)\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1._y__, l6, r0.w\n"
+"dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.___w, r1.y, l7, r0.w\n"
+"inegate r1._y__, r0.w\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1._y__, r1.y, l8\n"
+"iadd r1._y__, r1.x, r1.y\n"
+"ishr r1.x___, r1.x, r0.w\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l9, r0.w\n"
+"cmov_logical r0.___w, r0.w, r1.x, r1.y\n"
+"cmov_logical r0._y__, r0.z, r0.y, r0.w\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l11, 0xFFFFFFF0, 0xFFFFFFF0, 0xFFFFFFF0, 0xFFFFFFF0\n"
+"cmov_logical r0.__z_, r0.z, l10, l11\n"
+"dcl_literal l12, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.y, l12\n"
+"dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.x, l13\n"
+"dcl_literal l14, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l14\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l15, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+"mul_ieee r1.x___, r0.w, l15\n"
+"round_nearest r1.x___, r1.x\n"
+"dcl_literal l16, 0x40400000, 0x40400000, 0x40400000, 0x40400000\n"
+"mad_ieee r0.___w, r1.x_neg(xyzw), l16, r0.w\n"
+"ftoi r1.x___, r1.x\n"
+"ftoi r0.___w, r0.w\n"
+"dcl_literal l17, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r0.___w, r0.w, l17\n"
+"dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.___w, r0.w, l18\n"
+"ior r0.___w, r1.y, r0.w\n"
+"dcl_literal l19, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1._y__, r0.w, l19\n"
+"dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r0.w, l20\n"
+"dcl_literal l21, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1.__z_, l21, r1.y\n"
+"dcl_literal l22, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.__z_, r1.z, l22\n"
+"dcl_literal l23, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1.___w, r1.z, l23\n"
+"dcl_literal l24, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r1.z, l24\n"
+"ior r1._y__, r1.y, r1.w\n"
+"dcl_literal l25, 0x00000000, 0xBF800000, 0x00000000, 0x3F800000\n"
+"add r1._y_w, r1.y, l25\n"
+"div_zeroop(infinity) r1.___w, r1.y, r1.w\n"
+"mul_ieee r2.x___, r1.w, r1.w\n"
+"mul_ieee r2._y__, r2.x, r2.x\n"
+"dcl_literal l26, 0x00000000, 0x00000000, 0x3E1CD04F, 0x3E178897\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x3E638E29, 0x3E3A3325\n"
+"mad_ieee r2.__zw, r2.y, l26, l27\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x3ECCCCCD, 0x3E924925\n"
+"mad_ieee r2.__zw, r2.y, r2.zzzw, l28\n"
+"mul_ieee r2.__z_, r2.y, r2.z\n"
+"dcl_literal l29, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r2._y__, r2.y, r2.w, l29\n"
+"mad_ieee r2.x___, r2.x, r2.y, r2.z\n"
+"mul_ieee r2._y__, r1.y, r1.y\n"
+"dcl_literal l30, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.x___, r2.y, l30, r2.x\n"
+"dcl_literal l31, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r2._y__, r2.y, l31\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), r2.x, r2.y\n"
+"add r1._y__, r1.y_neg(xyzw), r1.w\n"
+"dcl_literal l32, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB\n"
+"mul_ieee r1.___w, r1.y, l32\n"
+"mul_ieee r2.x___, r1.w, r1.w\n"
+"dcl_literal l33, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"dcl_literal l34, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r2._y__, r2.x, l33, l34\n"
+"dcl_literal l35, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r2._y__, r2.x, r2.y, l35\n"
+"dcl_literal l36, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r2._y__, r2.x, r2.y, l36\n"
+"dcl_literal l37, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r2._y__, r2.x, r2.y, l37\n"
+"mad_ieee r2.x___, r2.x_neg(xyzw), r2.y, r1.w\n"
+"mul_ieee r1.___w, r1.w, r2.x\n"
+"dcl_literal l38, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"add r2.x___, r2.x, l38\n"
+"div_zeroop(infinity) r1.___w, r1.w, r2.x\n"
+"dcl_literal l39, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB\n"
+"mad_ieee r1._y__, r1.y_neg(xyzw), l39, r1.w\n"
+"dcl_literal l40, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1._y__, r1.y_neg(xyzw), l40\n"
+"dcl_literal l41, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l41\n"
+"iadd r0.___w, r0.w, r1.z\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l42, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820\n"
+"mul_ieee r1.__z_, r0.w, l42\n"
+"mul_ieee r1.___w, r1.z, r1.z\n"
+"dcl_literal l43, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"dcl_literal l44, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r2.x___, r1.w, l43, l44\n"
+"dcl_literal l45, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r2.x___, r1.w, r2.x, l45\n"
+"dcl_literal l46, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r2.x___, r1.w, r2.x, l46\n"
+"dcl_literal l47, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r2.x___, r1.w, r2.x, l47\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), r2.x, r1.z\n"
+"mul_ieee r1.__z_, r1.z, r1.w\n"
+"dcl_literal l48, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"add r1.___w, r1.w, l48\n"
+"div_zeroop(infinity) r1.__z_, r1.z, r1.w\n"
+"dcl_literal l49, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820\n"
+"mad_ieee r0.___w, r0.w_neg(xyzw), l49, r1.z\n"
+"dcl_literal l50, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.w_neg(xyzw), l50\n"
+"mul_ieee r0.___w, r1.y, r0.w\n"
+"dcl_literal l51, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r2, r0.w, l51\n"
+"iadd r0.__z_, r1.x, r0.z\n"
+"dcl_literal l52, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r0.___w, r0.z, l52\n"
+"cmov_logical r0.__z_, r2.z, r0.z, r0.w\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.___w, r0.z, l53\n"
+"itof r1.x___, r2.w\n"
+"cmov_logical r1.x___, r2.z, r2.x, r1.x\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"dcl_literal l54, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r1.x, l54\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l55\n"
+"dcl_literal l56, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1._y__, r1.y, l56\n"
+"iadd r0.__z_, r1.x, r0.z\n"
+"dcl_literal l57, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l57\n"
+"dcl_literal l58, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.x___, l58, r0.z\n"
+"dcl_literal l59, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.__z_, l59, r0.z_neg(xyzw)\n"
+"dcl_literal l60, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r1.x, l60, r0.w\n"
+"dcl_literal l61, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.x___, l61, r0.z\n"
+"dcl_literal l62, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.__z_, r1.x, l62, r0.z\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.__z_, r0.z, l63\n"
+"ishr r1.x___, r1.y, r0.z\n"
+"dcl_literal l64, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l64, r0.z\n"
+"cmov_logical r0.__z_, r0.z, r1.x, r0.w\n"
+"ior r0.__z_, r2.y, r0.z\n"
+"dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l65\n"
+"cmov_logical r0.__z_, r0.w, r0.z_neg(xyzw), r0.z\n"
+"dcl_literal l66, 0x7F800000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r1.xy__, r0.y, l66\n"
+"dcl_literal l67, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0._y__, l67, r0.y\n"
+"ior r0.___w, r1.y, r1.x\n"
+"cmov_logical r0.__z_, r0.w, r0.x, r0.z\n"
+"dcl_literal l68, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l68\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ceil_f32",
+"mdef(187)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_plusinf r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__clamp_f32",
+"mdef(188)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"max r0.___w, r1.x, r0.x\n"
+"min r0.x___, r2.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__copysign_f32",
+"mdef(189)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x80000000, 0x00000000, 0x00000000\n"
+"and r0.xy__, r0.xyxx, l0\n"
+"ior r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__cosh_f32",
+"mdef(190)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"\n"
+"dcl_literal l2, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.__z_, r0.y, l1, l2\n"
+"round_z r0.__z_, r0.z\n"
+"\n"
+"dcl_literal l3, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.___w, r0.z_neg(xyzw), l3, r0.y\n"
+"\n"
+"dcl_literal l4, 0x42000000, 0x42000000, 0x42000000, 0x42000000\n"
+"ge r1.x___, r0.z, l4\n"
+"\n"
+"dcl_literal l5, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1._y__, r0.z, l5\n"
+"cmov_logical r1.x___, r1.x, r1.y, r0.z\n"
+"\n"
+"dcl_literal l6, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1._y__, r0.z_neg(xyzw), l6, r0.w\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l7, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l8, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r1.___w, r1.z, l7, l8\n"
+"\n"
+"dcl_literal l9, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r1.___w, r1.z, r1.w, l9\n"
+"\n"
+"dcl_literal l10, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r1.___w, r1.z, r1.w, l10\n"
+"\n"
+"dcl_literal l11, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r1.___w, r1.z, r1.w, l11\n"
+"mad_ieee r1.__z_, r1.z_neg(xyzw), r1.w, r1.y\n"
+"mul_ieee r1._y__, r1.y, r1.z\n"
+"\n"
+"dcl_literal l12, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1.__z_, r1.z_neg(xyzw), l12\n"
+"div_zeroop(infinity) r1._y__, r1.y, r1.z\n"
+"\n"
+"dcl_literal l13, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r0.__z_, r0.z, l13, r1.y_neg(xyzw)\n"
+"add r0.__z_, r0.w_neg(xyzw), r0.z\n"
+"\n"
+"dcl_literal l14, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.__z_, r0.z_neg(xyzw), l14\n"
+"ftoi r0.___w, r1.x\n"
+"mov r1._y__, r0.z_abs\n"
+"\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r1.__zw, r1.y, l15\n"
+"if_logicalz r1.z\n"
+" itof r2.x___, r1.w\n"
+" \n"
+" dcl_literal l16, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r2.x, l16\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.x___, r2.x, l17\n"
+" iadd r2.x___, r2.x, r0.w\n"
+" \n"
+" dcl_literal l18, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.y, l18\n"
+" \n"
+" dcl_literal l19, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2.x___, l19, r2.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.__z_, l20, r2.x\n"
+" \n"
+" dcl_literal l21, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.x___, r2.z, l21, r2.x\n"
+" \n"
+" dcl_literal l22, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, l22, r2.x\n"
+" ishr r2.___w, r2.y, r2.x\n"
+" inegate r2.x___, r2.x\n"
+" \n"
+" dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.x___, r2.x, l23\n"
+" iadd r2.x___, r2.y, r2.x\n"
+" cmov_logical r2.x___, r2.z, r2.w, r2.x\n"
+"else\n"
+" \n"
+" dcl_literal l24, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l24\n"
+" iadd r1.__z_, r1.z, r0.w\n"
+" \n"
+" dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l25\n"
+" iadd r0.___w, r1.y, r0.w\n"
+" \n"
+" dcl_literal l26, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1._y__, r1.z, l26\n"
+" \n"
+" dcl_literal l27, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1.__z_, l27, r1.y\n"
+" \n"
+" dcl_literal l28, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r1.z, l28, r0.w\n"
+" \n"
+" dcl_literal l29, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1._y__, l29, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l30, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.__z_, r1.w, l30\n"
+" \n"
+" dcl_literal l31, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r2._yz_, l31, r1.y\n"
+" \n"
+" dcl_literal l32, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1._y__, r2.z, l32, r1.y\n"
+" ishr r1._y__, r1.z, r1.y\n"
+" cmov_logical r2.x___, r2.y, r1.y, r0.w\n"
+"endif\n"
+"\n"
+"dcl_literal l33, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r0.___w, l33, r1.x\n"
+"\n"
+"dcl_literal l34, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r0.w, l34, r2.x\n"
+"\n"
+"dcl_literal l35, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.__z_, r0.z, l35\n"
+"cmov_logical r0.__z_, r0.z, r0.w_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l36, 0x41AEAC50, 0x41AEAC50, 0x41AEAC50, 0x41AEAC50\n"
+"lt r0.___w, r0.y, l36\n"
+"\n"
+"dcl_literal l37, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"div_zeroop(infinity) r1.x___, l37, r0.z\n"
+"\n"
+"dcl_literal l38, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.x___, r0.z, l38, r1.x\n"
+"cmov_logical r0.__z_, r0.w, r1.x, r0.z\n"
+"\n"
+"dcl_literal l39, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"uge r0.___w, r0.y, l39\n"
+"cmov_logical r0.__z_, r0.w, r0.x, r0.z\n"
+"\n"
+"dcl_literal l40, 0x42B2D4FD, 0x42B2D4FD, 0x42B2D4FD, 0x42B2D4FD\n"
+"ge r0.___w, r0.y, l40\n"
+"\n"
+"dcl_literal l41, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r0.w, l41, r0.z\n"
+"\n"
+"dcl_literal l42, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0.___w, l42, r0.y\n"
+"\n"
+"dcl_literal l43, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l43\n"
+"cmov_logical r0.__z_, r0.w, r1.x, r0.z\n"
+"\n"
+"dcl_literal l44, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l44\n"
+"cmov_logical r0.__z_, r0.w, r0.y, r0.z\n"
+"\n"
+"dcl_literal l45, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r0.y, r0.z, l45\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__cospi_f32",
+"mdef(191)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"frc r0.__z_, r0.y\n"
+"add r0.___w, r0.y, r0.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l1, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r0.___w, r0.w, l1\n"
+"frc r0.___w, r0.w\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r0.___w, r0.w, l2\n"
+"\n"
+"dcl_literal l3, 0x00000000, 0x3F000000, 0x00000000, 0x00000000\n"
+"ne r1.xy__, r0.z, l3\n"
+"and r1.x___, r1.y, r1.x\n"
+"\n"
+"dcl_literal l4, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ige r1._y__, r0.y, l4\n"
+"and r1.x___, r1.x, r1.y\n"
+"\n"
+"dcl_literal l5, 0x00000000, 0x7F800000, 0x32000000, 0x00000000\n"
+"ilt r1._yz_, r0.y, l5\n"
+"and r1.x___, r1.x, r1.y\n"
+"if_logicalnz r1.x\n"
+" add r1.x___, r0.x, r0.x\n"
+" round_nearest r1.x___, r1.x\n"
+" \n"
+" dcl_literal l6, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1._y__, r1.x, l6\n"
+" frc r1._y__, r1.y\n"
+" \n"
+" dcl_literal l7, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r1._y__, r1.y, l7\n"
+" round_nearest r1._y__, r1.y\n"
+" \n"
+" dcl_literal l8, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" mad_ieee r1.x___, r0.x, l8, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mul_ieee r1.x___, r1.x, l9\n"
+" mul_ieee r1.___w, r1.x, r1.x\n"
+" mul_ieee r2.x___, r1.x, r1.w\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x3636DF25, 0xB492923A, 0x00000000\n"
+" \n"
+" dcl_literal l11, 0x00000000, 0xB95009D4, 0x37D00AE2, 0x00000000\n"
+" mad_ieee r2._yz_, r1.w, l10, l11\n"
+" \n"
+" dcl_literal l12, 0x00000000, 0x3C088887, 0xBAB60B60, 0x00000000\n"
+" mad_ieee r2._yz_, r1.w, r2.yyzy, l12\n"
+" \n"
+" dcl_literal l13, 0x00000000, 0xBE2AAAAB, 0x3D2AAAAB, 0x00000000\n"
+" mad_ieee r2._yz_, r1.w, r2.yyzy, l13\n"
+" mad_ieee r1.x___, r2.x, r2.y, r1.x\n"
+" \n"
+" dcl_literal l14, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" \n"
+" dcl_literal l15, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" mad_ieee r2.x___, r1.w_neg(xyzw), l14, l15\n"
+" mul_ieee r1.___w, r1.w, r1.w\n"
+" mad_ieee r1.___w, r1.w, r2.z, r2.x\n"
+" \n"
+" dcl_literal l16, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+" eq r2, r1.y, l16\n"
+" and r1._y__, r1.w, r2.x\n"
+" cmov_logical r1._y__, r2.y, r1.x_neg(xyzw), r1.y\n"
+" cmov_logical r1._y__, r2.z, r1.w_neg(xyzw), r1.y\n"
+" cmov_logical r1.x___, r2.w, r1.x, r1.y\n"
+"else\n"
+" \n"
+" dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r1.x___, l17\n"
+"endif\n"
+"\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x3F000000\n"
+"eq r1._y_w, r0.z, l18\n"
+"ior r0.__z_, r1.z, r1.y\n"
+"\n"
+"dcl_literal l19, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"\n"
+"dcl_literal l20, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.___w, r0.w, l19, l20\n"
+"\n"
+"dcl_literal l21, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.x___, r1.w, l21, r1.x\n"
+"cmov_logical r0.__z_, r0.z, r0.w, r1.x\n"
+"\n"
+"dcl_literal l22, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l22\n"
+"\n"
+"dcl_literal l23, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.__z_, r0.w, l23, r0.z\n"
+"\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l24, r0.y\n"
+"\n"
+"dcl_literal l25, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l25\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__cos_f32",
+"mdef(192)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ilt r0.__z_, r0.y, l1\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l2, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" mov r0.__z_, l2\n"
+"else\n"
+" \n"
+" dcl_literal l3, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680\n"
+" lt r0.___w, r0.y, l3\n"
+" if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l4, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.___w, r0.y, l4\n"
+" round_nearest r0.___w, r0.w\n"
+" \n"
+" dcl_literal l5, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r1.x___, r0.w, l5\n"
+" add r1._y__, r0.w, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l6, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1.__z_, r0.w, l6\n"
+" \n"
+" dcl_literal l7, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.w, l7\n"
+" \n"
+" dcl_literal l8, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l8, r2.xyxx_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l9, r3.xyxx\n"
+" \n"
+" dcl_literal l10, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l10, r3.xyxx\n"
+" \n"
+" dcl_literal l11, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l11, r3.xyxx\n"
+" add r0.___w, r0.y, r2.x_neg(xyzw)\n"
+" add r1.___w, r0.y, r0.w_neg(xyzw)\n"
+" add r1.___w, r2.x_neg(xyzw), r1.w\n"
+" add r1.___w, r3.x_neg(xyzw), r1.w\n"
+" add r0.___w, r0.w, r1.w\n"
+" add r1.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+" add r0.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r3.y_neg(xyzw), r0.w\n"
+" add r0.___w, r1.w, r0.w\n"
+" \n"
+" dcl_literal l12, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.___w, r1.x, l12, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l13, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.x, l13, r1.w\n"
+" \n"
+" dcl_literal l14, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.x___, r1.y, l14, r1.x\n"
+" \n"
+" dcl_literal l15, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1._y__, r1.y, l15, r1.x\n"
+" add r1.___w, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+" add r0.___w, r2.z_neg(xyzw), r0.w\n"
+" add r1.x___, r1.w, r0.w\n"
+" frc r0.___w, r1.z\n"
+" \n"
+" dcl_literal l16, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r0.___w, r0.w, l16\n"
+" round_nearest r0.___w, r0.w\n"
+" mov r1._y__, r1.y_neg(xyzw)\n"
+" else\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r1.__z_, r0.y, l17\n"
+" \n"
+" dcl_literal l18, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1.___w, r0.x, l18\n"
+" \n"
+" dcl_literal l19, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l19\n"
+" \n"
+" dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r1.w, l20\n"
+" \n"
+" dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.___w, r1.w, l21\n"
+" \n"
+" dcl_literal l22, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r3, l22, r1.w\n"
+" \n"
+" dcl_literal l23, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r4, l23, r2.x\n"
+" \n"
+" dcl_literal l24, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2._yz_, r4.yywy, l24\n"
+" \n"
+" dcl_literal l25, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r3.ywyy, l25\n"
+" iadd r2._yz_, r2.yyzy, r5.xxyx\n"
+" iadd r2._yz_, r4.xxzx, r2.yyzy\n"
+" \n"
+" dcl_literal l26, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x_z_, r2.yyzy, l26\n"
+" iadd r3.x_z_, r3.xxzx, r4.xxzx\n"
+" \n"
+" dcl_literal l27, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l27\n"
+" \n"
+" dcl_literal l28, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2._yz_, r2.yyzy, l28\n"
+" \n"
+" dcl_literal l29, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r4.ywyy, l29\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" ior r4.xy__, r2.yzyy, r4.xyxx\n"
+" iadd r5.__z_, r3.x, r4.y\n"
+" ult r2._y__, r5.z, r4.y\n"
+" \n"
+" dcl_literal l30, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r3.y, l30\n"
+" cmov_logical r2._y__, r2.y, r2.z, r3.y\n"
+" \n"
+" dcl_literal l31, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r3, l31, r1.w\n"
+" \n"
+" dcl_literal l32, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r6, l32, r2.x\n"
+" \n"
+" dcl_literal l33, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l33\n"
+" \n"
+" dcl_literal l34, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l34\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l35, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l35\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l36, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l36\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l37, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l37\n"
+" \n"
+" dcl_literal l38, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l38\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r5._y__, r2.y, r2.z\n"
+" ult r2._y__, r5.y, r2.z\n"
+" \n"
+" dcl_literal l39, 0x00000000, 0x00000000, 0x00000001, 0x00000001\n"
+" iadd r3.__zw, r3.xxxy, l39\n"
+" cmov_logical r2._y__, r2.y, r3.z, r3.x\n"
+" iadd r5.x___, r2.y, r2.w\n"
+" ult r2._y__, r5.x, r2.w\n"
+" cmov_logical r2._y__, r2.y, r3.w, r3.y\n"
+" \n"
+" dcl_literal l40, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r3, l40, r1.w\n"
+" \n"
+" dcl_literal l41, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r6, l41, r2.x\n"
+" \n"
+" dcl_literal l42, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l42\n"
+" \n"
+" dcl_literal l43, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l43\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l44, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l44\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l45, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l45\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l46, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l46\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l47\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r3.___w, r2.y, r2.z\n"
+" ult r2._y__, r3.w, r2.z\n"
+" \n"
+" dcl_literal l48, 0x00000001, 0x00000001, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r3.xyxx, l48\n"
+" cmov_logical r2._y__, r2.y, r6.x, r3.x\n"
+" iadd r3.__z_, r2.y, r2.w\n"
+" ult r2._y__, r3.z, r2.w\n"
+" cmov_logical r2._y__, r2.y, r6.y, r3.y\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x0000A2F9, 0x0000836E\n"
+" umul r2.__zw, l49, r1.w\n"
+" \n"
+" dcl_literal l50, 0x0000A2F9, 0x0000836E, 0x00000000, 0x00000000\n"
+" umul r3.xy__, l50, r2.x\n"
+" \n"
+" dcl_literal l51, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.___w, r3.y, l51\n"
+" \n"
+" dcl_literal l52, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r2.w, l52\n"
+" iadd r1.___w, r1.w, r2.x\n"
+" iadd r1.___w, r3.x, r1.w\n"
+" \n"
+" dcl_literal l53, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.x___, r1.w, l53\n"
+" iadd r2.x___, r2.z, r2.x\n"
+" \n"
+" dcl_literal l54, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__z_, r2.w, l54\n"
+" iadd r2.x___, r2.x, r2.z\n"
+" \n"
+" dcl_literal l55, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.___w, r1.w, l55\n"
+" \n"
+" dcl_literal l56, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r3.y, l56\n"
+" ior r1.___w, r1.w, r2.z\n"
+" iadd r2._y__, r2.y, r1.w\n"
+" ult r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l57, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r2.x, l57\n"
+" cmov_logical r1.___w, r1.w, r2.z, r2.x\n"
+" \n"
+" dcl_literal l58, 0xFFFFFF89, 0x00000000, 0xFFFFFF8A, 0x00000000\n"
+" iadd r2.x_z_, r1.z, l58\n"
+" \n"
+" dcl_literal l59, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r6.x___, r2.x, l59\n"
+" \n"
+" dcl_literal l60, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r1.__z_, r6.x, l60\n"
+" iadd r1.__z_, r2.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l61, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l61, r6.x\n"
+" \n"
+" dcl_literal l62, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r7, r2.x, l62\n"
+" inegate r2.___w, r7.x\n"
+" \n"
+" dcl_literal l63, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r8.x___, r7.y, l63\n"
+" \n"
+" dcl_literal l64, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r8._y__, r7.z, l64\n"
+" \n"
+" dcl_literal l65, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r8.__z_, r7.w, l65\n"
+" inegate r7.xyz_, r8.xyzx\n"
+" \n"
+" dcl_literal l66, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r8, r2.x, l66\n"
+" \n"
+" dcl_literal l67, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r9.x___, r8.x, l67\n"
+" \n"
+" dcl_literal l68, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r9._yzw, r8.yyzw, l68\n"
+" inegate r8, r9\n"
+" and r2.x___, r1.w, r2.w\n"
+" and r3.xy__, r2.y, r7.xyxx\n"
+" ior r2.x___, r2.x, r3.x\n"
+" and r7._yzw, r3.zzzw, r7.yyzz\n"
+" ior r2.x___, r2.x, r7.y\n"
+" ior r2.x___, r7.w, r2.x\n"
+" and r9, r5.xxyy, r8.xyyz\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.z, r2.x\n"
+" and r7._y_w, r5.z, r8.zzzw\n"
+" ior r2.x___, r2.x, r7.y\n"
+" and r2.___w, r4.x, r8.w\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r2.___w, r1.w, r7.x\n"
+" ior r2.___w, r3.y, r2.w\n"
+" ior r2.___w, r7.z, r2.w\n"
+" and r3.x___, r3.w, r8.x\n"
+" ior r2.___w, r2.w, r3.x\n"
+" ior r2.___w, r9.y, r2.w\n"
+" ior r2.___w, r9.w, r2.w\n"
+" ior r2.___w, r7.w, r2.w\n"
+" \n"
+" dcl_literal l69, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r3.x___, l69, r1.z\n"
+" and r3.x___, r2.x, r3.x\n"
+" inegate r3._y__, r1.z\n"
+" \n"
+" dcl_literal l70, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+" iadd r6._y__, l70, r3.y\n"
+" \n"
+" dcl_literal l71, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ult r4._y__, l71, r1.z\n"
+" \n"
+" dcl_literal l72, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r1.z, l72\n"
+" \n"
+" dcl_literal l73, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r1.__z_, l73, r1.z\n"
+" and r1.__z_, r2.x, r1.z\n"
+" ior r1.__z_, r3.x, r1.z\n"
+" ushr r1.__z_, r1.z, r6.y\n"
+" ushr r2.x___, r3.x, r6.y\n"
+" \n"
+" dcl_literal l74, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" and r2.___w, r2.w, l74\n"
+" \n"
+" dcl_literal l75, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.___w, r2.w, l75\n"
+" ior r2.x___, r2.x, r2.w\n"
+" cmov_logical r7.__z_, r4.y, r1.z, r2.x\n"
+" \n"
+" dcl_literal l76, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r8.x___, r6.x, l76\n"
+" \n"
+" dcl_literal l77, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ishl r6.__z_, l77, r3.y\n"
+" \n"
+" dcl_literal l78, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.___w, r6.z, l78\n"
+" \n"
+" dcl_literal l79, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFFF\n"
+" mov r8._yzw, l79\n"
+" cmov_logical r6, r6.y, r6, r8\n"
+" \n"
+" dcl_literal l80, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r6.y, l80\n"
+" \n"
+" dcl_literal l81, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r1.__z_, l81, r1.z\n"
+" \n"
+" dcl_literal l82, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r2.x___, r1.z, l82\n"
+" switch r6.x\n"
+" case 0\n"
+" and r3.x___, r1.w, r1.z\n"
+" \n"
+" dcl_literal l83, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l84, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l83, l84\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l85, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l85\n"
+" \n"
+" dcl_literal l86, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l86\n"
+" \n"
+" dcl_literal l87, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.zzwz_neg(xyzw), l87\n"
+" \n"
+" dcl_literal l88, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r11.x___, r2.y_neg(xyzw), l88\n"
+" and r3._y__, r1.w, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.x___, r2.y\n"
+" mov r11._yz_, r3.zzwz\n"
+" mov r3._y__, r1.w\n"
+" endif\n"
+" and r8.x___, r2.x, r3.y\n"
+" mov r8._yzw, r11.xxyz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 1\n"
+" and r3.x___, r2.y, r1.z\n"
+" \n"
+" dcl_literal l89, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l90, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l89, l90\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l91, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l91\n"
+" \n"
+" dcl_literal l92, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l92\n"
+" \n"
+" dcl_literal l93, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.wwzw_neg(xyzw), l93\n"
+" and r3._y__, r2.y, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" mov r11.xy__, r11.zyzz\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.xy__, r3.zwzz\n"
+" mov r3._y__, r2.y\n"
+" endif\n"
+" and r11.__z_, r2.x, r3.y\n"
+" \n"
+" dcl_literal l94, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l94\n"
+" mov r8, r11.wzxy\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 2\n"
+" and r3.x___, r3.z, r1.z\n"
+" \n"
+" dcl_literal l95, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l95, l96\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l97, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l97\n"
+" \n"
+" dcl_literal l98, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6._y__, r3.w_neg(xyzw), l98\n"
+" and r4._y__, r3.z, r6.w\n"
+" iadd r6.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r5.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r5\n"
+" cmov_logical r11._yz_, r3.x, r6.xxyx, r3.zzwz\n"
+" and r11.x___, r2.x, r11.y\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l99\n"
+" mov r8, r11.wwxz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 3\n"
+" and r3.x___, r3.w, r1.z\n"
+" \n"
+" dcl_literal l100, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l101, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l100, l101\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l102, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l102\n"
+" and r4._y__, r3.w, r6.w\n"
+" iadd r4._y__, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" cmov_logical r3.x___, r3.x, r4.y, r3.w\n"
+" and r11.___w, r2.x, r3.x\n"
+" \n"
+" dcl_literal l103, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.xyz_, l103\n"
+" mov r8, r11\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 4\n"
+" and r3.x___, r5.x, r1.z\n"
+" \n"
+" dcl_literal l104, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l104, l105\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l106, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10._yz_, r5.yyzy_neg(xyzw), l106\n"
+" and r4._y__, r5.x, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" and r3.x___, r2.x, r10.x\n"
+" \n"
+" dcl_literal l107, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l107\n"
+" mov r9._yzw, r10.yyzw\n"
+" mov r9.x___, r3.x\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 5\n"
+" and r3.x___, r5.y, r1.z\n"
+" \n"
+" dcl_literal l108, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l109, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l108, l109\n"
+" inegate r10.__z_, r4.x\n"
+" \n"
+" dcl_literal l110, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.z_neg(xyzw), l110\n"
+" and r4._y__, r5.y, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r4.__zw, r5.yyyz\n"
+" cmov_logical r10.x_zw, r3.x, r10.xxyz, r4.zzwx\n"
+" and r10._y__, r2.x, r10.x\n"
+" \n"
+" dcl_literal l111, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.x___, l111\n"
+" \n"
+" dcl_literal l112, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l112\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 6\n"
+" and r3.x___, r5.z, r1.z\n"
+" \n"
+" dcl_literal l113, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l114, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l113, l114\n"
+" inegate r4.__z_, r4.x\n"
+" and r4.___w, r5.z, r6.w\n"
+" iadd r4._y__, r6.w, r4.w_neg(xyzw)\n"
+" mov r6.x___, r5.z\n"
+" mov r6._y__, r4.x\n"
+" cmov_logical r10._yz_, r3.x, r4.yyzy, r6.xxyx\n"
+" and r10.x___, r2.x, r10.y\n"
+" \n"
+" dcl_literal l115, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.___w, l115\n"
+" \n"
+" dcl_literal l116, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l116\n"
+" mov r9, r10.wwxz\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 7\n"
+" and r1.__z_, r4.x, r1.z\n"
+" \n"
+" dcl_literal l117, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l118, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.x___, r1.z, l117, l118\n"
+" and r3._y__, r4.x, r6.w\n"
+" iadd r3._y__, r6.z, r3.y_neg(xyzw)\n"
+" cmov_logical r1.__z_, r1.z, r3.y, r4.x\n"
+" and r6.___w, r2.x, r1.z\n"
+" \n"
+" dcl_literal l119, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r6.xyz_, l119\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l120\n"
+" mov r9, r6\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" default\n"
+" mov r8.__zw, r3.zzzw\n"
+" mov r8._y__, r2.y\n"
+" mov r8.x___, r1.w\n"
+" mov r9.___w, r4.x\n"
+" mov r9.xyz_, r5.xyzx\n"
+" \n"
+" dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r2.___w, l121\n"
+" break\n"
+" endswitch\n"
+" mov r3, r8\n"
+" mov r4, r9\n"
+" mov r1.__z_, r2.w\n"
+" \n"
+" dcl_literal l122, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5, r3, l122\n"
+" \n"
+" dcl_literal l123, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5, r5, l123\n"
+" \n"
+" dcl_literal l124, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r6, r5, l124\n"
+" \n"
+" dcl_literal l125, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r6, r6, l125\n"
+" \n"
+" dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l126\n"
+" \n"
+" dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r6, l127, r6_neg(xyzw)\n"
+" \n"
+" dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l128\n"
+" \n"
+" dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r6, r6, l129\n"
+" \n"
+" dcl_literal l130, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r6, r6, l130\n"
+" \n"
+" dcl_literal l131, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r5, r5, r6, l131\n"
+" \n"
+" dcl_literal l132, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r3, l132\n"
+" \n"
+" dcl_literal l133, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l133\n"
+" \n"
+" dcl_literal l134, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l134\n"
+" \n"
+" dcl_literal l135, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l135\n"
+" \n"
+" dcl_literal l136, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l136, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l137\n"
+" \n"
+" dcl_literal l138, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l138\n"
+" \n"
+" dcl_literal l139, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l139\n"
+" \n"
+" dcl_literal l140, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l140\n"
+" \n"
+" dcl_literal l141, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r8, r5, l141\n"
+" iadd r6, r5, r6\n"
+" cmov_logical r5, r8, r6, r5\n"
+" \n"
+" dcl_literal l142, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6, r4, l142\n"
+" \n"
+" dcl_literal l143, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r6, l143\n"
+" \n"
+" dcl_literal l144, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l144\n"
+" \n"
+" dcl_literal l145, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l145\n"
+" \n"
+" dcl_literal l146, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l146\n"
+" \n"
+" dcl_literal l147, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l147, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l148, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l148\n"
+" \n"
+" dcl_literal l149, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l149\n"
+" \n"
+" dcl_literal l150, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l150\n"
+" \n"
+" dcl_literal l151, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l151\n"
+" \n"
+" dcl_literal l152, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r8, r4, l152\n"
+" \n"
+" dcl_literal l153, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r9, r8, l153\n"
+" \n"
+" dcl_literal l154, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r9, r9, l154\n"
+" \n"
+" dcl_literal l155, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l155\n"
+" \n"
+" dcl_literal l156, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r9, l156, r9_neg(xyzw)\n"
+" \n"
+" dcl_literal l157, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l157\n"
+" \n"
+" dcl_literal l158, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r9, r9, l158\n"
+" \n"
+" dcl_literal l159, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r9, r9, l159\n"
+" \n"
+" dcl_literal l160, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r8, r8, r9, l160\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r9, r6, l161\n"
+" iadd r8, r6, r8\n"
+" cmov_logical r6, r9, r8, r6\n"
+" \n"
+" dcl_literal l162, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ieq r1.___w, r5.x, l162\n"
+" \n"
+" dcl_literal l163, 0x00000020, 0x00000040, 0x00000000, 0x00000060\n"
+" iadd r2.xy_w, r5.yzyw, l163\n"
+" cmov_logical r1.___w, r1.w, r2.x, r5.x\n"
+" \n"
+" dcl_literal l164, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+" ieq r2.x___, r1.w, l164\n"
+" cmov_logical r1.___w, r2.x, r2.y, r1.w\n"
+" \n"
+" dcl_literal l165, 0x00000060, 0x00000060, 0x00000060, 0x00000060\n"
+" ieq r2.x___, r1.w, l165\n"
+" cmov_logical r1.___w, r2.x, r2.w, r1.w\n"
+" \n"
+" dcl_literal l166, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+" ieq r2.x___, r1.w, l166\n"
+" \n"
+" dcl_literal l167, 0x00000080, 0x000000A0, 0x000000C0, 0x000000E0\n"
+" iadd r5, r6, l167\n"
+" cmov_logical r1.___w, r2.x, r5.x, r1.w\n"
+" \n"
+" dcl_literal l168, 0x000000A0, 0x000000A0, 0x000000A0, 0x000000A0\n"
+" ieq r2.x___, r1.w, l168\n"
+" cmov_logical r1.___w, r2.x, r5.y, r1.w\n"
+" \n"
+" dcl_literal l169, 0x000000C0, 0x000000C0, 0x000000C0, 0x000000C0\n"
+" ieq r2.x___, r1.w, l169\n"
+" cmov_logical r1.___w, r2.x, r5.z, r1.w\n"
+" \n"
+" dcl_literal l170, 0x000000E0, 0x000000E0, 0x000000E0, 0x000000E0\n"
+" ieq r2.x___, r1.w, l170\n"
+" cmov_logical r1.___w, r2.x, r5.w, r1.w\n"
+" \n"
+" dcl_literal l171, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishr r2.x___, r1.w, l171\n"
+" \n"
+" dcl_literal l172, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r2._y__, r2.x, l172\n"
+" iadd r2._y__, r1.w, r2.y_neg(xyzw)\n"
+" iadd r1.___w, r2.z, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l173, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l173, r2.x\n"
+" \n"
+" dcl_literal l174, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r5, r2.x, l174\n"
+" inegate r2.__z_, r5.x\n"
+" \n"
+" dcl_literal l175, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r6.x__w, r5.y, l175\n"
+" \n"
+" dcl_literal l176, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r6._y__, r5.z, l176\n"
+" \n"
+" dcl_literal l177, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r6.__z_, r5.w, l177\n"
+" inegate r5, r6\n"
+" \n"
+" dcl_literal l178, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r6, r2.x, l178\n"
+" \n"
+" dcl_literal l179, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r8.x___, r6.x, l179\n"
+" \n"
+" dcl_literal l180, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r8._y__, r6.y, l180\n"
+" \n"
+" dcl_literal l181, 0x00000006, 0x00000006, 0x00000006, 0x00000006\n"
+" ushr r8.__z_, r6.z, l181\n"
+" \n"
+" dcl_literal l182, 0x00000007, 0x00000007, 0x00000007, 0x00000007\n"
+" ushr r8.___w, r6.w, l182\n"
+" inegate r6, r8\n"
+" and r2.x__w, r3.xxxy, r2.z\n"
+" and r8, r3.yzwz, r5\n"
+" ior r2.x__w, r2.xxxw, r8.xxxw\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" and r8, r4, r6\n"
+" ior r2.x___, r2.x, r8.x\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" ior r8._y_w, r8.w, r2.x\n"
+" and r2.x___, r3.w, r5.y\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.___w, r4.x, r5.z\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r9.xyz_, r4.yzwy, r6.xyzx\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.y, r2.x\n"
+" ior r8.x___, r9.z, r2.x\n"
+" \n"
+" dcl_literal l183, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r2.y, l183\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r8.y, r2.y\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l184, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2.___w, l184, r2.y_neg(xyzw)\n"
+" ushr r8.___w, r2.x, r2.w\n"
+" \n"
+" dcl_literal l185, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r3._y__, r2.y, l185\n"
+" ishl r2.x___, r2.x, r3.y\n"
+" ushr r3.x___, r2.x, r3.y\n"
+" else\n"
+" and r2.x___, r3.z, r2.z\n"
+" and r2.__z_, r3.w, r5.w\n"
+" ior r2.x___, r2.x, r2.z\n"
+" and r2.__zw, r4.xxxy, r5.yyyz\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.__zw, r4.zzzw, r6.xxxy\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r4._y__, r2.w, r2.x\n"
+" \n"
+" dcl_literal l186, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, l186, r2.y\n"
+" \n"
+" dcl_literal l187, 0xFFFFFFF8, 0x00000000, 0xFFFFFFF7, 0x00000000\n"
+" iadd r4.x_z_, r2.y, l187\n"
+" ishl r2.__z_, r8.y, r4.x\n"
+" ishl r2.___w, r8.x, r4.z\n"
+" \n"
+" dcl_literal l188, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r2._y__, l188, r2.y_neg(xyzw)\n"
+" ushr r2._y__, r8.x, r2.y\n"
+" ior r4.___w, r2.z, r2.y\n"
+" ushr r4.x___, r2.w, r4.z\n"
+" \n"
+" dcl_literal l189, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8.__z_, l189\n"
+" cmov_logical r8, r2.x, r4.yxzw, r8\n"
+" mov r3.xy__, r8.yzyy\n"
+" endif\n"
+" \n"
+" dcl_literal l190, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r8.w, l190\n"
+" \n"
+" dcl_literal l191, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.___w, r8.w, l191\n"
+" \n"
+" dcl_literal l192, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.___w, r2.w, l192\n"
+" \n"
+" dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l193\n"
+" \n"
+" dcl_literal l194, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.___w, l194, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l195, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l195\n"
+" \n"
+" dcl_literal l196, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r2.w, l196\n"
+" inegate r8._y__, r2.w\n"
+" \n"
+" dcl_literal l197, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r2.__z_, l197\n"
+" cmov_logical r2.xy__, r2.x, r2.yzyy, r8.wyww\n"
+" iadd r2._y__, r1.w, r2.y\n"
+" \n"
+" dcl_literal l198, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r2._y__, r2.y, l198\n"
+" \n"
+" dcl_literal l199, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l199\n"
+" ior r7.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l200, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r3.y, l200\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r3.x, r3.y\n"
+" ushr r2.x___, r2.x, r3.y\n"
+" \n"
+" dcl_literal l201, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2._y__, l201, r3.y_neg(xyzw)\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" else\n"
+" \n"
+" dcl_literal l202, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.__z_, l202, r3.y\n"
+" \n"
+" dcl_literal l203, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r2.___w, r3.y, l203\n"
+" ishl r2.___w, r3.x, r2.w\n"
+" \n"
+" dcl_literal l204, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r3._y__, l204, r3.y_neg(xyzw)\n"
+" ushr r3._y__, r8.x, r3.y\n"
+" ior r2.___w, r2.w, r3.y\n"
+" cmov_logical r2.x___, r2.z, r2.w, r3.x\n"
+" endif\n"
+" \n"
+" dcl_literal l205, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r1.___w, r1.w, l205\n"
+" \n"
+" dcl_literal l206, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.x, l206\n"
+" \n"
+" dcl_literal l207, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.__z_, r2.x, l207\n"
+" \n"
+" dcl_literal l208, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.__z_, r2.z, l208\n"
+" \n"
+" dcl_literal l209, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l209\n"
+" \n"
+" dcl_literal l210, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.__z_, l210, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l211, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l211\n"
+" \n"
+" dcl_literal l212, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.__z_, r2.z, l212\n"
+" inegate r2._y__, r2.z\n"
+" \n"
+" dcl_literal l213, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r3.__z_, l213\n"
+" cmov_logical r2._yz_, r3.x, r3.yyzy, r2.xxyx\n"
+" iadd r1.___w, r1.w, r2.z\n"
+" \n"
+" dcl_literal l214, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r1.___w, r1.w, l214\n"
+" \n"
+" dcl_literal l215, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l215\n"
+" ior r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l216, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r7._y__, r2.x, r1.w, l216\n"
+" \n"
+" dcl_literal l217, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r7.z, l217\n"
+" mov r2.xy__, r7.xyxx_neg(xyzw)\n"
+" cmov_logical r2.xyz_, r1.z, r2.xyzx, r7.xyzx\n"
+" \n"
+" dcl_literal l218, 0x00000000, 0x00000000, 0xFFFFF000, 0x00000003\n"
+" and r1.__zw, r2.xxxz, l218\n"
+" add r2.__z_, r2.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l219, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mul_ieee r1.x___, r2.x, l219\n"
+" \n"
+" dcl_literal l220, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r2.x___, r1.z, l220, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l221, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.__z_, r1.z, l221, r2.x\n"
+" \n"
+" dcl_literal l222, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r1.__z_, r2.z, l222, r1.z\n"
+" \n"
+" dcl_literal l223, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.__z_, r2.z, l223, r1.z\n"
+" utof r0.___w, r1.w\n"
+" \n"
+" dcl_literal l224, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mad_ieee r1._y__, r2.y, l224, r1.z\n"
+" endif\n"
+" mul_ieee r1.__zw, r1.xxxy, r1.x\n"
+" mul_ieee r2.x___, r1.x, r1.z\n"
+" \n"
+" dcl_literal l225, 0x00000000, 0x2F2EC9D3, 0xAD47D74E, 0x00000000\n"
+" \n"
+" dcl_literal l226, 0x00000000, 0xB2D72F34, 0x310F74F6, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, l225, l226\n"
+" \n"
+" dcl_literal l227, 0x00000000, 0x3636DF25, 0xB492923A, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, r2.yyzy, l227\n"
+" \n"
+" dcl_literal l228, 0x00000000, 0xB95009D4, 0x37D00AE2, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, r2.yyzy, l228\n"
+" \n"
+" dcl_literal l229, 0x00000000, 0x3C088887, 0xBAB60B60, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, r2.yyzy, l229\n"
+" mul_ieee r2._y__, r2.x, r2.y\n"
+" \n"
+" dcl_literal l230, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r2._y__, r1.y, l230, r2.y_neg(xyzw)\n"
+" mad_ieee r1._y__, r1.z, r2.y, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l231, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB\n"
+" mad_ieee r1._y__, r2.x_neg(xyzw), l231, r1.y\n"
+" add r1._y__, r1.x, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l232, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB\n"
+" mad_ieee r2.x___, r1.z, r2.z, l232\n"
+" mul_ieee r2._y__, r1.z, r1.z\n"
+" \n"
+" dcl_literal l233, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.x___, r1.x, l233\n"
+" \n"
+" dcl_literal l234, 0x3E99999A, 0x3E99999A, 0x3E99999A, 0x3E99999A\n"
+" ige r2.__z_, r1.x, l234\n"
+" \n"
+" dcl_literal l235, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ige r2.___w, l235, r1.x\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l236, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000\n"
+" iadd r2.___w, r1.x, l236\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l237, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ilt r1.x___, l237, r1.x\n"
+" \n"
+" dcl_literal l238, 0x3E900000, 0x3E900000, 0x3E900000, 0x3E900000\n"
+" cmov_logical r1.x___, r1.x, l238, r2.z\n"
+" \n"
+" dcl_literal l239, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1.__z_, r1.z, l239, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l240, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1.x___, r1.x_neg(xyzw), l240\n"
+" mad_ieee r1.___w, r2.x, r2.y, r1.w_neg(xyzw)\n"
+" add r1.__z_, r1.z, r1.w_neg(xyzw)\n"
+" add r1.x___, r1.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l241, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+" eq r2, r0.w, l241\n"
+" and r0.___w, r1.x, r2.x\n"
+" cmov_logical r0.___w, r2.y, r1.y_neg(xyzw), r0.w\n"
+" cmov_logical r0.___w, r2.z, r1.x_neg(xyzw), r0.w\n"
+" cmov_logical r0.__z_, r2.w, r1.y, r0.w\n"
+"endif\n"
+"\n"
+"dcl_literal l242, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l242\n"
+"\n"
+"dcl_literal l243, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.__z_, r0.w, l243, r0.z\n"
+"\n"
+"dcl_literal l244, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l244, r0.y\n"
+"\n"
+"dcl_literal l245, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l245\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__cross_4f32",
+"mdef(193)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, r0\n"
+"mov r0, r1\n"
+"mov r1, r2\n"
+"\n"
+"dcl_literal l1, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0x00000000\n"
+"and r2.xyz_, r1.yzxy, l1\n"
+"mul_ieee r3.xyz_, r0.yzxy, r1.zxyz\n"
+"\n"
+"dcl_literal l2, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0x00000000\n"
+"and r4.xyz_, r0.yzxy, l2\n"
+"mad_ieee r5.xyz_, r4.xyzx, r2.yzxy, r3.xyzx_neg(xyzw)\n"
+"add r6.xyz_, r1.yzxy, r2.xyzx_neg(xyzw)\n"
+"mad_ieee r5.xyz_, r4.xyzx, r6.yzxy, r5.xyzx\n"
+"add r4.xyz_, r0.yzxy, r4.xyzx_neg(xyzw)\n"
+"mov r0.xyz_, r0.zxyz_neg(xyzw)\n"
+"mad_ieee r5.xyz_, r4.xyzx, r2.yzxy, r5.xyzx\n"
+"mad_ieee r4.xyz_, r4.xyzx, r6.yzxy, r5.xyzx\n"
+"mul_ieee r1.xyz_, r0.xyzx, r1.yzxy\n"
+"\n"
+"dcl_literal l3, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0x00000000\n"
+"and r5.xyz_, r0.xyzx, l3\n"
+"add r0.xyz_, r0.xyzx, r5.xyzx_neg(xyzw)\n"
+"mad_ieee r7.xyz_, r5.xyzx, r2.xyzx, r1.xyzx_neg(xyzw)\n"
+"mad_ieee r5.xyz_, r5.xyzx, r6.xyzx, r7.xyzx\n"
+"mad_ieee r2.xyz_, r0.xyzx, r2.xyzx, r5.xyzx\n"
+"mad_ieee r0.xyz_, r0.xyzx, r6.xyzx, r2.xyzx\n"
+"add r0.xyz_, r4.xyzx, r0.xyzx\n"
+"add r2.xyz_, r3.xyzx, r1.xyzx\n"
+"add r4.xyz_, r3.xyzx, r2.xyzx_neg(xyzw)\n"
+"add r4.xyz_, r1.xyzx, r4.xyzx\n"
+"add r5.xyz_, r1.xyzx, r2.xyzx_neg(xyzw)\n"
+"lt r1.xyz_, r1.xyzx_abs, r3.xyzx_abs\n"
+"add r3.xyz_, r3.xyzx, r5.xyzx\n"
+"cmov_logical r1.xyz_, r1.xyzx, r4.xyzx, r3.xyzx\n"
+"add r0.xyz_, r0.xyzx, r1.xyzx\n"
+"add r0.xyz_, r2.xyzx, r0.xyzx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ddiv_f64",
+"mdef(194)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l9, 12, 2, 0x7ff00000, 0x00100000\n"
+"ixor r2.x, r0.y, r1.y\n"
+"dadd r0.xy, r0.xy, r0.00\n"
+"dadd r1.xy, r1.xy, r1.00\n"
+"ige r3.x, r0.y_abs, l9.z\n"
+"ige r3.y, r1.y_abs, l9.z\n"
+"ilt r3.z, r0.y_abs, l9.w\n"
+"ilt r3.w, r1.y_abs, l9.w\n"
+"ixor r2.x, r2.x_abs, r2.x\n"
+"dfrexp r20, r0\n"
+"dfrexp r21, r1\n"
+"ishl r0.z, r0.y, l9.x\n"
+"ishl r1.z, r1.y, l9.x\n"
+"ior r0.z, r0.z, r0.x\n"
+"ior r1.z, r1.z, r1.x\n"
+"ieq r0.z, r0.z, r0.0\n"
+"ieq r1.z, r1.z, r0.0\n"
+"mov r20.w, r20.wwww_abs\n"
+"mov r21.w, r21.wwww_abs\n"
+"dcl_literal l2, 0x16f0068e, 0x40075048, 0x00000000, 0xc0000000 \n"
+"dmad r22.xy, l2.zw, r21.zw, l2.xy\n"
+"dmad r23.xy, r21.zw, r22.xy, l2.zw\n"
+"dmul r22.xy, r22.xy, r23.xy_neg(yw)\n"
+"dmad r23.xy, r21.zw, r22.xy, l2.zw\n"
+"dmul r22.xy, r22.xy, r23.xy_neg(yw)\n"
+"dmad r23.xy, r21.zw, r22.xy, l2.zw\n"
+"dmul r22.xy, r22.xy, r23.xy_neg(yw)\n"
+"dmad r23.xy, r21.zw, r22.xy, l2.zw\n"
+"dmul r22.xy, r22.xy, r23.xy_neg(yw)\n"
+"dcl_literal l6, 0x80000000, 0x7fffffff, 0xf8000000, 0xffffffff \n"
+"and r24.xy, r21.zw, l6.zwzw\n"
+"dadd r25.xy, r21.zw, r24.xy_neg(yw)\n"
+"dmul r26.xy, r22.xy, r20.zw\n"
+"and r26.xy, r26.xy, l6.zwzw\n"
+"dmad r20.zw, r26.xy, r24.xy_neg(yw), r20.zw\n"
+"dmad r20.zw, r26.xy, r25.xy_neg(yw), r20.zw\n"
+"dmul r27.xy, r22.xy, r20.zw\n"
+"and r27.xy, r27.xy, l6.zwzw\n"
+"dmad r20.zw, r27.xy, r24.xy_neg(yw), r20.zw\n"
+"dmad r20.zw, r27.xy, r25.xy_neg(yw), r20.zw\n"
+"dmul r28.xy, r22.xy, r20.zw\n"
+"and r28.xy, r28.xy, l6.zwzw\n"
+"dmad r20.zw, r28.xy, r24.xy_neg(yw), r20.zw\n"
+"dmad r20.zw, r28.xy, r25.xy_neg(yw), r20.zw\n"
+"dmul r29.xy, r22.xy, r20.zw\n"
+"dadd r25.xy, r26.xy, r27.xy\n"
+"dadd r26.xy, r25.xy, r26.xy_neg(yw)\n"
+"dadd r27.xy, r27.xy, r26.xy_neg(yw)\n"
+"dadd r26.xy, r27.xy, r28.xy\n"
+"dadd r27.xy, r26.xy, r27.xy_neg(yw)\n"
+"dadd r28.xy, r28.xy, r27.xy_neg(yw)\n"
+"dadd r27.xy, r28.xy, r29.xy\n"
+"dadd r28.xy, r27.xy, r28.xy_neg(yw)\n"
+"dadd r28.xy, r29.xy, r28.xy_neg(yw)\n"
+"dadd r24.xy, r25.xy, r26.xy\n"
+"dadd r25.xy, r24.xy, r25.xy_neg(yw)\n"
+"dadd r26.xy, r26.xy, r25.xy_neg(yw)\n"
+"dadd r25.xy, r26.xy, r27.xy\n"
+"dadd r26.xy, r25.xy, r26.xy_neg(yw)\n"
+"dadd r27.xy, r27.xy, r26.xy_neg(yw)\n"
+"dadd r26.xy, r27.xy, r28.xy\n"
+"dadd r25.xy, r25.xy, r26.xy\n"
+"ior r25.x, r25.x, l6.w\n"
+"dadd r10.xy, r24.xy, r25.xy\n"
+"imad r4.x, r3.x, l9.y, r3.z\n"
+"imad r4.y, r3.y, l9.y, r3.w\n"
+"imad r4.x, r4.x, l9.y, r0.z\n"
+"imad r4.y, r4.y, l9.y, r1.z\n"
+"ishl r4.x, r4.x, l9.y\n"
+"ishl r4.y, r4.y, l9.y\n"
+"dcl_literal l7, 0x80008000, 0x66ff66ff, 0x26f326f3, 52\n"
+"dcl_literal l8, 0x0048c000, 0x00123000, 0x80000000, 31\n"
+"ushr r4.z, l8.x, r4.x\n"
+"ushr r4.w, l8.y, r4.y\n"
+"ior r4.w, r4.w, r4.z\n"
+"ishl r5.x, l7.x, r4.w\n"
+"ishl r5.y, l7.y, r4.w\n"
+"ishl r5.z, l7.z, r4.w\n"
+"ishr r5.x, r5.x, l8.w\n"
+"ishr r5.y, r5.y, l8.w\n"
+"ishr r5.z, r5.z, l8.w\n"
+"ishl r5.y, r5.y, l7.w\n"
+"inegate r21.y, r21.y\n"
+"iadd r20.y, r20.y, r21.y\n"
+"dldexp r10.xy, r10.xy, r20.y\n"
+"and r10.xy, r10.xy, r5.xx\n"
+"ior r10.y, r10.y, r5.y_abs\n"
+"ior r10.x, r10.x, r5.z\n"
+"ixor r10.y, r10.y_abs, r2.x\n"
+"mov r0.xy, r10.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ddiv_f64_fma",
+"mdef(195)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l9, 12, 2, 0x7ff00000, 0x00100000\n"
+"ixor r2.x, r0.y, r1.y\n"
+"ige r3.x, r0.y_abs, l9.z\n"
+"ige r3.y, r1.y_abs, l9.z\n"
+"ilt r3.z, r0.y_abs, l9.w\n"
+"ilt r3.w, r1.y_abs, l9.w\n"
+"ixor r2.x, r2.x_abs, r2.x\n"
+"dfrexp r20, r0\n"
+"dfrexp r21, r1\n"
+"ishl r0.z, r0.y, l9.x\n"
+"ishl r1.z, r1.y, l9.x\n"
+"ior r0.z, r0.z, r0.x\n"
+"ior r1.z, r1.z, r1.x\n"
+"ieq r0.z, r0.z, r0.0\n"
+"ieq r1.z, r1.z, r0.0\n"
+"mov r20.w, r20.wwww_abs\n"
+"mov r21.w, r21.wwww_abs\n"
+"dcl_literal l1, 0x00000000, 0x3ff00000, 0x00000001, 0 \n"
+"dcl_literal l2, 0x16f0068e, 0x40075048, 0x00000000, 0xc0000000 \n"
+"dcl_literal l3, 0x00040000, 0x00000300, 0, 0 \n"
+"drcp_zeroop(zero) r22.xy, r21.zw\n"
+"dmad r23.xy, r21.zw, r22.xy, l2.zw\n"
+"dmul r22.xy, r22.xy, r23.xy_neg(yw)\n"
+"dmad r23.xy, r21.zw, r22.xy, l2.zw\n"
+"dmul r22.xy, r22.xy, r23.xy_neg(yw)\n"
+"dcl_literal l6, 0x80000000, 0x7fffffff, 0xf8000000, 0xffffffff \n"
+"dmul r10.xy, r22.xy, r20.zw\n"
+"dmad r23.xy, r21.zw, r10.xy_neg(yw), r20.zw\n"
+"dmad r10.xy, r22.xy, r23.xy, r10.xy\n"
+"dmad r23.xy, r21.zw, r22.xy_neg(yw), l1.xy\n"
+"dmul r23.xy, r23.xy, r22.xy\n"
+"dmad r24.xy, r21.zw, r10.xy_neg(yw), r20.zw\n"
+"dmul r25.xy, r23.xy, r24.xy\n"
+"dmad r23.xy, r22.xy, r24.xy, r25.xy\n"
+"ior r23.y, r23.y, l3.x\n"
+"inegate r21.y, r21.y\n"
+"iadd r20.x, r20.y, r21.y\n"
+"imin r20.y, r20.x, l3.y\n"
+"inegate r21.y, r20.y\n"
+"iadd r20.x, r20.x, r21.y\n"
+"dldexp r11.xy, r10.xy, r20.y\n"
+"dldexp r12.xy, r11.xy, r21.y\n"
+"dadd r10.xy, r10.xy, r12.xy_neg(yw)\n"
+"dadd r10.xy, r10.xy, r23.xy\n"
+"dldexp r12.xy, l1.xy, r20.y\n"
+"dmad r10.xy, r10.xy, r12.xy, r11.xy\n"
+"dldexp r10.xy, r10.xy, r20.x\n"
+"imad r4.x, r3.x, l9.y, r3.z\n"
+"imad r4.y, r3.y, l9.y, r3.w\n"
+"imad r4.x, r4.x, l9.y, r0.z\n"
+"imad r4.y, r4.y, l9.y, r1.z\n"
+"ishl r4.x, r4.x, l9.y\n"
+"ishl r4.y, r4.y, l9.y\n"
+"dcl_literal l7, 0x80008000, 0x66ff66ff, 0x26f326f3, 52\n"
+"dcl_literal l8, 0x0048c000, 0x00123000, 0x80000000, 31\n"
+"ushr r4.z, l8.x, r4.x\n"
+"ushr r4.w, l8.y, r4.y\n"
+"ior r4.w, r4.w, r4.z\n"
+"ishl r5.x, l7.x, r4.w\n"
+"ishl r5.y, l7.y, r4.w\n"
+"ishl r5.z, l7.z, r4.w\n"
+"ishr r5.x, r5.x, l8.w\n"
+"ishr r5.y, r5.y, l8.w\n"
+"ishr r5.z, r5.z, l8.w\n"
+"ishl r5.y, r5.y, l7.w\n"
+"and r10.xy, r10.xy, r5.xx\n"
+"ior r10.y, r10.y, r5.y_abs\n"
+"ior r10.x, r10.x, r5.z\n"
+"ixor r10.y, r10.y_abs, r2.x\n"
+"mov r0.xy, r10.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__degrees_f32",
+"mdef(196)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x42652EE1, 0x00000000, 0x00000000, 0x00000000\n"
+"mul_ieee r0.x___, r0.x, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__distance_2f32",
+"mdef(197)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"add r2._yz_, r0.xxyx, r1.xxyx_neg(xyzw)\n"
+"dp2_ieee r2._y__, r2.yzyy, r2.yzyy\n"
+"sqrt_vec r0.x___, r2.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__distance_4f32",
+"mdef(198)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"add r0, r0, r1_neg(xyzw)\n"
+"dp2_ieee r0.x___, r0.xyxx, r0.xyxx\n"
+"mad_ieee r0.x___, r0.z, r0.z, r0.x\n"
+"mad_ieee r0.x___, r0.w, r0.w, r0.x\n"
+"sqrt_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__distance_f32",
+"mdef(199)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"add r2._y__, r0.x, r1.x_neg(xyzw)\n"
+"mov r0.x___, r2.y_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__dot_2f32",
+"mdef(200)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dp2_ieee r0.x___, r1.xyxx, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__dot_3f32",
+"mdef(201)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dp2_ieee r0.x___, r1.xyxx, r0.xyxx\n"
+"mad_ieee r0.x___, r1.z, r0.z, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__dot_4f32",
+"mdef(202)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dp2_ieee r0.x___, r1.xyxx, r0.xyxx\n"
+"mad_ieee r0.x___, r1.z, r0.z, r0.x\n"
+"mad_ieee r0.x___, r1.w, r0.w, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__dot_f32",
+"mdef(203)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mul_ieee r0.x___, r1.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__erfc_f32",
+"mdef(204)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.__z_, r0.x_neg(xyzw), l1\n"
+"mul_ieee r0.___w, r0.x, r0.x\n"
+"\n"
+"dcl_literal l2, 0xB7C756B1, 0xB684E21A, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l3, 0xBBBD1489, 0x390AEE49, 0x00000000, 0x00000000\n"
+"mad_ieee r1.xy__, r0.w, l2, l3\n"
+"\n"
+"dcl_literal l4, 0xBCE9528F, 0x3BA68116, 0x00000000, 0x00000000\n"
+"mad_ieee r1.xy__, r0.w, r1.xyxx, l4\n"
+"\n"
+"dcl_literal l5, 0xBEA66BEB, 0x3D852A63, 0x00000000, 0x00000000\n"
+"mad_ieee r1.xy__, r0.w, r1.xyxx, l5\n"
+"\n"
+"dcl_literal l6, 0x3E0375D4, 0x3ECBBBCE, 0x00000000, 0x00000000\n"
+"mad_ieee r1.xy__, r0.w, r1.xyxx, l6\n"
+"\n"
+"dcl_literal l7, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r0.___w, r0.w, r1.y, l7\n"
+"div_zeroop(infinity) r0.___w, r1.x, r0.w\n"
+"mad_ieee r1.x___, r0.x, r0.w, r0.x\n"
+"\n"
+"dcl_literal l8, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.x___, r1.x_neg(xyzw), l8\n"
+"\n"
+"\n"
+"dcl_literal l9, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"dcl_literal l10, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"add r1._y__, r0.x, l9\n"
+"mad_ieee r0.___w, r0.x, r0.w, r1.y\n"
+"add r0.___w, r0.w_neg(xyzw), l10\n"
+"\n"
+"mov r1._y__, r0.x_abs\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l11, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r1.__z_, l11, r1.z\n"
+"\n"
+"dcl_literal l12, 0xC11D077E, 0xBD777F97, 0xC3F1C275, 0xC1B38712\n"
+"\n"
+"dcl_literal l13, 0xC2A2932B, 0x40D23F7C, 0xC480230B, 0x43ED43A7\n"
+"mad_ieee r2, r1.z, l12, l13\n"
+"\n"
+"dcl_literal l14, 0xC3389AE7, 0x42D9451F, 0xC41F6441, 0x451F90CE\n"
+"mad_ieee r2, r1.z, r2, l14\n"
+"\n"
+"dcl_literal l15, 0xC322658C, 0x43D6810B, 0xC320A2EA, 0x4547FDBB\n"
+"mad_ieee r2, r1.z, r2, l15\n"
+"\n"
+"dcl_literal l16, 0xC2798057, 0x442158C9, 0xC18E104B, 0x44C01759\n"
+"mad_ieee r2, r1.z, r2, l16\n"
+"\n"
+"dcl_literal l17, 0xC128F022, 0x43D9486F, 0xBF4C9DD4, 0x43A2E571\n"
+"mad_ieee r2, r1.z, r2, l17\n"
+"\n"
+"dcl_literal l18, 0xBF31A0B7, 0x4309A863, 0xBC21A092, 0x41F2B459\n"
+"mad_ieee r2, r1.z, r2, l18\n"
+"\n"
+"dcl_literal l19, 0xBC21A093, 0x419D35CE, 0x00000000, 0x3F800000\n"
+"mad_ieee r2.xy_w, r1.z, r2.xyxw, l19\n"
+"\n"
+"dcl_literal l20, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r1.__z_, r1.z, r2.y, l20\n"
+"div_zeroop(infinity) r1.__z_, r2.x, r1.z\n"
+"div_zeroop(infinity) r1.___w, r2.z, r2.w\n"
+"\n"
+"dcl_literal l21, 0x4036DB6D, 0x3F580000, 0x41E00000, 0x00000000\n"
+"ilt r2.xyz_, r0.y, l21\n"
+"cmov_logical r1.__z_, r2.x, r1.z, r1.w\n"
+"\n"
+"dcl_literal l22, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r1.___w, r1.y, l22\n"
+"\n"
+"dcl_literal l23, 0xBF100000, 0x41770000, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r1.w_neg(xyzw), r1.w, l23\n"
+"\n"
+"dcl_literal l24, 0xC1800000, 0xC1800000, 0xC1800000, 0xC1800000\n"
+"lt r2.x___, r3.x, l24\n"
+"\n"
+"dcl_literal l25, 0x00000000, 0x00000000, 0x33F1AADE, 0x3F800000\n"
+"mov r3.__zw, l25\n"
+"cmov_logical r2.x__w, r2.x, r3.yyyz, r3.xxxw\n"
+"\n"
+"dcl_literal l26, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r3.x___, r2.x, l26\n"
+"\n"
+"dcl_literal l27, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l28, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r3.x___, r3.x, l27, l28\n"
+"\n"
+"dcl_literal l29, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r3.x___, r2.x, l29, r3.x\n"
+"round_z r3.x___, r3.x\n"
+"\n"
+"dcl_literal l30, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2.x___, r3.x_neg(xyzw), l30, r2.x\n"
+"\n"
+"dcl_literal l31, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3._y__, r3.x_neg(xyzw), l31, r2.x\n"
+"mul_ieee r3.__z_, r3.y, r3.y\n"
+"\n"
+"dcl_literal l32, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l33, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r3.___w, r3.z, l32, l33\n"
+"\n"
+"dcl_literal l34, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r3.___w, r3.z, r3.w, l34\n"
+"\n"
+"dcl_literal l35, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r3.___w, r3.z, r3.w, l35\n"
+"\n"
+"dcl_literal l36, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3.___w, r3.z, r3.w, l36\n"
+"mad_ieee r3.__z_, r3.z_neg(xyzw), r3.w, r3.y\n"
+"mul_ieee r3._y__, r3.y, r3.z\n"
+"\n"
+"dcl_literal l37, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r3.__z_, r3.z_neg(xyzw), l37\n"
+"div_zeroop(infinity) r3._y__, r3.y, r3.z\n"
+"\n"
+"dcl_literal l38, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3._y__, r3.x, l38, r3.y_neg(xyzw)\n"
+"add r2.x___, r2.x_neg(xyzw), r3.y\n"
+"\n"
+"dcl_literal l39, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.x___, r2.x_neg(xyzw), l39\n"
+"ftoi r3.x___, r3.x\n"
+"\n"
+"dcl_literal l40, 0x00000000, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r3._yzw, r2.x, l40\n"
+"if_logicalz r3.z\n"
+" itof r4.x___, r3.w\n"
+" \n"
+" dcl_literal l41, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r4.x, l41\n"
+" \n"
+" dcl_literal l42, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.x___, r4.x, l42\n"
+" iadd r4.x___, r4.x, r3.x\n"
+" \n"
+" dcl_literal l43, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4._y__, r4.y, l43\n"
+" \n"
+" dcl_literal l44, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r4.x___, l44, r4.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l45, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l45, r4.x\n"
+" \n"
+" dcl_literal l46, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r4.x___, r4.z, l46, r4.x\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.__z_, l47, r4.x\n"
+" ishr r4.___w, r4.y, r4.x\n"
+" inegate r4.x___, r4.x\n"
+" \n"
+" dcl_literal l48, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4.x___, r4.x, l48\n"
+" iadd r4.x___, r4.y, r4.x\n"
+" cmov_logical r4.x___, r4.z, r4.w, r4.x\n"
+"else\n"
+" \n"
+" dcl_literal l49, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.x___, r2.x, l49\n"
+" \n"
+" dcl_literal l50, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r3.z, l50\n"
+" iadd r3.__z_, r3.z, r3.x\n"
+" \n"
+" dcl_literal l51, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.x___, r3.x, l51\n"
+" iadd r2.x___, r2.x, r3.x\n"
+" \n"
+" dcl_literal l52, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3.x___, r3.z, l52\n"
+" \n"
+" dcl_literal l53, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.__z_, l53, r3.x\n"
+" \n"
+" dcl_literal l54, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.x___, r3.z, l54, r2.x\n"
+" \n"
+" dcl_literal l55, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.x___, l55, r3.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l56, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r3.w, l56\n"
+" \n"
+" dcl_literal l57, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r4._yz_, l57, r3.x\n"
+" \n"
+" dcl_literal l58, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.x___, r4.z, l58, r3.x\n"
+" ishr r3.x___, r3.z, r3.x\n"
+" cmov_logical r4.x___, r4.y, r3.x, r2.x\n"
+"endif\n"
+"ior r2.x___, r3.y, r4.x\n"
+"add r3.x___, r1.y_neg(xyzw), r1.w\n"
+"add r1.___w, r1.y, r1.w\n"
+"mad_ieee r1.__z_, r3.x, r1.w, r1.z\n"
+"\n"
+"dcl_literal l59, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1.___w, r1.z, l59\n"
+"\n"
+"dcl_literal l60, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l61, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r1.___w, r1.w, l60, l61\n"
+"\n"
+"dcl_literal l62, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r1.___w, r1.z, l62, r1.w\n"
+"round_z r1.___w, r1.w\n"
+"\n"
+"dcl_literal l63, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r1.__z_, r1.w_neg(xyzw), l63, r1.z\n"
+"\n"
+"dcl_literal l64, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3.x___, r1.w_neg(xyzw), l64, r1.z\n"
+"mul_ieee r3._y__, r3.x, r3.x\n"
+"\n"
+"dcl_literal l65, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l66, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r3.__z_, r3.y, l65, l66\n"
+"\n"
+"dcl_literal l67, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r3.__z_, r3.y, r3.z, l67\n"
+"\n"
+"dcl_literal l68, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r3.__z_, r3.y, r3.z, l68\n"
+"\n"
+"dcl_literal l69, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3.__z_, r3.y, r3.z, l69\n"
+"mad_ieee r3._y__, r3.y_neg(xyzw), r3.z, r3.x\n"
+"mul_ieee r3.x___, r3.x, r3.y\n"
+"\n"
+"dcl_literal l70, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r3._y__, r3.y_neg(xyzw), l70\n"
+"div_zeroop(infinity) r3.x___, r3.x, r3.y\n"
+"\n"
+"dcl_literal l71, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3.x___, r1.w, l71, r3.x_neg(xyzw)\n"
+"add r1.__z_, r1.z_neg(xyzw), r3.x\n"
+"\n"
+"dcl_literal l72, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.__z_, r1.z_neg(xyzw), l72\n"
+"ftoi r3.x___, r1.w\n"
+"\n"
+"dcl_literal l73, 0x00000000, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r3._yzw, r1.z, l73\n"
+"if_logicalz r3.z\n"
+" itof r4.x___, r3.w\n"
+" \n"
+" dcl_literal l74, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r4.x, l74\n"
+" \n"
+" dcl_literal l75, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.x___, r4.x, l75\n"
+" iadd r4.x___, r4.x, r3.x\n"
+" \n"
+" dcl_literal l76, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4._y__, r4.y, l76\n"
+" \n"
+" dcl_literal l77, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r4.x___, l77, r4.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l78, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l78, r4.x\n"
+" \n"
+" dcl_literal l79, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r4.x___, r4.z, l79, r4.x\n"
+" \n"
+" dcl_literal l80, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.__z_, l80, r4.x\n"
+" ishr r4.___w, r4.y, r4.x\n"
+" inegate r4.x___, r4.x\n"
+" \n"
+" dcl_literal l81, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4.x___, r4.x, l81\n"
+" iadd r4.x___, r4.y, r4.x\n"
+" cmov_logical r4.x___, r4.z, r4.w, r4.x\n"
+"else\n"
+" \n"
+" dcl_literal l82, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.__z_, r1.z, l82\n"
+" \n"
+" dcl_literal l83, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r3.z, l83\n"
+" iadd r3.__z_, r3.z, r3.x\n"
+" \n"
+" dcl_literal l84, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.x___, r3.x, l84\n"
+" iadd r1.__z_, r1.z, r3.x\n"
+" \n"
+" dcl_literal l85, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3.x___, r3.z, l85\n"
+" \n"
+" dcl_literal l86, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.__z_, l86, r3.x\n"
+" \n"
+" dcl_literal l87, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.__z_, r3.z, l87, r1.z\n"
+" \n"
+" dcl_literal l88, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.x___, l88, r3.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l89, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r3.w, l89\n"
+" \n"
+" dcl_literal l90, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r4._yz_, l90, r3.x\n"
+" \n"
+" dcl_literal l91, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.x___, r4.z, l91, r3.x\n"
+" ishr r3.x___, r3.z, r3.x\n"
+" cmov_logical r4.x___, r4.y, r3.x, r1.z\n"
+"endif\n"
+"\n"
+"dcl_literal l92, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r1.__z_, l92, r1.w\n"
+"\n"
+"dcl_literal l93, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1.__z_, r1.z, l93, r4.x\n"
+"ior r1.__z_, r3.y, r1.z\n"
+"\n"
+"dcl_literal l94, 0x41131CE0, 0x41131CE0, 0x41131CE0, 0x41131CE0\n"
+"ge r1.___w, r1.y, l94\n"
+"if_logicalnz r1.w\n"
+" mul_ieee r1.___w, r2.w, r1.z\n"
+" div_zeroop(infinity) r1.___w, r1.w, r1.y\n"
+" \n"
+" dcl_literal l95, 0x80000000, 0x7F800000, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.x, l95\n"
+" if_logicalz r3.y\n"
+" \n"
+" dcl_literal l96, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r3.__z_, r2.x, l96\n"
+" itof r3.__z_, r3.z\n"
+" \n"
+" dcl_literal l97, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r3.__zw, r3.z, l97\n"
+" \n"
+" dcl_literal l98, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r3.z, l98\n"
+" \n"
+" dcl_literal l99, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r3.__z_, r3.z, l99\n"
+" \n"
+" dcl_literal l100, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r3.w, l100\n"
+" \n"
+" dcl_literal l101, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r3.__z_, l101, r3.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l102, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.x___, l102, r3.z\n"
+" \n"
+" dcl_literal l103, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.__z_, r4.x, l103, r3.z\n"
+" \n"
+" dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.x___, l104, r3.z\n"
+" ishr r4._y__, r3.w, r3.z\n"
+" inegate r3.__z_, r3.z\n"
+" \n"
+" dcl_literal l105, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.__z_, r3.z, l105\n"
+" iadd r3.__z_, r3.w, r3.z\n"
+" cmov_logical r3.__z_, r4.x, r4.y, r3.z\n"
+" else\n"
+" \n"
+" dcl_literal l106, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r3.___w, r2.x, l106\n"
+" \n"
+" dcl_literal l107, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l107\n"
+" \n"
+" dcl_literal l108, 0x00000000, 0xFFFFFF99, 0x00000000, 0x0C000000\n"
+" iadd r3._y_w, r3.yyyw, l108\n"
+" \n"
+" dcl_literal l109, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l109, r3.y\n"
+" \n"
+" dcl_literal l110, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r3.__z_, r3.y, l110, r3.w\n"
+" endif\n"
+" ior r3.x___, r3.x, r3.z\n"
+" mul_ieee r1.___w, r1.w, r3.x\n"
+" \n"
+" dcl_literal l111, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3.xyz_, r1.w, l111\n"
+" if_logicalz r3.y\n"
+" itof r3.___w, r3.z\n"
+" \n"
+" dcl_literal l112, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r3.w, l112\n"
+" \n"
+" dcl_literal l113, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.___w, r4.x, l113\n"
+" \n"
+" dcl_literal l114, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r3.___w, r3.w, l114\n"
+" \n"
+" dcl_literal l115, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.x___, r4.y, l115\n"
+" \n"
+" dcl_literal l116, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r3.___w, l116, r3.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l117, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4._y__, l117, r3.w\n"
+" \n"
+" dcl_literal l118, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.___w, r4.y, l118, r3.w\n"
+" \n"
+" dcl_literal l119, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, l119, r3.w\n"
+" ishr r4.__z_, r4.x, r3.w\n"
+" inegate r3.___w, r3.w\n"
+" \n"
+" dcl_literal l120, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.___w, r3.w, l120\n"
+" iadd r3.___w, r4.x, r3.w\n"
+" cmov_logical r3.___w, r4.y, r4.z, r3.w\n"
+" else\n"
+" \n"
+" dcl_literal l121, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.___w, r1.w, l121\n"
+" \n"
+" dcl_literal l122, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l122\n"
+" \n"
+" dcl_literal l123, 0xF4000000, 0xF4000000, 0xF4000000, 0xF4000000\n"
+" iadd r1.___w, r1.w, l123\n"
+" \n"
+" dcl_literal l124, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69\n"
+" iadd r3._y__, r3.y, l124\n"
+" \n"
+" dcl_literal l125, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3._y__, l125, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l126, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r3.z, l126\n"
+" \n"
+" dcl_literal l127, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r4.xy__, l127, r3.y\n"
+" \n"
+" dcl_literal l128, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r4.y, l128, r3.y\n"
+" ishr r3._y__, r3.z, r3.y\n"
+" cmov_logical r3.___w, r4.x, r3.y, r1.w\n"
+" endif\n"
+" ior r1.___w, r3.x, r3.w\n"
+"else\n"
+" mul_ieee r1.__z_, r2.x, r1.z\n"
+" mul_ieee r1.__z_, r2.w, r1.z\n"
+" \n"
+" dcl_literal l129, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, l129, r0.x\n"
+" div_zeroop(infinity) r1._y__, r1.z, r1.y\n"
+" \n"
+" dcl_literal l130, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" add r1.__z_, r1.y_neg(xyzw), l130\n"
+" cmov_logical r1.___w, r2.x, r1.y, r1.z\n"
+"endif\n"
+"\n"
+"dcl_literal l131, 0x23800000, 0x3F580000, 0x40C00000, 0x41E00000\n"
+"ige r3, r0.y, l131\n"
+"\n"
+"dcl_literal l132, 0x00000000, 0x3E800000, 0x00000000, 0x00000000\n"
+"ilt r1._yz_, r0.x, l132\n"
+"and r1._yz_, r3.xxzx, r1.yyzy\n"
+"cmov_logical r0.__z_, r1.y, r1.x, r0.z\n"
+"\n"
+"dcl_literal l133, 0x3E800000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.xy__, r0.x, l133\n"
+"and r1.x___, r2.y, r1.x\n"
+"cmov_logical r0.__z_, r1.x, r0.w, r0.z\n"
+"and r0.___w, r2.z, r3.y\n"
+"cmov_logical r0.__z_, r0.w, r1.w, r0.z\n"
+"\n"
+"dcl_literal l134, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"cmov_logical r0.__z_, r1.z, l134, r0.z\n"
+"and r0.___w, r3.w, r1.y\n"
+"\n"
+"dcl_literal l135, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ushr r1.x___, r0.x, l135\n"
+"\n"
+"dcl_literal l136, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishl r1.x___, r1.x, l136\n"
+"\n"
+"dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r1._y__, r0.y, l137\n"
+"\n"
+"dcl_literal l138, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r1.x___, r1.x, l138\n"
+"and r1.x___, r1.y, r1.x\n"
+"ior r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l139, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r0.w, l139, r0.z\n"
+"\n"
+"dcl_literal l140, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l140, r0.y\n"
+"\n"
+"dcl_literal l141, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l141\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__erf_f32",
+"mdef(205)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00800011, 0x40C00000\n"
+"ilt r0.__zw, r0.y, l1\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l2, 0x80000000, 0x7F800000, 0x00000000, 0x00000000\n"
+" and r1.xy__, r0.x, l2\n"
+" if_logicalz r1.y\n"
+" \n"
+" dcl_literal l3, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1.__z_, r0.x, l3\n"
+" itof r1.__z_, r1.z\n"
+" \n"
+" dcl_literal l4, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r1.__zw, r1.z, l4\n"
+" \n"
+" dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l5\n"
+" \n"
+" dcl_literal l6, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r1.__z_, r1.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l7\n"
+" \n"
+" dcl_literal l8, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.__z_, l8, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.x___, l9, r1.z\n"
+" \n"
+" dcl_literal l10, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.x, l10, r1.z\n"
+" \n"
+" dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, l11, r1.z\n"
+" ishr r2._y__, r1.w, r1.z\n"
+" inegate r1.__z_, r1.z\n"
+" \n"
+" dcl_literal l12, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.__z_, r1.z, l12\n"
+" iadd r1.__z_, r1.w, r1.z\n"
+" cmov_logical r1.__z_, r2.x, r2.y, r1.z\n"
+" else\n"
+" \n"
+" dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1._y__, r1.y, l13\n"
+" \n"
+" dcl_literal l14, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000\n"
+" iadd r1.___w, r0.y, l14\n"
+" \n"
+" dcl_literal l15, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99\n"
+" iadd r1._y__, r1.y, l15\n"
+" \n"
+" dcl_literal l16, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1._y__, l16, r1.y\n"
+" \n"
+" dcl_literal l17, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.__z_, r1.y, l17, r1.w\n"
+" endif\n"
+" ior r1.x___, r1.x, r1.z\n"
+"else\n"
+" mov r1.x___, r0.x\n"
+"endif\n"
+"\n"
+"dcl_literal l18, 0x3F906EBB, 0x3F906EBB, 0x3F906EBB, 0x3F906EBB\n"
+"mul_ieee r1.x___, r1.x, l18\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l19, 0x00000000, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r1._yzw, r1.x, l19\n"
+" if_logicalz r1.z\n"
+" itof r0.__z_, r1.w\n"
+" \n"
+" dcl_literal l20, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r0.z, l20\n"
+" \n"
+" dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r2.x, l21\n"
+" \n"
+" dcl_literal l22, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r0.__z_, r0.z, l22\n"
+" \n"
+" dcl_literal l23, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.x___, r2.y, l23\n"
+" \n"
+" dcl_literal l24, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.__z_, l24, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2._y__, l25, r0.z\n"
+" \n"
+" dcl_literal l26, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.__z_, r2.y, l26, r0.z\n"
+" \n"
+" dcl_literal l27, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, l27, r0.z\n"
+" ishr r2.__z_, r2.x, r0.z\n"
+" inegate r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l28, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.__z_, r0.z, l28\n"
+" iadd r0.__z_, r2.x, r0.z\n"
+" cmov_logical r0.__z_, r2.y, r2.z, r0.z\n"
+" else\n"
+" \n"
+" dcl_literal l29, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.x___, r1.x, l29\n"
+" \n"
+" dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l30\n"
+" \n"
+" dcl_literal l31, 0xF4000000, 0xF4000000, 0xF4000000, 0xF4000000\n"
+" iadd r2.x___, r2.x, l31\n"
+" \n"
+" dcl_literal l32, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69, 0xFFFFFF69\n"
+" iadd r1.__z_, r1.z, l32\n"
+" \n"
+" dcl_literal l33, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.__z_, l33, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l34, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l34\n"
+" \n"
+" dcl_literal l35, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r2._yz_, l35, r1.z\n"
+" \n"
+" dcl_literal l36, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.z, l36, r1.z\n"
+" ishr r1.__z_, r1.w, r1.z\n"
+" cmov_logical r0.__z_, r2.y, r1.z, r2.x\n"
+" endif\n"
+" ior r1.x___, r1.y, r0.z\n"
+"endif\n"
+"mul_ieee r0.__z_, r0.x, r0.x\n"
+"\n"
+"dcl_literal l37, 0x00000000, 0xB7C756B1, 0xB684E21A, 0x00000000\n"
+"\n"
+"dcl_literal l38, 0x00000000, 0xBBBD1489, 0x390AEE49, 0x00000000\n"
+"mad_ieee r1._yz_, r0.z, l37, l38\n"
+"\n"
+"dcl_literal l39, 0x00000000, 0xBCE9528F, 0x3BA68116, 0x00000000\n"
+"mad_ieee r1._yz_, r0.z, r1.yyzy, l39\n"
+"\n"
+"dcl_literal l40, 0x00000000, 0xBEA66BEB, 0x3D852A63, 0x00000000\n"
+"mad_ieee r1._yz_, r0.z, r1.yyzy, l40\n"
+"\n"
+"dcl_literal l41, 0x00000000, 0x3E0375D4, 0x3ECBBBCE, 0x00000000\n"
+"mad_ieee r1._yz_, r0.z, r1.yyzy, l41\n"
+"\n"
+"dcl_literal l42, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r0.__z_, r0.z, r1.z, l42\n"
+"div_zeroop(infinity) r0.__z_, r1.y, r0.z\n"
+"mad_ieee r0.__z_, r0.x, r0.z, r0.x\n"
+"mov r1._y__, r0.x_abs\n"
+"\n"
+"dcl_literal l43, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.__z_, r1.y, l43\n"
+"\n"
+"dcl_literal l44, 0xBB0DF9C0, 0x3C445AA3, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l45, 0x3D1151B3, 0x3C5F6E13, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, l44, l45\n"
+"\n"
+"dcl_literal l46, 0xBDE31CC2, 0x3E013307, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l46\n"
+"\n"
+"dcl_literal l47, 0x3EA2FE54, 0x3D931AE7, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l47\n"
+"\n"
+"dcl_literal l48, 0xBEBE9208, 0x3F0A5785, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l48\n"
+"\n"
+"dcl_literal l49, 0x3ED46805, 0x3DD9F331, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l49\n"
+"\n"
+"dcl_literal l50, 0x00000000, 0x00000000, 0xBB1ACDC6, 0x3F800000\n"
+"mad_ieee r1.__zw, r1.z, r2.xxxy, l50\n"
+"\n"
+"dcl_literal l51, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.x___, r0.x, l51\n"
+"div_zeroop(infinity) r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l52, 0x3F58560B, 0x3F58560B, 0x3F58560B, 0x3F58560B\n"
+"add r1.___w, r1.z, l52\n"
+"\n"
+"dcl_literal l53, 0xBF58560B, 0xBF58560B, 0xBF58560B, 0xBF58560B\n"
+"add r1.__z_, r1.z_neg(xyzw), l53\n"
+"cmov_logical r1.__z_, r2.x, r1.w, r1.z\n"
+"mul_ieee r1.___w, r1.y, r1.y\n"
+"\n"
+"dcl_literal l54, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r1.___w, l54, r1.w\n"
+"\n"
+"dcl_literal l55, 0xC11D077E, 0xBD777F97, 0xC3F1C275, 0xC1B38712\n"
+"\n"
+"dcl_literal l56, 0xC2A2932B, 0x40D23F7C, 0xC480230B, 0x43ED43A7\n"
+"mad_ieee r3, r1.w, l55, l56\n"
+"\n"
+"dcl_literal l57, 0xC3389AE7, 0x42D9451F, 0xC41F6441, 0x451F90CE\n"
+"mad_ieee r3, r1.w, r3, l57\n"
+"\n"
+"dcl_literal l58, 0xC322658C, 0x43D6810B, 0xC320A2EA, 0x4547FDBB\n"
+"mad_ieee r3, r1.w, r3, l58\n"
+"\n"
+"dcl_literal l59, 0xC2798057, 0x442158C9, 0xC18E104B, 0x44C01759\n"
+"mad_ieee r3, r1.w, r3, l59\n"
+"\n"
+"dcl_literal l60, 0xC128F022, 0x43D9486F, 0xBF4C9DD4, 0x43A2E571\n"
+"mad_ieee r3, r1.w, r3, l60\n"
+"\n"
+"dcl_literal l61, 0xBF31A0B7, 0x4309A863, 0xBC21A092, 0x41F2B459\n"
+"mad_ieee r3, r1.w, r3, l61\n"
+"\n"
+"dcl_literal l62, 0x00000000, 0xBC21A093, 0x419D35CE, 0x3F800000\n"
+"mad_ieee r2._yzw, r1.w, r3.xxyw, l62\n"
+"\n"
+"dcl_literal l63, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r1.___w, r1.w, r2.z, l63\n"
+"div_zeroop(infinity) r1.___w, r2.y, r1.w\n"
+"div_zeroop(infinity) r2._y__, r3.z, r2.w\n"
+"\n"
+"dcl_literal l64, 0x4036DB6E, 0x4036DB6E, 0x4036DB6E, 0x4036DB6E\n"
+"ige r2.__z_, r0.y, l64\n"
+"and r0.___w, r0.w, r2.z\n"
+"cmov_logical r0.___w, r0.w, r2.y, r1.w\n"
+"\n"
+"dcl_literal l65, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r1.___w, r1.y, l65\n"
+"\n"
+"dcl_literal l66, 0xBF100000, 0xBF100000, 0xBF100000, 0xBF100000\n"
+"mad_ieee r2._y__, r1.w_neg(xyzw), r1.w, l66\n"
+"\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.__z_, r2.y, l67\n"
+"\n"
+"dcl_literal l68, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l69, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r2.__z_, r2.z, l68, l69\n"
+"\n"
+"dcl_literal l70, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r2.__z_, r2.y, l70, r2.z\n"
+"round_z r2.__z_, r2.z\n"
+"\n"
+"dcl_literal l71, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2._y__, r2.z_neg(xyzw), l71, r2.y\n"
+"\n"
+"dcl_literal l72, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r2.___w, r2.z_neg(xyzw), l72, r2.y\n"
+"mul_ieee r3.x___, r2.w, r2.w\n"
+"\n"
+"dcl_literal l73, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l74, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r3._y__, r3.x, l73, l74\n"
+"\n"
+"dcl_literal l75, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r3._y__, r3.x, r3.y, l75\n"
+"\n"
+"dcl_literal l76, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r3._y__, r3.x, r3.y, l76\n"
+"\n"
+"dcl_literal l77, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3._y__, r3.x, r3.y, l77\n"
+"mad_ieee r3.x___, r3.x_neg(xyzw), r3.y, r2.w\n"
+"mul_ieee r2.___w, r2.w, r3.x\n"
+"\n"
+"dcl_literal l78, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r3.x___, r3.x_neg(xyzw), l78\n"
+"div_zeroop(infinity) r2.___w, r2.w, r3.x\n"
+"\n"
+"dcl_literal l79, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r2.___w, r2.z, l79, r2.w_neg(xyzw)\n"
+"add r2._y__, r2.y_neg(xyzw), r2.w\n"
+"\n"
+"dcl_literal l80, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2._y__, r2.y_neg(xyzw), l80\n"
+"ftoi r2.__z_, r2.z\n"
+"\n"
+"dcl_literal l81, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r3.xyz_, r2.y, l81\n"
+"if_logicalz r3.y\n"
+" itof r2.___w, r3.z\n"
+" \n"
+" dcl_literal l82, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r2.w, l82\n"
+" \n"
+" dcl_literal l83, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r4.x, l83\n"
+" iadd r2.___w, r2.w, r2.z\n"
+" \n"
+" dcl_literal l84, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r4.y, l84\n"
+" \n"
+" dcl_literal l85, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2.___w, l85, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l86, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.x___, l86, r2.w\n"
+" \n"
+" dcl_literal l87, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r4.x, l87, r2.w\n"
+" \n"
+" dcl_literal l88, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.x___, l88, r2.w\n"
+" ishr r4._y__, r3.w, r2.w\n"
+" inegate r2.___w, r2.w\n"
+" \n"
+" dcl_literal l89, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.___w, r2.w, l89\n"
+" iadd r2.___w, r3.w, r2.w\n"
+" cmov_logical r2.___w, r4.x, r4.y, r2.w\n"
+"else\n"
+" \n"
+" dcl_literal l90, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2._y__, r2.y, l90\n"
+" \n"
+" dcl_literal l91, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l91\n"
+" iadd r3._y__, r3.y, r2.z\n"
+" \n"
+" dcl_literal l92, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.__z_, r2.z, l92\n"
+" iadd r2._y__, r2.y, r2.z\n"
+" \n"
+" dcl_literal l93, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.__z_, r3.y, l93\n"
+" \n"
+" dcl_literal l94, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l94, r2.z\n"
+" \n"
+" dcl_literal l95, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2._y__, r3.y, l95, r2.y\n"
+" \n"
+" dcl_literal l96, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.__z_, l96, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l97, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.z, l97\n"
+" \n"
+" dcl_literal l98, 0x00000000, 0x00000000, 0x00000000, 0x00000017\n"
+" ilt r3.__zw, l98, r2.z\n"
+" \n"
+" dcl_literal l99, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r3.w, l99, r2.z\n"
+" ishr r2.__z_, r3.y, r2.z\n"
+" cmov_logical r2.___w, r3.z, r2.z, r2.y\n"
+"endif\n"
+"ior r2._y__, r3.x, r2.w\n"
+"add r2.__z_, r1.y_neg(xyzw), r1.w\n"
+"add r1.___w, r1.y, r1.w\n"
+"mad_ieee r0.___w, r2.z, r1.w, r0.w\n"
+"\n"
+"dcl_literal l100, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1.___w, r0.w, l100\n"
+"\n"
+"dcl_literal l101, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l102, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r1.___w, r1.w, l101, l102\n"
+"\n"
+"dcl_literal l103, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r1.___w, r0.w, l103, r1.w\n"
+"round_z r1.___w, r1.w\n"
+"\n"
+"dcl_literal l104, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.___w, r1.w_neg(xyzw), l104, r0.w\n"
+"\n"
+"dcl_literal l105, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r2.__z_, r1.w_neg(xyzw), l105, r0.w\n"
+"mul_ieee r2.___w, r2.z, r2.z\n"
+"\n"
+"dcl_literal l106, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l107, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r3.x___, r2.w, l106, l107\n"
+"\n"
+"dcl_literal l108, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r3.x___, r2.w, r3.x, l108\n"
+"\n"
+"dcl_literal l109, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r3.x___, r2.w, r3.x, l109\n"
+"\n"
+"dcl_literal l110, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3.x___, r2.w, r3.x, l110\n"
+"mad_ieee r2.___w, r2.w_neg(xyzw), r3.x, r2.z\n"
+"mul_ieee r2.__z_, r2.z, r2.w\n"
+"\n"
+"dcl_literal l111, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r2.___w, r2.w_neg(xyzw), l111\n"
+"div_zeroop(infinity) r2.__z_, r2.z, r2.w\n"
+"\n"
+"dcl_literal l112, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r2.__z_, r1.w, l112, r2.z_neg(xyzw)\n"
+"add r0.___w, r0.w_neg(xyzw), r2.z\n"
+"\n"
+"dcl_literal l113, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.w_neg(xyzw), l113\n"
+"ftoi r2.__z_, r1.w\n"
+"\n"
+"dcl_literal l114, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r3.xyz_, r0.w, l114\n"
+"if_logicalz r3.y\n"
+" itof r2.___w, r3.z\n"
+" \n"
+" dcl_literal l115, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r2.w, l115\n"
+" \n"
+" dcl_literal l116, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r4.x, l116\n"
+" iadd r2.___w, r2.w, r2.z\n"
+" \n"
+" dcl_literal l117, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r4.y, l117\n"
+" \n"
+" dcl_literal l118, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2.___w, l118, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l119, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.x___, l119, r2.w\n"
+" \n"
+" dcl_literal l120, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r4.x, l120, r2.w\n"
+" \n"
+" dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.x___, l121, r2.w\n"
+" ishr r4._y__, r3.w, r2.w\n"
+" inegate r2.___w, r2.w\n"
+" \n"
+" dcl_literal l122, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.___w, r2.w, l122\n"
+" iadd r2.___w, r3.w, r2.w\n"
+" cmov_logical r2.___w, r4.x, r4.y, r2.w\n"
+"else\n"
+" \n"
+" dcl_literal l123, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r0.___w, r0.w, l123\n"
+" \n"
+" dcl_literal l124, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l124\n"
+" iadd r3._y__, r3.y, r2.z\n"
+" \n"
+" dcl_literal l125, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.__z_, r2.z, l125\n"
+" iadd r0.___w, r0.w, r2.z\n"
+" \n"
+" dcl_literal l126, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.__z_, r3.y, l126\n"
+" \n"
+" dcl_literal l127, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l127, r2.z\n"
+" \n"
+" dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r3.y, l128, r0.w\n"
+" \n"
+" dcl_literal l129, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.__z_, l129, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l130, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.z, l130\n"
+" \n"
+" dcl_literal l131, 0x00000000, 0x00000000, 0x00000000, 0x00000017\n"
+" ilt r3.__zw, l131, r2.z\n"
+" \n"
+" dcl_literal l132, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r3.w, l132, r2.z\n"
+" ishr r2.__z_, r3.y, r2.z\n"
+" cmov_logical r2.___w, r3.z, r2.z, r0.w\n"
+"endif\n"
+"\n"
+"dcl_literal l133, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r0.___w, l133, r1.w\n"
+"\n"
+"dcl_literal l134, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r0.w, l134, r2.w\n"
+"ior r0.___w, r3.x, r0.w\n"
+"mul_ieee r0.___w, r2.y, r0.w\n"
+"div_zeroop(infinity) r0.___w, r0.w, r1.y\n"
+"\n"
+"dcl_literal l135, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.___w, r0.w_neg(xyzw), l135\n"
+"\n"
+"dcl_literal l136, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l136\n"
+"cmov_logical r0.___w, r2.x, r1.w, r0.w\n"
+"\n"
+"dcl_literal l137, 0x31800000, 0x3F580000, 0x3FA00000, 0x40C00000\n"
+"ige r3, r1.y, l137\n"
+"\n"
+"dcl_literal l138, 0x00000000, 0x3F580000, 0x3FA00000, 0x40C00000\n"
+"ilt r2._yzw, r1.y, l138\n"
+"and r2._yzw, r3.xxyz, r2.yyzw\n"
+"cmov_logical r0.__z_, r2.y, r0.z, r1.x\n"
+"cmov_logical r0.__z_, r2.z, r1.z, r0.z\n"
+"cmov_logical r0.__z_, r2.w, r0.w, r0.z\n"
+"\n"
+"dcl_literal l139, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"\n"
+"dcl_literal l140, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.___w, r2.x, l139, l140\n"
+"cmov_logical r0.__z_, r3.w, r0.w, r0.z\n"
+"\n"
+"dcl_literal l141, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l141\n"
+"\n"
+"dcl_literal l142, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r0.w, l142, r0.z\n"
+"\n"
+"dcl_literal l143, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l143, r0.y\n"
+"\n"
+"dcl_literal l144, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l144\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__exp10_f32",
+"mdef(206)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x40549A78, 0x40549A78, 0x40549A78, 0x40549A78\n"
+"mul_ieee r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0xFFFFF000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r1.xyz_, r0.x, l1\n"
+"dcl_literal l2, 0x40549000, 0x40549000, 0x40549000, 0x40549000\n"
+"mad_ieee r0.__z_, r1.x, l2, r0.y_neg(xyzw)\n"
+"add r0.___w, r0.x, r1.x_neg(xyzw)\n"
+"dcl_literal l3, 0x40549000, 0x40549000, 0x40549000, 0x40549000\n"
+"mad_ieee r0.__z_, r0.w, l3, r0.z\n"
+"dcl_literal l4, 0x3A2784BD, 0x3A2784BD, 0x3A2784BD, 0x3A2784BD\n"
+"mad_ieee r0.__z_, r1.x, l4, r0.z\n"
+"dcl_literal l5, 0x3A2784BD, 0x3A2784BD, 0x3A2784BD, 0x3A2784BD\n"
+"mad_ieee r0.__z_, r0.w, l5, r0.z\n"
+"add r0.___w, r0.y, r0.z\n"
+"add r0._y__, r0.y, r0.w_neg(xyzw)\n"
+"add r0._y__, r0.z, r0.y\n"
+"add r0.__z_, r0.w, r0.y\n"
+"round_nearest r0.__z_, r0.z\n"
+"add r1.x___, r0.w, r0.z_neg(xyzw)\n"
+"add r0.___w, r0.w, r1.x_neg(xyzw)\n"
+"add r0.___w, r0.z_neg(xyzw), r0.w\n"
+"add r0._y__, r0.y, r0.w\n"
+"add r0.___w, r1.x, r0.y\n"
+"add r1.x___, r1.x, r0.w_neg(xyzw)\n"
+"add r0._y__, r0.y, r1.x\n"
+"dcl_literal l6, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r1.x___, r0.w, l6\n"
+"dcl_literal l7, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0._y__, r0.y, l7, r1.x\n"
+"dcl_literal l8, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r1.x___, r0.w, l8\n"
+"add r1.___w, r0.w, r1.x_neg(xyzw)\n"
+"dcl_literal l9, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mul_ieee r0.___w, r0.w, l9\n"
+"dcl_literal l10, 0x3F317000, 0x3F317000, 0x3F317000, 0x3F317000\n"
+"mad_ieee r2.x___, r1.x, l10, r0.w_neg(xyzw)\n"
+"dcl_literal l11, 0x3F317000, 0x3F317000, 0x3F317000, 0x3F317000\n"
+"mad_ieee r2.x___, r1.w, l11, r2.x\n"
+"dcl_literal l12, 0x37C00001, 0x37C00001, 0x37C00001, 0x37C00001\n"
+"mad_ieee r1.x___, r1.x, l12, r2.x\n"
+"dcl_literal l13, 0x37C00001, 0x37C00001, 0x37C00001, 0x37C00001\n"
+"mad_ieee r1.x___, r1.w, l13, r1.x\n"
+"add r0._y__, r0.y, r1.x\n"
+"add r1.x___, r0.w, r0.y\n"
+"add r0.___w, r0.w, r1.x_neg(xyzw)\n"
+"add r0._y__, r0.y, r0.w\n"
+"add r0.___w, r1.x, r0.y\n"
+"mul_ieee r1.___w, r0.w, r0.w\n"
+"dcl_literal l14, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"dcl_literal l15, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r2.x___, r1.w, l14, l15\n"
+"dcl_literal l16, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r2.x___, r1.w, r2.x, l16\n"
+"dcl_literal l17, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r2.x___, r1.w, r2.x, l17\n"
+"dcl_literal l18, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r2.x___, r1.w, r2.x, l18\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), r2.x, r0.w\n"
+"mul_ieee r0.___w, r0.w, r1.w\n"
+"dcl_literal l19, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1.___w, r1.w_neg(xyzw), l19\n"
+"div_zeroop(infinity) r0.___w, r0.w, r1.w\n"
+"add r0._y__, r0.y_neg(xyzw), r0.w_neg(xyzw)\n"
+"add r0._y__, r1.x_neg(xyzw), r0.y\n"
+"dcl_literal l20, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0._y__, r0.y_neg(xyzw), l20\n"
+"dcl_literal l21, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r2, r0.y, l21\n"
+"ftoi r0._y__, r0.z\n"
+"dcl_literal l22, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r0.__z_, l22, r0.z\n"
+"dcl_literal l23, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r0.___w, r0.y, l23\n"
+"cmov_logical r0._y__, r2.z, r0.y, r0.w\n"
+"dcl_literal l24, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.___w, r0.y, l24\n"
+"itof r1.x___, r2.w\n"
+"cmov_logical r1.x___, r2.z, r2.x, r1.x\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"dcl_literal l25, 0x7F800000, 0x00000000, 0x00000000, 0x007FFFFF\n"
+"and r1.x__w, r1.x, l25\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l26\n"
+"dcl_literal l27, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l27\n"
+"iadd r0._y__, r1.x, r0.y\n"
+"dcl_literal l28, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0._y__, r0.y, l28\n"
+"dcl_literal l29, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.x___, l29, r0.y\n"
+"dcl_literal l30, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0._y__, l30, r0.y_neg(xyzw)\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r1.x, l31, r0.w\n"
+"dcl_literal l32, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.x___, l32, r0.y\n"
+"dcl_literal l33, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0._y__, r1.x, l33, r0.y\n"
+"dcl_literal l34, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0._y__, r0.y, l34\n"
+"ishr r1.x___, r1.w, r0.y\n"
+"dcl_literal l35, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0._y__, l35, r0.y\n"
+"cmov_logical r0._y__, r0.y, r1.x, r0.w\n"
+"dcl_literal l36, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.z, l36, r0.y\n"
+"ior r0._y__, r2.y, r0.y\n"
+"dcl_literal l37, 0x421A209B, 0x421A209B, 0x421A209B, 0x421A209B\n"
+"ge r0.__z_, r0.x, l37\n"
+"dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, r1.y, l38\n"
+"and r0.__z_, r0.z, r0.w\n"
+"dcl_literal l39, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.z, l39, r0.y\n"
+"dcl_literal l40, 0xC23369F4, 0xC23369F4, 0xC23369F4, 0xC23369F4\n"
+"lt r0.__z_, r0.x, l40\n"
+"and r0.__z_, r0.w, r0.z\n"
+"dcl_literal l41, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l41, r0.y\n"
+"dcl_literal l42, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l42, r1.y\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, r0.x, l43\n"
+"dcl_literal l44, 0x7F800000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r1.x_z_, r1.yyzy, l44\n"
+"dcl_literal l45, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r1.y, l45\n"
+"and r0.__z_, r0.z, r1.x\n"
+"dcl_literal l46, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l46, r0.y\n"
+"dcl_literal l47, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l47, r0.x\n"
+"and r0.__zw, r1.xxxz, r0.zzzw\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"dcl_literal l48, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.__z_, r0.x, l48\n"
+"dcl_literal l49, 0x421A209A, 0x421A209A, 0x421A209A, 0x421A209A\n"
+"eq r0.x___, r0.x, l49\n"
+"cmov_logical r0._y__, r0.w, r0.z, r0.y\n"
+"dcl_literal l50, 0x7F7FFFB3, 0x7F7FFFB3, 0x7F7FFFB3, 0x7F7FFFB3\n"
+"cmov_logical r0.x___, r0.x, l50, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__exp2_f32",
+"mdef(207)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_nearest r0._y__, r0.x\n"
+"add r0.__z_, r0.x, r0.y_neg(xyzw)\n"
+"dcl_literal l0, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r0.___w, r0.z, l0\n"
+"mul_ieee r1.x___, r0.w, r0.w\n"
+"dcl_literal l1, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"dcl_literal l2, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r1._y__, r1.x, l1, l2\n"
+"dcl_literal l3, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r1._y__, r1.x, r1.y, l3\n"
+"dcl_literal l4, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r1._y__, r1.x, r1.y, l4\n"
+"dcl_literal l5, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r1._y__, r1.x, r1.y, l5\n"
+"mad_ieee r1.x___, r1.x_neg(xyzw), r1.y, r0.w\n"
+"mul_ieee r0.___w, r0.w, r1.x\n"
+"dcl_literal l6, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1.x___, r1.x_neg(xyzw), l6\n"
+"div_zeroop(infinity) r0.___w, r0.w, r1.x\n"
+"dcl_literal l7, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r0.___w, r0.z_neg(xyzw), l7, r0.w_neg(xyzw)\n"
+"dcl_literal l8, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.__z_, r0.z_neg(xyzw), l8, r0.w\n"
+"dcl_literal l9, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.__z_, r0.z_neg(xyzw), l9\n"
+"dcl_literal l10, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r1, r0.z, l10\n"
+"ftoi r0.__z_, r0.y\n"
+"dcl_literal l11, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r0._y__, l11, r0.y\n"
+"dcl_literal l12, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r0.___w, r0.z, l12\n"
+"cmov_logical r0.__z_, r1.z, r0.z, r0.w\n"
+"dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.___w, r0.z, l13\n"
+"itof r1.___w, r1.w\n"
+"cmov_logical r1.x___, r1.z, r1.x, r1.w\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"dcl_literal l14, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r1.x_z_, r1.x, l14\n"
+"dcl_literal l15, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l15\n"
+"dcl_literal l16, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.__z_, r1.z, l16\n"
+"iadd r0.__z_, r1.x, r0.z\n"
+"dcl_literal l17, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l17\n"
+"dcl_literal l18, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.x___, l18, r0.z\n"
+"dcl_literal l19, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.__z_, l19, r0.z_neg(xyzw)\n"
+"dcl_literal l20, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r1.x, l20, r0.w\n"
+"dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.x___, l21, r0.z\n"
+"dcl_literal l22, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.__z_, r1.x, l22, r0.z\n"
+"dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.__z_, r0.z, l23\n"
+"ishr r1.x___, r1.z, r0.z\n"
+"dcl_literal l24, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l24, r0.z\n"
+"cmov_logical r0.__z_, r0.z, r1.x, r0.w\n"
+"dcl_literal l25, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l25, r0.z\n"
+"ior r0._y__, r1.y, r0.y\n"
+"dcl_literal l26, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"ge r0.__z_, r0.x, l26\n"
+"dcl_literal l27, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l27\n"
+"dcl_literal l28, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, r0.w, l28\n"
+"and r0.__z_, r0.z, r1.x\n"
+"dcl_literal l29, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.z, l29, r0.y\n"
+"dcl_literal l30, 0xC3150000, 0xC3150000, 0xC3150000, 0xC3150000\n"
+"lt r0.__z_, r0.x, l30\n"
+"and r0.__z_, r1.x, r0.z\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l31, r0.y\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l32, r0.w\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.w, l33\n"
+"dcl_literal l34, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l34\n"
+"cmov_logical r0._y__, r0.z, r1.x, r0.y\n"
+"dcl_literal l35, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, r0.x, l35\n"
+"and r0.__z_, r0.w, r0.z\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l36, r0.y\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l37, r0.x\n"
+"and r0.__z_, r0.w, r0.z\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__expm1_f32",
+"mdef(208)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r3.x___, r0.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r0.x___, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0xB717F7D1, 0xFFFFFFFF\n"
+"mov r1.__zw, l1\n"
+"dcl_literal l2, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mov r2._y__, l2\n"
+"dcl_literal l3, 0xBF317180, 0x3F317180, 0x00000000, 0x00000000\n"
+"add r1.xy__, r3.x, l3\n"
+"mov r2.x___, r1.x\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF\n"
+"and r0.__zw, r3.x, l4\n"
+"dcl_literal l5, 0x00000000, 0x3EB17218, 0x7F800000, 0x00000000\n"
+"ult r3._yz_, l5, r0.w\n"
+"dcl_literal l6, 0x3F851592, 0x33000000, 0x7F800000, 0x00000000\n"
+"ult r4.xyz_, r0.w, l6\n"
+"and r1.x___, r3.y, r4.x\n"
+"dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x7F800000\n"
+"ieq r4.x__w, r0.zzzw, l7\n"
+"and r2.___w, r1.x, r4.x\n"
+"dcl_literal l8, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r2.__z_, r2.w, l8\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2.___w, r0.z, l9\n"
+"and r1.x___, r1.x, r2.w\n"
+"cmov_logical r1.xyz_, r1.x, r1.yzwy, r2.xyzx\n"
+"dcl_literal l10, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"dcl_literal l11, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r0.__z_, r0.z, l10, l11\n"
+"dcl_literal l12, 0x3F851592, 0x42B17218, 0x4195B844, 0x00000000\n"
+"uge r2.xyz_, r0.w, l12\n"
+"dcl_literal l13, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0.__z_, r3.x, l13, r0.z\n"
+"round_z r0.___w, r0.z\n"
+"ftoi r5.__z_, r0.z\n"
+"dcl_literal l14, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r5.x___, r0.w_neg(xyzw), l14, r3.x\n"
+"dcl_literal l15, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r5._y__, r0.w, l15\n"
+"cmov_logical r1.xyz_, r2.x, r5.xyzx, r1.xyzx\n"
+"and r0.__zw, r4.z, r2.yyyz\n"
+"add r2._y__, r1.y_neg(xyzw), r1.x\n"
+"add r1.x___, r1.x, r2.y_neg(xyzw)\n"
+"add r2.x___, r1.y_neg(xyzw), r1.x\n"
+"mov r0._y__, r3.x\n"
+"cmov_logical r0.xy__, r3.y, r2.xyxx, r0.xyxx\n"
+"dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.x___, r0.y, l16\n"
+"mul_ieee r1._y__, r0.y, r1.x\n"
+"dcl_literal l17, 0xB457EDBB, 0xB457EDBB, 0xB457EDBB, 0xB457EDBB\n"
+"dcl_literal l18, 0x36867E54, 0x36867E54, 0x36867E54, 0x36867E54\n"
+"mad_ieee r1.___w, r1.y, l17, l18\n"
+"dcl_literal l19, 0xB8A670CD, 0xB8A670CD, 0xB8A670CD, 0xB8A670CD\n"
+"mad_ieee r1.___w, r1.y, r1.w, l19\n"
+"dcl_literal l20, 0x3AD00D01, 0x3AD00D01, 0x3AD00D01, 0x3AD00D01\n"
+"mad_ieee r1.___w, r1.y, r1.w, l20\n"
+"dcl_literal l21, 0xBD088889, 0xBD088889, 0xBD088889, 0xBD088889\n"
+"mad_ieee r1.___w, r1.y, r1.w, l21\n"
+"dcl_literal l22, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r1.___w, r1.y, r1.w, l22\n"
+"dcl_literal l23, 0x40400000, 0x40400000, 0x40400000, 0x40400000\n"
+"mad_ieee r2.x___, r1.w_neg(xyzw), r1.x, l23\n"
+"add r1.___w, r1.w, r2.x_neg(xyzw)\n"
+"dcl_literal l24, 0x40C00000, 0x40C00000, 0x40C00000, 0x40C00000\n"
+"mad_ieee r2.x___, r0.y_neg(xyzw), r2.x, l24\n"
+"div_zeroop(infinity) r1.___w, r1.w, r2.x\n"
+"mad_ieee r2.x___, r1.y, r1.w, r0.x_neg(xyzw)\n"
+"mul_ieee r1.___w, r1.y, r1.w\n"
+"mad_ieee r1._y__, r0.y, r1.w, r1.y_neg(xyzw)\n"
+"mad_ieee r0.x___, r0.y, r2.x, r0.x_neg(xyzw)\n"
+"mad_ieee r0.x___, r0.y_neg(xyzw), r1.x, r0.x\n"
+"dcl_literal l25, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r1.x___, l25, r1.z_neg(xyzw)\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1.x___, r1.x, l26\n"
+"add r1.x___, r0.x, r1.x\n"
+"add r1.x___, r0.y, r1.x_neg(xyzw)\n"
+"dcl_literal l27, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.x___, r1.x, l27\n"
+"add r1.___w, r0.y_neg(xyzw), r0.x\n"
+"add r0.x___, r0.y, r0.x_neg(xyzw)\n"
+"add r0._y__, r0.y, r1.y_neg(xyzw)\n"
+"dcl_literal l28, 0x01000000, 0x01000000, 0x01000000, 0x01000000\n"
+"ushr r1._y__, l28, r1.z\n"
+"dcl_literal l29, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1._y__, l29, r1.y_neg(xyzw)\n"
+"add r1._y__, r1.w_neg(xyzw), r1.y\n"
+"dcl_literal l30, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.___w, r1.w_neg(xyzw), l30\n"
+"dcl_literal l31, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ige r2.x___, r1.z, l31\n"
+"cmov_logical r1.x___, r2.x, r1.x, r1.y\n"
+"dcl_literal l32, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r5, r1.x, l32\n"
+"itof r1.x___, r1.z\n"
+"dcl_literal l33, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+"lt r1._y__, l33, r1.x_abs\n"
+"ftoi r2.x___, r1.x_abs\n"
+"dcl_literal l34, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1.x___, r1.x, l34\n"
+"dcl_literal l35, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+"cmov_logical r1._y__, r1.y, l35, r2.x\n"
+"inegate r2.x___, r1.y\n"
+"cmov_logical r1.x___, r1.x, r2.x, r1.y\n"
+"dcl_literal l36, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r1._y__, r1.x, l36\n"
+"cmov_logical r2.x___, r5.z, r1.x, r1.y\n"
+"dcl_literal l37, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2._y__, r2.x, l37\n"
+"itof r2.__z_, r5.w\n"
+"cmov_logical r2.__z_, r5.z, r5.x, r2.z\n"
+"iadd r2._y__, r2.z, r2.y\n"
+"dcl_literal l38, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r3._y_w, r2.z, l38\n"
+"dcl_literal l39, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r3.y, l39\n"
+"dcl_literal l40, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3._y__, r3.w, l40\n"
+"iadd r2.x___, r2.z, r2.x\n"
+"dcl_literal l41, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l41\n"
+"dcl_literal l42, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r2.__z_, l42, r2.x\n"
+"dcl_literal l43, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.x___, l43, r2.x_neg(xyzw)\n"
+"dcl_literal l44, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2._y__, r2.z, l44, r2.y\n"
+"dcl_literal l45, 0x00000017, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r5.x_z_, l45, r2.x\n"
+"dcl_literal l46, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2.__z_, r5.x, l46, r2.x\n"
+"dcl_literal l47, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r2.x, l47\n"
+"dcl_literal l48, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r2.x, l48, r2.z\n"
+"ishr r2.x___, r3.y, r2.x\n"
+"cmov_logical r2.x___, r5.z, r2.x, r2.y\n"
+"itof r2._y__, r1.x\n"
+"dcl_literal l49, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+"lt r2._y__, l49, r2.y\n"
+"dcl_literal l50, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.x___, r2.y, l50, r2.x\n"
+"ior r2.__z_, r5.y, r2.x\n"
+"dcl_literal l51, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r3._y__, l51, r2.x\n"
+"cmov_logical r2.x___, r3.y, r2.z, r2.x\n"
+"dcl_literal l52, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r5, r1.w, l52\n"
+"cmov_logical r1.x___, r5.z, r1.x, r1.y\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1._y__, r1.x, l53\n"
+"itof r1.___w, r5.w\n"
+"cmov_logical r1.___w, r5.z, r5.x, r1.w\n"
+"iadd r1._y__, r1.w, r1.y\n"
+"dcl_literal l54, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r3._y_w, r1.w, l54\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.___w, r3.y, l55\n"
+"dcl_literal l56, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r2.__z_, r3.w, l56\n"
+"iadd r1.x___, r1.w, r1.x\n"
+"dcl_literal l57, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1.x___, r1.x, l57\n"
+"dcl_literal l58, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.___w, l58, r1.x\n"
+"dcl_literal l59, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r1.x___, l59, r1.x_neg(xyzw)\n"
+"dcl_literal l60, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1._y__, r1.w, l60, r1.y\n"
+"dcl_literal l61, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+"ilt r3._y_w, l61, r1.x\n"
+"dcl_literal l62, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r1.___w, r3.y, l62, r1.x\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r1.x, l63\n"
+"dcl_literal l64, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.x___, r1.x, l64, r1.w\n"
+"ishr r1.x___, r2.z, r1.x\n"
+"cmov_logical r1.x___, r3.w, r1.x, r1.y\n"
+"dcl_literal l65, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1.x___, r2.y, l65, r1.x\n"
+"ior r1._y__, r5.y, r1.x\n"
+"dcl_literal l66, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r1.___w, l66, r1.x\n"
+"cmov_logical r1.x___, r1.w, r1.y, r1.x\n"
+"dcl_literal l67, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r1._y__, r1.x, l67\n"
+"dcl_literal l68, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.___w, r1.x, l68\n"
+"cmov_logical r1.x___, r1.y, r1.x, r1.w\n"
+"dcl_literal l69, 0xFFFFFFFE, 0xFFFFFFFE, 0xFFFFFFFE, 0xFFFFFFFE\n"
+"ige r1._y__, l69, r1.z\n"
+"dcl_literal l70, 0x00000038, 0x00000038, 0x00000038, 0x00000038\n"
+"ilt r1.___w, l70, r1.z\n"
+"ior r1._y__, r1.y, r1.w\n"
+"cmov_logical r1.x___, r1.y, r1.x, r2.x\n"
+"dcl_literal l71, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"dcl_literal l72, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"mad_ieee r0.x___, r0.x, l71, l72\n"
+"dcl_literal l73, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"ieq r1._y__, r1.z, l73\n"
+"cmov_logical r0.x___, r1.y, r0.x, r1.x\n"
+"cmov_logical r0.x___, r1.z, r0.x, r0.y\n"
+"cmov_logical r0.x___, r4.y, r3.x, r0.x\n"
+"dcl_literal l74, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.z, l74, r0.x\n"
+"and r0._y__, r2.w, r0.w\n"
+"and r0.__z_, r4.w, r2.w\n"
+"and r0.___w, r4.x, r4.w\n"
+"dcl_literal l75, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.x___, r0.y, l75, r0.x\n"
+"dcl_literal l76, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.w, l76, r0.x\n"
+"dcl_literal l77, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.x___, r0.z, l77, r0.x\n"
+"dcl_literal l78, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r3.z, l78, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__exp_f32",
+"mdef(209)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0xC2CFF1B5, 0x00000000\n"
+"lt r0._yz_, r0.x, l0\n"
+"dcl_literal l1, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"dcl_literal l2, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r0._y__, r0.y, l1, l2\n"
+"dcl_literal l3, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0._y__, r0.x, l3, r0.y\n"
+"round_z r0._y__, r0.y\n"
+"dcl_literal l4, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.___w, r0.y_neg(xyzw), l4, r0.x\n"
+"dcl_literal l5, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1.x___, r0.y_neg(xyzw), l5, r0.w\n"
+"mul_ieee r1._y__, r1.x, r1.x\n"
+"dcl_literal l6, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"dcl_literal l7, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r1.__z_, r1.y, l6, l7\n"
+"dcl_literal l8, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r1.__z_, r1.y, r1.z, l8\n"
+"dcl_literal l9, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r1.__z_, r1.y, r1.z, l9\n"
+"dcl_literal l10, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r1.__z_, r1.y, r1.z, l10\n"
+"mad_ieee r1._y__, r1.y_neg(xyzw), r1.z, r1.x\n"
+"mul_ieee r1.x___, r1.x, r1.y\n"
+"dcl_literal l11, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1._y__, r1.y_neg(xyzw), l11\n"
+"div_zeroop(infinity) r1.x___, r1.x, r1.y\n"
+"dcl_literal l12, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1.x___, r0.y, l12, r1.x_neg(xyzw)\n"
+"add r0.___w, r0.w_neg(xyzw), r1.x\n"
+"dcl_literal l13, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.w_neg(xyzw), l13\n"
+"dcl_literal l14, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r1, r0.w, l14\n"
+"ftoi r0.___w, r0.y\n"
+"dcl_literal l15, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r0._y__, l15, r0.y\n"
+"dcl_literal l16, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.x___, r0.w, l16\n"
+"cmov_logical r0.___w, r1.z, r0.w, r2.x\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r0.w, l17\n"
+"itof r1.___w, r1.w\n"
+"cmov_logical r1.x___, r1.z, r1.x, r1.w\n"
+"iadd r1.__z_, r1.x, r2.x\n"
+"dcl_literal l18, 0x7F800000, 0x00000000, 0x00000000, 0x007FFFFF\n"
+"and r1.x__w, r1.x, l18\n"
+"dcl_literal l19, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l19\n"
+"dcl_literal l20, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l20\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"dcl_literal l21, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l21\n"
+"dcl_literal l22, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.x___, l22, r0.w\n"
+"dcl_literal l23, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.___w, l23, r0.w_neg(xyzw)\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1.x___, r1.x, l24, r1.z\n"
+"dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.__z_, l25, r0.w\n"
+"dcl_literal l26, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.___w, r1.z, l26, r0.w\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.___w, r0.w, l27\n"
+"ishr r1.__z_, r1.w, r0.w\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l28, r0.w\n"
+"cmov_logical r0.___w, r0.w, r1.z, r1.x\n"
+"dcl_literal l29, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l29, r0.w\n"
+"ior r0._y__, r1.y, r0.y\n"
+"dcl_literal l30, 0x42B17218, 0x42B17218, 0x42B17218, 0x42B17218\n"
+"ge r0.___w, r0.x, l30\n"
+"dcl_literal l31, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r1.x___, r0.x, l31\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1._y__, r1.x, l32\n"
+"and r0.__zw, r0.zzzw, r1.y\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.w, l33, r0.y\n"
+"dcl_literal l34, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l34, r0.y\n"
+"dcl_literal l35, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l35, r1.x\n"
+"dcl_literal l36, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r1.x, l36\n"
+"dcl_literal l37, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l37\n"
+"cmov_logical r0._y__, r0.z, r1.x, r0.y\n"
+"dcl_literal l38, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, r0.x, l38\n"
+"and r0.__z_, r0.w, r0.z\n"
+"dcl_literal l39, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l39, r0.y\n"
+"dcl_literal l40, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l40, r0.x\n"
+"and r0.__z_, r0.w, r0.z\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fabs_f32",
+"mdef(210)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.x___, r0.x, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fast_distance_2f32",
+"mdef(211)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"add r2._yz_, r0.xxyx, r1.xxyx_neg(xyzw)\n"
+"dp2_ieee r2._y__, r2.yzyy, r2.yzyy\n"
+"sqrt_vec r0.x___, r2.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fast_distance_4f32",
+"mdef(212)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"add r0, r0, r1_neg(xyzw)\n"
+"dp2_ieee r0.x___, r0.xyxx, r0.xyxx\n"
+"mad_ieee r0.x___, r0.z, r0.z, r0.x\n"
+"mad_ieee r0.x___, r0.w, r0.w, r0.x\n"
+"sqrt_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fast_distance_f32",
+"mdef(213)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"add r2._y__, r0.x, r1.x_neg(xyzw)\n"
+"mov r0.x___, r2.y_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fast_length_2f32",
+"mdef(214)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r1._y__, r0.xyxx, r0.xyxx\n"
+"sqrt_vec r0.x___, r1.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fast_length_4f32",
+"mdef(215)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r0.x___, r0.xyxx, r0.xyxx\n"
+"mad_ieee r0.x___, r0.z, r0.z, r0.x\n"
+"mad_ieee r0.x___, r0.w, r0.w, r0.x\n"
+"sqrt_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fast_length_f32",
+"mdef(216)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x___, r0.x_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fast_normalize_2f32",
+"mdef(217)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r2.x___, r0.xyxx, r0.xyxx\n"
+"rsq_vec r2._y__, r2.x\n"
+"mov r2.__zw, r0.xxxy_abs\n"
+"\n"
+"dcl_literal l7, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ilt r2.__zw, l7, r2.zzzw\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000001, 0x00000001\n"
+"and r2.__zw, r2.zzzw, l8\n"
+"iadd r2.__z_, r2.z, r2.w\n"
+"mul_ieee r2._y_w, r0.xxxy, r2.y\n"
+"\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r2.x___, r2.x, l9\n"
+"\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.xy__, r2.x, l10, r2.ywyy\n"
+"\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.__z_, l11, r2.z\n"
+"\n"
+"dcl_literal l12, 0x7FC00000, 0x7FC00000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r2.z, l12, r2.xyxx\n"
+"\n"
+"\n"
+"mov r0.__zw, l13\n"
+"\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fast_normalize_4f32",
+"mdef(218)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r2.x___, r0.xyxx, r0.xyxx\n"
+"mad_ieee r2.x___, r0.z, r0.z, r2.x\n"
+"mad_ieee r2.x___, r0.w, r0.w, r2.x\n"
+"rsq_vec r2._y__, r2.x\n"
+"mov r3, r0_abs\n"
+"\n"
+"dcl_literal l14, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r3, l14, r3\n"
+"\n"
+"dcl_literal l15, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3, r3, l15\n"
+"iadd r2.__z_, r3.x, r3.y\n"
+"iadd r2.__z_, r2.z, r3.z\n"
+"iadd r2.__z_, r2.z, r3.w\n"
+"mul_ieee r0, r0, r2.y\n"
+"\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r2.x___, r2.x, l16\n"
+"\n"
+"dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0, r2.x, l17, r0\n"
+"\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, l18, r2.z\n"
+"\n"
+"dcl_literal l19, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0, r2.x, l19, r0\n"
+"\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fast_normalize_f32",
+"mdef(219)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r2.x___, r0.x_abs\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r2.__z_, l1, r2.x\n"
+"and r2._y__, r2.y, r2.z\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"uge r2.x___, l2, r2.x\n"
+"and r2._y__, r2.y, r2.x\n"
+"\n"
+"dcl_literal l3, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r2._y__, r2.y, l3, r0.x\n"
+"\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, l4, r0.x\n"
+"and r2.__z_, r2.z, r2.w\n"
+"and r2.x___, r2.x, r2.z\n"
+"\n"
+"dcl_literal l5, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l5, r2.y\n"
+"\n"
+"\n"
+"mov r0._yzw, l6\n"
+"\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fdim_f32",
+"mdef(220)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7F800000, 0x7F800000\n"
+"and r1, r0.xyxy, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__zw, l1, r1.xxxy\n"
+"\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__zw, r1.zzzw, l2\n"
+"\n"
+"dcl_literal l3, 0x00000018, 0x00000018, 0x00000000, 0x00000000\n"
+"ige r2.xy__, l3, r1.zwzz\n"
+"and r2.x___, r2.y, r2.x\n"
+"if_logicalnz r2.x\n"
+" \n"
+" dcl_literal l4, 0x80000000, 0x7F800000, 0x007FFFFF, 0x80000000\n"
+" and r2, r0.xxxy, l4\n"
+" \n"
+" dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.x___, r2.y, l5\n"
+" itof r2.__z_, r2.z\n"
+" \n"
+" dcl_literal l6, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3._yz_, r2.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.__z_, r3.y, l7\n"
+" \n"
+" dcl_literal l8, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+" iadd r2.__z_, r2.z, l8\n"
+" \n"
+" dcl_literal l9, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.z, l9\n"
+" \n"
+" dcl_literal l10, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2.__z_, l10, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.__z_, l11, r2.z\n"
+" \n"
+" dcl_literal l12, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r3.z, l12, r2.z\n"
+" \n"
+" dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.__z_, l13, r2.z\n"
+" ishr r3.___w, r3.y, r2.z\n"
+" inegate r2.__z_, r2.z\n"
+" \n"
+" dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.__z_, r2.z, l14\n"
+" iadd r2.__z_, r3.y, r2.z\n"
+" cmov_logical r2.__z_, r3.z, r3.w, r2.z\n"
+" \n"
+" dcl_literal l15, 0x18000000, 0x18000000, 0x00000000, 0x00000000\n"
+" iadd r1.xy__, r1.xyxx, l15\n"
+" \n"
+" dcl_literal l16, 0xFFFFFFB1, 0xFFFFFFB1, 0xFFFFFFB1, 0xFFFFFFB1\n"
+" iadd r3.x___, r3.x, l16\n"
+" \n"
+" dcl_literal l17, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.x___, l17, r3.x\n"
+" \n"
+" dcl_literal l18, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.x___, r3.x, l18, r1.x\n"
+" cmov_logical r1.x___, r2.y, r1.x, r2.z\n"
+" ior r1.x___, r2.x, r1.x\n"
+" \n"
+" dcl_literal l19, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r0.y, l19\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.__z_, r2.x, l20\n"
+" itof r2._y__, r2.y\n"
+" \n"
+" dcl_literal l21, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.y, l21\n"
+" \n"
+" dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r3.x, l22\n"
+" \n"
+" dcl_literal l23, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+" iadd r2._y__, r2.y, l23\n"
+" \n"
+" dcl_literal l24, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.x___, r3.y, l24\n"
+" \n"
+" dcl_literal l25, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2._y__, l25, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3._y__, l26, r2.y\n"
+" \n"
+" dcl_literal l27, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2._y__, r3.y, l27, r2.y\n"
+" \n"
+" dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3._y__, l28, r2.y\n"
+" ishr r3.__z_, r3.x, r2.y\n"
+" inegate r2._y__, r2.y\n"
+" \n"
+" dcl_literal l29, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l29\n"
+" iadd r2._y__, r3.x, r2.y\n"
+" cmov_logical r2._y__, r3.y, r3.z, r2.y\n"
+" \n"
+" dcl_literal l30, 0xFFFFFFB1, 0xFFFFFFB1, 0xFFFFFFB1, 0xFFFFFFB1\n"
+" iadd r2.__z_, r2.z, l30\n"
+" \n"
+" dcl_literal l31, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.__z_, l31, r2.z\n"
+" \n"
+" dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1._y__, r2.z, l32, r1.y\n"
+" cmov_logical r1._y__, r2.x, r1.y, r2.y\n"
+" ior r1._y__, r2.w, r1.y\n"
+" lt r2.x___, r1.y, r1.x\n"
+" add r1.x___, r1.x, r1.y_neg(xyzw)\n"
+" and r1.x___, r2.x, r1.x\n"
+" \n"
+" dcl_literal l33, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r2, r1.x, l33\n"
+" \n"
+" dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r2.z, l34\n"
+" itof r1._y__, r2.w\n"
+" \n"
+" dcl_literal l35, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r1.y, l35\n"
+" \n"
+" dcl_literal l36, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1._y__, r3.x, l36\n"
+" \n"
+" dcl_literal l37, 0xFFFFFFD0, 0xFFFFFFD0, 0xFFFFFFD0, 0xFFFFFFD0\n"
+" iadd r1._y__, r1.y, l37\n"
+" \n"
+" dcl_literal l38, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.x___, r3.y, l38\n"
+" \n"
+" dcl_literal l39, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1._y__, l39, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l40, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3._y__, l40, r1.y\n"
+" \n"
+" dcl_literal l41, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1._y__, r3.y, l41, r1.y\n"
+" \n"
+" dcl_literal l42, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3._y__, l42, r1.y\n"
+" ishr r3.__z_, r3.x, r1.y\n"
+" inegate r1._y__, r1.y\n"
+" \n"
+" dcl_literal l43, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1._y__, r1.y, l43\n"
+" iadd r1._y__, r3.x, r1.y\n"
+" cmov_logical r1._y__, r3.y, r3.z, r1.y\n"
+" \n"
+" dcl_literal l44, 0xE8000000, 0xE8000000, 0xE8000000, 0xE8000000\n"
+" iadd r2.x___, r2.x, l44\n"
+" \n"
+" dcl_literal l45, 0xFFFFFF51, 0xFFFFFF51, 0xFFFFFF51, 0xFFFFFF51\n"
+" iadd r1.x___, r1.x, l45\n"
+" \n"
+" dcl_literal l46, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.x___, l46, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l47, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r2.w, l47\n"
+" \n"
+" dcl_literal l48, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l48, r1.x\n"
+" \n"
+" dcl_literal l49, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.x___, r3.x, l49, r1.x\n"
+" ishr r2.___w, r2.w, r1.x\n"
+" \n"
+" dcl_literal l50, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, l50, r1.x\n"
+" cmov_logical r1.x___, r1.x, r2.w, r2.x\n"
+" cmov_logical r1.x___, r2.z, r1.x, r1.y\n"
+" ior r1.x___, r2.y, r1.x\n"
+"else\n"
+" \n"
+" dcl_literal l51, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1._yz_, r1.zzwz, r0.xxyx, l51\n"
+" lt r1.___w, r1.z, r1.y\n"
+" add r1._y__, r1.z_neg(xyzw), r1.y\n"
+" and r1.x___, r1.w, r1.y\n"
+"endif\n"
+"\n"
+"dcl_literal l52, 0x7FC00000, 0x7FC00000, 0x00000000, 0x00000000\n"
+"ior r0.xy__, r0.xyxx, l52\n"
+"cmov_logical r0.x___, r0.z, r0.x, r1.x\n"
+"cmov_logical r0.x___, r0.w, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fdiv_f32",
+"mdef(221)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l1, 0x7f800000, 0x7f800000, 0x807fffff, 0x807fffff\n"
+"dcl_literal l2, 0x7f800000, 0x7f800000, 0, 0\n"
+"dcl_literal l3, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"dcl_literal l4, 0x3f800000, 0x3f800000, 0, 0\n"
+"dcl_literal l5, 0, 0, 0, 0\n"
+"dcl_literal l6, 0x7fffffff, 0x80000000, 0x7fffffff, 0x80000000\n"
+"dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"dcl_literal l9, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff\n"
+"mov r2.x___, r0.x\n"
+"mov r2._y__, r1.x\n"
+"and r3.xyzw, r2.xyxy, l1\n"
+"ieq r4.xyzw, r3.xyxy, l2\n"
+"and r5.xy__, r2.xy, l3\n"
+"ior r3.__zw, r3.zwzw, l4.xyxy\n"
+"cmov_logical r3.__zw, r4.zwzw, r5.xyxy, r3.zwzw\n"
+"cmov_logical r3.__zw, r4.xyxy, r2.xyxy, r3.zwzw\n"
+"ior r5.xy__, r4.xz, r4.yw\n"
+"ior r5.x___, r5.x, r5.y\n"
+"inegate r5.__z_, r3.yyyy\n"
+"iadd r3.x___, r3.x, r5.z\n"
+"cmov_logical r3.x___, r5.xxxx, l5, r3.xxxx\n"
+"rcp_zeroop(infinity) r2._y__, r3.ww\n"
+"mul_ieee r2.x___, r3.z, r2.y\n"
+"and r2.__zw, r2.xxxx, l6.xyzw\n"
+"ishr r6.x___, r2.z, l8\n"
+"ishr r6._y__, r3.xxxx, l8\n"
+"iadd r2.xy__, r2.xzxz, r3.xxxx\n"
+"iadd r6.x___, r6.x, r6.y\n"
+"ige r4.__z_, l5, r6.x\n"
+"ior r4._y__, r2.wwww, l1\n"
+"ige r4.x, r6.x, l9\n"
+"cmov_logical r4.x, r5.x, l5, r4.x\n"
+"cmov_logical r2.x, r4.z, r2.w, r2.x\n"
+"cmov_logical r0.x, r4.x, r4.y, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__floor_f32",
+"mdef(222)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_neginf r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__fmax_f32",
+"mdef(223)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__,r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x80000000\n"
+"and r1, r0.xyxy, l0\n"
+"ixor r0.__z_, r1.w, r1.z\n"
+"dcl_literal l1, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, l1, r1.xyxx\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r0.z, l2\n"
+"inot r0.___w, r0.z\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r0.x, l3\n"
+"and r1.__zw, r0.zzzw, r1.z\n"
+"ilt r2.xy__, r0.yxyy, r0.xyxx\n"
+"and r1.___w, r1.w, r2.x\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r0.x, l4\n"
+"and r0.__zw, r0.zzzw, r2.x\n"
+"and r0.___w, r2.y, r0.w\n"
+"ior r0.___w, r1.w, r0.w\n"
+"ior r0.___w, r1.z, r0.w\n"
+"cmov_logical r0.___w, r0.w, r0.x, r0.y\n"
+"inot r1.__zw, r1.yyyx\n"
+"and r1.x_zw, r1.yyzw, r1.xxxy\n"
+"ior r0.__z_, r0.z, r1.z\n"
+"cmov_logical r0._y__, r0.z, r0.y, r0.w\n"
+"cmov_logical r0.x___, r1.w, r0.x, r0.y\n"
+"dcl_literal l5, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r1.x, l5, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fma_f32",
+"mdef(224)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mad_ieee r3.x, r0.x, r1.x, r2.x\n"
+"mov r4.x, r2.x\n"
+"dcl_literal l1, 23, 23, 23, 23\n"
+"ishr r24.x, r0.x, l1.x\n"
+"ishr r25.x, r1.x, l1.x\n"
+"ishr r26.x, r2.x, l1.x\n"
+"dcl_literal l2, 0xff, 0xff, 0xff, 0xff\n"
+"iand r24.x, r24.x, l2.x\n"
+"iand r25.x, r25.x, l2.x\n"
+"iand r26.x, r26.x, l2.x\n"
+"dcl_literal l3, -127, -127, -127, -127\n"
+"iadd r24.x, r24.x, l3.x\n"
+"iadd r25.x, r25.x, l3.x\n"
+"iadd r26.x, r26.x, l3.x\n"
+"iadd r27.x, r24.x, r25.x\n"
+"iadd r28.x, r26.x, r27.x_neg(xyzw)\n"
+"dcl_literal l4, 127, 127, 127, 127\n"
+"iadd r28.x, r28.x, l4.x\n"
+"ishl r28.x, r28.x, l1.x\n"
+"ieq r29.x, r24.x, l3.x\n"
+"ieq r30.x, r25.x, l3.x\n"
+"ieq r31.x, r26.x, l3.x\n"
+"dcl_literal l5, 128, 128, 128, 128\n"
+"ieq r32.x, r24.x, l5.x\n"
+"ieq r33.x, r25.x, l5.x\n"
+"ieq r34.x, r26.x, l5.x\n"
+"ior r29.x, r29.x, r32.x\n"
+"ior r30.x, r30.x, r33.x\n"
+"ior r31.x, r31.x, r34.x\n"
+"ior r35.x, r29.x, r30.x\n"
+"ior r35.x, r35.x, r31.x\n"
+"ior r36.x, r32.x, r33.x\n"
+"inot r36.x, r36.x\n"
+"iand r36.x, r36.x, r34.x\n"
+"dcl_literal l6, 0x807fffff, 0x807fffff, 0x807fffff, 0x807fffff\n"
+"iand r37.x, r0.x, l6.x\n"
+"iand r38.x, r1.x, l6.x\n"
+"iand r39.x, r2.x, l6.x\n"
+"dcl_literal l7, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000\n"
+"ior r37.x, r37.x, l7.x\n"
+"ior r38.x, r38.x, l7.x\n"
+"ior r39.x, r39.x, r28.x\n"
+"mov r0.x, r37.x\n"
+"mov r1.x, r38.x\n"
+"mov r2.x, r39.x\n"
+"dcl_literal l8, 0xfffff000, 0xfffff000, 0xfffff000, 0xfffff000\n"
+"iand r5.x, r37.x, l8.x\n"
+"iand r6.x, r38.x, l8.x\n"
+"add r7.x, r0.x, r5.x_neg(xyzw)\n"
+"add r8.x, r1.x, r6.x_neg(xyzw)\n"
+"mul_ieee r9.x, r0.x, r1.x\n"
+"mad_ieee r10.x, r5.x, r6.x, r9.x_neg(xyzw)\n"
+"mad_ieee r10.x, r5.x, r8.x, r10.x\n"
+"mad_ieee r10.x, r7.x, r6.x, r10.x\n"
+"mad_ieee r10.x, r7.x, r8.x, r10.x\n"
+"add r11.x, r9.x, r2.x\n"
+"add r12.x, r9.x, r11.x_neg(xyzw)\n"
+"add r12.x, r12.x, r2.x\n"
+"add r13.x, r2.x, r11.x_neg(xyzw)\n"
+"add r13.x, r13.x, r9.x\n"
+"dcl_literal l9, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff\n"
+"iand r40.x, r9.x, l9.x\n"
+"iand r41.x, r2.x, l9.x\n"
+"ilt r42.x, r41.x, r40.x\n"
+"cmov_logical r14.x, r42.x, r12.x, r13.x\n"
+"add r15.x, r14.x, r10.x\n"
+"add r16.x, r14.x, r15.x_neg(xyzw)\n"
+"add r16.x, r16.x, r10.x\n"
+"add r17.x, r10.x, r15.x_neg(xyzw)\n"
+"add r17.x, r17.x, r14.x\n"
+"iand r40.x, r14.x, l9.x\n"
+"iand r41.x, r10.x, l9.x\n"
+"ilt r42.x, r41.x, r40.x\n"
+"cmov_logical r18.x, r42.x, r16.x, r17.x\n"
+"iadd r40.x, r27.x, r26.x_neg(xyzw)\n"
+"dcl_literal l10, 60, 60, 60, 60\n"
+"ilt r42.x, r40.x, l10.x\n"
+"cmov_logical r19.x, r42.x, r11.x, r9.x\n"
+"cmov_logical r20.x, r42.x, r15.x, r10.x\n"
+"cmov_logical r21.x, r42.x, r18.x, r4.x\n"
+"dcl_literal l11, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000\n"
+"iand r43.x, r19.x, l11.x\n"
+"iand r44.x, r20.x, l11.x\n"
+"iadd r45.x, r43.x, r44.x_neg(xyzw)\n"
+"dcl_literal l12, 0x0c000000, 0x0c000000, 0x0c000000, 0x0c000000\n"
+"ieq r45.x, r45.x, l12.x\n"
+"dcl_literal l13, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff\n"
+"iand r43.x, r20.x, l13.x\n"
+"dcl_literal l14, 0, 0, 0, 0\n"
+"ieq r43.x, r43.x, l14.x\n"
+"iand r44.x, r21.x, l9.x\n"
+"ine r44.x, r44.x, l14.x\n"
+"iand r45.x, r45.x, r43.x\n"
+"iand r45.x, r45.x, r44.x\n"
+"dcl_literal l15, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"iand r46.x, r21.x, l15.x\n"
+"iand r47.x, r20.x, l11.x\n"
+"dcl_literal l16, 0xf4800000, 0xf4800000, 0xf4800000, 0xf4800000\n"
+"iadd r47.x, r47.x, l16.x\n"
+"ior r21.x, r46.x, r47.x\n"
+"add r22.x, r20.x, r21.x\n"
+"cmov_logical r20.x, r45.x, r22.x, r20.x\n"
+"add r23.x, r19.x, r20.x\n"
+"ishr r48.x, r23.x, l1.x\n"
+"iand r48.x, r48.x, l2.x\n"
+"iadd r48.x, r48.x, l3.x\n"
+"iadd r48.x, r48.x, r27.x\n"
+"iadd r48.x, r48.x, l4.x\n"
+"iand r49.x, r48.x, l2.x\n"
+"ishl r49.x, r49.x, l1.x\n"
+"iand r50.x, r23.x, l6.x\n"
+"ior r50.x, r50.x, r49.x\n"
+"iand r40.x, r23.x, l9.x\n"
+"ieq r42.x, r40.x, l14.x\n"
+"cmov_logical r23.x, r42.x, r23.x, r50.x\n"
+"iand r51.x, r23.x, l15.x\n"
+"ige r42.x, l14.x, r48.x\n"
+"cmov_logical r23.x, r42.x, r51.x, r23.x\n"
+"ior r51.x, r51.x, l11.x\n"
+"dcl_literal l17, 254, 254, 254, 254\n"
+"ilt r42.x, l17.x, r48.x\n"
+"cmov_logical r23.x, r42.x, r51.x, r23.x\n"
+"iadd r40.x, r26.x, r27.x_neg(xyzw)\n"
+"dcl_literal l18, 30, 30, 30, 30\n"
+"ilt r42.x, l18.x, r40.x\n"
+"cmov_logical r23.x, r42.x, r4.x, r23.x\n"
+"cmov_logical r23.x, r35.x, r3.x, r23.x\n"
+"cmov_logical r23.x, r36.x, r4.x, r23.x\n"
+"mov r0.x, r23.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__fma_f64",
+"mdef(225)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"dmad r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__fmin_f32",
+"mdef(226)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x80000000\n"
+"and r1, r0.xyxy, l0\n"
+"ixor r0.__z_, r1.w, r1.z\n"
+"dcl_literal l1, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, l1, r1.xyxx\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r0.z, l2\n"
+"inot r0.___w, r0.z\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r0.x, l3\n"
+"and r1.__zw, r0.zzzw, r1.z\n"
+"ilt r2.xy__, r0.xyxx, r0.yxyy\n"
+"and r1.___w, r1.w, r2.x\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r0.x, l4\n"
+"and r0.__zw, r0.zzzw, r2.x\n"
+"and r0.___w, r2.y, r0.w\n"
+"ior r0.___w, r1.w, r0.w\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"cmov_logical r0.__z_, r0.z, r0.x, r0.y\n"
+"inot r2.xy__, r1.yxyy\n"
+"and r2.xy__, r1.xyxx, r2.xyxx\n"
+"and r0.___w, r1.y, r1.x\n"
+"ior r1.x___, r1.z, r2.x\n"
+"cmov_logical r0._y__, r1.x, r0.y, r0.z\n"
+"cmov_logical r0._y__, r2.y, r0.x, r0.y\n"
+"dcl_literal l5, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l5\n"
+"cmov_logical r0.x___, r0.w, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fmod_f32",
+"mdef(227)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x7F800000\n"
+"and r1, r0.xyxy, l0\n"
+"dcl_literal l1, 0x7F800000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2.xyz_, r1.xyxx, l1\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r0.__z_, l2, r1.x\n"
+"and r0.__z_, r2.x, r0.z\n"
+"dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.w, l3\n"
+"dcl_literal l4, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ilt r1.___w, r0.w, l4\n"
+"and r1.___w, r2.y, r1.w\n"
+"dcl_literal l5, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"iadd r0.___w, l5, r0.w_neg(xyzw)\n"
+"and r0.___w, r1.w, r0.w\n"
+"itof r1.___w, r0.w\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r1.___w, r1.w, l6\n"
+"if_logicalnz r1.w\n"
+" \n"
+" dcl_literal l7, 0x7FFFFFFF, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r2.xy_w, r0.y, l7\n"
+" itof r2.___w, r2.w\n"
+" cmov_logical r2.x___, r2.y, r2.x, r2.w\n"
+" \n"
+" dcl_literal l8, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.___w, r0.w, l8\n"
+" cmov_logical r0.___w, r2.y, r0.w, r2.w\n"
+" \n"
+" dcl_literal l9, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r2._y_w, r2.x, l9\n"
+" \n"
+" dcl_literal l10, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r2.y, l10\n"
+" iadd r2._y__, r2.y, r0.w\n"
+" \n"
+" dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l11\n"
+" iadd r0.___w, r2.x, r0.w\n"
+" \n"
+" dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.x___, r2.y, l12\n"
+" \n"
+" dcl_literal l13, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2._y__, l13, r2.x\n"
+" \n"
+" dcl_literal l14, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r2.y, l14, r0.w\n"
+" \n"
+" dcl_literal l15, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.x___, l15, r2.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l16, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.w, l16\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.___w, l17, r2.x\n"
+" \n"
+" dcl_literal l18, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.x___, r2.w, l18, r2.x\n"
+" \n"
+" dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.x___, r2.x, l19\n"
+" ishr r2._y__, r2.y, r2.x\n"
+" \n"
+" dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, l20, r2.x\n"
+" cmov_logical r0.___w, r2.x, r2.y, r0.w\n"
+"else\n"
+" mov r0.___w, r1.y\n"
+"endif\n"
+"dcl_literal l21, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.x___, r0.w, l21\n"
+"dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r2.x, l22\n"
+"dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2._y__, r0.x, l23\n"
+"dcl_literal l24, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2._y__, r2.y, l24\n"
+"ige r2.___w, r2.x, r2.y\n"
+"iadd r2.x___, r2.y, r2.x_neg(xyzw)\n"
+"itof r2.x___, r2.x\n"
+"dcl_literal l25, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+"mul_ieee r2.x___, r2.x, l25\n"
+"round_neginf r2.x___, r2.x\n"
+"dcl_literal l26, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r2.w, l26, r2.x\n"
+"ftoi r2._y__, r2.x\n"
+"dcl_literal l27, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+"imul r2.___w, r2.y, l27\n"
+"dcl_literal l28, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r2.___w, r2.w, l28\n"
+"dcl_literal l29, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+"imad r2._y__, r2.y, l29, r2.w_neg(xyzw)\n"
+"dcl_literal l30, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r2.___w, r2.w, l30\n"
+"dcl_literal l31, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.___w, r2.w, l31\n"
+"mul_ieee r2.___w, r0.w, r2.w\n"
+"dcl_literal l32, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r2._y__, r2.y, l32\n"
+"dcl_literal l33, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2._y__, r2.y, l33\n"
+"mul_ieee r2._y__, r2.w, r2.y\n"
+"dcl_literal l34, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r2.___w, r2.x, l34\n"
+"cmov_logical r0.___w, r2.w, r0.w, r2.y\n"
+"dcl_literal l35, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"dcl_literal l36, 0x39800000, 0x39800000, 0x39800000, 0x39800000\n"
+"cmov_logical r2._y__, r2.w, l35, l36\n"
+"dcl_literal l37, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.___w, r0.w, l37\n"
+"dcl_literal l38, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.___w, r2.w, l38\n"
+"dcl_literal l39, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.___w, r2.w, l39\n"
+"dcl_literal l40, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3.x___, r0.w, l40\n"
+"dcl_literal l41, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r3._y__, l41, r2.w\n"
+"dcl_literal l42, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r2.___w, l42, r2.w\n"
+"and r2.___w, r3.y, r2.w\n"
+"dcl_literal l43, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3._y__, r2.w, l43\n"
+"dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r3.__z_, l44, r2.x\n"
+"and r2.___w, r2.w, r3.z\n"
+"cmov_logical r2.___w, r2.w, r3.x, r0.w\n"
+"mov r3.x___, r1.x\n"
+"mov r3.__z_, r2.w\n"
+"mov r3.___w, r0.w\n"
+"mov r4.x___, r3.y\n"
+"dcl_literal l45, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r4._y__, l45\n"
+"whileloop\n"
+" ge r4.__z_, r4.y, r2.x\n"
+" break_logicalnz r4.z\n"
+" div_zeroop(infinity) r4.__z_, r3.x, r3.z\n"
+" \n"
+" dcl_literal l46, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+" mul_ieee r4.___w, r4.z, l46\n"
+" cmov_logical r4.__z_, r4.x, r4.w, r4.z\n"
+" round_neginf r4.__z_, r4.z\n"
+" \n"
+" dcl_literal l47, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r4.___w, r3.w, l47\n"
+" add r5.x___, r3.w, r4.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l48, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r5._y__, r4.z, l48\n"
+" add r5.__z_, r4.z, r5.y_neg(xyzw)\n"
+" mul_ieee r4.__z_, r3.w, r4.z\n"
+" mad_ieee r5.___w, r4.w, r5.y, r4.z_neg(xyzw)\n"
+" mad_ieee r4.___w, r4.w, r5.z, r5.w\n"
+" mad_ieee r4.___w, r5.x, r5.y, r4.w\n"
+" mad_ieee r4.___w, r5.x, r5.z, r4.w\n"
+" add r5.x___, r3.x, r4.z_neg(xyzw)\n"
+" add r5._y__, r3.x, r5.x_neg(xyzw)\n"
+" add r4.__z_, r4.z_neg(xyzw), r5.y\n"
+" add r4.__z_, r4.w_neg(xyzw), r4.z\n"
+" add r4.__z_, r5.x, r4.z\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r4.___w, r4.z, l49\n"
+" add r5.x___, r3.w, r4.z\n"
+" cmov_logical r4.__z_, r4.w, r5.x, r4.z\n"
+" \n"
+" dcl_literal l50, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r4.___w, r4.z, l50\n"
+" add r5.x___, r3.w, r4.z\n"
+" cmov_logical r4.__z_, r4.w, r5.x, r4.z\n"
+" ge r4.___w, r4.z, r3.w\n"
+" add r5.x___, r3.w_neg(xyzw), r4.z\n"
+" cmov_logical r3.x___, r4.w, r5.x, r4.z\n"
+" mul_ieee r3.___w, r2.y, r3.w\n"
+" \n"
+" dcl_literal l51, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r4._y__, r4.y, l51\n"
+" mov r3.x_zw, r3.xxww\n"
+" \n"
+" dcl_literal l52, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4.x___, l52\n"
+"endloop\n"
+"mov r2.xy_w, r3.xzxz\n"
+"dcl_literal l53, 0x7F800000, 0x7F800000, 0xFFFFF000, 0x00000000\n"
+"and r3.xyz_, r2.xywx, l53\n"
+"dcl_literal l54, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.xy__, r3.xyxx, l54\n"
+"dcl_literal l55, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r3.y, l55\n"
+"dcl_literal l56, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r2._y__, r2.w, l56\n"
+"dcl_literal l57, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r3._y__, l57, r0.w\n"
+"dcl_literal l58, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r0.___w, l58, r0.w\n"
+"and r0.___w, r3.y, r0.w\n"
+"cmov_logical r2._y__, r0.w, r2.y, r2.w\n"
+"div_zeroop(infinity) r2._y__, r2.x, r2.y\n"
+"dcl_literal l59, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3._y__, r2.y, l59\n"
+"cmov_logical r0.___w, r0.w, r3.y, r2.y\n"
+"round_neginf r0.___w, r0.w\n"
+"add r2._y__, r2.w, r3.z_neg(xyzw)\n"
+"dcl_literal l60, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3._y__, r0.w, l60\n"
+"add r3.___w, r0.w, r3.y_neg(xyzw)\n"
+"mul_ieee r0.___w, r2.w, r0.w\n"
+"mad_ieee r4.x___, r3.z, r3.y, r0.w_neg(xyzw)\n"
+"mad_ieee r3.__z_, r3.z, r3.w, r4.x\n"
+"mad_ieee r3._y__, r2.y, r3.y, r3.z\n"
+"mad_ieee r2._y__, r2.y, r3.w, r3.y\n"
+"add r3._y__, r2.x, r0.w_neg(xyzw)\n"
+"add r3.__z_, r2.x, r3.y_neg(xyzw)\n"
+"add r0.___w, r0.w_neg(xyzw), r3.z\n"
+"add r0.___w, r2.y_neg(xyzw), r0.w\n"
+"add r0.___w, r3.y, r0.w\n"
+"dcl_literal l61, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2._y__, r0.w, l61\n"
+"add r3._y__, r2.w, r0.w\n"
+"cmov_logical r0.___w, r2.y, r3.y, r0.w\n"
+"dcl_literal l62, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2._y__, r0.w, l62\n"
+"add r3._y__, r2.w, r0.w\n"
+"cmov_logical r0.___w, r2.y, r3.y, r0.w\n"
+"ge r2._y__, r0.w, r2.w\n"
+"add r2.___w, r2.w_neg(xyzw), r0.w\n"
+"cmov_logical r0.___w, r2.y, r2.w, r0.w\n"
+"cmov_logical r0.___w, r3.x, r0.w, r2.x\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2.x___, r0.w, l63\n"
+"and r1.___w, r1.w, r2.x\n"
+"if_logicalnz r1.w\n"
+" \n"
+" dcl_literal l64, 0x7FFFFFFF, 0x7F800000, 0x80000000, 0x007FFFFF\n"
+" and r3, r0.w, l64\n"
+" \n"
+" dcl_literal l65, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r1.___w, r3.x, l65\n"
+" \n"
+" dcl_literal l66, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r1.___w, r1.w, l66\n"
+" \n"
+" dcl_literal l67, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r1.___w, r1.w, l67\n"
+" \n"
+" dcl_literal l68, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r1.___w, l68, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l69, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r1.___w, r1.w, l69\n"
+" \n"
+" dcl_literal l70, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r1.___w, r1.w, l70\n"
+" \n"
+" dcl_literal l71, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" cmov_logical r1.___w, r3.x, r1.w, l71\n"
+" \n"
+" dcl_literal l72, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.___w, r1.w_neg(xyzw), l72\n"
+" \n"
+" dcl_literal l73, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r2.x___, r3.x, l73\n"
+" \n"
+" dcl_literal l74, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ine r2._y__, r3.x, l74\n"
+" and r2.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l75, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r3.y, l75\n"
+" \n"
+" dcl_literal l76, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2._y__, r2.y, l76\n"
+" cmov_logical r1.___w, r2.x, r1.w, r2.y\n"
+" \n"
+" dcl_literal l77, 0x7FFFFFFF, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r2.xy_w, r0.y, l77\n"
+" \n"
+" dcl_literal l78, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r0._y__, r2.x, l78\n"
+" \n"
+" dcl_literal l79, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r0._y__, r0.y, l79\n"
+" \n"
+" dcl_literal l80, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0._y__, r0.y, l80\n"
+" \n"
+" dcl_literal l81, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r0._y__, l81, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l82, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0._y__, r0.y, l82\n"
+" \n"
+" dcl_literal l83, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r0._y__, r0.y, l83\n"
+" \n"
+" dcl_literal l84, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" cmov_logical r0._y__, r2.x, r0.y, l84\n"
+" \n"
+" dcl_literal l85, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0._y__, r0.y_neg(xyzw), l85\n"
+" \n"
+" dcl_literal l86, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r4.x___, r2.x, l86\n"
+" \n"
+" dcl_literal l87, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ine r4._y__, r2.x, l87\n"
+" and r4.x___, r4.x, r4.y\n"
+" \n"
+" dcl_literal l88, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4._y__, r2.y, l88\n"
+" \n"
+" dcl_literal l89, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4._y__, r4.y, l89\n"
+" cmov_logical r0._y__, r4.x, r0.y, r4.y\n"
+" inegate r4.x___, r0.y\n"
+" itof r4._y__, r4.x\n"
+" itof r2.___w, r2.w\n"
+" cmov_logical r2.x___, r2.y, r2.x, r2.w\n"
+" \n"
+" dcl_literal l90, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.___w, r4.x, l90\n"
+" cmov_logical r2._y__, r2.y, r4.x, r2.w\n"
+" \n"
+" dcl_literal l91, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r4.__zw, r2.x, l91\n"
+" \n"
+" dcl_literal l92, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r4.z, l92\n"
+" iadd r2.___w, r2.w, r2.y\n"
+" \n"
+" dcl_literal l93, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l93\n"
+" iadd r2.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l94, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2._y__, r2.w, l94\n"
+" \n"
+" dcl_literal l95, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l95, r2.y\n"
+" \n"
+" dcl_literal l96, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.x___, r2.w, l96, r2.x\n"
+" \n"
+" dcl_literal l97, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2._y__, l97, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l98, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r4.w, l98\n"
+" \n"
+" dcl_literal l99, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l99, r2.y\n"
+" \n"
+" dcl_literal l100, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2._y__, r4.z, l100, r2.y\n"
+" \n"
+" dcl_literal l101, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2._y__, r2.y, l101\n"
+" ishr r2.___w, r2.w, r2.y\n"
+" \n"
+" dcl_literal l102, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, l102, r2.y\n"
+" cmov_logical r2.x___, r2.y, r2.w, r2.x\n"
+" \n"
+" dcl_literal l103, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r2._y__, l103, r4.y\n"
+" \n"
+" dcl_literal l104, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.x___, r2.y, l104, r2.x\n"
+" inegate r2._y__, r1.w\n"
+" itof r2.___w, r2.y\n"
+" itof r3.___w, r3.w\n"
+" cmov_logical r3.x___, r3.y, r3.x, r3.w\n"
+" \n"
+" dcl_literal l105, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r3.___w, r2.y, l105\n"
+" cmov_logical r2._y__, r3.y, r2.y, r3.w\n"
+" \n"
+" dcl_literal l106, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r3._y_w, r3.x, l106\n"
+" \n"
+" dcl_literal l107, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l107\n"
+" iadd r3._y__, r3.y, r2.y\n"
+" \n"
+" dcl_literal l108, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l108\n"
+" iadd r2._y__, r3.x, r2.y\n"
+" \n"
+" dcl_literal l109, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3.x___, r3.y, l109\n"
+" \n"
+" dcl_literal l110, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l110, r3.x\n"
+" \n"
+" dcl_literal l111, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2._y__, r3.y, l111, r2.y\n"
+" \n"
+" dcl_literal l112, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.x___, l112, r3.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l113, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.w, l113\n"
+" \n"
+" dcl_literal l114, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.___w, l114, r3.x\n"
+" \n"
+" dcl_literal l115, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.x___, r3.w, l115, r3.x\n"
+" \n"
+" dcl_literal l116, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r3.x___, r3.x, l116\n"
+" ishr r3._y__, r3.y, r3.x\n"
+" \n"
+" dcl_literal l117, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.x___, l117, r3.x\n"
+" cmov_logical r2._y__, r3.x, r3.y, r2.y\n"
+" \n"
+" dcl_literal l118, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r2.___w, l118, r2.w\n"
+" \n"
+" dcl_literal l119, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2._y__, r2.w, l119, r2.y\n"
+" ior r2._y__, r3.z, r2.y\n"
+" iadd r1.___w, r1.w, r4.x\n"
+" mov r2.___w, r2.y\n"
+" mov r3.x___, r1.w\n"
+" whileloop\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ige r3._y__, l120, r3.x\n"
+" break_logicalnz r3.y\n"
+" ge r3._y__, r2.w, r2.x\n"
+" add r3.__z_, r2.x_neg(xyzw), r2.w\n"
+" cmov_logical r3._y__, r3.y, r3.z, r2.w\n"
+" add r2.___w, r3.y, r3.y\n"
+" \n"
+" dcl_literal l121, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r3.x___, r3.x, l121\n"
+" endloop\n"
+" ge r2._y__, r2.w, r2.x\n"
+" add r2.x___, r2.x_neg(xyzw), r2.w\n"
+" cmov_logical r2.x___, r2.y, r2.x, r2.w\n"
+" itof r2._y__, r0.y\n"
+" \n"
+" dcl_literal l122, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r3, r2.x, l122\n"
+" itof r2.x___, r3.w\n"
+" cmov_logical r2.x___, r3.z, r3.x, r2.x\n"
+" \n"
+" dcl_literal l123, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.___w, r0.y, l123\n"
+" cmov_logical r0._y__, r3.z, r0.y, r2.w\n"
+" \n"
+" dcl_literal l124, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r3.x_z_, r2.x, l124\n"
+" \n"
+" dcl_literal l125, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r3.x, l125\n"
+" iadd r2.___w, r2.w, r0.y\n"
+" \n"
+" dcl_literal l126, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l126\n"
+" iadd r0._y__, r2.x, r0.y\n"
+" \n"
+" dcl_literal l127, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.x___, r2.w, l127\n"
+" \n"
+" dcl_literal l128, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l128, r2.x\n"
+" \n"
+" dcl_literal l129, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r2.w, l129, r0.y\n"
+" \n"
+" dcl_literal l130, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.x___, l130, r2.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l131, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r3.z, l131\n"
+" \n"
+" dcl_literal l132, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l132, r2.x\n"
+" \n"
+" dcl_literal l133, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.x___, r3.x, l133, r2.x\n"
+" \n"
+" dcl_literal l134, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.x___, r2.x, l134\n"
+" ishr r2.___w, r2.w, r2.x\n"
+" \n"
+" dcl_literal l135, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, l135, r2.x\n"
+" cmov_logical r0._y__, r2.x, r2.w, r0.y\n"
+" \n"
+" dcl_literal l136, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r2.x___, l136, r2.y\n"
+" \n"
+" dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r2.x, l137, r0.y\n"
+" ior r0._y__, r3.y, r0.y\n"
+" \n"
+" dcl_literal l138, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ige r1.___w, r1.w, l138\n"
+" cmov_logical r0.___w, r1.w, r0.y, r0.w\n"
+"endif\n"
+"dcl_literal l139, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ixor r0._y__, r0.w, l139\n"
+"cmov_logical r0._y__, r1.z, r0.y, r0.w\n"
+"dcl_literal l140, 0x7F800000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r1.x_zw, r1.yyxy, l140\n"
+"dcl_literal l141, 0x7F800000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.xy__, l141, r1.y\n"
+"and r0.___w, r1.z, r2.y\n"
+"inot r1._y__, r2.x\n"
+"and r0.___w, r0.w, r1.y\n"
+"cmov_logical r0._y__, r0.w, r0.x, r0.y\n"
+"inot r0.___w, r0.z\n"
+"ior r0.___w, r1.w, r0.w\n"
+"ior r0.___w, r2.x, r0.w\n"
+"dcl_literal l142, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.w, l142, r0.y\n"
+"and r0.__z_, r2.z, r0.z\n"
+"and r0.__z_, r1.x, r0.z\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__fract_2f32f32",
+"mdef(228)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_z r1._y__, r0.x\n"
+"add r1.x___, r0.x, r1.y_neg(xyzw)\n"
+"dcl_literal l0, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r2._yz_, r1.yyxy, l0\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r0.___w, r2.z, l1\n"
+"dcl_literal l2, 0x3F7FFFFF, 0x3F7FFFFF, 0x3F7FFFFF, 0x3F7FFFFF\n"
+"cmov_logical r2.x___, r0.w, l2, r2.z\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r1.x, l3\n"
+"cmov_logical r1._yz_, r0.w, r2.xxyx, r1.xxyx\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r0.___w, r1.y, l4\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1.___w, r0.x, l5\n"
+"and r0.___w, r0.w, r1.w\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r1.x___, r0.w, l6, r1.y\n"
+"dcl_literal l7, 0x00000000, 0x7FFFFFFF, 0x00000000, 0x7F800000\n"
+"and r1._y_w, r0.x, l7\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.___w, r1.w, l8\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.___w, r1.y, l9\n"
+"and r0.___w, r0.w, r1.w\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.___w, r0.x, l10\n"
+"and r0.___w, r0.w, r1.w\n"
+"dcl_literal l11, 0x3F7FFFFF, 0x00000000, 0xBF800000, 0x00000000\n"
+"cmov_logical r1.x_z_, r0.w, l11, r1.xxzx\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x80000000, 0x00000000\n"
+"mov r0._yz_, l12\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l13, r0.x\n"
+"dcl_literal l14, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ilt r1.___w, r1.y, l14\n"
+"and r0.___w, r0.w, r1.w\n"
+"cmov_logical r1.x_z_, r0.w, r0.xxyx, r1.xxzx\n"
+"cmov_logical r1.x_z_, r1.y, r1.xxzx, r0.x\n"
+"dcl_literal l15, 0x7F800000, 0xFF800000, 0x00000000, 0x00000000\n"
+"ieq r2.xy__, r0.x, l15\n"
+"dcl_literal l16, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, l16, r1.y\n"
+"cmov_logical r1.xy__, r2.x, r0.yxyy, r1.xzxx\n"
+"cmov_logical r0.xy__, r2.y, r0.zxzz, r1.xyxx\n"
+"dcl_literal l17, 0x7FC00000, 0x7FC00000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r0.w, l17, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__frexp_2f32f32",
+"mdef(229)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"itof r0.___w, r0.z\n"
+"dcl_literal l1, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.w, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.x, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.x___, r1.y, l3\n"
+"dcl_literal l4, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+"iadd r0.___w, r0.w, l4\n"
+"dcl_literal l5, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.___w, l5, r0.w_neg(xyzw)\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1._y__, l6, r0.w\n"
+"dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.___w, r1.y, l7, r0.w\n"
+"inegate r1._y__, r0.w\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1._y__, r1.y, l8\n"
+"iadd r1._y__, r1.x, r1.y\n"
+"ishr r1.x___, r1.x, r0.w\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l9, r0.w\n"
+"cmov_logical r0.___w, r0.w, r1.x, r1.y\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.x___, r0.z, l10\n"
+"dcl_literal l11, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r0.__z_, r0.z, l11\n"
+"and r0.__z_, r1.x, r0.z\n"
+"cmov_logical r0.x___, r0.z, r0.w, r0.x\n"
+"dcl_literal l12, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7\n"
+"and r0.__z_, r0.z, l12\n"
+"ior r0._y__, r0.y, r0.x\n"
+"dcl_literal l13, 0x00000000, 0x7FFFFFFF, 0x00000000, 0x807FFFFF\n"
+"and r0._y_w, r0.y, l13\n"
+"dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r0.y, l14\n"
+"iadd r0.__z_, r1.x, r0.z\n"
+"dcl_literal l15, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.__z_, l15, r0.z\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r0.y, r0.z, l16\n"
+"dcl_literal l17, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r1.x___, r0.y, l17\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r1.x, l18, r0.z\n"
+"dcl_literal l19, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"ior r0.__z_, r0.w, l19\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.y, r0.z, l20\n"
+"cmov_logical r0.x___, r1.x, r0.x, r0.y\n"
+"mov r0._y__, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ftol_i64",
+"mdef(230)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7F800000, 0x80000000, 0x007FFFFF, 0x00000000\n"
+"and r0.xyz_, r0.x, l0\n"
+"dcl_literal l1, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.x___, r0.x, l1\n"
+"dcl_literal l2, 0xFFFFFF81, 0x00000000, 0x00000000, 0xFFFFFF6A\n"
+"iadd r0.x__w, r0.x, l2\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r0.x, l3\n"
+"dcl_literal l4, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ige r1._y__, l4, r0.x\n"
+"and r1.x___, r1.x, r1.y\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"iadd r1._y__, l5, r0.x_neg(xyzw)\n"
+"cmov_logical r0.___w, r1.x, r1.y, r0.w\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l6\n"
+"dcl_literal l7, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"imin r0.___w, r0.w, l7\n"
+"dcl_literal l8, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r1._y__, r0.w, l8\n"
+"dcl_literal l9, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.__z_, r0.w, l9\n"
+"cmov_logical r1._y__, r1.z, r1.y, r0.w\n"
+"dcl_literal l10, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r1.___w, l10, r1.y_neg(xyzw)\n"
+"dcl_literal l11, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0.__z_, r0.z, l11\n"
+"ushr r1.___w, r0.z, r1.w\n"
+"ishl r1._y__, r0.z, r1.y\n"
+"cmov_logical r1.___w, r1.z, r1.y, r1.w\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.z, l12, r1.y\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2._y__, r0.w, r1.w, l13\n"
+"cmov_logical r2.x___, r0.w, r1.y, r0.z\n"
+"dcl_literal l14, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"imin r1._y__, r0.w, l14\n"
+"dcl_literal l15, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"ieq r0.___w, r0.w, l15\n"
+"ushr r0.__z_, r0.z, r1.y\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.z, l16, r0.z\n"
+"dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r1.__z_, l17\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__zw, r0.w, l18, r1.yyyz\n"
+"cmov_logical r0.__zw, r1.x, r0.zzzw, r2.xxxy\n"
+"inot r1.xy__, r0.zwzz\n"
+"dcl_literal l19, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r1.__z_, r1.x, l19\n"
+"ult r1.x___, r1.z, r1.x\n"
+"cmov_logical r1.__z_, r0.y, r1.z, r0.z\n"
+"dcl_literal l20, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.__z_, r1.x, l20\n"
+"iadd r0.__z_, r1.y, r0.z\n"
+"cmov_logical r1.___w, r0.y, r0.z, r0.w\n"
+"dcl_literal l21, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r0.x, l21, r1.zwzz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ftoul_i64",
+"mdef(231)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7F800000, 0x007FFFFF, 0x80000000, 0x00000000\n"
+"and r0.xyz_, r0.x, l0\n"
+"dcl_literal l1, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.x___, r0.x, l1\n"
+"dcl_literal l2, 0xFFFFFF81, 0x00000000, 0x00000000, 0xFFFFFF6A\n"
+"iadd r0.x__w, r0.x, l2\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r0.x, l3\n"
+"dcl_literal l4, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ige r1._y__, l4, r0.x\n"
+"and r1.x___, r1.x, r1.y\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"iadd r1._y__, l5, r0.x_neg(xyzw)\n"
+"cmov_logical r0.___w, r1.x, r1.y, r0.w\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l6\n"
+"dcl_literal l7, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"imin r0.___w, r0.w, l7\n"
+"dcl_literal l8, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r1._y__, r0.w, l8\n"
+"dcl_literal l9, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.__z_, r0.w, l9\n"
+"cmov_logical r1._y__, r1.z, r1.y, r0.w\n"
+"dcl_literal l10, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r1.___w, l10, r1.y_neg(xyzw)\n"
+"dcl_literal l11, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0._y__, r0.y, l11\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r0.z, l12\n"
+"ushr r1.___w, r0.y, r1.w\n"
+"ishl r1._y__, r0.y, r1.y\n"
+"cmov_logical r1.___w, r1.z, r1.y, r1.w\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.z, l13, r1.y\n"
+"dcl_literal l14, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2._y__, r0.w, r1.w, l14\n"
+"cmov_logical r2.x___, r0.w, r1.y, r0.y\n"
+"dcl_literal l15, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"imin r1._y__, r0.w, l15\n"
+"dcl_literal l16, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"ieq r0.___w, r0.w, l16\n"
+"ushr r0._y__, r0.y, r1.y\n"
+"dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.z, l17, r0.y\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r1.__z_, l18\n"
+"dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y_w, r0.w, l19, r1.yyyz\n"
+"cmov_logical r0._y_w, r1.x, r0.yyyw, r2.xxxy\n"
+"ior r0.x___, r0.x, r0.z\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r0.x, l20, r0.ywyy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ftz_2f32",
+"mdef(232)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x7F800000, 0xFFFFFFFF, 0x80000000, 0x00000000\n"
+"iand r1.xy_, r0.xy, l1.xxxx\n"
+"cmov_logical r1.xy__, r1.xy, l1.yyyy, l1.zzzz\n"
+"iand r0.xy__, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ftz_4f32",
+"mdef(233)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x7F800000, 0xFFFFFFFF, 0x80000000, 0x00000000\n"
+"iand r1, r0, l1.xxxx\n"
+"cmov_logical r1, r1, l1.yyyy, l1.zzzz\n"
+"iand r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ftz_f32",
+"mdef(234)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x7F800000, 0xFFFFFFFF, 0x80000000, 0x00000000\n"
+"iand r1.x___, r0.x, l1.x\n"
+"cmov_logical r1.x___, r1.x, l1.y, l1.z\n"
+"iand r0.x___, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_cos_f32",
+"mdef(235)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x3A000000, 0x3A000000, 0x3A000000, 0x3A000000\n"
+"ige r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"lt r0.___w, r0.y, l2\n"
+"and r0.__z_, r0.z, r0.w\n"
+"if_logicalnz r0.z\n"
+" mul_ieee r0.__z_, r0.y, r0.y\n"
+" \n"
+" dcl_literal l3, 0xAD47D74E, 0xAD47D74E, 0xAD47D74E, 0xAD47D74E\n"
+" \n"
+" dcl_literal l4, 0x310F74F6, 0x310F74F6, 0x310F74F6, 0x310F74F6\n"
+" mad_ieee r0.___w, r0.z, l3, l4\n"
+" \n"
+" dcl_literal l5, 0xB492923A, 0xB492923A, 0xB492923A, 0xB492923A\n"
+" mad_ieee r0.___w, r0.z, r0.w, l5\n"
+" \n"
+" dcl_literal l6, 0x37D00AE2, 0x37D00AE2, 0x37D00AE2, 0x37D00AE2\n"
+" mad_ieee r0.___w, r0.z, r0.w, l6\n"
+" \n"
+" dcl_literal l7, 0xBAB60B60, 0xBAB60B60, 0xBAB60B60, 0xBAB60B60\n"
+" mad_ieee r0.___w, r0.z, r0.w, l7\n"
+" \n"
+" dcl_literal l8, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB\n"
+" mad_ieee r0.___w, r0.z, r0.w, l8\n"
+" mul_ieee r1.x___, r0.z, r0.z\n"
+" \n"
+" dcl_literal l9, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1._y__, r0.x, l9\n"
+" \n"
+" dcl_literal l10, 0x3E99999A, 0x3E99999A, 0x3E99999A, 0x3E99999A\n"
+" ige r1.__z_, r1.y, l10\n"
+" \n"
+" dcl_literal l11, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ige r1.___w, l11, r1.y\n"
+" and r1.__z_, r1.z, r1.w\n"
+" \n"
+" dcl_literal l12, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000\n"
+" iadd r1.___w, r1.y, l12\n"
+" and r1.__z_, r1.z, r1.w\n"
+" \n"
+" dcl_literal l13, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ilt r1._y__, l13, r1.y\n"
+" \n"
+" dcl_literal l14, 0x3E900000, 0x3E900000, 0x3E900000, 0x3E900000\n"
+" cmov_logical r1._y__, r1.y, l14, r1.z\n"
+" \n"
+" dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r0.__z_, r0.z, l15, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1._y__, r1.y_neg(xyzw), l16\n"
+" mad_ieee r0.__z_, r0.w_neg(xyzw), r1.x, r0.z\n"
+" add r0.__z_, r1.y, r0.z_neg(xyzw)\n"
+"else\n"
+" \n"
+" dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.__z_, l17\n"
+"endif\n"
+"dcl_literal l18, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"ge r0.___w, r0.y, l18\n"
+"dcl_literal l19, 0x47800000, 0x47800000, 0x47800000, 0x47800000\n"
+"ge r1.x___, l19, r0.y\n"
+"and r0.___w, r0.w, r1.x\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l20, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.___w, r0.y, l20\n"
+" round_nearest r0.___w, r0.w\n"
+" \n"
+" dcl_literal l21, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r1.x___, r0.w, l21\n"
+" add r1._y__, r0.w, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l22, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1.__z_, r0.w, l22\n"
+" \n"
+" dcl_literal l23, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.w, l23\n"
+" \n"
+" dcl_literal l24, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l24, r2.xyxx_neg(xyzw)\n"
+" \n"
+" dcl_literal l25, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l25, r3.xyxx\n"
+" \n"
+" dcl_literal l26, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l26, r3.xyxx\n"
+" \n"
+" dcl_literal l27, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l27, r3.xyxx\n"
+" add r0.___w, r0.y, r2.x_neg(xyzw)\n"
+" add r1.___w, r0.y, r0.w_neg(xyzw)\n"
+" add r1.___w, r2.x_neg(xyzw), r1.w\n"
+" add r1.___w, r3.x_neg(xyzw), r1.w\n"
+" add r0.___w, r0.w, r1.w\n"
+" add r1.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+" add r0.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r3.y_neg(xyzw), r0.w\n"
+" add r0.___w, r1.w, r0.w\n"
+" \n"
+" dcl_literal l28, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.___w, r1.x, l28, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l29, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.x, l29, r1.w\n"
+" \n"
+" dcl_literal l30, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.x___, r1.y, l30, r1.x\n"
+" \n"
+" dcl_literal l31, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.y, l31, r1.x\n"
+" add r1._y__, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.y_neg(xyzw)\n"
+" add r0.___w, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r1.y, r0.w\n"
+" frc r1._y__, r1.z\n"
+" \n"
+" dcl_literal l32, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r1._y__, r1.y, l32\n"
+" round_nearest r1._y__, r1.y\n"
+" mul_ieee r1.__z_, r0.w, r0.w\n"
+" mul_ieee r1.___w, r0.w, r1.z\n"
+" \n"
+" dcl_literal l33, 0x2F2EC9D3, 0xAD47D74E, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l34, 0xB2D72F34, 0x310F74F6, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, l33, l34\n"
+" \n"
+" dcl_literal l35, 0x3636DF25, 0xB492923A, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, r2.xyxx, l35\n"
+" \n"
+" dcl_literal l36, 0xB95009D4, 0x37D00AE2, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, r2.xyxx, l36\n"
+" \n"
+" dcl_literal l37, 0x3C088887, 0xBAB60B60, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, r2.xyxx, l37\n"
+" mul_ieee r2.x___, r1.w, r2.x\n"
+" \n"
+" dcl_literal l38, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+" mad_ieee r2.x___, r1.x, l38, r2.x_neg(xyzw)\n"
+" mad_ieee r2.x___, r1.z, r2.x, r1.x\n"
+" \n"
+" dcl_literal l39, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB\n"
+" mad_ieee r1.___w, r1.w_neg(xyzw), l39, r2.x\n"
+" add r1.___w, r0.w, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l40, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB\n"
+" mad_ieee r2.x___, r1.z, r2.y, l40\n"
+" mul_ieee r2._y__, r1.z, r1.z\n"
+" \n"
+" dcl_literal l41, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.__z_, r0.w, l41\n"
+" \n"
+" dcl_literal l42, 0x3E99999A, 0x3E99999A, 0x3E99999A, 0x3E99999A\n"
+" ige r2.___w, r2.z, l42\n"
+" \n"
+" dcl_literal l43, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ige r3.x___, l43, r2.z\n"
+" and r2.___w, r2.w, r3.x\n"
+" \n"
+" dcl_literal l44, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000\n"
+" iadd r3.x___, r2.z, l44\n"
+" and r2.___w, r2.w, r3.x\n"
+" \n"
+" dcl_literal l45, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ilt r2.__z_, l45, r2.z\n"
+" \n"
+" dcl_literal l46, 0x3E900000, 0x3E900000, 0x3E900000, 0x3E900000\n"
+" cmov_logical r2.__z_, r2.z, l46, r2.w\n"
+" \n"
+" dcl_literal l47, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1.__z_, r1.z, l47, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l48, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r2.__z_, r2.z_neg(xyzw), l48\n"
+" mul_ieee r0.___w, r1.x_neg(xyzw), r0.w\n"
+" mad_ieee r0.___w, r2.x, r2.y, r0.w_neg(xyzw)\n"
+" add r0.___w, r1.z, r0.w_neg(xyzw)\n"
+" add r0.___w, r2.z, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+" eq r2, r1.y, l49\n"
+" and r1.x___, r0.w, r2.x\n"
+" cmov_logical r1.x___, r2.y, r1.w_neg(xyzw), r1.x\n"
+" cmov_logical r0.___w, r2.z, r0.w_neg(xyzw), r1.x\n"
+" cmov_logical r0.__z_, r2.w, r1.w, r0.w\n"
+"endif\n"
+"dcl_literal l50, 0x47800000, 0x47800000, 0x47800000, 0x47800000\n"
+"lt r0.___w, l50, r0.y\n"
+"dcl_literal l51, 0x3A000000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, r0.y, l51\n"
+"and r0.___w, r0.w, r1.y\n"
+"ior r0.___w, r1.x, r0.w\n"
+"dcl_literal l52, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r0.w, l52, r0.z\n"
+"dcl_literal l53, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l53\n"
+"dcl_literal l54, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.__z_, r0.w, l54, r0.z\n"
+"dcl_literal l55, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l55, r0.y\n"
+"dcl_literal l56, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l56, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_divide_f32",
+"mdef(236)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7F800000, 0x7F800000\n"
+"and r1, r0.yxyx, l0\n"
+"dcl_literal l1, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2, r1.wzzw, l1\n"
+"dcl_literal l2, 0x00800000, 0x00800000, 0x00800000, 0x00000000\n"
+"ilt r3.xyz_, r1.xxyx, l2\n"
+"dcl_literal l3, 0xFFFFFF81, 0xFFFFFF9D, 0xFFFFFF81, 0xFFFFFF9D\n"
+"iadd r2, r2.yzxw, l3\n"
+"mov r4.x_z_, r2.yyxy\n"
+"dcl_literal l4, 0x00000000, 0x41E00000, 0x00000000, 0x00000000\n"
+"mov r4._y_w, l4\n"
+"cmov_logical r2.xy__, r3.xyxx, r4.xyxx, r4.zwzz\n"
+"dcl_literal l5, 0x00000000, 0x42280000, 0x41600000, 0x00000000\n"
+"dcl_literal l6, 0xC1E00000, 0x41600000, 0xC1600000, 0x00000000\n"
+"cmov_logical r4.xyz_, r3.xzyx, l5, l6\n"
+"mov r3.x___, r2.w\n"
+"dcl_literal l7, 0x41E00000, 0x41E00000, 0x41E00000, 0x41E00000\n"
+"mov r3._y__, l7\n"
+"mov r3.___w, r4.x\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r2.___w, l8\n"
+"cmov_logical r3.xy_w, r3.z, r3.xyxw, r2.zwzy\n"
+"iadd r0.__z_, r3.x, r2.x_neg(xyzw)\n"
+"dcl_literal l9, 0xFFFFFF83, 0xFFFFFF83, 0xFFFFFF83, 0xFFFFFF83\n"
+"ilt r0.__z_, r0.z, l9\n"
+"dcl_literal l10, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r0.___w, l10, r2.x\n"
+"and r0.__z_, r0.z, r0.w\n"
+"dcl_literal l11, 0xFFFFFFF2, 0xFFFFFFF2, 0xFFFFFFF2, 0xFFFFFFF2\n"
+"iadd r4.x___, r2.x, l11\n"
+"dcl_literal l12, 0xC1E00000, 0xC1E00000, 0xC1E00000, 0xC1E00000\n"
+"add r4.___w, r3.w, l12\n"
+"mov r3.x_z_, r2.xxyx\n"
+"cmov_logical r2, r0.z, r4, r3\n"
+"dcl_literal l13, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r0.__z_, l13, r2.x\n"
+"dcl_literal l14, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r0.___w, l14, r2.x\n"
+"and r0.__z_, r0.z, r0.w\n"
+"dcl_literal l15, 0xC1600000, 0xC1600000, 0x00000000, 0x00000000\n"
+"add r3.xy__, r2.zwzz, l15\n"
+"cmov_logical r0.__zw, r0.z, r3.xxxy, r2.zzzw\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r2.x___, r2.y, l16\n"
+"if_logicalnz r2.x\n"
+" ftoi r2.x___, r2.y\n"
+" \n"
+" dcl_literal l17, 0x00000000, 0x80000000, 0x007FFFFF, 0x00000000\n"
+" and r2._yz_, r0.x, l17\n"
+" itof r2.__z_, r2.z\n"
+" cmov_logical r1._y__, r1.w, r1.y, r2.z\n"
+" \n"
+" dcl_literal l18, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.__z_, r2.x, l18\n"
+" cmov_logical r1.___w, r1.w, r2.x, r2.z\n"
+" \n"
+" dcl_literal l19, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r2.x_z_, r1.y, l19\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.x___, r2.x, l20\n"
+" iadd r2.x___, r2.x, r1.w\n"
+" \n"
+" dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l21\n"
+" iadd r1._y__, r1.y, r1.w\n"
+" \n"
+" dcl_literal l22, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.___w, r2.x, l22\n"
+" \n"
+" dcl_literal l23, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.x___, l23, r1.w\n"
+" \n"
+" dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1._y__, r2.x, l24, r1.y\n"
+" \n"
+" dcl_literal l25, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.___w, l25, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l26, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.x___, r2.z, l26\n"
+" \n"
+" dcl_literal l27, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r2.__zw, l27, r1.w\n"
+" \n"
+" dcl_literal l28, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r2.z, l28, r1.w\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.___w, r1.w, l29\n"
+" \n"
+" dcl_literal l30, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.___w, r1.w, l30, r2.z\n"
+" ishr r1.___w, r2.x, r1.w\n"
+" cmov_logical r1._y__, r2.w, r1.w, r1.y\n"
+" ior r0.x___, r2.y, r1.y\n"
+"endif\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r1._y_w, r0.zzzw, l31\n"
+"if_logicalnz r1.y\n"
+" ftoi r1._y__, r0.z_abs\n"
+" \n"
+" dcl_literal l32, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r0.__z_, r0.z, l32\n"
+" inegate r2.x___, r1.y\n"
+" cmov_logical r0.__z_, r0.z, r2.x, r1.y\n"
+" \n"
+" dcl_literal l33, 0x80000000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r0.y, l33\n"
+" itof r1._y__, r2.y\n"
+" cmov_logical r1.x___, r1.z, r1.x, r1.y\n"
+" \n"
+" dcl_literal l34, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r1._y__, r0.z, l34\n"
+" cmov_logical r0.__z_, r1.z, r0.z, r1.y\n"
+" \n"
+" dcl_literal l35, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r1._yz_, r1.x, l35\n"
+" \n"
+" dcl_literal l36, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1._y__, r1.y, l36\n"
+" iadd r1._y__, r1.y, r0.z\n"
+" \n"
+" dcl_literal l37, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.__z_, r0.z, l37\n"
+" iadd r0.__z_, r1.x, r0.z\n"
+" \n"
+" dcl_literal l38, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.x___, r1.y, l38\n"
+" \n"
+" dcl_literal l39, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1._y__, l39, r1.x\n"
+" \n"
+" dcl_literal l40, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.__z_, r1.y, l40, r0.z\n"
+" \n"
+" dcl_literal l41, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.x___, l41, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l42, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1._y__, r1.z, l42\n"
+" \n"
+" dcl_literal l43, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r2._yz_, l43, r1.x\n"
+" \n"
+" dcl_literal l44, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.y, l44, r1.x\n"
+" \n"
+" dcl_literal l45, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, r1.x, l45\n"
+" \n"
+" dcl_literal l46, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.x___, r1.x, l46, r1.z\n"
+" ishr r1.x___, r1.y, r1.x\n"
+" cmov_logical r0.__z_, r2.z, r1.x, r0.z\n"
+" ior r0._y__, r2.x, r0.z\n"
+"endif\n"
+"dcl_literal l47, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0._y__, l47, r0.y\n"
+"mul_ieee r0.x___, r0.x, r0.y\n"
+"dcl_literal l48, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r1.x___, r0.x, l48\n"
+"dcl_literal l49, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1._y__, r1.x, l49\n"
+"and r1._y__, r1.w, r1.y\n"
+"if_logicalnz r1.y\n"
+" ftoi r1._y__, r0.w_abs\n"
+" \n"
+" dcl_literal l50, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r0.___w, r0.w, l50\n"
+" inegate r1.__z_, r1.y\n"
+" cmov_logical r0.___w, r0.w, r1.z, r1.y\n"
+" \n"
+" dcl_literal l51, 0x00000000, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r1._yzw, r0.x, l51\n"
+" itof r1.___w, r1.w\n"
+" cmov_logical r1.x___, r1.z, r1.x, r1.w\n"
+" \n"
+" dcl_literal l52, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r1.___w, r0.w, l52\n"
+" cmov_logical r0.___w, r1.z, r0.w, r1.w\n"
+" \n"
+" dcl_literal l53, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r1.__zw, r1.x, l53\n"
+" \n"
+" dcl_literal l54, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l54\n"
+" iadd r1.__z_, r1.z, r0.w\n"
+" \n"
+" dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l55\n"
+" iadd r0.___w, r1.x, r0.w\n"
+" \n"
+" dcl_literal l56, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.x___, r1.z, l56\n"
+" \n"
+" dcl_literal l57, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1.__z_, l57, r1.x\n"
+" \n"
+" dcl_literal l58, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r1.z, l58, r0.w\n"
+" \n"
+" dcl_literal l59, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.x___, l59, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l60, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.__z_, r1.w, l60\n"
+" \n"
+" dcl_literal l61, 0x00000017, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.xy__, l61, r1.x\n"
+" \n"
+" dcl_literal l62, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.___w, r2.x, l62, r1.x\n"
+" \n"
+" dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, r1.x, l63\n"
+" \n"
+" dcl_literal l64, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.x___, r1.x, l64, r1.w\n"
+" ishr r1.x___, r1.z, r1.x\n"
+" cmov_logical r0.___w, r2.y, r1.x, r0.w\n"
+" ior r0.x___, r1.y, r0.w\n"
+"endif\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__half_exp10_f32",
+"mdef(237)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x421A209B, 0x421A209B, 0x421A209B, 0x421A209B\n"
+"ge r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x7F800000\n"
+"and r0.__zw, r0.x, l1\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, r0.z, l2\n"
+"and r0._y__, r0.y, r1.x\n"
+"dcl_literal l3, 0x40549A78, 0x40549A78, 0x40549A78, 0x40549A78\n"
+"mul_ieee r1._y__, r0.x, l3\n"
+"exp_vec r1._y__, r1.y\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l4, r1.y\n"
+"dcl_literal l5, 0xC23369F4, 0xC23369F4, 0xC23369F4, 0xC23369F4\n"
+"lt r1._y__, r0.x, l5\n"
+"and r1.x___, r1.x, r1.y\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r1.x, l6, r0.y\n"
+"dcl_literal l7, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l7\n"
+"dcl_literal l8, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1._y__, l8, r0.z\n"
+"cmov_logical r0._y__, r1.y, r1.x, r0.y\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r0.x, l9\n"
+"dcl_literal l10, 0x00000000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ieq r1._yz_, r0.zzwz, l10\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r0.z, l11\n"
+"and r0.___w, r1.x, r1.y\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.w, l12, r0.y\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l13, r0.x\n"
+"and r0.__zw, r1.zzzy, r0.zzzw\n"
+"cmov_logical r0._y__, r0.w, r0.x, r0.y\n"
+"dcl_literal l14, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.x, l14\n"
+"dcl_literal l15, 0x421A209A, 0x421A209A, 0x421A209A, 0x421A209A\n"
+"eq r0.x___, r0.x, l15\n"
+"cmov_logical r0._y__, r0.z, r0.w, r0.y\n"
+"dcl_literal l16, 0x7F7FFFB3, 0x7F7FFFB3, 0x7F7FFFB3, 0x7F7FFFB3\n"
+"cmov_logical r0.x___, r0.x, l16, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_exp2_f32",
+"mdef(238)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"ge r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.__z_, r0.x, l1\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, r0.z, l2\n"
+"and r0._y__, r0.y, r0.w\n"
+"exp_vec r1.x___, r0.x\n"
+"\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l3, r1.x\n"
+"\n"
+"dcl_literal l4, 0xC3150000, 0xC3150000, 0xC3150000, 0xC3150000\n"
+"lt r1.x___, r0.x, l4\n"
+"and r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.w, l5, r0.y\n"
+"\n"
+"dcl_literal l6, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, l6, r0.z\n"
+"\n"
+"dcl_literal l7, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.z, l7\n"
+"\n"
+"dcl_literal l8, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l8\n"
+"cmov_logical r0._y__, r0.w, r1.x, r0.y\n"
+"\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l9\n"
+"and r0.___w, r0.z, r0.w\n"
+"\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.w, l10, r0.y\n"
+"\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l11, r0.x\n"
+"and r0.__z_, r0.z, r0.w\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_exp_f32",
+"mdef(239)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x42B17218, 0x42B17218, 0x42B17218, 0x42B17218\n"
+"ge r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.__z_, r0.x, l1\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, r0.z, l2\n"
+"and r0._y__, r0.y, r0.w\n"
+"\n"
+"dcl_literal l3, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r1.x___, r0.x, l3\n"
+"exp_vec r1.x___, r1.x\n"
+"\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l4, r1.x\n"
+"\n"
+"dcl_literal l5, 0xC2CFF1B5, 0xC2CFF1B5, 0xC2CFF1B5, 0xC2CFF1B5\n"
+"lt r1.x___, r0.x, l5\n"
+"and r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.w, l6, r0.y\n"
+"\n"
+"dcl_literal l7, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, l7, r0.z\n"
+"\n"
+"dcl_literal l8, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.z, l8\n"
+"\n"
+"dcl_literal l9, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l9\n"
+"cmov_logical r0._y__, r0.w, r1.x, r0.y\n"
+"\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l10\n"
+"and r0.___w, r0.z, r0.w\n"
+"\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.w, l11, r0.y\n"
+"\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l12, r0.x\n"
+"and r0.__z_, r0.z, r0.w\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_log10_f32",
+"mdef(240)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000\n"
+"ige r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x3F801000, 0x3F801000, 0x3F801000, 0x3F801000\n"
+"ige r0.___w, l2, r0.y\n"
+"and r0.__z_, r0.z, r0.w\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r0.x, l3\n"
+" \n"
+" dcl_literal l4, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.__z_, r0.z, l4\n"
+" \n"
+" dcl_literal l5, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l5\n"
+" \n"
+" dcl_literal l6, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+" iadd r1.x___, l6, r0.w\n"
+" \n"
+" dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" and r1.x___, r1.x, l7\n"
+" \n"
+" dcl_literal l8, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ixor r1._y__, r1.x, l8\n"
+" ior r0.___w, r0.w, r1.y\n"
+" \n"
+" dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l9\n"
+" iadd r0.__z_, r0.z, r1.x\n"
+" itof r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l10, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+" add r1.xy__, r0.w, l10\n"
+" div_zeroop(infinity) r0.___w, r1.x, r1.y\n"
+" mul_ieee r1._y__, r0.w, r0.w\n"
+" mul_ieee r1.__z_, r1.y, r1.y\n"
+" \n"
+" dcl_literal l11, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l12, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, l11, l12\n"
+" \n"
+" dcl_literal l13, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, r2.xyxx, l13\n"
+" mul_ieee r1.___w, r1.z, r2.x\n"
+" \n"
+" dcl_literal l14, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+" mad_ieee r1.__z_, r1.z, r2.y, l14\n"
+" mad_ieee r1._y__, r1.y, r1.z, r1.w\n"
+" mul_ieee r1.__z_, r1.x, r1.x\n"
+" \n"
+" dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mul_ieee r1.___w, r1.z, l15\n"
+" \n"
+" dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1._y__, r1.z, l16, r1.y\n"
+" mad_ieee r0.___w, r0.w_neg(xyzw), r1.y, r1.w\n"
+" add r0.___w, r1.x_neg(xyzw), r0.w\n"
+" \n"
+" dcl_literal l17, 0x3EDE5BD9, 0x3EDE5BD9, 0x3EDE5BD9, 0x3EDE5BD9\n"
+" mul_ieee r0.___w, r0.w, l17\n"
+" \n"
+" dcl_literal l18, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B\n"
+" mad_ieee r0.__z_, r0.z, l18, r0.w_neg(xyzw)\n"
+"endif\n"
+"dcl_literal l19, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000\n"
+"ilt r0.___w, r0.y, l19\n"
+"dcl_literal l20, 0x3F801000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, l20, r0.y\n"
+"ior r0.___w, r0.w, r1.x\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l21, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l21\n"
+" itof r0.___w, r0.w\n"
+" \n"
+" dcl_literal l22, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r1.x_z_, r0.w, l22\n"
+" \n"
+" dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r1.x, l23\n"
+" \n"
+" dcl_literal l24, 0x0000001C, 0x0000001C, 0x0000001C, 0x0000001C\n"
+" iadd r0.___w, r0.w, l24\n"
+" \n"
+" dcl_literal l25, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.x___, r1.z, l25\n"
+" \n"
+" dcl_literal l26, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.___w, l26, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1.__z_, l27, r0.w\n"
+" \n"
+" dcl_literal l28, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r1.z, l28, r0.w\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.__z_, l29, r0.w\n"
+" ishr r1.___w, r1.x, r0.w\n"
+" inegate r0.___w, r0.w\n"
+" \n"
+" dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l30\n"
+" iadd r0.___w, r1.x, r0.w\n"
+" cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+" \n"
+" dcl_literal l31, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r1.x___, r0.x, l31\n"
+" \n"
+" dcl_literal l32, 0xC1E00000, 0xC1E00000, 0xC1E00000, 0xC1E00000\n"
+" and r1.__z_, r1.x, l32\n"
+" cmov_logical r0.___w, r1.x, r0.w, r0.x\n"
+" log_vec r0.___w, r0.w\n"
+" add r0.___w, r1.z, r0.w\n"
+" \n"
+" dcl_literal l33, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B\n"
+" mul_ieee r0.__z_, r0.w, l33\n"
+"endif\n"
+"dcl_literal l34, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0.__z_, r0.y, r0.z, l34\n"
+"dcl_literal l35, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l35\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0._y__, r0.y, l36\n"
+"and r0._y__, r0.w, r0.y\n"
+"dcl_literal l37, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.y, l37, r0.z\n"
+"dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.x, l38\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"dcl_literal l39, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l39\n"
+"cmov_logical r0.x___, r1.y, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_log2_f32",
+"mdef(241)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000\n"
+"ige r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x3F801000, 0x3F801000, 0x3F801000, 0x3F801000\n"
+"ige r0.___w, l2, r0.y\n"
+"and r0.__z_, r0.z, r0.w\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r0.x, l3\n"
+" \n"
+" dcl_literal l4, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.__z_, r0.z, l4\n"
+" \n"
+" dcl_literal l5, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l5\n"
+" \n"
+" dcl_literal l6, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+" iadd r1.x___, l6, r0.w\n"
+" \n"
+" dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" and r1.x___, r1.x, l7\n"
+" \n"
+" dcl_literal l8, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ixor r1._y__, r1.x, l8\n"
+" ior r0.___w, r0.w, r1.y\n"
+" \n"
+" dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l9\n"
+" iadd r0.__z_, r0.z, r1.x\n"
+" itof r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l10, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+" add r1.xy__, r0.w, l10\n"
+" div_zeroop(infinity) r0.___w, r1.x, r1.y\n"
+" mul_ieee r1._y__, r0.w, r0.w\n"
+" mul_ieee r1.__z_, r1.y, r1.y\n"
+" \n"
+" dcl_literal l11, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l12, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, l11, l12\n"
+" \n"
+" dcl_literal l13, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, r2.xyxx, l13\n"
+" mul_ieee r1.___w, r1.z, r2.x\n"
+" \n"
+" dcl_literal l14, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+" mad_ieee r1.__z_, r1.z, r2.y, l14\n"
+" mad_ieee r1._y__, r1.y, r1.z, r1.w\n"
+" mul_ieee r1.__z_, r1.x, r1.x\n"
+" \n"
+" dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mul_ieee r1.___w, r1.z, l15\n"
+" \n"
+" dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1._y__, r1.z, l16, r1.y\n"
+" mad_ieee r0.___w, r0.w_neg(xyzw), r1.y, r1.w\n"
+" add r0.___w, r1.x_neg(xyzw), r0.w\n"
+" \n"
+" dcl_literal l17, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+" mad_ieee r0.__z_, r0.w_neg(xyzw), l17, r0.z\n"
+"endif\n"
+"dcl_literal l18, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000\n"
+"ilt r0.___w, r0.y, l18\n"
+"dcl_literal l19, 0x3F801000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, l19, r0.y\n"
+"ior r0.___w, r0.w, r1.x\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l20, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l20\n"
+" itof r0.___w, r0.w\n"
+" \n"
+" dcl_literal l21, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r1.x_z_, r0.w, l21\n"
+" \n"
+" dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r1.x, l22\n"
+" \n"
+" dcl_literal l23, 0x0000001C, 0x0000001C, 0x0000001C, 0x0000001C\n"
+" iadd r0.___w, r0.w, l23\n"
+" \n"
+" dcl_literal l24, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.x___, r1.z, l24\n"
+" \n"
+" dcl_literal l25, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.___w, l25, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1.__z_, l26, r0.w\n"
+" \n"
+" dcl_literal l27, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r1.z, l27, r0.w\n"
+" \n"
+" dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.__z_, l28, r0.w\n"
+" ishr r1.___w, r1.x, r0.w\n"
+" inegate r0.___w, r0.w\n"
+" \n"
+" dcl_literal l29, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l29\n"
+" iadd r0.___w, r1.x, r0.w\n"
+" cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+" \n"
+" dcl_literal l30, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r1.x___, r0.x, l30\n"
+" \n"
+" dcl_literal l31, 0xC1E00000, 0xC1E00000, 0xC1E00000, 0xC1E00000\n"
+" and r1.__z_, r1.x, l31\n"
+" cmov_logical r0.___w, r1.x, r0.w, r0.x\n"
+" log_vec r0.___w, r0.w\n"
+" add r0.__z_, r1.z, r0.w\n"
+"endif\n"
+"dcl_literal l32, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0.__z_, r0.y, r0.z, l32\n"
+"dcl_literal l33, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l33\n"
+"dcl_literal l34, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0._y__, r0.y, l34\n"
+"and r0._y__, r0.w, r0.y\n"
+"dcl_literal l35, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.y, l35, r0.z\n"
+"dcl_literal l36, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.x, l36\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"dcl_literal l37, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.__z_, r0.x, l37\n"
+"cmov_logical r0.x___, r1.y, r0.z, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_log_f32",
+"mdef(242)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000\n"
+"ige r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x3F801000, 0x3F801000, 0x3F801000, 0x3F801000\n"
+"ige r0.___w, l2, r0.y\n"
+"and r0.__z_, r0.z, r0.w\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r0.x, l3\n"
+" \n"
+" dcl_literal l4, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.__z_, r0.z, l4\n"
+" \n"
+" dcl_literal l5, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l5\n"
+" \n"
+" dcl_literal l6, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+" iadd r1.x___, l6, r0.w\n"
+" \n"
+" dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" and r1.x___, r1.x, l7\n"
+" \n"
+" dcl_literal l8, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ixor r1._y__, r1.x, l8\n"
+" ior r0.___w, r0.w, r1.y\n"
+" \n"
+" dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l9\n"
+" iadd r0.__z_, r0.z, r1.x\n"
+" itof r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l10, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+" add r1.xy__, r0.w, l10\n"
+" div_zeroop(infinity) r0.___w, r1.x, r1.y\n"
+" mul_ieee r1._y__, r0.w, r0.w\n"
+" mul_ieee r1.__z_, r1.y, r1.y\n"
+" \n"
+" dcl_literal l11, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l12, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, l11, l12\n"
+" \n"
+" dcl_literal l13, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.z, r2.xyxx, l13\n"
+" mul_ieee r1.___w, r1.z, r2.x\n"
+" \n"
+" dcl_literal l14, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+" mad_ieee r1.__z_, r1.z, r2.y, l14\n"
+" mad_ieee r1._y__, r1.y, r1.z, r1.w\n"
+" mul_ieee r1.__z_, r1.x, r1.x\n"
+" \n"
+" dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1._y__, r1.z, l15, r1.y\n"
+" \n"
+" dcl_literal l16, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+" mul_ieee r1.___w, r0.z, l16\n"
+" mad_ieee r0.___w, r0.w, r1.y, r1.w\n"
+" \n"
+" dcl_literal l17, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r0.___w, r1.z, l17, r0.w_neg(xyzw)\n"
+" add r0.___w, r1.x_neg(xyzw), r0.w\n"
+" \n"
+" dcl_literal l18, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+" mad_ieee r0.__z_, r0.z, l18, r0.w_neg(xyzw)\n"
+"endif\n"
+"dcl_literal l19, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000, 0x3F7FF000\n"
+"ilt r0.___w, r0.y, l19\n"
+"dcl_literal l20, 0x3F801000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, l20, r0.y\n"
+"ior r0.___w, r0.w, r1.x\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l21, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.x, l21\n"
+" itof r0.___w, r0.w\n"
+" \n"
+" dcl_literal l22, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r1.x_z_, r0.w, l22\n"
+" \n"
+" dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r1.x, l23\n"
+" \n"
+" dcl_literal l24, 0x0000001C, 0x0000001C, 0x0000001C, 0x0000001C\n"
+" iadd r0.___w, r0.w, l24\n"
+" \n"
+" dcl_literal l25, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.x___, r1.z, l25\n"
+" \n"
+" dcl_literal l26, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.___w, l26, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1.__z_, l27, r0.w\n"
+" \n"
+" dcl_literal l28, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r1.z, l28, r0.w\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.__z_, l29, r0.w\n"
+" ishr r1.___w, r1.x, r0.w\n"
+" inegate r0.___w, r0.w\n"
+" \n"
+" dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l30\n"
+" iadd r0.___w, r1.x, r0.w\n"
+" cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+" \n"
+" dcl_literal l31, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r1.x___, r0.x, l31\n"
+" \n"
+" dcl_literal l32, 0xC1E00000, 0xC1E00000, 0xC1E00000, 0xC1E00000\n"
+" and r1.__z_, r1.x, l32\n"
+" cmov_logical r0.___w, r1.x, r0.w, r0.x\n"
+" log_vec r0.___w, r0.w\n"
+" add r0.___w, r1.z, r0.w\n"
+" \n"
+" dcl_literal l33, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+" mul_ieee r0.__z_, r0.w, l33\n"
+"endif\n"
+"dcl_literal l34, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0.__z_, r0.y, r0.z, l34\n"
+"dcl_literal l35, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l35\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0._y__, r0.y, l36\n"
+"and r0._y__, r0.w, r0.y\n"
+"dcl_literal l37, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.y, l37, r0.z\n"
+"dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.x, l38\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"dcl_literal l39, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l39\n"
+"cmov_logical r0.x___, r1.y, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_powr_f32",
+"mdef(243)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x80000000, 0x7FFFFFFF, 0x80000000\n"
+"and r1, r0.xxyy, l0\n"
+"itof r0.x___, r1.x\n"
+"dcl_literal l1, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r0.x_z_, r0.x, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.x___, r0.x, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0.__z_, r0.z, l3\n"
+"dcl_literal l4, 0x00000024, 0x00000024, 0x00000024, 0x00000024\n"
+"iadd r0.x___, r0.x, l4\n"
+"dcl_literal l5, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.x___, l5, r0.x_neg(xyzw)\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r0.___w, l6, r0.x\n"
+"dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.x___, r0.w, l7, r0.x\n"
+"inegate r0.___w, r0.x\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.___w, r0.w, l8\n"
+"iadd r0.___w, r0.z, r0.w\n"
+"ishr r0.__z_, r0.z, r0.x\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, l9, r0.x\n"
+"cmov_logical r0.x___, r0.x, r0.z, r0.w\n"
+"dcl_literal l10, 0x00800000, 0x2E800000, 0x3F800000, 0x00000000\n"
+"ilt r2.xyz_, r1.xzxx, l10\n"
+"cmov_logical r0.x___, r2.x, r0.x, r1.x\n"
+"dcl_literal l11, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r0.x_z_, r0.x, l11\n"
+"dcl_literal l12, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"ior r0.__z_, r0.z, l12\n"
+"dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.x___, r0.x, l13\n"
+"dcl_literal l14, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r0.z, l14, l15\n"
+"round_z r2.___w, r0.w\n"
+"ftoi r0.___w, r0.w\n"
+"dcl_literal l16, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r0.__z_, r2.w_neg(xyzw), l16, r0.z\n"
+"add r3.x___, r0.z, cb0[1].x\n"
+"add r3._y__, r0.z, r3.x_neg(xyzw)\n"
+"dcl_literal l17, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mul_ieee r3.__z_, r2.w, l17\n"
+"dcl_literal l18, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.___w, r3.z, l18\n"
+"dcl_literal l19, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r2.___w, r2.w, l19, r3.w_neg(xyzw)\n"
+"div_zeroop(infinity) r0.__z_, r0.z, r3.z\n"
+"dcl_literal l20, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r4.x___, r0.z, l20\n"
+"mad_ieee r3.x___, r4.x, r3.w, r3.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.x, r2.w, r3.x\n"
+"add r4.x___, r0.z, r4.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.x, r3.w, r3.x\n"
+"mad_ieee r2.___w, r4.x, r2.w, r3.x\n"
+"add r2.___w, r3.y, r2.w_neg(xyzw)\n"
+"div_zeroop(infinity) r2.___w, r2.w, r3.z\n"
+"add r3.x___, r0.z, r2.w\n"
+"dcl_literal l21, 0x3E000000, 0x3E000000, 0x3E000000, 0x3E000000\n"
+"dcl_literal l22, 0x3E124925, 0x3E124925, 0x3E124925, 0x3E124925\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), l21, l22\n"
+"dcl_literal l23, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l23\n"
+"dcl_literal l24, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l24\n"
+"dcl_literal l25, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l25\n"
+"dcl_literal l26, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l26\n"
+"dcl_literal l27, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l27\n"
+"mul_ieee r3.__z_, r0.z_neg(xyzw), r0.z\n"
+"add r0.__z_, r0.z, r3.x_neg(xyzw)\n"
+"mad_ieee r3.___w, r3.y, r3.z, r3.x\n"
+"add r3.x___, r3.x, r3.w_neg(xyzw)\n"
+"mad_ieee r3.x___, r3.y, r3.z, r3.x\n"
+"add r0.__z_, r2.w, r0.z\n"
+"add r0.__z_, r3.x, r0.z\n"
+"add r2.___w, r3.w, r0.z\n"
+"add r3.x___, r3.w, r2.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r3.x\n"
+"dcl_literal l28, 0x23BBBE88, 0x23BBBE88, 0x23BBBE88, 0x23BBBE88\n"
+"mul_ieee r3.x___, r2.w, l28\n"
+"dcl_literal l29, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0.__z_, r0.z, l29, r3.x\n"
+"dcl_literal l30, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r2.w, l30\n"
+"add r3._y__, r2.w, r3.x_neg(xyzw)\n"
+"dcl_literal l31, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r2.___w, r2.w, l31\n"
+"dcl_literal l32, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r3.__z_, r3.x, l32, r2.w_neg(xyzw)\n"
+"dcl_literal l33, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r3.__z_, r3.y, l33, r3.z\n"
+"dcl_literal l34, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.x___, r3.x, l34, r3.z\n"
+"dcl_literal l35, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.x___, r3.y, l35, r3.x\n"
+"add r0.__z_, r0.z, r3.x\n"
+"add r3.x___, r2.w, r0.z\n"
+"add r2.___w, r2.w, r3.x_neg(xyzw)\n"
+"add r0.__z_, r0.z, r2.w\n"
+"dcl_literal l36, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0\n"
+"iadd r0.___w, r0.w, l36\n"
+"dcl_literal l37, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r2.___w, r0.w, l37\n"
+"dcl_literal l38, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l39, 0x00000000, 0x2FB85A45, 0x312C77EC, 0x31FD14FD\n"
+"cmov_logical r4, r2.w, l38, l39\n"
+"dcl_literal l40, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r5, r2.w, l40\n"
+"dcl_literal l41, 0x31D64899, 0x2F2DABBA, 0x31CDA79E, 0x309E2B87\n"
+"cmov_logical r4, r5.x, l41, r4\n"
+"dcl_literal l42, 0x324FDEB4, 0x3201781E, 0x31DB4EC9, 0x31C32597\n"
+"cmov_logical r4, r5.y, l42, r4\n"
+"dcl_literal l43, 0x32481340, 0x32D6985C, 0x3230E074, 0x326A4CDF\n"
+"cmov_logical r4, r5.z, l43, r4\n"
+"dcl_literal l44, 0x323CD1B9, 0x32CFDEB4, 0x3259D0CE, 0x31458715\n"
+"cmov_logical r4, r5.w, l44, r4\n"
+"dcl_literal l45, 0x00000005, 0x00000006, 0x00000007, 0x00000008\n"
+"ieq r6, r2.w, l45\n"
+"dcl_literal l46, 0x3211F171, 0x3227F605, 0x32B9C415, 0x325295B5\n"
+"cmov_logical r4, r6.x, l46, r4\n"
+"dcl_literal l47, 0x2F4BA83C, 0x30F3AA69, 0x320CB06D, 0x32B97998\n"
+"cmov_logical r4, r6.y, l47, r4\n"
+"dcl_literal l48, 0x30D63AA6, 0x33550F2A, 0x3374AE80, 0x3321393E\n"
+"cmov_logical r4, r6.z, l48, r4\n"
+"dcl_literal l49, 0x3267EF5A, 0x32AB49CA, 0x3355010B, 0x3374C355\n"
+"cmov_logical r4, r6.w, l49, r4\n"
+"dcl_literal l50, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C\n"
+"ieq r7, r2.w, l50\n"
+"dcl_literal l51, 0x32BCD1B9, 0x3261151F, 0x325313A6, 0x3320F04D\n"
+"cmov_logical r4, r7.x, l51, r4\n"
+"dcl_literal l52, 0x32E4788D, 0x32A76195, 0x3332703B, 0x3349A817\n"
+"cmov_logical r4, r7.y, l52, r4\n"
+"dcl_literal l53, 0x332DF384, 0x32B06EF9, 0x33300016, 0x331565B0\n"
+"cmov_logical r4, r7.z, l53, r4\n"
+"dcl_literal l54, 0x336A8086, 0x331C70C1, 0x332C00A7, 0x3257990D\n"
+"cmov_logical r4, r7.w, l54, r4\n"
+"dcl_literal l55, 0x00000000, 0x0000000D, 0x0000000E, 0x0000000F\n"
+"ieq r3._yzw, r2.w, l55\n"
+"dcl_literal l56, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l57, 0x00000000, 0x3CB73CB4, 0x3D35D69B, 0x3D8759C4\n"
+"cmov_logical r8, r2.w, l56, l57\n"
+"dcl_literal l58, 0x32F5532E, 0x336633F3, 0x31B86815, 0x33654999\n"
+"cmov_logical r4, r3.y, l58, r4\n"
+"dcl_literal l59, 0x32D26089, 0x2FCBA83C, 0x32B3FF57, 0x32E0E014\n"
+"cmov_logical r4, r3.z, l59, r4\n"
+"dcl_literal l60, 0x331B1354, 0x330D9D4B, 0x32BCF065, 0x330BBE12\n"
+"cmov_logical r4, r3.w, l60, r4\n"
+"dcl_literal l61, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"and r2.___w, r0.w, l61\n"
+"dcl_literal l62, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"ieq r0.___w, r0.w, l62\n"
+"dcl_literal l63, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r9.xyz_, r2.w, l63\n"
+"cmov_logical r2.___w, r9.x, r4.y, r4.x\n"
+"cmov_logical r2.___w, r9.y, r4.z, r2.w\n"
+"cmov_logical r2.___w, r9.z, r4.w, r2.w\n"
+"dcl_literal l64, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r0.w, l64, r2.w\n"
+"add r4.x___, r3.x, r2.w\n"
+"add r3.x___, r3.x, r4.x_neg(xyzw)\n"
+"add r2.___w, r2.w, r3.x\n"
+"add r0.__z_, r0.z, r2.w\n"
+"add r2.___w, r4.x, r0.z\n"
+"add r3.x___, r4.x, r2.w_neg(xyzw)\n"
+"dcl_literal l65, 0x3DB31FB7, 0x3DDE4212, 0x3E0462C4, 0x3E19574F\n"
+"cmov_logical r4, r5.x, l65, r8\n"
+"dcl_literal l66, 0x3E2E00D1, 0x3E42615E, 0x3E567AF1, 0x3E6A4F72\n"
+"cmov_logical r4, r5.y, l66, r4\n"
+"dcl_literal l67, 0x3E7DE0B5, 0x3E88983E, 0x3E92203D, 0x3E9B8926\n"
+"cmov_logical r4, r5.z, l67, r4\n"
+"dcl_literal l68, 0x3EA4D3C2, 0x3EAE00D1, 0x3EB7110E, 0x3EC0052B\n"
+"cmov_logical r4, r5.w, l68, r4\n"
+"dcl_literal l69, 0x3EC8DDD4, 0x3ED19BB0, 0x3EDA3F5F, 0x3EE2C97D\n"
+"cmov_logical r4, r6.x, l69, r4\n"
+"dcl_literal l70, 0x3EEB3A9F, 0x3EF39355, 0x3EFBD42B, 0x3F01FED4\n"
+"cmov_logical r4, r6.y, l70, r4\n"
+"dcl_literal l71, 0x3F060828, 0x3F0A064F, 0x3F0DF988, 0x3F11E20E\n"
+"cmov_logical r4, r6.z, l71, r4\n"
+"dcl_literal l72, 0x3F15C01A, 0x3F1993E3, 0x3F1D5D9F, 0x3F211D83\n"
+"cmov_logical r4, r6.w, l72, r4\n"
+"dcl_literal l73, 0x3F24D3C2, 0x3F28808C, 0x3F2C2411, 0x3F2FBE7F\n"
+"cmov_logical r4, r7.x, l73, r4\n"
+"dcl_literal l74, 0x3F335004, 0x3F36D8CB, 0x3F3A58FE, 0x3F3DD0C7\n"
+"cmov_logical r4, r7.y, l74, r4\n"
+"dcl_literal l75, 0x3F41404E, 0x3F44A7BA, 0x3F480730, 0x3F4B5ED6\n"
+"cmov_logical r4, r7.z, l75, r4\n"
+"dcl_literal l76, 0x3F4EAECF, 0x3F51F73F, 0x3F553847, 0x3F587209\n"
+"cmov_logical r4, r7.w, l76, r4\n"
+"dcl_literal l77, 0x3F5BA4A4, 0x3F5ED038, 0x3F61F4E5, 0x3F6512C6\n"
+"cmov_logical r4, r3.y, l77, r4\n"
+"dcl_literal l78, 0x3F6829FB, 0x3F6B3A9F, 0x3F6E44CD, 0x3F7148A1\n"
+"cmov_logical r4, r3.z, l78, r4\n"
+"dcl_literal l79, 0x3F744635, 0x3F773DA3, 0x3F7A2F04, 0x3F7D1A70\n"
+"cmov_logical r4, r3.w, l79, r4\n"
+"cmov_logical r3._y__, r9.x, r4.y, r4.x\n"
+"cmov_logical r3._y__, r9.y, r4.z, r3.y\n"
+"cmov_logical r3._y__, r9.z, r4.w, r3.y\n"
+"dcl_literal l80, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.___w, r0.w, l80, r3.y\n"
+"add r3._y__, r2.w, r0.w\n"
+"add r0.___w, r0.w, r3.y_neg(xyzw)\n"
+"add r0.___w, r2.w, r0.w\n"
+"add r0.__z_, r0.z, r3.x\n"
+"add r0.__z_, r0.w, r0.z\n"
+"add r0.___w, r3.y, r0.z\n"
+"add r2.___w, r3.y, r0.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r2.w\n"
+"dcl_literal l81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.x___, r0.x, l81\n"
+"dcl_literal l82, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC\n"
+"and r2.x___, r2.x, l82\n"
+"iadd r0.x___, r0.x, r2.x\n"
+"itof r0.x___, r0.x\n"
+"add r2.x___, r0.w, r0.x\n"
+"add r0.x___, r0.x, r2.x_neg(xyzw)\n"
+"add r0.x___, r0.w, r0.x\n"
+"add r0.x___, r0.z, r0.x\n"
+"add r0.__z_, r2.x, r0.x\n"
+"add r0.___w, r2.x, r0.z_neg(xyzw)\n"
+"dcl_literal l83, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2.x___, r0.z, l83\n"
+"mul_ieee r2.___w, r0.z, r0.y\n"
+"dcl_literal l84, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r0.y, l84\n"
+"mad_ieee r3._y__, r2.x, r3.x, r2.w_neg(xyzw)\n"
+"add r3.__z_, r0.y, r3.x_neg(xyzw)\n"
+"mad_ieee r3._y__, r2.x, r3.z, r3.y\n"
+"add r2.x___, r0.z, r2.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r2.x, r3.x, r3.y\n"
+"mad_ieee r2.x___, r2.x, r3.z, r3.x\n"
+"add r0.x___, r0.x, r0.w\n"
+"mad_ieee r0.___w, r0.x, r0.y, r2.x\n"
+"add r0.x___, r0.z, r0.x\n"
+"add r0.__z_, r2.w, r0.w\n"
+"add r2.x___, r2.w, r0.z_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.x\n"
+"add r2.x___, r0.z, r0.w\n"
+"round_z r2.___w, r2.x\n"
+"ftoi r2.x___, r2.x\n"
+"add r3.x___, r0.z, r2.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r3.x_neg(xyzw)\n"
+"add r0.__z_, r2.w_neg(xyzw), r0.z\n"
+"add r0.__z_, r0.w, r0.z\n"
+"add r0.___w, r3.x, r0.z\n"
+"add r3.x___, r3.x, r0.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r3.x\n"
+"add r0.__z_, r0.w, r0.z\n"
+"dcl_literal l85, 0x42000000, 0x42000000, 0x00000000, 0x00000000\n"
+"dcl_literal l86, 0x3F000000, 0xBF000000, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r0.z, l85, l86\n"
+"ftoi r3.xy__, r3.xyxx\n"
+"dcl_literal l87, 0x00000000, 0x00000000, 0x3F317218, 0x42000000\n"
+"mul_ieee r3.__zw, r0.z, l87\n"
+"dcl_literal l88, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, l88, r3.w\n"
+"cmov_logical r0.___w, r0.w, r3.x, r3.y\n"
+"itof r3.x___, r0.w\n"
+"dcl_literal l89, 0x3D000000, 0x3D000000, 0x3D000000, 0x3D000000\n"
+"mad_ieee r3.x___, r3.x_neg(xyzw), l89, r0.z\n"
+"dcl_literal l90, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820\n"
+"dcl_literal l91, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r3.x, l90, l91\n"
+"dcl_literal l92, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r3.x___, r3.x, l92\n"
+"mul_ieee r3.___w, r3.x, r3.x\n"
+"mad_ieee r3.x___, r3.w, r3.y, r3.x\n"
+"dcl_literal l93, 0x00000000, 0x0000001F, 0x00000000, 0x00000003\n"
+"and r3._y_w, r0.w, l93\n"
+"iadd r0.___w, r0.w, r3.y_neg(xyzw)\n"
+"dcl_literal l94, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r4.xyz_, r3.w, l94\n"
+"dcl_literal l95, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r3._y__, r3.y, l95\n"
+"dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l97, 0x3F800000, 0x3F82C000, 0x3F858000, 0x3F888000\n"
+"cmov_logical r5, r3.y, l96, l97\n"
+"dcl_literal l98, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r6, r3.y, l98\n"
+"dcl_literal l99, 0x3F8B8000, 0x3F8E8000, 0x3F91C000, 0x3F94C000\n"
+"cmov_logical r5, r6.x, l99, r5\n"
+"dcl_literal l100, 0x3F980000, 0x3F9B8000, 0x3F9EC000, 0x3FA24000\n"
+"cmov_logical r5, r6.y, l100, r5\n"
+"dcl_literal l101, 0x3FA5C000, 0x3FA98000, 0x3FAD4000, 0x3FB10000\n"
+"cmov_logical r5, r6.z, l101, r5\n"
+"dcl_literal l102, 0x3FB50000, 0x3FB8C000, 0x3FBD0000, 0x3FC10000\n"
+"cmov_logical r5, r6.w, l102, r5\n"
+"dcl_literal l103, 0x00000005, 0x00000006, 0x00000007, 0x00000000\n"
+"ieq r7.xyz_, r3.y, l103\n"
+"dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l105, 0x00000000, 0x39D86988, 0x3AAB0D9F, 0x3A407404\n"
+"cmov_logical r8, r3.y, l104, l105\n"
+"dcl_literal l106, 0x3FC54000, 0x3FC98000, 0x3FCE0000, 0x3FD28000\n"
+"cmov_logical r5, r7.x, l106, r5\n"
+"dcl_literal l107, 0x3FD74000, 0x3FDBC000, 0x3FE0C000, 0x3FE58000\n"
+"cmov_logical r5, r7.y, l107, r5\n"
+"dcl_literal l108, 0x3FEAC000, 0x3FEFC000, 0x3FF50000, 0x3FFA8000\n"
+"cmov_logical r5, r7.z, l108, r5\n"
+"cmov_logical r3._y__, r4.x, r5.y, r5.x\n"
+"cmov_logical r3._y__, r4.y, r5.z, r3.y\n"
+"cmov_logical r3._y__, r4.z, r5.w, r3.y\n"
+"dcl_literal l109, 0x3A2E0F1E, 0x3A90E62D, 0x38F4DCE0, 0x3AD3BEA3\n"
+"cmov_logical r5, r6.x, l109, r8\n"
+"dcl_literal l110, 0x3ADFC146, 0x39D39B9C, 0x3AD4C982, 0x3AC10C0C\n"
+"cmov_logical r5, r6.y, l110, r5\n"
+"dcl_literal l111, 0x3AFB5AA6, 0x3A856AD3, 0x3A41F752, 0x3A8FD607\n"
+"cmov_logical r5, r6.z, l111, r5\n"
+"dcl_literal l112, 0x391E6678, 0x3AEEBD1D, 0x398A39F4, 0x3AB13329\n"
+"cmov_logical r5, r6.w, l112, r5\n"
+"dcl_literal l113, 0x3A9CA845, 0x3AE6F619, 0x3A923054, 0x3AA07647\n"
+"cmov_logical r5, r7.x, l113, r5\n"
+"dcl_literal l114, 0x391F9958, 0x3AEEDE5F, 0x39CDEEC0, 0x3AE41B9D\n"
+"cmov_logical r5, r7.y, l114, r5\n"
+"dcl_literal l115, 0x37C6E7C0, 0x3A92E66F, 0x3A95F454, 0x38ECB6D0\n"
+"cmov_logical r5, r7.z, l115, r5\n"
+"cmov_logical r3.___w, r4.x, r5.y, r5.x\n"
+"cmov_logical r3.___w, r4.y, r5.z, r3.w\n"
+"cmov_logical r3.___w, r4.z, r5.w, r3.w\n"
+"add r4.x___, r3.y, r3.w\n"
+"mad_ieee r3.x___, r4.x, r3.x, r3.w\n"
+"add r3.x___, r3.y, r3.x\n"
+"dcl_literal l116, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r3.x, l116\n"
+"dcl_literal l117, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ixor r3.x___, r0.w, l117\n"
+"imax r0.___w, r0.w, r0.w_neg(xyzw)\n"
+"dcl_literal l118, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r3.x___, r3.x, l118\n"
+"dcl_literal l119, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"udiv r0.___w, r0.w, l119\n"
+"inegate r3._y__, r0.w\n"
+"cmov_logical r0.___w, r3.x, r3.y, r0.w\n"
+"iadd r0.___w, r0.w, r2.x\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l120, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+"lt r2.x___, l120, r0.w_abs\n"
+"ftoi r3.x___, r0.w_abs\n"
+"dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r0.w, l121\n"
+"dcl_literal l122, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+"cmov_logical r2.x___, r2.x, l122, r3.x\n"
+"inegate r3.x___, r2.x\n"
+"cmov_logical r0.___w, r0.w, r3.x, r2.x\n"
+"dcl_literal l123, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.x___, r0.w, l123\n"
+"cmov_logical r2.x___, r4.z, r0.w, r2.x\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l124, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3.x___, r2.x, l124\n"
+"itof r3._y__, r4.w\n"
+"cmov_logical r3._y__, r4.z, r4.x, r3.y\n"
+"iadd r3.x___, r3.y, r3.x\n"
+"dcl_literal l125, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r3._y_w, r3.y, l125\n"
+"dcl_literal l126, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3._y__, r3.y, l126\n"
+"dcl_literal l127, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.___w, r3.w, l127\n"
+"iadd r2.x___, r3.y, r2.x\n"
+"dcl_literal l128, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l128\n"
+"dcl_literal l129, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3._y__, l129, r2.x\n"
+"dcl_literal l130, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.x___, l130, r2.x_neg(xyzw)\n"
+"dcl_literal l131, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r3.x___, r3.y, l131, r3.x\n"
+"dcl_literal l132, 0x00000017, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.x_z_, l132, r2.x\n"
+"dcl_literal l133, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r3._y__, r4.x, l133, r2.x\n"
+"dcl_literal l134, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r2.x, l134\n"
+"dcl_literal l135, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r2.x, l135, r3.y\n"
+"ishr r2.x___, r3.w, r2.x\n"
+"cmov_logical r2.x___, r4.z, r2.x, r3.x\n"
+"dcl_literal l136, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+"lt r0.___w, l136, r0.w\n"
+"dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r0.w, l137, r2.x\n"
+"ior r0.___w, r4.y, r0.w\n"
+"dcl_literal l138, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+"lt r2.x___, l138, r2.w_abs\n"
+"ftoi r3.x___, r2.w_abs\n"
+"dcl_literal l139, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, r2.w, l139\n"
+"dcl_literal l140, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+"cmov_logical r2.x___, r2.x, l140, r3.x\n"
+"inegate r3.x___, r2.x\n"
+"cmov_logical r2.x___, r2.w, r3.x, r2.x\n"
+"dcl_literal l141, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.___w, r2.x, l141\n"
+"dcl_literal l142, 0x7FFFFFFF, 0x0FFFF000, 0x00000000, 0x00000000\n"
+"and r3.xy__, r0.z, l142\n"
+"dcl_literal l143, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r3.___w, r3.y, l143, r3.z_neg(xyzw)\n"
+"dcl_literal l144, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r3.___w, r3.y, l144, r3.w\n"
+"add r3._y__, r0.z, r3.y_neg(xyzw)\n"
+"dcl_literal l145, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B\n"
+"lt r3.x___, r3.x, l145\n"
+"dcl_literal l146, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r3.___w, r3.y, l146, r3.w\n"
+"dcl_literal l147, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r3._y__, r3.y, l147, r3.w\n"
+"dcl_literal l148, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r0.__z_, r0.z, l148, r3.y\n"
+"dcl_literal l149, 0x34904540, 0x3A1BECC5, 0x00000000, 0x00000000\n"
+"dcl_literal l150, 0x3CC2F57D, 0xBC42EEB9, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r0.z, l149, l150\n"
+"dcl_literal l151, 0x37874471, 0x3DDB50CA, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r4.xyxx, r0.z, l151\n"
+"dcl_literal l152, 0x3F7FE378, 0xBEFFE15B, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r4.xyxx, r0.z, l152\n"
+"dcl_literal l153, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378\n"
+"mad_ieee r0.__z_, r4.y, r0.z, l153\n"
+"div_zeroop(infinity) r0.__z_, r4.x, r0.z\n"
+"mul_ieee r3._y__, r3.y, r0.z\n"
+"mad_ieee r0.__z_, r0.z, r3.z, r3.y\n"
+"dcl_literal l154, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.__z_, r0.z, l154\n"
+"dcl_literal l155, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r0.z, l155\n"
+"cmov_logical r0.__z_, r4.z, r2.x, r2.w\n"
+"itof r2.x___, r2.x\n"
+"dcl_literal l156, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.___w, r0.z, l156\n"
+"itof r3._y__, r4.w\n"
+"cmov_logical r3._y__, r4.z, r4.x, r3.y\n"
+"iadd r2.___w, r3.y, r2.w\n"
+"dcl_literal l157, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r3._yz_, r3.y, l157\n"
+"dcl_literal l158, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3._y__, r3.y, l158\n"
+"dcl_literal l159, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.__z_, r3.z, l159\n"
+"iadd r0.__z_, r3.y, r0.z\n"
+"dcl_literal l160, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l160\n"
+"dcl_literal l161, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3._y__, l161, r0.z\n"
+"dcl_literal l162, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.__z_, l162, r0.z_neg(xyzw)\n"
+"dcl_literal l163, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.___w, r3.y, l163, r2.w\n"
+"dcl_literal l164, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+"ilt r3._y_w, l164, r0.z\n"
+"dcl_literal l165, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r3._y__, r3.y, l165, r0.z\n"
+"dcl_literal l166, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, r0.z, l166\n"
+"dcl_literal l167, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r0.z, l167, r3.y\n"
+"ishr r0.__z_, r3.z, r0.z\n"
+"cmov_logical r0.__z_, r3.w, r0.z, r2.w\n"
+"dcl_literal l168, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+"lt r2.x___, l168, r2.x\n"
+"dcl_literal l169, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r2.x, l169, r0.z\n"
+"ior r0.__z_, r4.y, r0.z\n"
+"cmov_logical r0.__z_, r3.x, r0.z, r0.w\n"
+"dcl_literal l170, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.y, l170\n"
+"cmov_logical r0.__z_, r2.y, r0.w, r0.z\n"
+"dcl_literal l171, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r0.___w, r0.y, l171\n"
+"mul_ieee r0.x___, r0.y, r0.x\n"
+"cmov_logical r0._y__, r0.w, r1.x, r0.z\n"
+"dcl_literal l172, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"lt r0.__z_, l172, r0.x\n"
+"dcl_literal l173, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l173\n"
+"dcl_literal l174, 0xC3150000, 0xC3150000, 0xC3150000, 0xC3150000\n"
+"lt r0.x___, r0.x, l174\n"
+"dcl_literal l175, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r0.___w, l175, r0.w\n"
+"and r0.x_z_, r0.xxzx, r0.w\n"
+"dcl_literal l176, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.z, l176, r0.y\n"
+"dcl_literal l177, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l177, r0.y\n"
+"dcl_literal l178, 0x00000000, 0x80000000, 0x80000000, 0x00000000\n"
+"ine r0._yz_, r1.yywy, l178\n"
+"inot r1._y_w, r0.zzzy\n"
+"dcl_literal l179, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l180, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"cmov_logical r3, r1.xzzx, l179, l180\n"
+"and r2.xy__, r1.ywyy, r3.xwxx\n"
+"and r0.___w, r3.z, r2.x\n"
+"dcl_literal l181, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.w, l181, r0.x\n"
+"and r2.x__w, r0.zzzy, r3.xxxw\n"
+"and r0.___w, r3.z, r2.x\n"
+"dcl_literal l182, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.w, l182, r0.x\n"
+"dcl_literal l183, 0x7F800000, 0x7F800000, 0x3F800000, 0x00000000\n"
+"ilt r4.xyz_, l183, r1.xzxx\n"
+"inot r0.___w, r4.x\n"
+"dcl_literal l184, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ieq r1.__zw, r1.xxxz, l184\n"
+"dcl_literal l185, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r1.x___, r1.x, l185\n"
+"inot r5.xy__, r1.zwzz\n"
+"and r0.___w, r0.w, r5.x\n"
+"and r2.x___, r2.w, r0.w\n"
+"and r0.___w, r4.z, r0.w\n"
+"ior r2.___w, r4.y, r4.x\n"
+"and r2.x___, r3.y, r2.x\n"
+"dcl_literal l186, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l186, r0.x\n"
+"ior r2.x___, r3.x, r1.z\n"
+"and r2.x___, r3.y, r2.x\n"
+"dcl_literal l187, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r2.x, l187, r0.x\n"
+"and r1.x___, r0.y, r1.x\n"
+"and r2.x___, r5.y, r1.x\n"
+"and r1.x___, r1.w, r1.x\n"
+"and r2.x___, r3.z, r2.x\n"
+"dcl_literal l188, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l188, r0.x\n"
+"dcl_literal l189, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r1.x, l189, r0.x\n"
+"and r1.x_z_, r0.yyzy, r1.z\n"
+"and r0._y__, r1.y, r1.x\n"
+"and r1.x___, r3.z, r1.z\n"
+"and r0._y__, r3.z, r0.y\n"
+"and r1.__z_, r2.z, r3.w\n"
+"dcl_literal l190, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.y, l190, r0.x\n"
+"and r0._y__, r1.w, r1.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"and r1.__z_, r1.y, r0.y\n"
+"and r0._yz_, r0.z, r0.yywy\n"
+"and r0.___w, r1.y, r0.w\n"
+"dcl_literal l191, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.z, l191, r0.x\n"
+"ior r0._y__, r0.y, r0.w\n"
+"dcl_literal l192, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.y, l192, r0.x\n"
+"ior r0._y__, r1.x, r0.z\n"
+"dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.y, l193, r0.x\n"
+"ior r0._y__, r2.y, r2.w\n"
+"dcl_literal l194, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l194, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__half_recip_f32",
+"mdef(244)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x41E00000, 0x00000000\n"
+"mov r0.__zw, l0\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7F800000, 0x80000000, 0x007FFFFF\n"
+"and r2, r1.x, l1\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1._y__, r2.x, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r1.__z_, r2.x, l3\n"
+"and r1._y__, r1.y, r1.z\n"
+"dcl_literal l4, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r2.y, l4\n"
+"dcl_literal l5, 0xFFFFFF81, 0xFFFFFF9D, 0x00000000, 0x00000000\n"
+"iadd r0.xy__, r1.z, l5\n"
+"cmov_logical r0.xy__, r1.y, r0.yzyy, r0.xwxx\n"
+"dcl_literal l6, 0x41600000, 0x41600000, 0x41600000, 0x41600000\n"
+"dcl_literal l7, 0xC1600000, 0xC1600000, 0xC1600000, 0xC1600000\n"
+"cmov_logical r0.__z_, r1.y, l6, l7\n"
+"dcl_literal l8, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r0.___w, l8, r0.x\n"
+"dcl_literal l9, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r0.x___, l9, r0.x\n"
+"and r0.x___, r0.w, r0.x\n"
+"cmov_logical r0.x___, r0.x, r0.z, r0.y\n"
+"ftoi r0._y__, r0.x\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0.x___, r0.x, l10\n"
+"dcl_literal l11, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r0.__z_, r0.y, l11\n"
+"cmov_logical r0.___w, r2.y, r0.y, r0.z\n"
+"dcl_literal l12, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1._y__, r0.w, l12\n"
+"itof r1.__z_, r2.w\n"
+"cmov_logical r1.__z_, r2.y, r2.x, r1.z\n"
+"iadd r1._y__, r1.z, r1.y\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r1.__zw, r1.z, l13\n"
+"dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r1.z, l14\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l15\n"
+"iadd r0.___w, r1.z, r0.w\n"
+"dcl_literal l16, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l16\n"
+"dcl_literal l17, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.__z_, l17, r0.w\n"
+"dcl_literal l18, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.___w, l18, r0.w_neg(xyzw)\n"
+"dcl_literal l19, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1._y__, r1.z, l19, r1.y\n"
+"dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.__z_, l20, r0.w\n"
+"dcl_literal l21, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.___w, r1.z, l21, r0.w\n"
+"dcl_literal l22, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.___w, r0.w, l22\n"
+"ishr r1.__z_, r1.w, r0.w\n"
+"dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l23, r0.w\n"
+"cmov_logical r0.___w, r0.w, r1.z, r1.y\n"
+"ior r0.___w, r2.z, r0.w\n"
+"cmov_logical r0.___w, r0.x, r0.w, r1.x\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.___w, l24, r0.w\n"
+"dcl_literal l25, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r1, r0.w, l25\n"
+"cmov_logical r0._y__, r1.z, r0.y, r0.z\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.__z_, r0.y, l26\n"
+"itof r1.___w, r1.w\n"
+"cmov_logical r1.__z_, r1.z, r1.x, r1.w\n"
+"iadd r0.__z_, r1.z, r0.z\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r1.__zw, r1.z, l27\n"
+"dcl_literal l28, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r1.z, l28\n"
+"dcl_literal l29, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l29\n"
+"iadd r0._y__, r1.z, r0.y\n"
+"dcl_literal l30, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0._y__, r0.y, l30\n"
+"dcl_literal l31, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.__z_, l31, r0.y\n"
+"dcl_literal l32, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0._y__, l32, r0.y_neg(xyzw)\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r1.z, l33, r0.z\n"
+"dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.__z_, l34, r0.y\n"
+"dcl_literal l35, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0._y__, r1.z, l35, r0.y\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0._y__, r0.y, l36\n"
+"ishr r1.__z_, r1.w, r0.y\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0._y__, l37, r0.y\n"
+"cmov_logical r0._y__, r0.y, r1.z, r0.z\n"
+"ior r0._y__, r1.y, r0.y\n"
+"dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, r1.x, l38\n"
+"and r0.x___, r0.x, r0.z\n"
+"cmov_logical r0.x___, r0.x, r0.y, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_rsqrt_f32",
+"mdef(245)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.__z_, r0.z, l1\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r0.y, l2\n"
+"and r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r0.x, l3\n"
+"and r0.___w, r0.w, r1.x\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l4, r0.y\n"
+"ior r0.__z_, r0.z, r0.y\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l5, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.__z_, r0.x, l5\n"
+" itof r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l6, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r1.xy__, r0.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r1.x, l7\n"
+" \n"
+" dcl_literal l8, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r0.__z_, r0.z, l8\n"
+" \n"
+" dcl_literal l9, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.x___, r1.y, l9\n"
+" \n"
+" dcl_literal l10, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.__z_, l10, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1._y__, l11, r0.z\n"
+" \n"
+" dcl_literal l12, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.__z_, r1.y, l12, r0.z\n"
+" \n"
+" dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1._y__, l13, r0.z\n"
+" ishr r1.__z_, r1.x, r0.z\n"
+" inegate r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.__z_, r0.z, l14\n"
+" iadd r0.__z_, r1.x, r0.z\n"
+" cmov_logical r0.__z_, r1.y, r1.z, r0.z\n"
+" rsq_vec r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l15, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r1.x___, r0.z, l15\n"
+" if_logicalz r1.x\n"
+" \n"
+" dcl_literal l16, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1._y__, r0.z, l16\n"
+" itof r1._y__, r1.y\n"
+" \n"
+" dcl_literal l17, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r1._yz_, r1.y, l17\n"
+" \n"
+" dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1._y__, r1.y, l18\n"
+" \n"
+" dcl_literal l19, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+" iadd r1._y__, r1.y, l19\n"
+" \n"
+" dcl_literal l20, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.__z_, r1.z, l20\n"
+" \n"
+" dcl_literal l21, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1._y__, l21, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1.___w, l22, r1.y\n"
+" \n"
+" dcl_literal l23, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1._y__, r1.w, l23, r1.y\n"
+" \n"
+" dcl_literal l24, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.___w, l24, r1.y\n"
+" ishr r2.x___, r1.z, r1.y\n"
+" inegate r1._y__, r1.y\n"
+" \n"
+" dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1._y__, r1.y, l25\n"
+" iadd r1._y__, r1.z, r1.y\n"
+" cmov_logical r1._y__, r1.w, r2.x, r1.y\n"
+" else\n"
+" \n"
+" dcl_literal l26, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r0.__z_, r0.z, l26\n"
+" \n"
+" dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l27\n"
+" \n"
+" dcl_literal l28, 0x06000000, 0x06000000, 0x06000000, 0x06000000\n"
+" iadd r0.__z_, r0.z, l28\n"
+" \n"
+" dcl_literal l29, 0xFFFFFF8D, 0xFFFFFF8D, 0xFFFFFF8D, 0xFFFFFF8D\n"
+" iadd r1.x___, r1.x, l29\n"
+" \n"
+" dcl_literal l30, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1.x___, l30, r1.x\n"
+" \n"
+" dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1._y__, r1.x, l31, r0.z\n"
+" endif\n"
+" \n"
+" dcl_literal l32, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+" cmov_logical r0.__z_, r0.w, l32, r1.y\n"
+" \n"
+" dcl_literal l33, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+" ior r0.___w, r0.x, l33\n"
+" cmov_logical r0.x___, r0.y, r0.w, r0.z\n"
+"else\n"
+" rsq_vec r0.x___, r0.x\n"
+"endif\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_sin_f32",
+"mdef(246)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"ge r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x47800000, 0x47800000, 0x47800000, 0x47800000\n"
+"ge r0.___w, l2, r0.y\n"
+"and r0.__z_, r0.z, r0.w\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l3, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.__z_, r0.y, l3\n"
+" round_nearest r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l4, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r0.___w, r0.z, l4\n"
+" add r1.x___, r0.z, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l5, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1._y__, r0.z, l5\n"
+" \n"
+" dcl_literal l6, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00000000, 0x00000000, 0x3FC90000, 0x33A22000\n"
+" mad_ieee r1.__zw, r0.w, l7, r2.xxxy_neg(xyzw)\n"
+" \n"
+" dcl_literal l8, 0x00000000, 0x00000000, 0x39FDA000, 0x2C340000\n"
+" mad_ieee r1.__zw, r0.w, l8, r1.zzzw\n"
+" \n"
+" dcl_literal l9, 0x00000000, 0x00000000, 0x3FC90000, 0x33A22000\n"
+" mad_ieee r1.__zw, r1.x, l9, r1.zzzw\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x00000000, 0x39FDA000, 0x2C340000\n"
+" mad_ieee r1.__zw, r1.x, l10, r1.zzzw\n"
+" add r0.__z_, r0.y, r2.x_neg(xyzw)\n"
+" add r2.___w, r0.y, r0.z_neg(xyzw)\n"
+" add r2.x___, r2.x_neg(xyzw), r2.w\n"
+" add r1.__z_, r1.z_neg(xyzw), r2.x\n"
+" add r0.__z_, r0.z, r1.z\n"
+" add r1.__z_, r2.y_neg(xyzw), r0.z\n"
+" add r0.__z_, r0.z, r1.z_neg(xyzw)\n"
+" add r0.__z_, r2.y_neg(xyzw), r0.z\n"
+" add r0.__z_, r1.w_neg(xyzw), r0.z\n"
+" add r0.__z_, r1.z, r0.z\n"
+" \n"
+" dcl_literal l11, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.__z_, r0.w, l11, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l12, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r0.___w, r0.w, l12, r1.z\n"
+" \n"
+" dcl_literal l13, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r0.___w, r1.x, l13, r0.w\n"
+" \n"
+" dcl_literal l14, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r0.___w, r1.x, l14, r0.w\n"
+" add r1.x___, r2.z_neg(xyzw), r0.z\n"
+" add r0.__z_, r0.z, r1.x_neg(xyzw)\n"
+" add r0.__z_, r2.z_neg(xyzw), r0.z\n"
+" add r0.__z_, r1.x, r0.z\n"
+" frc r1.x___, r1.y\n"
+" \n"
+" dcl_literal l15, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r1.x___, r1.x, l15\n"
+" round_nearest r1.x___, r1.x\n"
+" mul_ieee r1._y__, r0.z, r0.z\n"
+" mul_ieee r1.__z_, r0.z, r1.y\n"
+" \n"
+" dcl_literal l16, 0x2F2EC9D3, 0xAD47D74E, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l17, 0xB2D72F34, 0x310F74F6, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.y, l16, l17\n"
+" \n"
+" dcl_literal l18, 0x3636DF25, 0xB492923A, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.y, r2.xyxx, l18\n"
+" \n"
+" dcl_literal l19, 0xB95009D4, 0x37D00AE2, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.y, r2.xyxx, l19\n"
+" \n"
+" dcl_literal l20, 0x3C088887, 0xBAB60B60, 0x00000000, 0x00000000\n"
+" mad_ieee r2.xy__, r1.y, r2.xyxx, l20\n"
+" mul_ieee r1.___w, r1.z, r2.x\n"
+" \n"
+" dcl_literal l21, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+" mad_ieee r1.___w, r0.w, l21, r1.w_neg(xyzw)\n"
+" mad_ieee r1.___w, r1.y, r1.w, r0.w\n"
+" \n"
+" dcl_literal l22, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB\n"
+" mad_ieee r1.__z_, r1.z_neg(xyzw), l22, r1.w\n"
+" add r1.__z_, r0.z, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l23, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB\n"
+" mad_ieee r1.___w, r1.y, r2.y, l23\n"
+" mul_ieee r2.x___, r1.y, r1.y\n"
+" \n"
+" dcl_literal l24, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2._y__, r0.z, l24\n"
+" \n"
+" dcl_literal l25, 0x3E99999A, 0x3E99999A, 0x3E99999A, 0x3E99999A\n"
+" ige r2.__z_, r2.y, l25\n"
+" \n"
+" dcl_literal l26, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ige r2.___w, l26, r2.y\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l27, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000\n"
+" iadd r2.___w, r2.y, l27\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l28, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ilt r2._y__, l28, r2.y\n"
+" \n"
+" dcl_literal l29, 0x3E900000, 0x3E900000, 0x3E900000, 0x3E900000\n"
+" cmov_logical r2._y__, r2.y, l29, r2.z\n"
+" \n"
+" dcl_literal l30, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1._y__, r1.y, l30, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l31, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r2._y__, r2.y_neg(xyzw), l31\n"
+" mul_ieee r0.__z_, r0.w_neg(xyzw), r0.z\n"
+" mad_ieee r0.__z_, r1.w, r2.x, r0.z_neg(xyzw)\n"
+" add r0.__z_, r1.y, r0.z_neg(xyzw)\n"
+" add r0.__z_, r2.y, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l32, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+" eq r2, r1.x, l32\n"
+" \n"
+" dcl_literal l33, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ge r0.___w, r0.y, l33\n"
+" and r2, r2, r0.w\n"
+" and r0.___w, r1.z, r2.x\n"
+" cmov_logical r0.___w, r2.y, r0.z, r0.w\n"
+" cmov_logical r0.___w, r2.z, r1.z_neg(xyzw), r0.w\n"
+" cmov_logical r0.__z_, r2.w, r0.z_neg(xyzw), r0.w\n"
+"else\n"
+" \n"
+" dcl_literal l34, 0x3A000000, 0x3A000000, 0x3A000000, 0x3A000000\n"
+" ige r0.___w, r0.y, l34\n"
+" \n"
+" dcl_literal l35, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+" lt r1.x___, r0.y, l35\n"
+" and r0.___w, r0.w, r1.x\n"
+" mul_ieee r1.x___, r0.y, r0.y\n"
+" mul_ieee r1._y__, r0.y, r1.x\n"
+" \n"
+" dcl_literal l36, 0x2F2EC9D3, 0x2F2EC9D3, 0x2F2EC9D3, 0x2F2EC9D3\n"
+" \n"
+" dcl_literal l37, 0xB2D72F34, 0xB2D72F34, 0xB2D72F34, 0xB2D72F34\n"
+" mad_ieee r1.__z_, r1.x, l36, l37\n"
+" \n"
+" dcl_literal l38, 0x3636DF25, 0x3636DF25, 0x3636DF25, 0x3636DF25\n"
+" mad_ieee r1.__z_, r1.x, r1.z, l38\n"
+" \n"
+" dcl_literal l39, 0xB95009D4, 0xB95009D4, 0xB95009D4, 0xB95009D4\n"
+" mad_ieee r1.__z_, r1.x, r1.z, l39\n"
+" \n"
+" dcl_literal l40, 0x3C088887, 0x3C088887, 0x3C088887, 0x3C088887\n"
+" mad_ieee r1.__z_, r1.x, r1.z, l40\n"
+" mul_ieee r1.__z_, r1.y, r1.z\n"
+" \n"
+" dcl_literal l41, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB\n"
+" mul_ieee r1._y__, r1.y, l41\n"
+" mad_ieee r1.x___, r1.x, r1.z_neg(xyzw), r1.y_neg(xyzw)\n"
+" add r1.x___, r0.y, r1.x_neg(xyzw)\n"
+" cmov_logical r0.__z_, r0.w, r1.x, r0.y\n"
+"endif\n"
+"dcl_literal l42, 0x47800000, 0x47800000, 0x47800000, 0x47800000\n"
+"lt r0.___w, l42, r0.y\n"
+"dcl_literal l43, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, r0.y, l43\n"
+"and r0.___w, r0.w, r1.x\n"
+"dcl_literal l44, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r0.w, l44, r0.z\n"
+"dcl_literal l45, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l45\n"
+"cmov_logical r0.x___, r0.x, r0.z_neg(xyzw), r0.z\n"
+"dcl_literal l46, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.y, l46\n"
+"dcl_literal l47, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r0.z, l47, r0.x\n"
+"dcl_literal l48, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l48, r0.y\n"
+"dcl_literal l49, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l49, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_sqrt_f32",
+"mdef(247)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r0.y, l1\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l2, r0.y\n"
+"\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l3\n"
+"and r0.__z_, r0.z, r0.w\n"
+"sqrt_vec r0.___w, r0.x\n"
+"\n"
+"dcl_literal l4, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.__z_, r0.z, l4, r0.w\n"
+"\n"
+"dcl_literal l5, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.___w, r0.x, l5\n"
+"cmov_logical r0.x___, r0.y, r0.w, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__half_tan_f32",
+"mdef(248)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"dcl_literal l1, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+"ge r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x47800000, 0x47800000, 0x47800000, 0x47800000\n"
+"ge r0.___w, l2, r0.y\n"
+"and r0.__z_, r0.z, r0.w\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l3, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.__z_, r0.y, l3\n"
+" round_nearest r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l4, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r0.___w, r0.z, l4\n"
+" add r1.x___, r0.z, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l5, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1._y__, r0.z, l5\n"
+" \n"
+" dcl_literal l6, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00000000, 0x00000000, 0x3FC90000, 0x33A22000\n"
+" mad_ieee r1.__zw, r0.w, l7, r2.xxxy_neg(xyzw)\n"
+" \n"
+" dcl_literal l8, 0x00000000, 0x00000000, 0x39FDA000, 0x2C340000\n"
+" mad_ieee r1.__zw, r0.w, l8, r1.zzzw\n"
+" \n"
+" dcl_literal l9, 0x00000000, 0x00000000, 0x3FC90000, 0x33A22000\n"
+" mad_ieee r1.__zw, r1.x, l9, r1.zzzw\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x00000000, 0x39FDA000, 0x2C340000\n"
+" mad_ieee r1.__zw, r1.x, l10, r1.zzzw\n"
+" add r0.__z_, r0.y, r2.x_neg(xyzw)\n"
+" add r2.___w, r0.y, r0.z_neg(xyzw)\n"
+" add r2.x___, r2.x_neg(xyzw), r2.w\n"
+" add r1.__z_, r1.z_neg(xyzw), r2.x\n"
+" add r0.__z_, r0.z, r1.z\n"
+" add r1.__z_, r2.y_neg(xyzw), r0.z\n"
+" add r0.__z_, r0.z, r1.z_neg(xyzw)\n"
+" add r0.__z_, r2.y_neg(xyzw), r0.z\n"
+" add r0.__z_, r1.w_neg(xyzw), r0.z\n"
+" add r0.__z_, r1.z, r0.z\n"
+" \n"
+" dcl_literal l11, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.__z_, r0.w, l11, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l12, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r0.___w, r0.w, l12, r1.z\n"
+" \n"
+" dcl_literal l13, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r0.___w, r1.x, l13, r0.w\n"
+" \n"
+" dcl_literal l14, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r0.___w, r1.x, l14, r0.w\n"
+" add r1.x___, r2.z_neg(xyzw), r0.z\n"
+" add r0.__z_, r0.z, r1.x_neg(xyzw)\n"
+" add r0.__z_, r2.z_neg(xyzw), r0.z\n"
+" add r0.__z_, r1.x, r0.z\n"
+" frc r1.x___, r1.y\n"
+" \n"
+" dcl_literal l15, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r1.x___, r1.x, l15\n"
+" round_nearest r1.x___, r1.x\n"
+" \n"
+" dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mul_ieee r1.x___, r1.x, l16\n"
+" frc r1.x___, r1.x\n"
+" add r1.x___, r1.x, r1.x\n"
+" round_nearest r1.x___, r1.x\n"
+" add r0.__z_, r0.w_neg(xyzw), r0.z\n"
+" mul_ieee r0.___w, r0.z, r0.z\n"
+" mul_ieee r1._y__, r0.z, r0.w\n"
+" \n"
+" dcl_literal l17, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3\n"
+" \n"
+" dcl_literal l18, 0x3EC54587, 0x3EC54587, 0x3EC54587, 0x3EC54587\n"
+" mad_ieee r1.__z_, r0.w_neg(xyzw), l17, l18\n"
+" mul_ieee r1._y__, r1.y, r1.z\n"
+" \n"
+" dcl_literal l19, 0x3C971480, 0x3C971480, 0x3C971480, 0x3C971480\n"
+" \n"
+" dcl_literal l20, 0xBF039337, 0xBF039337, 0xBF039337, 0xBF039337\n"
+" mad_ieee r1.__z_, r0.w, l19, l20\n"
+" \n"
+" dcl_literal l21, 0x3F93F425, 0x3F93F425, 0x3F93F425, 0x3F93F425\n"
+" mad_ieee r0.___w, r1.z, r0.w, l21\n"
+" div_zeroop(infinity) r0.___w, r1.y, r0.w\n"
+" add r0.__z_, r0.z, r0.w\n"
+" \n"
+" dcl_literal l22, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" div_zeroop(infinity) r0.___w, l22, r0.z\n"
+" \n"
+" dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r1.x___, l23, r1.x\n"
+" cmov_logical r0.__z_, r1.x, r0.w_neg(xyzw), r0.z\n"
+"else\n"
+" \n"
+" dcl_literal l24, 0x3A000000, 0x3A000000, 0x3A000000, 0x3A000000\n"
+" ige r0.___w, r0.y, l24\n"
+" \n"
+" dcl_literal l25, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB, 0x3F490FDB\n"
+" lt r1.x___, r0.y, l25\n"
+" and r0.___w, r0.w, r1.x\n"
+" mul_ieee r1.x___, r0.y, r0.y\n"
+" mul_ieee r1._y__, r0.y, r1.x\n"
+" \n"
+" dcl_literal l26, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3\n"
+" \n"
+" dcl_literal l27, 0x3EC54587, 0x3EC54587, 0x3EC54587, 0x3EC54587\n"
+" mad_ieee r1.__z_, r1.x_neg(xyzw), l26, l27\n"
+" mul_ieee r1._y__, r1.y, r1.z\n"
+" \n"
+" dcl_literal l28, 0x3C971480, 0x3C971480, 0x3C971480, 0x3C971480\n"
+" \n"
+" dcl_literal l29, 0xBF039337, 0xBF039337, 0xBF039337, 0xBF039337\n"
+" mad_ieee r1.__z_, r1.x, l28, l29\n"
+" \n"
+" dcl_literal l30, 0x3F93F425, 0x3F93F425, 0x3F93F425, 0x3F93F425\n"
+" mad_ieee r1.x___, r1.z, r1.x, l30\n"
+" div_zeroop(infinity) r1.x___, r1.y, r1.x\n"
+" add r1.x___, r0.y, r1.x\n"
+" cmov_logical r0.__z_, r0.w, r1.x, r0.y\n"
+"endif\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l31\n"
+"cmov_logical r0.x___, r0.x, r0.z_neg(xyzw), r0.z\n"
+"dcl_literal l32, 0x47800000, 0x47800000, 0x47800000, 0x47800000\n"
+"lt r0.__z_, l32, r0.y\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, r0.y, l33\n"
+"and r0.__z_, r0.z, r0.w\n"
+"dcl_literal l34, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.z, l34, r0.x\n"
+"dcl_literal l35, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.y, l35\n"
+"dcl_literal l36, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r0.z, l36, r0.x\n"
+"dcl_literal l37, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l37, r0.y\n"
+"dcl_literal l38, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l38, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__hwfma_f32",
+"mdef(249)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"fma r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__hypot_f32",
+"mdef(250)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.__zw, r0.yyyx, l0\n"
+"ilt r1.x___, r0.w, r0.z\n"
+"cmov_logical r1.x___, r1.x, r0.x, r0.y\n"
+"ige r1._y__, r0.w, r0.z\n"
+"cmov_logical r0.x___, r1.y, r0.x, r0.y\n"
+"dcl_literal l1, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r1._yz_, r1.x, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0._y__, r1.z, l2\n"
+"dcl_literal l3, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1.___w, r0.y, l3\n"
+"dcl_literal l4, 0x7FFFFFFF, 0x7F800000, 0x00000000, 0x00000000\n"
+"and r2.xy__, r0.x, l4\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.y, l5\n"
+"dcl_literal l6, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.___w, r2.z, l6\n"
+"dcl_literal l7, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ige r3.x___, l7, r0.y\n"
+"dcl_literal l8, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ige r3._y__, l8, r2.z\n"
+"iadd r3.__z_, r2.z, r0.y_neg(xyzw)\n"
+"dcl_literal l9, 0x0000001C, 0x0000001C, 0x0000001C, 0x0000001C\n"
+"ilt r3.__z_, l9, r3.z\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r3.___w, r1.y, l10\n"
+"ior r3.__z_, r3.z, r3.w\n"
+"inot r3.___w, r3.z\n"
+"and r3.x___, r3.x, r3.w\n"
+"dcl_literal l11, 0x42400000, 0xC2400000, 0x00000000, 0x00000000\n"
+"and r4.xy__, r3.x, l11\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x42C00000, 0xC2C00000\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x42400000, 0xC2400000\n"
+"cmov_logical r4.__zw, r3.x, l12, l13\n"
+"cmov_logical r3.xy__, r3.y, r4.zwzz, r4.xyxx\n"
+"itof r1.___w, r1.w\n"
+"add r1.___w, r3.x, r1.w\n"
+"add r4.x___, r1.w, r1.w\n"
+"dcl_literal l14, 0xC2CC0000, 0xC2CC0000, 0xC2CC0000, 0xC2CC0000\n"
+"lt r4.x___, r4.x, l14\n"
+"dcl_literal l15, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l16, 0xC2CC0000, 0xC2CC0000, 0xC2CC0000, 0xC2CC0000\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), l15, l16\n"
+"dcl_literal l17, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"dcl_literal l18, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r1.___w, r1.w, l17, l18\n"
+"round_neginf r1.___w, r1.w\n"
+"dcl_literal l19, 0x42800000, 0x42800000, 0x42800000, 0x42800000\n"
+"min r1.___w, r1.w, l19\n"
+"itof r2.___w, r2.w\n"
+"add r2.___w, r3.x, r2.w\n"
+"add r4._y__, r2.w, r2.w\n"
+"dcl_literal l20, 0x42FA0000, 0x42FA0000, 0x42FA0000, 0x42FA0000\n"
+"lt r4._y__, l20, r4.y\n"
+"dcl_literal l21, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"dcl_literal l22, 0xC2FA0000, 0xC2FA0000, 0xC2FA0000, 0xC2FA0000\n"
+"mad_ieee r2.___w, r2.w, l21, l22\n"
+"dcl_literal l23, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r2.___w, r2.w, l23, l24\n"
+"round_neginf r2.___w, r2.w\n"
+"dcl_literal l25, 0x42800000, 0x42800000, 0x42800000, 0x42800000\n"
+"min r2.___w, r2.w, l25\n"
+"and r4.x___, r3.w, r4.x\n"
+"add r4.__z_, r3.x, r1.w\n"
+"add r4.___w, r3.y, r1.w_neg(xyzw)\n"
+"cmov_logical r3.xy__, r4.x, r4.zwzz, r3.xyxx\n"
+"dcl_literal l26, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.___w, r2.x, l26, l27\n"
+"and r4.x___, r4.y, r1.w\n"
+"add r4._y__, r2.w_neg(xyzw), r3.x\n"
+"add r4.__z_, r2.w, r3.y\n"
+"cmov_logical r3.xy__, r4.x, r4.yzyy, r3.xyxx\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r2.___w, l28, r3.x\n"
+"if_logicalnz r2.w\n"
+" ftoi r4.x___, r3.x\n"
+" iadd r0._y__, r0.y, r4.x\n"
+" \n"
+" dcl_literal l29, 0x00000000, 0x7FFFFFFF, 0x007FFFFF, 0x00000000\n"
+" and r4._yz_, r1.x, l29\n"
+" itof r1.x___, r4.z\n"
+" \n"
+" dcl_literal l30, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r1.x, l30\n"
+" \n"
+" dcl_literal l31, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r5.x, l31\n"
+" iadd r1.x___, r1.x, r4.x\n"
+" \n"
+" dcl_literal l32, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.___w, r5.y, l32\n"
+" \n"
+" dcl_literal l33, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.x___, l33, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5.x___, l34, r1.x\n"
+" \n"
+" dcl_literal l35, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.x___, r5.x, l35, r1.x\n"
+" \n"
+" dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.x___, l36, r1.x\n"
+" ishr r5._y__, r4.w, r1.x\n"
+" inegate r1.x___, r1.x\n"
+" \n"
+" dcl_literal l37, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.x___, r1.x, l37\n"
+" iadd r1.x___, r4.w, r1.x\n"
+" cmov_logical r1.x___, r5.x, r5.y, r1.x\n"
+" \n"
+" dcl_literal l38, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4.x___, r4.x, l38\n"
+" iadd r4.x___, r4.y, r4.x\n"
+" \n"
+" dcl_literal l39, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0._y__, r0.y, l39\n"
+" \n"
+" dcl_literal l40, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r4._y__, l40, r0.y\n"
+" \n"
+" dcl_literal l41, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r4.x___, r4.y, l41, r4.x\n"
+" \n"
+" dcl_literal l42, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0._y__, l42, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l43, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4._y__, r4.z, l43\n"
+" \n"
+" dcl_literal l44, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l44, r0.y\n"
+" \n"
+" dcl_literal l45, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0._y__, r4.z, l45, r0.y\n"
+" ishr r4._y__, r4.y, r0.y\n"
+" \n"
+" dcl_literal l46, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0._y__, l46, r0.y\n"
+" cmov_logical r0._y__, r0.y, r4.y, r4.x\n"
+" cmov_logical r0._y__, r1.z, r0.y, r1.x\n"
+" \n"
+" dcl_literal l47, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r1.x___, l47, r3.x\n"
+" \n"
+" dcl_literal l48, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r1.x, l48, r0.y\n"
+"else\n"
+" mov r0._y__, r1.y\n"
+"endif\n"
+"if_logicalnz r2.w\n"
+" ftoi r1.x___, r3.x\n"
+" iadd r1.__z_, r2.z, r1.x\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x007FFFFF\n"
+" and r2.__zw, r0.x, l49\n"
+" itof r0.x___, r2.w\n"
+" \n"
+" dcl_literal l50, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r0.x, l50\n"
+" \n"
+" dcl_literal l51, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.x___, r4.x, l51\n"
+" iadd r0.x___, r0.x, r1.x\n"
+" \n"
+" dcl_literal l52, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.x___, r4.y, l52\n"
+" \n"
+" dcl_literal l53, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.x___, l53, r0.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l54, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4._y__, l54, r0.x\n"
+" \n"
+" dcl_literal l55, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.x___, r4.y, l55, r0.x\n"
+" \n"
+" dcl_literal l56, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, l56, r0.x\n"
+" ishr r4.__z_, r4.x, r0.x\n"
+" inegate r0.x___, r0.x\n"
+" \n"
+" dcl_literal l57, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.x___, r0.x, l57\n"
+" iadd r0.x___, r4.x, r0.x\n"
+" cmov_logical r0.x___, r4.y, r4.z, r0.x\n"
+" \n"
+" dcl_literal l58, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.x___, r1.x, l58\n"
+" iadd r1.x___, r2.z, r1.x\n"
+" \n"
+" dcl_literal l59, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.__z_, r1.z, l59\n"
+" \n"
+" dcl_literal l60, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.__z_, l60, r1.z\n"
+" \n"
+" dcl_literal l61, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.x___, r2.z, l61, r1.x\n"
+" \n"
+" dcl_literal l62, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.__z_, l62, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l63, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.__z_, r2.w, l63\n"
+" \n"
+" dcl_literal l64, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.___w, l64, r1.z\n"
+" \n"
+" dcl_literal l65, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.w, l65, r1.z\n"
+" ishr r2.__z_, r2.z, r1.z\n"
+" \n"
+" dcl_literal l66, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.__z_, l66, r1.z\n"
+" cmov_logical r1.x___, r1.z, r2.z, r1.x\n"
+" cmov_logical r0.x___, r2.y, r1.x, r0.x\n"
+" \n"
+" dcl_literal l67, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r1.x___, l67, r3.x\n"
+" \n"
+" dcl_literal l68, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.x___, r1.x, l68, r0.x\n"
+"else\n"
+" mov r0.x___, r2.x\n"
+"endif\n"
+"dcl_literal l69, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r1.x___, r0.y, l69\n"
+"add r1.__z_, r0.y, r1.x_neg(xyzw)\n"
+"dcl_literal l70, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r1.__z_, r1.z, l70\n"
+"dcl_literal l71, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2._y__, r0.x, l71\n"
+"add r2.__z_, r0.x, r2.y_neg(xyzw)\n"
+"dcl_literal l72, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2.__z_, r2.z, l72\n"
+"mul_ieee r2.___w, r0.y, r0.y\n"
+"mul_ieee r3.x___, r0.x, r0.x\n"
+"mad_ieee r4.x___, r0.y, r0.y, r3.x\n"
+"dcl_literal l73, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r4._y__, r4.x, l73\n"
+"mad_ieee r0._y__, r0.y, r0.y, r4.x_neg(xyzw)\n"
+"mad_ieee r0.x___, r0.x, r0.x, r0.y\n"
+"mad_ieee r0._y__, r1.x, r1.x, r2.w_neg(xyzw)\n"
+"dp2_ieee r1.x___, r1.z, r1.x\n"
+"add r0._y__, r0.y, r1.x\n"
+"add r0.x___, r0.x, r0.y\n"
+"mad_ieee r0.x___, r1.z, r1.z, r0.x\n"
+"mad_ieee r0._y__, r2.y, r2.y, r3.x_neg(xyzw)\n"
+"dp2_ieee r1.x___, r2.z, r2.y\n"
+"add r0._y__, r0.y, r1.x\n"
+"add r0.x___, r0.x, r0.y\n"
+"mad_ieee r0.x___, r2.z, r2.z, r0.x\n"
+"add r0.x___, r4.x, r0.x\n"
+"sqrt_vec r0.x___, r0.x\n"
+"dcl_literal l74, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0._y__, r3.y, l74\n"
+"if_logicalnz r0.y\n"
+" ftoi r0._y__, r3.y\n"
+" \n"
+" dcl_literal l75, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r5, r0.x, l75\n"
+" \n"
+" dcl_literal l76, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r5.z, l76\n"
+" iadd r1.x___, r1.x, r0.y\n"
+" itof r1.__z_, r5.w\n"
+" \n"
+" dcl_literal l77, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r2._yz_, r1.z, l77\n"
+" \n"
+" dcl_literal l78, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r2.y, l78\n"
+" iadd r1.__z_, r1.z, r0.y\n"
+" \n"
+" dcl_literal l79, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.z, l79\n"
+" \n"
+" dcl_literal l80, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.__z_, l80, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l81, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.__z_, l81, r1.z\n"
+" \n"
+" dcl_literal l82, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.z, l82, r1.z\n"
+" \n"
+" dcl_literal l83, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, l83, r1.z\n"
+" ishr r2.___w, r2.y, r1.z\n"
+" inegate r1.__z_, r1.z\n"
+" \n"
+" dcl_literal l84, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.__z_, r1.z, l84\n"
+" iadd r1.__z_, r2.y, r1.z\n"
+" cmov_logical r1.__z_, r2.z, r2.w, r1.z\n"
+" \n"
+" dcl_literal l85, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l85\n"
+" iadd r0._y__, r5.x, r0.y\n"
+" \n"
+" dcl_literal l86, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.x___, r1.x, l86\n"
+" \n"
+" dcl_literal l87, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2._y__, l87, r1.x\n"
+" \n"
+" dcl_literal l88, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r2.y, l88, r0.y\n"
+" \n"
+" dcl_literal l89, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.x___, l89, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l90, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r5.w, l90\n"
+" \n"
+" dcl_literal l91, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.__z_, l91, r1.x\n"
+" \n"
+" dcl_literal l92, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.x___, r2.z, l92, r1.x\n"
+" ishr r2._y__, r2.y, r1.x\n"
+" \n"
+" dcl_literal l93, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, l93, r1.x\n"
+" cmov_logical r0._y__, r1.x, r2.y, r0.y\n"
+" cmov_logical r0._y__, r5.z, r0.y, r1.z\n"
+" \n"
+" dcl_literal l94, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r1.x___, l94, r3.y\n"
+" \n"
+" dcl_literal l95, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r1.x, l95, r0.y\n"
+" ior r0.x___, r5.y, r0.y\n"
+"endif\n"
+"cmov_logical r0.x___, r2.x, r0.x, r1.y\n"
+"cmov_logical r0.x___, r3.z, r2.x, r0.x\n"
+"and r0._y__, r1.w, r4.y\n"
+"and r0._y__, r3.w, r0.y\n"
+"dcl_literal l96, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ieq r1.xy__, r0.wzww, l96\n"
+"ior r1.x___, r1.y, r1.x\n"
+"ior r0._y__, r0.y, r1.x\n"
+"dcl_literal l97, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.y, l97, r0.x\n"
+"dcl_literal l98, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ult r1.xy__, l98, r0.wzww\n"
+"dcl_literal l99, 0x00000000, 0x7F800000, 0x7F800000, 0x00000000\n"
+"ine r0._yz_, r0.zzwz, l99\n"
+"and r0._yz_, r1.xxyx, r0.yyzy\n"
+"ior r0._y__, r0.z, r0.y\n"
+"dcl_literal l100, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l100, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ilogb_f32",
+"mdef(251)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7F800000, 0x00000000, 0x00000000\n"
+"and r0.xy__, r0.x, l0\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.__z_, r0.x, l1\n"
+"dcl_literal l2, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.__z_, r0.z, l2\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.__z_, r0.z, l3\n"
+"dcl_literal l4, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.__z_, l4, r0.z_neg(xyzw)\n"
+"dcl_literal l5, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.__z_, r0.z, l5\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.__z_, r0.z, l6\n"
+"dcl_literal l7, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r0.__z_, r0.x, r0.z, l7\n"
+"dcl_literal l8, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.__z_, r0.z_neg(xyzw), l8\n"
+"dcl_literal l9, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r0.___w, r0.x, l9\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.x___, r0.x, l10\n"
+"and r0.___w, r0.w, r1.x\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0._y__, r0.y, l11\n"
+"dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0._y__, r0.y, l12\n"
+"cmov_logical r0._y__, r0.w, r0.z, r0.y\n"
+"dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l13, r0.x\n"
+"dcl_literal l14, 0x00000000, 0x00000000, 0x00000000, 0x7F800000\n"
+"ieq r0.x__w, r0.x, l14\n"
+"ior r0.x___, r0.z, r0.x\n"
+"dcl_literal l15, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.x___, r0.x, l15, r0.y\n"
+"dcl_literal l16, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"cmov_logical r0.x___, r0.w, l16, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ldexp_f32i32",
+"mdef(252)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"itof r0._y__, r0.y\n"
+"dcl_literal l0, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+"lt r0.__z_, l0, r0.y_abs\n"
+"ftoi r0.___w, r0.y_abs\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0._y__, r0.y, l1\n"
+"dcl_literal l2, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+"cmov_logical r0.__z_, r0.z, l2, r0.w\n"
+"inegate r0.___w, r0.z\n"
+"cmov_logical r0._y__, r0.y, r0.w, r0.z\n"
+"dcl_literal l3, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r0.__z_, r0.y, l3\n"
+"dcl_literal l4, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r1, r0.x, l4\n"
+"cmov_logical r0.__z_, r1.z, r0.y, r0.z\n"
+"itof r0._y__, r0.y\n"
+"itof r0.___w, r1.w\n"
+"cmov_logical r0.___w, r1.z, r1.x, r0.w\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r1.__zw, r0.w, l5\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r1.z, l6\n"
+"dcl_literal l7, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l7\n"
+"iadd r1.__z_, r1.z, r0.z\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.__z_, r0.z, l8\n"
+"dcl_literal l9, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1.__z_, r1.z, l9\n"
+"dcl_literal l10, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.x___, l10, r1.z_neg(xyzw)\n"
+"dcl_literal l11, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.__z_, l11, r1.z\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2._y__, r2.x, l12\n"
+"dcl_literal l13, 0x00000017, 0x00000018, 0x00000000, 0x00000000\n"
+"ilt r3.xyz_, l13, r2.x\n"
+"dcl_literal l14, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2.x___, r3.x, l14, r2.x\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r2.y, l15, r2.x\n"
+"dcl_literal l16, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r2._y__, l16, r2.x_neg(xyzw)\n"
+"ishr r2.x___, r1.w, r2.x\n"
+"ishl r1.___w, r1.w, r2.y\n"
+"dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.___w, r3.y, l17, r1.w\n"
+"dcl_literal l18, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ieq r2._y__, r1.w, l18\n"
+"dcl_literal l19, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ult r1.___w, l19, r1.w\n"
+"dcl_literal l20, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r2.__z_, r2.x, l20\n"
+"iadd r2.__z_, r2.x, r2.z\n"
+"cmov_logical r2.x___, r2.y, r2.z, r2.x\n"
+"dcl_literal l21, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r2._y__, r2.x, l21\n"
+"cmov_logical r1.___w, r1.w, r2.y, r2.x\n"
+"iadd r0.__z_, r0.w, r0.z\n"
+"dcl_literal l22, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r1.z, l22, r0.z\n"
+"cmov_logical r0.__z_, r3.z, r1.w, r0.z\n"
+"dcl_literal l23, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+"lt r0._y__, l23, r0.y\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l24, r0.z\n"
+"ior r0.__z_, r1.y, r0.y\n"
+"dcl_literal l25, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r0.___w, l25, r0.y\n"
+"cmov_logical r0._y__, r0.w, r0.z, r0.y\n"
+"cmov_logical r0._y__, r1.x, r0.y, r0.x\n"
+"dcl_literal l26, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r0.__z_, r1.x, l26\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__length_2f32",
+"mdef(253)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r1._y__, r0.xyxx, r0.xyxx\n"
+"sqrt_vec r0.x___, r1.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__length_4f32",
+"mdef(254)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r0.x___, r0.xyxx, r0.xyxx\n"
+"mad_ieee r0.x___, r0.z, r0.z, r0.x\n"
+"mad_ieee r0.x___, r0.w, r0.w, r0.x\n"
+"sqrt_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__length_f32",
+"mdef(255)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x___, r0.x_abs\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__lgamma_f32",
+"mdef(256)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x007FFFFF, 0x7F800000, 0x00000000\n"
+"and r1.xyz_, r0.x, l0\n"
+"add r0._y__, r1.x, r1.x\n"
+"round_nearest r0._y__, r0.y\n"
+"\n"
+"dcl_literal l1, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r1.x, l1, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x3E800000, 0x3FC90FDB, 0x00000000\n"
+"mul_ieee r0._yz_, r0.yyzy, l2\n"
+"mul_ieee r0.___w, r0.z, r0.z\n"
+"\n"
+"dcl_literal l3, 0x3636DF25, 0xB492923A, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l4, 0xB95009D4, 0x37D00AE2, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r0.w, l3, l4\n"
+"\n"
+"dcl_literal l5, 0x3C088887, 0xBAB60B60, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r0.w, r2.xyxx, l5\n"
+"\n"
+"dcl_literal l6, 0xBE2AAAAB, 0x3D2AAAAB, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r0.w, r2.xyxx, l6\n"
+"mul_ieee r2.__z_, r0.z, r0.w\n"
+"mad_ieee r0.__z_, r2.z, r2.x, r0.z\n"
+"frc r0._y__, r0.y\n"
+"\n"
+"dcl_literal l7, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+"mul_ieee r0._y__, r0.y, l7\n"
+"round_nearest r0._y__, r0.y\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+"eq r3, r0.y, l8\n"
+"\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ge r0._y__, r1.x, l9\n"
+"and r3, r3, r0.y\n"
+"and r0._y__, r0.z, r3.x\n"
+"\n"
+"dcl_literal l10, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"\n"
+"dcl_literal l11, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r2.x___, r0.w_neg(xyzw), l10, l11\n"
+"mul_ieee r0.___w, r0.w, r0.w\n"
+"mad_ieee r0.___w, r0.w, r2.y, r2.x\n"
+"cmov_logical r0._y__, r3.y, r0.w, r0.y\n"
+"cmov_logical r0._y__, r3.z, r0.z_neg(xyzw), r0.y\n"
+"cmov_logical r0._y__, r3.w, r0.w_neg(xyzw), r0.y\n"
+"itof r0.__zw, r1.yyyx\n"
+"\n"
+"dcl_literal l12, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r0.___w, r0.w, l12\n"
+"\n"
+"dcl_literal l13, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r2.xy__, r0.z, l13\n"
+"\n"
+"dcl_literal l14, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r0.__zw, r0.w, l14\n"
+"\n"
+"dcl_literal l15, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l15\n"
+"\n"
+"dcl_literal l16, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0.___w, r0.w, l16\n"
+"\n"
+"dcl_literal l17, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.__z_, l17, r0.z_neg(xyzw)\n"
+"inegate r1._y__, r0.z\n"
+"\n"
+"dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1._y__, r1.y, l18\n"
+"iadd r1._y__, r0.w, r1.y\n"
+"ishr r0.___w, r0.w, r0.z\n"
+"\n"
+"dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l19, r0.z\n"
+"cmov_logical r0.__z_, r0.z, r0.w, r1.y\n"
+"\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2.__zw, r1.xxxz, l20\n"
+"\n"
+"dcl_literal l21, 0x00000000, 0x00000000, 0x7F800000, 0x00000000\n"
+"ieq r3.xyz_, r1.zxxz, l21\n"
+"and r0.___w, r2.z, r3.x\n"
+"cmov_logical r0._y__, r0.w, r0.z, r0.y\n"
+"\n"
+"dcl_literal l22, 0x00800000, 0x32000000, 0x1C800000, 0x7F800000\n"
+"ilt r4, r1.x, l22\n"
+"and r0.__z_, r2.w, r4.y\n"
+"\n"
+"dcl_literal l23, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r0.___w, r1.x, l23\n"
+"cmov_logical r0._y__, r0.z, r0.w, r0.y\n"
+"frc r0.__z_, r1.x\n"
+"add r0.___w, r1.x, r0.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l24, 0x00000000, 0x00000000, 0x3F000000, 0x00000000\n"
+"eq r1._yz_, r0.z, l24\n"
+"\n"
+"dcl_literal l25, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ge r0.__z_, r0.w, l25\n"
+"\n"
+"dcl_literal l26, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r0.___w, r0.w, l26\n"
+"and r0.__z_, r1.y, r0.z\n"
+"ior r0.__z_, r3.y, r0.z\n"
+"\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l27, r0.y\n"
+"frc r0.__z_, r0.w\n"
+"\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r0.__z_, r0.z, l28\n"
+"\n"
+"dcl_literal l29, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"\n"
+"dcl_literal l30, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.__z_, r0.z, l29, l30\n"
+"cmov_logical r0._y__, r1.z, r0.z, r0.y\n"
+"\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, r0.x, l31\n"
+"cmov_logical r0._y__, r0.z, r0.y_neg(xyzw), r0.y\n"
+"mul_ieee r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l32, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.y, l32\n"
+"\n"
+"dcl_literal l33, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"div_zeroop(infinity) r5._y__, l33, r0.w_abs\n"
+"\n"
+"dcl_literal l34, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.___w, r5.y, l34\n"
+"itof r0.___w, r0.w\n"
+"\n"
+"dcl_literal l35, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r1._yz_, r0.w, l35\n"
+"\n"
+"dcl_literal l36, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.y, l36\n"
+"\n"
+"dcl_literal l37, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1._y__, r1.z, l37\n"
+"\n"
+"dcl_literal l38, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+"iadd r0.___w, r0.w, l38\n"
+"\n"
+"dcl_literal l39, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.___w, l39, r0.w_neg(xyzw)\n"
+"inegate r1.__z_, r0.w\n"
+"\n"
+"dcl_literal l40, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1.__z_, r1.z, l40\n"
+"iadd r1.__z_, r1.y, r1.z\n"
+"ishr r1._y__, r1.y, r0.w\n"
+"\n"
+"dcl_literal l41, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l41, r0.w\n"
+"cmov_logical r6.___w, r0.w, r1.y, r1.z\n"
+"\n"
+"dcl_literal l42, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r0.___w, r5.y, l42\n"
+"\n"
+"dcl_literal l43, 0xFFFFFFE7, 0x00000000, 0xFFFFFFE7, 0x00000000\n"
+"mov r6.x_z_, l43\n"
+"\n"
+"dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r5.x__w, l44\n"
+"cmov_logical r1._yz_, r0.w, r6.zzwz, r5.xxyx\n"
+"\n"
+"dcl_literal l45, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r1.z, l45\n"
+"iadd r0.___w, r0.w, r1.y\n"
+"\n"
+"dcl_literal l46, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1._y__, r1.z, l46\n"
+"\n"
+"dcl_literal l47, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, l47, r0.w\n"
+"\n"
+"dcl_literal l48, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1.__z_, l48, r1.y\n"
+"\n"
+"dcl_literal l49, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.__z_, r1.z, l49\n"
+"\n"
+"dcl_literal l50, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r1.z, l50\n"
+"\n"
+"dcl_literal l51, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1.__z_, r1.z, l51\n"
+"iadd r0.___w, r0.w, r2.z\n"
+"itof r0.___w, r0.w\n"
+"\n"
+"dcl_literal l52, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r2.__z_, r0.w, l52\n"
+"ior r1._y__, r1.y, r1.z\n"
+"\n"
+"dcl_literal l53, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r1._yz_, r1.y, l53\n"
+"div_zeroop(infinity) r1.__z_, r1.y, r1.z\n"
+"mul_ieee r2.___w, r1.z, r1.z\n"
+"mul_ieee r3.x___, r2.w, r2.w\n"
+"\n"
+"dcl_literal l54, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l55, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r5.xy__, r3.x, l54, l55\n"
+"\n"
+"dcl_literal l56, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r5.xy__, r3.x, r5.xyxx, l56\n"
+"mul_ieee r3.___w, r3.x, r5.x\n"
+"\n"
+"dcl_literal l57, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r3.x___, r3.x, r5.y, l57\n"
+"mad_ieee r2.___w, r2.w, r3.x, r3.w\n"
+"mul_ieee r3.x___, r1.y, r1.y\n"
+"\n"
+"dcl_literal l58, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.___w, r3.x, l58, r2.w\n"
+"mad_ieee r1.__z_, r1.z, r2.w, r2.z\n"
+"\n"
+"dcl_literal l59, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.__z_, r3.x, l59, r1.z_neg(xyzw)\n"
+"add r1._y__, r1.y_neg(xyzw), r1.z\n"
+"\n"
+"dcl_literal l60, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.___w, r0.w, l60, r1.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l61, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1._y__, r2.x, l61\n"
+"\n"
+"dcl_literal l62, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.__z_, r2.y, l62\n"
+"\n"
+"dcl_literal l63, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+"iadd r1._y__, r1.y, l63\n"
+"\n"
+"dcl_literal l64, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r1._y__, l64, r1.y_neg(xyzw)\n"
+"inegate r2.x___, r1.y\n"
+"\n"
+"dcl_literal l65, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r2.x, l65\n"
+"iadd r2.x___, r1.z, r2.x\n"
+"ishr r1.__z_, r1.z, r1.y\n"
+"\n"
+"dcl_literal l66, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, l66, r1.y\n"
+"cmov_logical r6._y__, r1.y, r1.z, r2.x\n"
+"\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r1.___w, l67\n"
+"cmov_logical r1._yz_, r4.x, r6.xxyx, r1.wwxw\n"
+"\n"
+"dcl_literal l68, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.___w, r1.z, l68\n"
+"iadd r1._y__, r1.w, r1.y\n"
+"\n"
+"dcl_literal l69, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.__z_, r1.z, l69\n"
+"\n"
+"dcl_literal l70, 0x00000000, 0xFFFFFF81, 0x00000000, 0x004AFB20\n"
+"iadd r1._y_w, l70, r1.yyyz\n"
+"\n"
+"dcl_literal l71, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.___w, r1.w, l71\n"
+"\n"
+"dcl_literal l72, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r1.w, l72\n"
+"\n"
+"dcl_literal l73, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1.___w, r1.w, l73\n"
+"iadd r1._y__, r1.y, r2.x\n"
+"itof r1._y__, r1.y\n"
+"\n"
+"dcl_literal l74, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r2.x___, r1.y, l74\n"
+"ior r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l75, 0x00000000, 0x00000000, 0xBF800000, 0x3F800000\n"
+"add r1.__zw, r1.z, l75\n"
+"div_zeroop(infinity) r1.___w, r1.z, r1.w\n"
+"mul_ieee r2._y__, r1.w, r1.w\n"
+"mul_ieee r2.__z_, r2.y, r2.y\n"
+"\n"
+"dcl_literal l76, 0x3E1CD04F, 0x00000000, 0x00000000, 0x3E178897\n"
+"\n"
+"dcl_literal l77, 0x3E638E29, 0x00000000, 0x00000000, 0x3E3A3325\n"
+"mad_ieee r3.x__w, r2.z, l76, l77\n"
+"\n"
+"dcl_literal l78, 0x3ECCCCCD, 0x00000000, 0x00000000, 0x3E924925\n"
+"mad_ieee r3.x__w, r2.z, r3.xxxw, l78\n"
+"mul_ieee r2.___w, r2.z, r3.x\n"
+"\n"
+"dcl_literal l79, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r2.__z_, r2.z, r3.w, l79\n"
+"mad_ieee r2._y__, r2.y, r2.z, r2.w\n"
+"mul_ieee r2.__z_, r1.z, r1.z\n"
+"\n"
+"dcl_literal l80, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2._y__, r2.z, l80, r2.y\n"
+"mad_ieee r1.___w, r1.w, r2.y, r2.x\n"
+"\n"
+"dcl_literal l81, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.___w, r2.z, l81, r1.w_neg(xyzw)\n"
+"add r1.__z_, r1.z_neg(xyzw), r1.w\n"
+"\n"
+"dcl_literal l82, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r1._y__, r1.y, l82, r1.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l83, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"mul_ieee r1.__z_, r1.y, l83\n"
+"cmov_logical r1.___w, r4.y, r1.z, r0.w\n"
+"mov r1.__z_, r0.x_neg(xyzw)\n"
+"mov r5.__z_, r0.x\n"
+"cmov_logical r1.__zw, r0.z, r1.zzzw, r5.zzzw\n"
+"ftoi r0.___w, r1.z\n"
+"\n"
+"dcl_literal l84, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"ige r2.x___, r0.w, l84\n"
+"\n"
+"dcl_literal l85, 0x00000007, 0x00000006, 0x00000005, 0x00000004\n"
+"ige r5, r0.w, l85\n"
+"round_z r0.___w, r1.z\n"
+"add r0.___w, r1.z, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l86, 0x40C00000, 0x40A00000, 0x40800000, 0x40400000\n"
+"add r6, r0.w, l86\n"
+"\n"
+"dcl_literal l87, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r2._y__, r5.x, r6.x, l87\n"
+"mul_ieee r2.__z_, r6.y, r2.y\n"
+"cmov_logical r2._y__, r5.y, r2.z, r2.y\n"
+"mul_ieee r2.__z_, r6.z, r2.y\n"
+"cmov_logical r2._y__, r5.z, r2.z, r2.y\n"
+"mul_ieee r2.__z_, r6.w, r2.y\n"
+"cmov_logical r2._y__, r5.w, r2.z, r2.y\n"
+"\n"
+"dcl_literal l88, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r2.__z_, r0.w, l88\n"
+"mul_ieee r2.__z_, r2.y, r2.z\n"
+"cmov_logical r2.___w, r2.x, r2.z, r2.y\n"
+"\n"
+"dcl_literal l89, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3.x___, r2.w, l89\n"
+"itof r3.x___, r3.x\n"
+"\n"
+"dcl_literal l90, 0x7F800000, 0x00000000, 0x00000000, 0x007FFFFF\n"
+"and r3.x__w, r3.x, l90\n"
+"\n"
+"dcl_literal l91, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.___w, r3.w, l91\n"
+"\n"
+"dcl_literal l92, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r3.x, l92\n"
+"\n"
+"dcl_literal l93, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+"iadd r3.x___, r3.x, l93\n"
+"\n"
+"dcl_literal l94, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r3.x___, l94, r3.x_neg(xyzw)\n"
+"inegate r4.x___, r3.x\n"
+"\n"
+"dcl_literal l95, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r4.x___, r4.x, l95\n"
+"iadd r4.x___, r3.w, r4.x\n"
+"ishr r3.___w, r3.w, r3.x\n"
+"\n"
+"dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.x___, l96, r3.x\n"
+"cmov_logical r2._y__, r3.x, r3.w, r4.x\n"
+"\n"
+"dcl_literal l97, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r3.x___, r2.w, l97\n"
+"\n"
+"dcl_literal l98, 0xFFFFFFE7, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r2.x_z_, l98\n"
+"cmov_logical r2.xy__, r3.x, r2.xyxx, r2.zwzz\n"
+"\n"
+"dcl_literal l99, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.y, l99\n"
+"iadd r2.x___, r2.z, r2.x\n"
+"\n"
+"dcl_literal l100, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2._y__, r2.y, l100\n"
+"\n"
+"dcl_literal l101, 0xFFFFFF81, 0x00000000, 0x004AFB20, 0x00000000\n"
+"iadd r2.x_z_, l101, r2.xxyx\n"
+"\n"
+"dcl_literal l102, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r2.__z_, r2.z, l102\n"
+"\n"
+"dcl_literal l103, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.___w, r2.z, l103\n"
+"\n"
+"dcl_literal l104, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r2.__z_, r2.z, l104\n"
+"iadd r2.x___, r2.x, r2.w\n"
+"itof r2.x___, r2.x\n"
+"\n"
+"dcl_literal l105, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r2.___w, r2.x, l105\n"
+"ior r2._y__, r2.y, r2.z\n"
+"\n"
+"dcl_literal l106, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r2._yz_, r2.y, l106\n"
+"div_zeroop(infinity) r2.__z_, r2.y, r2.z\n"
+"mul_ieee r3.x___, r2.z, r2.z\n"
+"mul_ieee r3.___w, r3.x, r3.x\n"
+"\n"
+"dcl_literal l107, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l108, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.w, l107, l108\n"
+"\n"
+"dcl_literal l109, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.w, r4.xyxx, l109\n"
+"mul_ieee r4.x___, r3.w, r4.x\n"
+"\n"
+"dcl_literal l110, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r3.___w, r3.w, r4.y, l110\n"
+"mad_ieee r3.x___, r3.x, r3.w, r4.x\n"
+"mul_ieee r3.___w, r2.y, r2.y\n"
+"\n"
+"dcl_literal l111, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3.x___, r3.w, l111, r3.x\n"
+"mad_ieee r2.__z_, r2.z, r3.x, r2.w\n"
+"\n"
+"dcl_literal l112, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.__z_, r3.w, l112, r2.z_neg(xyzw)\n"
+"add r2._y__, r2.y_neg(xyzw), r2.z\n"
+"\n"
+"dcl_literal l113, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2.x___, r2.x, l113, r2.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l114, 0x00000000, 0x375943FE, 0x3A620FAB, 0x00000000\n"
+"\n"
+"dcl_literal l115, 0x00000000, 0x3B1F8219, 0x3CD01D14, 0x00000000\n"
+"mad_ieee r2._yz_, r0.w, l114, l115\n"
+"\n"
+"dcl_literal l116, 0x00000000, 0x3D11C643, 0x3E53B452, 0x00000000\n"
+"mad_ieee r2._yz_, r2.yyzy, r0.w, l116\n"
+"\n"
+"dcl_literal l117, 0x00000000, 0x3E11BDA2, 0x3F1E0B56, 0x00000000\n"
+"mad_ieee r2._yz_, r2.yyzy, r0.w, l117\n"
+"\n"
+"dcl_literal l118, 0x00000000, 0x3E172A19, 0x3F1B09B3, 0x00000000\n"
+"mad_ieee r2._yz_, r2.yyzy, r0.w, l118\n"
+"\n"
+"dcl_literal l119, 0xBD3F8AA9, 0xBD3F8AA9, 0xBD3F8AA9, 0xBD3F8AA9\n"
+"mad_ieee r2._y__, r2.y, r0.w, l119\n"
+"mul_ieee r2._y__, r0.w, r2.y\n"
+"div_zeroop(infinity) r2._y__, r2.y, r2.z\n"
+"\n"
+"dcl_literal l120, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r0.w, l120, r2.y\n"
+"add r0.___w, r2.x, r0.w\n"
+"\n"
+"dcl_literal l121, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r2.x___, l121, r1.z\n"
+"mul_ieee r2._y__, r2.x, r2.x\n"
+"\n"
+"dcl_literal l122, 0xBAD5C4E8, 0xBAD5C4E8, 0xBAD5C4E8, 0xBAD5C4E8\n"
+"\n"
+"dcl_literal l123, 0x3A5B3DD2, 0x3A5B3DD2, 0x3A5B3DD2, 0x3A5B3DD2\n"
+"mad_ieee r2.__z_, r2.y, l122, l123\n"
+"\n"
+"dcl_literal l124, 0xBA1C065C, 0xBA1C065C, 0xBA1C065C, 0xBA1C065C\n"
+"mad_ieee r2.__z_, r2.y, r2.z, l124\n"
+"\n"
+"dcl_literal l125, 0x3A500CFD, 0x3A500CFD, 0x3A500CFD, 0x3A500CFD\n"
+"mad_ieee r2.__z_, r2.y, r2.z, l125\n"
+"\n"
+"dcl_literal l126, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r2.__z_, r2.y, r2.z, l126\n"
+"\n"
+"dcl_literal l127, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+"mad_ieee r2._y__, r2.y, r2.z, l127\n"
+"\n"
+"dcl_literal l128, 0x3ED67F1D, 0x3ED67F1D, 0x3ED67F1D, 0x3ED67F1D\n"
+"mad_ieee r2.x___, r2.x, r2.y, l128\n"
+"\n"
+"dcl_literal l129, 0xBEEC5B0C, 0xBF800000, 0xBFBB16C3, 0xBF000000\n"
+"add r5, r1.z, l129\n"
+"\n"
+"dcl_literal l130, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r2._y__, r1.y, l130\n"
+"mov r1._y__, r1.y_neg(xyzw)\n"
+"mad_ieee r2.x___, r5.w, r2.y, r2.x\n"
+"mul_ieee r2._y__, r1.z, r2.y\n"
+"\n"
+"dcl_literal l131, 0x3FDDA512, 0x41000000, 0x40000000, 0x00000000\n"
+"ge r6.xyz_, r1.z, l131\n"
+"\n"
+"dcl_literal l132, 0x40000000, 0x5C800000, 0x41000000, 0x00000000\n"
+"lt r7.xyz_, r1.z, l132\n"
+"and r6.xyz_, r6.xyzx, r7.xyzx\n"
+"cmov_logical r2.x___, r6.y, r2.x, r2.y\n"
+"cmov_logical r0.___w, r6.z, r0.w, r2.x\n"
+"\n"
+"dcl_literal l133, 0x3F666666, 0x3F666666, 0x3F666666, 0x3F666666\n"
+"lt r2.x___, l133, r1.z\n"
+"\n"
+"dcl_literal l134, 0x3F3B4A23, 0x3E6B851F, 0x3F9D70A4, 0x3FDDA512\n"
+"lt r8, r1.z, l134\n"
+"and r2.x___, r2.x, r8.z\n"
+"\n"
+"dcl_literal l135, 0x3F3B4A23, 0x3E6B851F, 0x00000000, 0x3F9D70A4\n"
+"ge r9, r1.z, l135\n"
+"and r2._yzw, r8.xxyw, r9.yyzw\n"
+"ior r3.x___, r2.x, r2.z\n"
+"\n"
+"dcl_literal l136, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3.___w, r2.y, l136\n"
+"\n"
+"dcl_literal l137, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"cmov_logical r3.x___, r3.x, l137, r3.w\n"
+"\n"
+"dcl_literal l138, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r3.x___, r2.w, l138, r3.x\n"
+"\n"
+"dcl_literal l139, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r6.x, l139, r3.x\n"
+"\n"
+"dcl_literal l140, 0x00000000, 0x00000000, 0x00000001, 0x00000002\n"
+"ieq r6._yzw, r3.x, l140\n"
+"and r6._yzw, r7.x, r6.yyzw\n"
+"\n"
+"dcl_literal l141, 0x3F666666, 0x3F666666, 0x3F666666, 0x3F666666\n"
+"ge r3.x___, l141, r1.z\n"
+"and r3.___w, r9.x, r3.x\n"
+"and r3.x___, r1.y, r3.x\n"
+"\n"
+"dcl_literal l142, 0x3F800000, 0x40000000, 0x00000000, 0x00000000\n"
+"add r4.xy__, r1.z_neg(xyzw), l142\n"
+"and r3.___w, r3.w, r4.x\n"
+"cmov_logical r2._y__, r2.y, r5.x, r3.w\n"
+"cmov_logical r1.__z_, r2.z, r1.z, r2.y\n"
+"cmov_logical r1.__z_, r2.x, r5.y, r1.z\n"
+"cmov_logical r1.__z_, r2.w, r5.z, r1.z\n"
+"cmov_logical r1.__z_, r6.x, r4.y, r1.z\n"
+"mul_ieee r2.x___, r1.z, r1.z\n"
+"\n"
+"dcl_literal l143, 0x00000000, 0x37D383A2, 0x383C2C75, 0x00000000\n"
+"\n"
+"dcl_literal l144, 0x00000000, 0x39679767, 0x38E28445, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, l143, l144\n"
+"\n"
+"dcl_literal l145, 0x00000000, 0x3A9C54A1, 0x3A05B634, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, r2.yyzy, l145\n"
+"\n"
+"dcl_literal l146, 0x00000000, 0x3BF2027E, 0x3B3D6EC6, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, r2.yyzy, l146\n"
+"\n"
+"dcl_literal l147, 0x00000000, 0x3D89F001, 0x3CA89915, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, r2.yyzy, l147\n"
+"\n"
+"dcl_literal l148, 0x00000000, 0x3D9E233F, 0x3EA51A66, 0x00000000\n"
+"mad_ieee r2._yz_, r2.x, r2.yyzy, l148\n"
+"mul_ieee r2.__z_, r2.x, r2.z\n"
+"mad_ieee r2._y__, r1.z, r2.y, r2.z\n"
+"\n"
+"dcl_literal l149, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2._y__, r1.z_neg(xyzw), l149, r2.y\n"
+"add r2._y__, r3.x, r2.y\n"
+"cmov_logical r0.___w, r6.y, r2.y, r0.w\n"
+"mul_ieee r2._y__, r1.z, r2.x\n"
+"\n"
+"dcl_literal l150, 0x39A57B6B, 0xB9A3F927, 0x39AFE9F7, 0x00000000\n"
+"\n"
+"dcl_literal l151, 0xBAB7F476, 0x3A66F867, 0xBA0D3085, 0x00000000\n"
+"mad_ieee r5.xyz_, r2.y, l150, l151\n"
+"\n"
+"dcl_literal l152, 0x3BC7E707, 0xBB7177FE, 0x3B141699, 0x00000000\n"
+"mad_ieee r5.xyz_, r2.y, r5.xyzx, l152\n"
+"\n"
+"dcl_literal l153, 0xBD064D47, 0x3C93373D, 0xBC28FCFE, 0x00000000\n"
+"mad_ieee r5.xyz_, r2.y, r5.xyzx, l153\n"
+"\n"
+"dcl_literal l154, 0x3EF7B95E, 0xBE17213C, 0x3D845A15, 0x00000000\n"
+"mad_ieee r5.xyz_, r2.y, r5.xyzx, l154\n"
+"mad_ieee r2.__z_, r1.z, r5.z, r5.y\n"
+"\n"
+"dcl_literal l155, 0x31E61C52, 0x31E61C52, 0x31E61C52, 0x31E61C52\n"
+"mad_ieee r2._y__, r2.y_neg(xyzw), r2.z, l155\n"
+"mad_ieee r2.x___, r2.x, r5.x, r2.y_neg(xyzw)\n"
+"add r2.x___, r3.x, r2.x\n"
+"\n"
+"dcl_literal l156, 0xBDF8CDCD, 0xBDF8CDCD, 0xBDF8CDCD, 0xBDF8CDCD\n"
+"add r2.x___, r2.x, l156\n"
+"cmov_logical r0.___w, r6.z, r2.x, r0.w\n"
+"\n"
+"dcl_literal l157, 0x3C5B3C5E, 0x3B52D5DB, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l158, 0x3E6A7578, 0x3DD572AF, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, l157, l158\n"
+"\n"
+"dcl_literal l159, 0x3F7A4BB2, 0x3F44EFDF, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l159\n"
+"\n"
+"dcl_literal l160, 0x3FBA3AE7, 0x4008392D, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l160\n"
+"\n"
+"dcl_literal l161, 0x3F2200F4, 0x401D2EBE, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l161\n"
+"\n"
+"dcl_literal l162, 0xBD9E233F, 0x3F800000, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l162\n"
+"mul_ieee r2.x___, r1.z, r2.x\n"
+"div_zeroop(infinity) r2.x___, r2.x, r2.y\n"
+"\n"
+"dcl_literal l163, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"mad_ieee r1.__z_, r1.z, l163, r2.x\n"
+"add r1.__z_, r3.x, r1.z\n"
+"cmov_logical r0.___w, r6.w, r1.z, r0.w\n"
+"cmov_logical r0.___w, r4.z, r1.y, r0.w\n"
+"add r1._y__, r1.w, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l164, 0x00000000, 0x00000000, 0x1C800000, 0x4B000000\n"
+"ige r1.__zw, r1.x, l164\n"
+"\n"
+"dcl_literal l165, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, l165, r1.x\n"
+"and r1.__zw, r0.z, r1.zzzw\n"
+"cmov_logical r0.___w, r1.z, r1.y, r0.w\n"
+"and r1._y__, r4.w, r1.w\n"
+"\n"
+"dcl_literal l166, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0._y__, r0.y, l166\n"
+"and r0._y__, r0.z, r0.y\n"
+"ior r0._y__, r3.y, r0.y\n"
+"\n"
+"dcl_literal l167, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l167, r0.w\n"
+"cmov_logical r0._y__, r3.z, r0.x, r0.y\n"
+"\n"
+"dcl_literal l168, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.__z_, r0.x, l168\n"
+"\n"
+"dcl_literal l169, 0x3F800000, 0x00000000, 0x00000000, 0x40000000\n"
+"ieq r0.x__w, r0.x, l169\n"
+"cmov_logical r0._y__, r1.x, r0.z, r0.y\n"
+"\n"
+"dcl_literal l170, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r1.y, l170, r0.y\n"
+"ior r0.x___, r0.w, r0.x\n"
+"\n"
+"dcl_literal l171, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l171, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__lgamma_r_2f32f32",
+"mdef(257)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x_neg(xyzw)\n"
+"max r1.__z_, r0.x, r1.x\n"
+"\n"
+"dcl_literal l0, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r1.___w, r1.z, l0\n"
+"if_logicalnz r1.w\n"
+" \n"
+" dcl_literal l1, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1.___w, r1.z, l1\n"
+" itof r1.___w, r1.w\n"
+" \n"
+" dcl_literal l2, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r1.w, l2\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.___w, r2.x, l3\n"
+" \n"
+" dcl_literal l4, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+" iadd r1.___w, r1.w, l4\n"
+" \n"
+" dcl_literal l5, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.x___, r2.y, l5\n"
+" \n"
+" dcl_literal l6, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.___w, l6, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, l7, r1.w\n"
+" ishr r2.__z_, r2.x, r1.w\n"
+" inegate r1.___w, r1.w\n"
+" \n"
+" dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l8\n"
+" iadd r1.___w, r2.x, r1.w\n"
+" cmov_logical r1.__z_, r2.y, r2.z, r1.w\n"
+" \n"
+" dcl_literal l9, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7\n"
+" mov r1.___w, l9\n"
+"else\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r1.___w, l10\n"
+"endif\n"
+"\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r1.z, l11\n"
+"iadd r1.___w, r2.x, r1.w\n"
+"\n"
+"dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1.___w, l12, r1.w\n"
+"\n"
+"dcl_literal l13, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.__z_, r1.z, l13\n"
+"\n"
+"dcl_literal l14, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r2.x___, l14, r1.z\n"
+"\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r2.x___, r2.x, l15\n"
+"\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r2._y__, r2.x, l16\n"
+"ior r1.__z_, r1.z, r2.y\n"
+"\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.x___, r2.x, l17\n"
+"iadd r1.___w, r1.w, r2.x\n"
+"itof r1.___w, r1.w\n"
+"\n"
+"dcl_literal l18, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r2.xy__, r1.z, l18\n"
+"div_zeroop(infinity) r1.__z_, r2.x, r2.y\n"
+"mul_ieee r2._y__, r1.z, r1.z\n"
+"mul_ieee r2.__z_, r2.y, r2.y\n"
+"\n"
+"dcl_literal l19, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l20, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r2.z, l19, l20\n"
+"\n"
+"dcl_literal l21, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r2.z, r3.xyxx, l21\n"
+"mul_ieee r2.___w, r2.z, r3.x\n"
+"\n"
+"dcl_literal l22, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r2.__z_, r2.z, r3.y, l22\n"
+"mad_ieee r2._y__, r2.y, r2.z, r2.w\n"
+"mul_ieee r2.__z_, r2.x, r2.x\n"
+"\n"
+"dcl_literal l23, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2._y__, r2.z, l23, r2.y\n"
+"\n"
+"dcl_literal l24, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r2.___w, r1.w, l24\n"
+"mad_ieee r1.__z_, r1.z, r2.y, r2.w\n"
+"\n"
+"dcl_literal l25, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.__z_, r2.z, l25, r1.z_neg(xyzw)\n"
+"add r1.__z_, r2.x_neg(xyzw), r1.z\n"
+"\n"
+"dcl_literal l26, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r1.__z_, r1.w, l26, r1.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l27, 0x7F800000, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000000\n"
+"and r2.xyz_, r0.x, l27\n"
+"frc r1.___w, r2.z\n"
+"add r2.___w, r2.z, r1.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l28, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r3.x___, r2.w, l28\n"
+"frc r3.x___, r3.x\n"
+"\n"
+"dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r3.x___, r3.x, l29\n"
+"add r3._y__, r2.z, r2.z\n"
+"round_nearest r3._y__, r3.y\n"
+"\n"
+"dcl_literal l30, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r3.__z_, r2.z, l30, r3.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l31, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mul_ieee r3._y__, r3.y, l31\n"
+"frc r3._y__, r3.y\n"
+"\n"
+"dcl_literal l32, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+"mul_ieee r3._y__, r3.y, l32\n"
+"round_nearest r3._y__, r3.y\n"
+"\n"
+"dcl_literal l33, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"mul_ieee r3.__z_, r3.z, l33\n"
+"mul_ieee r3.___w, r3.z, r3.z\n"
+"mul_ieee r4.x___, r3.z, r3.w\n"
+"\n"
+"dcl_literal l34, 0x00000000, 0x3636DF25, 0xB492923A, 0x00000000\n"
+"\n"
+"dcl_literal l35, 0x00000000, 0xB95009D4, 0x37D00AE2, 0x00000000\n"
+"mad_ieee r4._yz_, r3.w, l34, l35\n"
+"\n"
+"dcl_literal l36, 0x00000000, 0x3C088887, 0xBAB60B60, 0x00000000\n"
+"mad_ieee r4._yz_, r3.w, r4.yyzy, l36\n"
+"\n"
+"dcl_literal l37, 0x00000000, 0xBE2AAAAB, 0x3D2AAAAB, 0x00000000\n"
+"mad_ieee r4._yz_, r3.w, r4.yyzy, l37\n"
+"mad_ieee r3.__z_, r4.x, r4.y, r3.z\n"
+"\n"
+"dcl_literal l38, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"\n"
+"dcl_literal l39, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r4.x___, r3.w_neg(xyzw), l38, l39\n"
+"mul_ieee r3.___w, r3.w, r3.w\n"
+"mad_ieee r3.___w, r3.w, r4.z, r4.x\n"
+"\n"
+"dcl_literal l40, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+"eq r4, r3.y, l40\n"
+"\n"
+"dcl_literal l41, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ge r3._y__, r2.z, l41\n"
+"and r4, r4, r3.y\n"
+"and r3._y__, r3.z, r4.x\n"
+"cmov_logical r3._y__, r4.y, r3.w, r3.y\n"
+"cmov_logical r3._y__, r4.z, r3.z_neg(xyzw), r3.y\n"
+"cmov_logical r3._y__, r4.w, r3.w_neg(xyzw), r3.y\n"
+"\n"
+"dcl_literal l42, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r3.__zw, r2.zzzx, l42\n"
+"\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x7F800000, 0x00000000\n"
+"ieq r4.xyz_, r2.xyzx, l43\n"
+"and r2.x___, r3.z, r4.x\n"
+"itof r2._y__, r2.z\n"
+"\n"
+"dcl_literal l44, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r2._y__, r2.y, l44\n"
+"\n"
+"dcl_literal l45, 0x7F800000, 0x00000000, 0x00000000, 0x007FFFFF\n"
+"and r4.x__w, r2.y, l45\n"
+"\n"
+"dcl_literal l46, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2._y__, r4.x, l46\n"
+"\n"
+"dcl_literal l47, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.__z_, r4.w, l47\n"
+"\n"
+"dcl_literal l48, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r2._y__, l48, r2.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l49, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.x___, l49, r2.y\n"
+"ishr r4.___w, r3.z, r2.y\n"
+"inegate r2._y__, r2.y\n"
+"\n"
+"dcl_literal l50, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2._y__, r2.y, l50\n"
+"iadd r2._y__, r3.z, r2.y\n"
+"cmov_logical r2._y__, r4.x, r4.w, r2.y\n"
+"cmov_logical r2.x___, r2.x, r2.y, r3.y\n"
+"\n"
+"dcl_literal l51, 0x32000000, 0x1C800000, 0x7F800000, 0x00000000\n"
+"ilt r5.xyz_, r2.z, l51\n"
+"and r2._y__, r3.w, r5.x\n"
+"\n"
+"dcl_literal l52, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r3._y__, r2.z, l52\n"
+"cmov_logical r2.x___, r2.y, r3.y, r2.x\n"
+"\n"
+"dcl_literal l53, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ge r2._y__, r2.w, l53\n"
+"\n"
+"dcl_literal l54, 0x00000000, 0x00000000, 0x3F000000, 0x00000000\n"
+"eq r3._yz_, r1.w, l54\n"
+"and r1.___w, r2.y, r3.y\n"
+"ior r1.___w, r4.y, r1.w\n"
+"\n"
+"dcl_literal l55, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.___w, r1.w, l55, r2.x\n"
+"\n"
+"dcl_literal l56, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"\n"
+"dcl_literal l57, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r2.x___, r3.x, l56, l57\n"
+"cmov_logical r1.___w, r3.z, r2.x, r1.w\n"
+"\n"
+"dcl_literal l58, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r0.x, l58\n"
+"cmov_logical r1.___w, r2.x, r1.w_neg(xyzw), r1.w\n"
+"mul_ieee r2._y__, r0.x, r1.w\n"
+"\n"
+"dcl_literal l59, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"div_zeroop(infinity) r3.___w, l59, r2.y_abs\n"
+"\n"
+"dcl_literal l60, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r2._y__, r3.w, l60\n"
+"\n"
+"dcl_literal l61, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r3.w, l61\n"
+"itof r2.___w, r2.w\n"
+"\n"
+"dcl_literal l62, 0x7F800000, 0x00000000, 0x00000000, 0x007FFFFF\n"
+"and r4.x__w, r2.w, l62\n"
+"\n"
+"dcl_literal l63, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.___w, r4.x, l63\n"
+"\n"
+"dcl_literal l64, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+"iadd r2.___w, r2.w, l64\n"
+"\n"
+"dcl_literal l65, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r4.x___, r4.w, l65\n"
+"\n"
+"dcl_literal l66, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r2.___w, l66, r2.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.___w, l67, r2.w\n"
+"ishr r5.___w, r4.x, r2.w\n"
+"inegate r2.___w, r2.w\n"
+"\n"
+"dcl_literal l68, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.___w, r2.w, l68\n"
+"iadd r2.___w, r4.x, r2.w\n"
+"cmov_logical r3._y__, r4.w, r5.w, r2.w\n"
+"\n"
+"dcl_literal l69, 0xFFFFFFE7, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r3.x_z_, l69\n"
+"cmov_logical r2._y_w, r2.y, r3.xxxy, r3.zzzw\n"
+"\n"
+"dcl_literal l70, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r2.w, l70\n"
+"iadd r2._y__, r3.x, r2.y\n"
+"\n"
+"dcl_literal l71, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2._y__, l71, r2.y\n"
+"\n"
+"dcl_literal l72, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r2.w, l72\n"
+"\n"
+"dcl_literal l73, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r3.x___, l73, r2.w\n"
+"\n"
+"dcl_literal l74, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r3.x___, r3.x, l74\n"
+"\n"
+"dcl_literal l75, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r3._y__, r3.x, l75\n"
+"ior r2.___w, r2.w, r3.y\n"
+"\n"
+"dcl_literal l76, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r3.x, l76\n"
+"iadd r2._y__, r2.y, r3.x\n"
+"itof r2._y__, r2.y\n"
+"\n"
+"dcl_literal l77, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r3.xy__, r2.w, l77\n"
+"div_zeroop(infinity) r2.___w, r3.x, r3.y\n"
+"mul_ieee r3._y__, r2.w, r2.w\n"
+"mul_ieee r3.__z_, r3.y, r3.y\n"
+"\n"
+"dcl_literal l78, 0x3E1CD04F, 0x00000000, 0x00000000, 0x3E178897\n"
+"\n"
+"dcl_literal l79, 0x3E638E29, 0x00000000, 0x00000000, 0x3E3A3325\n"
+"mad_ieee r4.x__w, r3.z, l78, l79\n"
+"\n"
+"dcl_literal l80, 0x3ECCCCCD, 0x00000000, 0x00000000, 0x3E924925\n"
+"mad_ieee r4.x__w, r3.z, r4.xxxw, l80\n"
+"mul_ieee r3.___w, r3.z, r4.x\n"
+"\n"
+"dcl_literal l81, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r3.__z_, r3.z, r4.w, l81\n"
+"mad_ieee r3._y__, r3.y, r3.z, r3.w\n"
+"mul_ieee r3.__z_, r3.x, r3.x\n"
+"\n"
+"dcl_literal l82, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r3.z, l82, r3.y\n"
+"\n"
+"dcl_literal l83, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r3.___w, r2.y, l83\n"
+"mad_ieee r2.___w, r2.w, r3.y, r3.w\n"
+"\n"
+"dcl_literal l84, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.___w, r3.z, l84, r2.w_neg(xyzw)\n"
+"add r2.___w, r3.x_neg(xyzw), r2.w\n"
+"\n"
+"dcl_literal l85, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2._y__, r2.y, l85, r2.w_neg(xyzw)\n"
+"mov r2.___w, r1.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l86, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"mul_ieee r3.x___, r1.z, l86\n"
+"cmov_logical r1._y__, r5.x, r3.x, r2.y\n"
+"\n"
+"dcl_literal l87, 0x00000000, 0x00000000, 0xFFFFFFE7, 0x00000000\n"
+"mov r0._yz_, l87\n"
+"cmov_logical r1.xy__, r2.x, r1.xyxx, r0.xyxx\n"
+"\n"
+"dcl_literal l88, 0x3F666666, 0x3F666666, 0x3F666666, 0x3F666666\n"
+"ge r0._y__, l88, r1.x\n"
+"and r2._y__, r2.w, r0.y\n"
+"\n"
+"dcl_literal l89, 0x3F3B4A23, 0x3E6B851F, 0x00000000, 0x3F9D70A4\n"
+"ge r3, r1.x, l89\n"
+"and r0._y__, r0.y, r3.x\n"
+"\n"
+"dcl_literal l90, 0x3F800000, 0x00000000, 0x00000000, 0x40000000\n"
+"add r4.x__w, r1.x_neg(xyzw), l90\n"
+"and r0._y__, r0.y, r4.x\n"
+"\n"
+"dcl_literal l91, 0x3F3B4A23, 0x3E6B851F, 0x3F9D70A4, 0x3FDDA512\n"
+"lt r6, r1.x, l91\n"
+"and r3.xyz_, r3.yzwy, r6.xywx\n"
+"\n"
+"dcl_literal l92, 0xBEEC5B0C, 0xBF800000, 0xBFBB16C3, 0xBF000000\n"
+"add r7, r1.x, l92\n"
+"cmov_logical r0._y__, r3.x, r7.x, r0.y\n"
+"\n"
+"dcl_literal l93, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3.x___, r3.x, l93\n"
+"cmov_logical r0._y__, r3.y, r1.x, r0.y\n"
+"\n"
+"dcl_literal l94, 0x3F666666, 0x3F666666, 0x3F666666, 0x3F666666\n"
+"lt r3.___w, l94, r1.x\n"
+"and r3.___w, r6.z, r3.w\n"
+"cmov_logical r0._y__, r3.w, r7.y, r0.y\n"
+"ior r3._y__, r3.y, r3.w\n"
+"\n"
+"dcl_literal l95, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"cmov_logical r3.x___, r3.y, l95, r3.x\n"
+"cmov_logical r0._y__, r3.z, r7.z, r0.y\n"
+"\n"
+"dcl_literal l96, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r3.x___, r3.z, l96, r3.x\n"
+"\n"
+"dcl_literal l97, 0x00000000, 0x3FDDA512, 0x41000000, 0x40000000\n"
+"ge r3._yzw, r1.x, l97\n"
+"\n"
+"dcl_literal l98, 0x40000000, 0x5C800000, 0x41000000, 0x00000000\n"
+"lt r6.xyz_, r1.x, l98\n"
+"and r3._yzw, r3.yyzw, r6.xxyz\n"
+"cmov_logical r0._y__, r3.y, r4.w, r0.y\n"
+"\n"
+"dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r3.y, l99, r3.x\n"
+"mul_ieee r3._y__, r0.y, r0.y\n"
+"\n"
+"dcl_literal l100, 0x37D383A2, 0x00000000, 0x00000000, 0x383C2C75\n"
+"\n"
+"dcl_literal l101, 0x39679767, 0x00000000, 0x00000000, 0x38E28445\n"
+"mad_ieee r4.x__w, r3.y, l100, l101\n"
+"\n"
+"dcl_literal l102, 0x3A9C54A1, 0x00000000, 0x00000000, 0x3A05B634\n"
+"mad_ieee r4.x__w, r3.y, r4.xxxw, l102\n"
+"\n"
+"dcl_literal l103, 0x3BF2027E, 0x00000000, 0x00000000, 0x3B3D6EC6\n"
+"mad_ieee r4.x__w, r3.y, r4.xxxw, l103\n"
+"\n"
+"dcl_literal l104, 0x3D89F001, 0x00000000, 0x00000000, 0x3CA89915\n"
+"mad_ieee r4.x__w, r3.y, r4.xxxw, l104\n"
+"\n"
+"dcl_literal l105, 0x3D9E233F, 0x00000000, 0x00000000, 0x3EA51A66\n"
+"mad_ieee r4.x__w, r3.y, r4.xxxw, l105\n"
+"mul_ieee r4.___w, r3.y, r4.w\n"
+"mad_ieee r4.x___, r0.y, r4.x, r4.w\n"
+"\n"
+"dcl_literal l106, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r4.x___, r0.y_neg(xyzw), l106, r4.x\n"
+"add r4.x___, r2.y, r4.x\n"
+"mul_ieee r4.___w, r0.y, r3.y\n"
+"\n"
+"dcl_literal l107, 0x00000000, 0x39A57B6B, 0xB9A3F927, 0x39AFE9F7\n"
+"\n"
+"dcl_literal l108, 0x00000000, 0xBAB7F476, 0x3A66F867, 0xBA0D3085\n"
+"mad_ieee r6._yzw, r4.w, l107, l108\n"
+"\n"
+"dcl_literal l109, 0x00000000, 0x3BC7E707, 0xBB7177FE, 0x3B141699\n"
+"mad_ieee r6._yzw, r4.w, r6.yyzw, l109\n"
+"\n"
+"dcl_literal l110, 0x00000000, 0xBD064D47, 0x3C93373D, 0xBC28FCFE\n"
+"mad_ieee r6._yzw, r4.w, r6.yyzw, l110\n"
+"\n"
+"dcl_literal l111, 0x00000000, 0x3EF7B95E, 0xBE17213C, 0x3D845A15\n"
+"mad_ieee r6._yzw, r4.w, r6.yyzw, l111\n"
+"mad_ieee r5.x___, r0.y, r6.w, r6.z\n"
+"\n"
+"dcl_literal l112, 0x31E61C52, 0x31E61C52, 0x31E61C52, 0x31E61C52\n"
+"mad_ieee r4.___w, r4.w_neg(xyzw), r5.x, l112\n"
+"mad_ieee r3._y__, r3.y, r6.y, r4.w_neg(xyzw)\n"
+"add r3._y__, r2.y, r3.y\n"
+"\n"
+"dcl_literal l113, 0xBDF8CDCD, 0xBDF8CDCD, 0xBDF8CDCD, 0xBDF8CDCD\n"
+"add r3._y__, r3.y, l113\n"
+"\n"
+"dcl_literal l114, 0x3C5B3C5E, 0x00000000, 0x00000000, 0x3B52D5DB\n"
+"\n"
+"dcl_literal l115, 0x3E6A7578, 0x00000000, 0x00000000, 0x3DD572AF\n"
+"mad_ieee r5.x__w, r0.y, l114, l115\n"
+"\n"
+"dcl_literal l116, 0x3F7A4BB2, 0x00000000, 0x00000000, 0x3F44EFDF\n"
+"mad_ieee r5.x__w, r0.y, r5.xxxw, l116\n"
+"\n"
+"dcl_literal l117, 0x3FBA3AE7, 0x00000000, 0x00000000, 0x4008392D\n"
+"mad_ieee r5.x__w, r0.y, r5.xxxw, l117\n"
+"\n"
+"dcl_literal l118, 0x3F2200F4, 0x00000000, 0x00000000, 0x401D2EBE\n"
+"mad_ieee r5.x__w, r0.y, r5.xxxw, l118\n"
+"\n"
+"dcl_literal l119, 0xBD9E233F, 0x00000000, 0x00000000, 0x3F800000\n"
+"mad_ieee r5.x__w, r0.y, r5.xxxw, l119\n"
+"mul_ieee r4.___w, r0.y, r5.x\n"
+"div_zeroop(infinity) r4.___w, r4.w, r5.w\n"
+"\n"
+"dcl_literal l120, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"mad_ieee r0._y__, r0.y, l120, r4.w\n"
+"add r0._y__, r2.y, r0.y\n"
+"ftoi r2._y__, r1.x\n"
+"round_z r4.___w, r1.x\n"
+"add r4.___w, r1.x, r4.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l121, 0x375943FE, 0x00000000, 0x00000000, 0x3A620FAB\n"
+"\n"
+"dcl_literal l122, 0x3B1F8219, 0x00000000, 0x00000000, 0x3CD01D14\n"
+"mad_ieee r5.x__w, r4.w, l121, l122\n"
+"\n"
+"dcl_literal l123, 0x3D11C643, 0x00000000, 0x00000000, 0x3E53B452\n"
+"mad_ieee r5.x__w, r5.xxxw, r4.w, l123\n"
+"\n"
+"dcl_literal l124, 0x3E11BDA2, 0x00000000, 0x00000000, 0x3F1E0B56\n"
+"mad_ieee r5.x__w, r5.xxxw, r4.w, l124\n"
+"\n"
+"dcl_literal l125, 0x3E172A19, 0x00000000, 0x00000000, 0x3F1B09B3\n"
+"mad_ieee r5.x__w, r5.xxxw, r4.w, l125\n"
+"\n"
+"dcl_literal l126, 0xBD3F8AA9, 0xBD3F8AA9, 0xBD3F8AA9, 0xBD3F8AA9\n"
+"mad_ieee r5.x___, r5.x, r4.w, l126\n"
+"mul_ieee r5.x___, r4.w, r5.x\n"
+"div_zeroop(infinity) r5.x___, r5.x, r5.w\n"
+"\n"
+"dcl_literal l127, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r5.x___, r4.w, l127, r5.x\n"
+"\n"
+"dcl_literal l128, 0x00000007, 0x00000006, 0x00000005, 0x00000004\n"
+"ige r8, r2.y, l128\n"
+"\n"
+"dcl_literal l129, 0x40C00000, 0x40A00000, 0x40800000, 0x40400000\n"
+"add r9, r4.w, l129\n"
+"\n"
+"dcl_literal l130, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r5.___w, r8.x, r9.x, l130\n"
+"mul_ieee r6._y__, r9.y, r5.w\n"
+"cmov_logical r5.___w, r8.y, r6.y, r5.w\n"
+"mul_ieee r6._y__, r9.z, r5.w\n"
+"cmov_logical r5.___w, r8.z, r6.y, r5.w\n"
+"mul_ieee r6._y__, r9.w, r5.w\n"
+"cmov_logical r5.___w, r8.w, r6.y, r5.w\n"
+"\n"
+"dcl_literal l131, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"ige r2._y__, r2.y, l131\n"
+"\n"
+"dcl_literal l132, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r4.___w, r4.w, l132\n"
+"mul_ieee r4.___w, r5.w, r4.w\n"
+"cmov_logical r6.__z_, r2.y, r4.w, r5.w\n"
+"\n"
+"dcl_literal l133, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r2._y__, r6.z, l133\n"
+"\n"
+"dcl_literal l134, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r4.___w, r6.z, l134\n"
+"itof r4.___w, r4.w\n"
+"\n"
+"dcl_literal l135, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r7.xy__, r4.w, l135\n"
+"\n"
+"dcl_literal l136, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r4.___w, r7.x, l136\n"
+"\n"
+"dcl_literal l137, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+"iadd r4.___w, r4.w, l137\n"
+"\n"
+"dcl_literal l138, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r5.___w, r7.y, l138\n"
+"\n"
+"dcl_literal l139, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r4.___w, l139, r4.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l140, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r6.___w, l140, r4.w\n"
+"ishr r7.x___, r5.w, r4.w\n"
+"inegate r4.___w, r4.w\n"
+"\n"
+"dcl_literal l141, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r4.___w, r4.w, l141\n"
+"iadd r4.___w, r5.w, r4.w\n"
+"cmov_logical r0.___w, r6.w, r7.x, r4.w\n"
+"\n"
+"dcl_literal l142, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r6._y__, l142\n"
+"cmov_logical r0.__zw, r2.y, r0.zzzw, r6.yyyz\n"
+"\n"
+"dcl_literal l143, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2._y__, r0.w, l143\n"
+"iadd r0.__z_, r2.y, r0.z\n"
+"\n"
+"dcl_literal l144, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, l144, r0.z\n"
+"\n"
+"dcl_literal l145, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.___w, r0.w, l145\n"
+"\n"
+"dcl_literal l146, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r2._y__, l146, r0.w\n"
+"\n"
+"dcl_literal l147, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r2._y__, r2.y, l147\n"
+"\n"
+"dcl_literal l148, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r4.___w, r2.y, l148\n"
+"ior r0.___w, r0.w, r4.w\n"
+"\n"
+"dcl_literal l149, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2._y__, r2.y, l149\n"
+"iadd r0.__z_, r0.z, r2.y\n"
+"itof r0.__z_, r0.z\n"
+"\n"
+"dcl_literal l150, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r6._yz_, r0.w, l150\n"
+"div_zeroop(infinity) r0.___w, r6.y, r6.z\n"
+"mul_ieee r2._y__, r0.w, r0.w\n"
+"mul_ieee r4.___w, r2.y, r2.y\n"
+"\n"
+"dcl_literal l151, 0x00000000, 0x00000000, 0x3E1CD04F, 0x3E178897\n"
+"\n"
+"dcl_literal l152, 0x00000000, 0x00000000, 0x3E638E29, 0x3E3A3325\n"
+"mad_ieee r6.__zw, r4.w, l151, l152\n"
+"\n"
+"dcl_literal l153, 0x00000000, 0x00000000, 0x3ECCCCCD, 0x3E924925\n"
+"mad_ieee r6.__zw, r4.w, r6.zzzw, l153\n"
+"mul_ieee r5.___w, r4.w, r6.z\n"
+"\n"
+"dcl_literal l154, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r4.___w, r4.w, r6.w, l154\n"
+"mad_ieee r2._y__, r2.y, r4.w, r5.w\n"
+"mul_ieee r4.___w, r6.y, r6.y\n"
+"\n"
+"dcl_literal l155, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2._y__, r4.w, l155, r2.y\n"
+"\n"
+"dcl_literal l156, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r5.___w, r0.z, l156\n"
+"mad_ieee r0.___w, r0.w, r2.y, r5.w\n"
+"\n"
+"dcl_literal l157, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r4.w, l157, r0.w_neg(xyzw)\n"
+"add r0.___w, r6.y_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l158, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0.__z_, r0.z, l158, r0.w_neg(xyzw)\n"
+"add r0.__z_, r5.x, r0.z\n"
+"\n"
+"dcl_literal l159, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.___w, l159, r1.x\n"
+"mul_ieee r2._y__, r0.w, r0.w\n"
+"\n"
+"dcl_literal l160, 0xBAD5C4E8, 0xBAD5C4E8, 0xBAD5C4E8, 0xBAD5C4E8\n"
+"\n"
+"dcl_literal l161, 0x3A5B3DD2, 0x3A5B3DD2, 0x3A5B3DD2, 0x3A5B3DD2\n"
+"mad_ieee r4.___w, r2.y, l160, l161\n"
+"\n"
+"dcl_literal l162, 0xBA1C065C, 0xBA1C065C, 0xBA1C065C, 0xBA1C065C\n"
+"mad_ieee r4.___w, r2.y, r4.w, l162\n"
+"\n"
+"dcl_literal l163, 0x3A500CFD, 0x3A500CFD, 0x3A500CFD, 0x3A500CFD\n"
+"mad_ieee r4.___w, r2.y, r4.w, l163\n"
+"\n"
+"dcl_literal l164, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r4.___w, r2.y, r4.w, l164\n"
+"\n"
+"dcl_literal l165, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+"mad_ieee r2._y__, r2.y, r4.w, l165\n"
+"\n"
+"dcl_literal l166, 0x3ED67F1D, 0x3ED67F1D, 0x3ED67F1D, 0x3ED67F1D\n"
+"mad_ieee r0.___w, r0.w, r2.y, l166\n"
+"\n"
+"dcl_literal l167, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.__z_, r1.z, l167\n"
+"mad_ieee r0.___w, r7.w, r1.z, r0.w\n"
+"mul_ieee r1.x___, r1.x, r1.z\n"
+"cmov_logical r0.___w, r3.z, r0.w, r1.x\n"
+"cmov_logical r0.__z_, r3.w, r0.z, r0.w\n"
+"\n"
+"dcl_literal l168, 0x00000000, 0x00000000, 0x00000001, 0x00000002\n"
+"ieq r3.x_zw, r3.x, l168\n"
+"and r3.x_zw, r6.x, r3.xxzw\n"
+"cmov_logical r0.__z_, r3.x, r4.x, r0.z\n"
+"cmov_logical r0.__z_, r3.z, r3.y, r0.z\n"
+"cmov_logical r0._y__, r3.w, r0.y, r0.z\n"
+"and r0.__z_, r5.y, r2.x\n"
+"\n"
+"dcl_literal l169, 0x1C800000, 0x4B000000, 0x7F800000, 0x00000000\n"
+"ige r3.xyz_, r2.z, l169\n"
+"and r1.x_z_, r2.x, r3.xxyx\n"
+"\n"
+"dcl_literal l170, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r1.w, l170\n"
+"and r0.___w, r1.x, r0.w\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l171, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"\n"
+"dcl_literal l172, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.__z_, r0.z, l171, l172\n"
+"cmov_logical r0._y__, r5.y, r2.w, r0.y\n"
+"add r0.___w, r1.y, r0.y_neg(xyzw)\n"
+"cmov_logical r0._y__, r1.x, r0.w, r0.y\n"
+"\n"
+"dcl_literal l173, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r1.w, l173\n"
+"\n"
+"dcl_literal l174, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.___w, r0.w, l174\n"
+"and r0.___w, r2.x, r0.w\n"
+"\n"
+"dcl_literal l175, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.w, l175, r0.y\n"
+"cmov_logical r0._y__, r4.z, r0.x, r0.y\n"
+"\n"
+"dcl_literal l176, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.___w, l176, r2.z\n"
+"\n"
+"dcl_literal l177, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l177\n"
+"cmov_logical r0._y__, r0.w, r1.x, r0.y\n"
+"and r0.___w, r5.z, r1.z\n"
+"\n"
+"dcl_literal l178, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.w, l178, r0.y\n"
+"\n"
+"dcl_literal l179, 0x3F800000, 0x00000000, 0x00000000, 0x40000000\n"
+"ieq r0.x__w, r0.x, l179\n"
+"ior r0.x___, r0.w, r0.x\n"
+"\n"
+"dcl_literal l180, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r0.x, l180, r0.y\n"
+"ior r0.x___, r4.y, r3.z\n"
+"\n"
+"dcl_literal l181, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.x___, r0.x, l181, r0.z\n"
+"mov r0.x___, r2.x\n"
+"mov r0.y, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_128bit_global",
+"mdef(258)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, g[r0.x]\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_128bit_local",
+"mdef(259)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"lds_load_vec_id(0) r0, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_128bit_private",
+"mdef(260)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, x0[r0.x]\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_128bit_uav",
+"mdef(261)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"uav_raw_load_id(0) r0, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_32bit_local",
+"mdef(262)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"lds_load_id(0) r0.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_32bit_uav",
+"mdef(263)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"uav_raw_load_id(0) r0.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_64bit_local",
+"mdef(264)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"lds_load_vec_id(0) r0.xy__, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_64bit_uav",
+"mdef(265)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"uav_raw_load_id(0) r0.xy, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_96bit_global",
+"mdef(266)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xyz_, g[r0.x].xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_96bit_local",
+"mdef(267)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"lds_load_vec_id(0) r0.xyz_, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_96bit_private",
+"mdef(268)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xyz_, x0[r0.x].xyz\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_96bit_uav",
+"mdef(269)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"uav_raw_load_id(0) r0.xyz, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_hi_64bit_global",
+"mdef(270)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xy__, g[r0.x].zw\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_hi_64bit_private",
+"mdef(271)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xy__, x0[r0.x].zw\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_lo_64bit_global",
+"mdef(272)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xy__, g[r0.x].xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_lo_64bit_private",
+"mdef(273)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.xy__, x0[r0.x].xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_w_32bit_global",
+"mdef(274)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, g[r0.x].w\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_w_32bit_private",
+"mdef(275)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, x0[r0.x].w\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_x_32bit_global",
+"mdef(276)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, g[r0.x].x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_x_32bit_private",
+"mdef(277)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, x0[r0.x].x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_y_32bit_global",
+"mdef(278)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, g[r0.x].y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_y_32bit_private",
+"mdef(279)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, x0[r0.x].y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_z_32bit_global",
+"mdef(280)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, g[r0.x].z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__load_z_32bit_private",
+"mdef(281)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0.x, x0[r0.x].z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__log10_f32",
+"mdef(282)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00800000, 0x00000000, 0x00000000\n"
+"ilt r0._yz_, r0.x, l0\n"
+"if_logicalnz r0.y\n"
+" \n"
+" dcl_literal l1, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0._y__, r0.x, l1\n"
+" itof r0._y__, r0.y\n"
+" \n"
+" dcl_literal l2, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r0._y_w, r0.y, l2\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0._y__, r0.y, l3\n"
+" \n"
+" dcl_literal l4, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+" iadd r0._y__, r0.y, l4\n"
+" \n"
+" dcl_literal l5, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r0.___w, r0.w, l5\n"
+" \n"
+" dcl_literal l6, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0._y__, l6, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, l7, r0.y\n"
+" ishr r1._y__, r0.w, r0.y\n"
+" inegate r0._y__, r0.y\n"
+" \n"
+" dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l8\n"
+" iadd r0._y__, r0.w, r0.y\n"
+" cmov_logical r0._y__, r1.x, r1.y, r0.y\n"
+" \n"
+" dcl_literal l9, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7\n"
+" mov r0.___w, l9\n"
+"else\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.___w, l10\n"
+" mov r0._y__, r0.x\n"
+"endif\n"
+"\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r0.y, l11\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"\n"
+"dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, l12, r0.w\n"
+"\n"
+"dcl_literal l13, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0._y__, r0.y, l13\n"
+"\n"
+"dcl_literal l14, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1.x___, l14, r0.y\n"
+"\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.x___, r1.x, l15\n"
+"\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1._y__, r1.x, l16\n"
+"ior r0._y__, r0.y, r1.y\n"
+"\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l17\n"
+"iadd r0.___w, r0.w, r1.x\n"
+"itof r0.___w, r0.w\n"
+"\n"
+"dcl_literal l18, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r1.xy__, r0.y, l18\n"
+"div_zeroop(infinity) r0._y__, r1.x, r1.y\n"
+"mul_ieee r1._y__, r0.y, r0.y\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l19, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l20, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, l19, l20\n"
+"\n"
+"dcl_literal l21, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l21\n"
+"mul_ieee r1.___w, r1.z, r2.x\n"
+"\n"
+"dcl_literal l22, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r1.__z_, r1.z, r2.y, l22\n"
+"mad_ieee r1._y__, r1.y, r1.z, r1.w\n"
+"mul_ieee r1.__z_, r1.x, r1.x\n"
+"\n"
+"dcl_literal l23, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.___w, r1.z, l23\n"
+"\n"
+"dcl_literal l24, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1._y__, r1.z, l24, r1.y\n"
+"mad_ieee r0._y__, r0.y_neg(xyzw), r1.y, r1.w\n"
+"add r0._y__, r1.x_neg(xyzw), r0.y\n"
+"\n"
+"dcl_literal l25, 0x3EDE5BD9, 0x3EDE5BD9, 0x3EDE5BD9, 0x3EDE5BD9\n"
+"mul_ieee r0._y__, r0.y, l25\n"
+"\n"
+"dcl_literal l26, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B\n"
+"mad_ieee r0._y__, r0.w, l26, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l27, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l27\n"
+"\n"
+"dcl_literal l28, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0._y__, r0.w, r0.y, l28\n"
+"\n"
+"dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.x___, r0.w, l29\n"
+"and r0.__z_, r0.z, r1.x\n"
+"\n"
+"dcl_literal l30, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.z, l30, r0.y\n"
+"\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.x, l31\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l32, r0.w\n"
+"\n"
+"dcl_literal l33, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l33\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__log1p_f32",
+"mdef(283)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x00000000, 0x40000000, 0x3F800000, 0x00000000\n"
+"add r0._yz_, r0.x, l0\n"
+"dcl_literal l1, 0x7F800000, 0x007FFFFF, 0x7FFFFFFF, 0x00000000\n"
+"and r1.xyz_, r0.z, l1\n"
+"dcl_literal l2, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"ior r0.___w, r1.y, l2\n"
+"dcl_literal l3, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"dcl_literal l4, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1._y__, r0.w, l3, l4\n"
+"round_z r1.___w, r1.y\n"
+"ftoi r1._y__, r1.y\n"
+"dcl_literal l5, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mul_ieee r2.x___, r1.w, l5\n"
+"dcl_literal l6, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2._y__, r2.x, l6\n"
+"dcl_literal l7, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r2.__z_, r1.w, l7, r2.y_neg(xyzw)\n"
+"dcl_literal l8, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r0.___w, r1.w_neg(xyzw), l8, r0.w\n"
+"add r1.___w, r0.w, cb0[1].x\n"
+"div_zeroop(infinity) r2.___w, r0.w, r2.x\n"
+"add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+"dcl_literal l9, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r2.w, l9\n"
+"mad_ieee r1.___w, r3.x, r2.y, r1.w_neg(xyzw)\n"
+"mad_ieee r1.___w, r3.x, r2.z, r1.w\n"
+"add r3.x___, r2.w, r3.x_neg(xyzw)\n"
+"mad_ieee r1.___w, r3.x, r2.y, r1.w\n"
+"mad_ieee r1.___w, r3.x, r2.z, r1.w\n"
+"add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+"div_zeroop(infinity) r0.___w, r0.w, r2.x\n"
+"add r1.___w, r2.w, r0.w\n"
+"dcl_literal l10, 0x3E000000, 0x3E000000, 0x3E000000, 0x3E000000\n"
+"dcl_literal l11, 0x3E124925, 0x3E124925, 0x3E124925, 0x3E124925\n"
+"mad_ieee r2.x___, r2.w_neg(xyzw), l10, l11\n"
+"dcl_literal l12, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r2.x___, r2.w_neg(xyzw), r2.x, l12\n"
+"dcl_literal l13, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD\n"
+"mad_ieee r2.x___, r2.w_neg(xyzw), r2.x, l13\n"
+"dcl_literal l14, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mad_ieee r2.x___, r2.w_neg(xyzw), r2.x, l14\n"
+"dcl_literal l15, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+"mad_ieee r2.x___, r2.w_neg(xyzw), r2.x, l15\n"
+"dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.x___, r2.w_neg(xyzw), r2.x, l16\n"
+"mul_ieee r2._y__, r2.w_neg(xyzw), r2.w\n"
+"add r2.__z_, r2.w, r1.w_neg(xyzw)\n"
+"mad_ieee r2.___w, r2.x, r2.y, r1.w\n"
+"add r1.___w, r1.w, r2.w_neg(xyzw)\n"
+"mad_ieee r1.___w, r2.x, r2.y, r1.w\n"
+"add r0.___w, r0.w, r2.z\n"
+"add r0.___w, r1.w, r0.w\n"
+"add r1.___w, r2.w, r0.w\n"
+"add r2.x___, r2.w, r1.w_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.x\n"
+"dcl_literal l17, 0xFFFFFFBF, 0xFFFFFFBF, 0xFFFFFFBF, 0xFFFFFFBF\n"
+"iadd r1._y__, r1.y, l17\n"
+"dcl_literal l18, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r2.x___, r1.y, l18\n"
+"dcl_literal l19, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"and r1._y__, r1.y, l19\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l21, 0x31B0FC04, 0x33439E0D, 0x32F632DD, 0x32C01163\n"
+"cmov_logical r3, r2.x, l20, l21\n"
+"dcl_literal l22, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r4, r2.x, l22\n"
+"dcl_literal l23, 0x33161BD2, 0x330AE56B, 0x32692B56, 0x32DC55E6\n"
+"cmov_logical r3, r4.x, l23, r3\n"
+"dcl_literal l24, 0x3379A11D, 0x33703FEA, 0x335996FA, 0x33614F28\n"
+"cmov_logical r3, r4.y, l24, r3\n"
+"dcl_literal l25, 0x30843642, 0x3267A2B6, 0x331D0180, 0x330F3534\n"
+"cmov_logical r3, r4.z, l25, r3\n"
+"dcl_literal l26, 0x335C55E6, 0x335ABC7C, 0x3309CE44, 0x332EAE98\n"
+"cmov_logical r3, r4.w, l26, r3\n"
+"dcl_literal l27, 0x00000005, 0x00000006, 0x00000007, 0x00000008\n"
+"ieq r5, r2.x, l27\n"
+"dcl_literal l28, 0x336F3DE6, 0x336B8475, 0x32AB4A2F, 0x310717B1\n"
+"cmov_logical r3, r5.x, l28, r3\n"
+"dcl_literal l29, 0x31E5BF06, 0x337D6027, 0x32F4D8A5, 0x335AEF40\n"
+"cmov_logical r3, r5.y, l29, r3\n"
+"dcl_literal l30, 0x336E0ABF, 0x31D8284B, 0x32E108B8, 0x3332FE61\n"
+"cmov_logical r3, r5.z, l30, r3\n"
+"dcl_literal l31, 0x32D4D042, 0x336F323E, 0x32ED38DC, 0x31F35344\n"
+"cmov_logical r3, r5.w, l31, r3\n"
+"dcl_literal l32, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C\n"
+"ieq r6, r2.x, l32\n"
+"dcl_literal l33, 0x337803AF, 0x31983894, 0x33075A1E, 0x32FD7837\n"
+"cmov_logical r3, r6.x, l33, r3\n"
+"dcl_literal l34, 0x32778F32, 0x3234BA20, 0x3360E6FA, 0x3204A550\n"
+"cmov_logical r3, r6.y, l34, r3\n"
+"dcl_literal l35, 0x33716A65, 0x3317A6AF, 0x33233E4B, 0x33738207\n"
+"cmov_logical r3, r6.z, l35, r3\n"
+"dcl_literal l36, 0x333DF5FA, 0x33144D89, 0x32D448E9, 0x331A7886\n"
+"cmov_logical r3, r6.w, l36, r3\n"
+"dcl_literal l37, 0x00000000, 0x0000000D, 0x0000000E, 0x0000000F\n"
+"ieq r2._yzw, r2.x, l37\n"
+"dcl_literal l38, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l39, 0x3C7E0540, 0x3CFC14C0, 0x3D3BA2C0, 0x3D785180\n"
+"cmov_logical r7, r2.x, l38, l39\n"
+"dcl_literal l40, 0x336CE70F, 0x331AB151, 0x32A71570, 0x3284672B\n"
+"cmov_logical r3, r2.y, l40, r3\n"
+"dcl_literal l41, 0x318717B1, 0x336D81F6, 0x330DBADE, 0x324B78B5\n"
+"cmov_logical r3, r2.z, l41, r3\n"
+"dcl_literal l42, 0x332D9F9D, 0x32C359F3, 0x32924167, 0x3377D1CF\n"
+"cmov_logical r3, r2.w, l42, r3\n"
+"dcl_literal l43, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r8.xyz_, r1.y, l43\n"
+"cmov_logical r1._y__, r8.x, r3.y, r3.x\n"
+"cmov_logical r1._y__, r8.y, r3.z, r1.y\n"
+"cmov_logical r1._y__, r8.z, r3.w, r1.y\n"
+"add r2.x___, r1.w, r1.y\n"
+"add r1.___w, r1.w, r2.x_neg(xyzw)\n"
+"add r1._y__, r1.y, r1.w\n"
+"add r0.___w, r0.w, r1.y\n"
+"add r1._y__, r2.x, r0.w\n"
+"add r1.___w, r2.x, r1.y_neg(xyzw)\n"
+"dcl_literal l44, 0x3D9A0EB8, 0x3DB78690, 0x3DD49368, 0x3DF13838\n"
+"cmov_logical r3, r4.x, l44, r7\n"
+"dcl_literal l45, 0x3E06BBF0, 0x3E14AA94, 0x3E226958, 0x3E2FF980\n"
+"cmov_logical r3, r4.y, l45, r3\n"
+"dcl_literal l46, 0x3E3D5C48, 0x3E4A92D4, 0x3E579E48, 0x3E647FBC\n"
+"cmov_logical r3, r4.z, l46, r3\n"
+"dcl_literal l47, 0x3E713838, 0x3E7DC8C0, 0x3E851926, 0x3E8B3AE4\n"
+"cmov_logical r3, r4.w, l47, r3\n"
+"dcl_literal l48, 0x3E914A0E, 0x3E974714, 0x3E9D3262, 0x3EA30C5E\n"
+"cmov_logical r3, r5.x, l48, r3\n"
+"dcl_literal l49, 0x3EA8D56C, 0x3EAE8DEC, 0x3EB43640, 0x3EB9CEBE\n"
+"cmov_logical r3, r5.y, l49, r3\n"
+"dcl_literal l50, 0x3EBF57C0, 0x3EC4D19C, 0x3ECA3CA0, 0x3ECF991E\n"
+"cmov_logical r3, r5.z, l50, r3\n"
+"dcl_literal l51, 0x3ED4E764, 0x3EDA27BA, 0x3EDF5A6C, 0x3EE47FBE\n"
+"cmov_logical r3, r5.w, l51, r3\n"
+"dcl_literal l52, 0x3EE997F2, 0x3EEEA350, 0x3EF3A212, 0x3EF8947A\n"
+"cmov_logical r3, r6.x, l52, r3\n"
+"dcl_literal l53, 0x3EFD7AC4, 0x3F012A95, 0x3F0391F2, 0x3F05F397\n"
+"cmov_logical r3, r6.y, l53, r3\n"
+"dcl_literal l54, 0x3F084F9C, 0x3F0AA61E, 0x3F0CF735, 0x3F0F42FA\n"
+"cmov_logical r3, r6.z, l54, r3\n"
+"dcl_literal l55, 0x3F118986, 0x3F13CAF0, 0x3F16074F, 0x3F183EB9\n"
+"cmov_logical r3, r6.w, l55, r3\n"
+"dcl_literal l56, 0x3F1A7144, 0x3F1C9F06, 0x3F1EC813, 0x3F20EC7F\n"
+"cmov_logical r3, r2.y, l56, r3\n"
+"dcl_literal l57, 0x3F230C5E, 0x3F2527C2, 0x3F273EC0, 0x3F295169\n"
+"cmov_logical r3, r2.z, l57, r3\n"
+"dcl_literal l58, 0x3F2B5FCE, 0x3F2D6A02, 0x3F2F7015, 0x3F317217\n"
+"cmov_logical r2, r2.w, l58, r3\n"
+"cmov_logical r2.x___, r8.x, r2.y, r2.x\n"
+"cmov_logical r2.x___, r8.y, r2.z, r2.x\n"
+"cmov_logical r2.x___, r8.z, r2.w, r2.x\n"
+"add r2._y__, r1.y, r2.x\n"
+"add r2.x___, r2.x, r2.y_neg(xyzw)\n"
+"add r1._y__, r1.y, r2.x\n"
+"add r0.___w, r0.w, r1.w\n"
+"add r0.___w, r1.y, r0.w\n"
+"add r1._y__, r2.y, r0.w\n"
+"add r1.___w, r2.y, r1.y_neg(xyzw)\n"
+"add r0.___w, r0.w, r1.w\n"
+"dcl_literal l59, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l59\n"
+"dcl_literal l60, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.__z_, l60, r1.z\n"
+"dcl_literal l61, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1.x___, r1.x, l61\n"
+"itof r1.x___, r1.x\n"
+"dcl_literal l62, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r1.___w, r1.x, l62\n"
+"dcl_literal l63, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mul_ieee r2.x___, r1.x, l63\n"
+"dcl_literal l64, 0x3F317000, 0x3F317000, 0x3F317000, 0x3F317000\n"
+"mad_ieee r2._y__, r1.w, l64, r2.x_neg(xyzw)\n"
+"add r2.__z_, r1.x, r1.w_neg(xyzw)\n"
+"dcl_literal l65, 0x3F317000, 0x3F317000, 0x3F317000, 0x3F317000\n"
+"mad_ieee r2._y__, r2.z, l65, r2.y\n"
+"dcl_literal l66, 0x37C00001, 0x37C00001, 0x37C00001, 0x37C00001\n"
+"mad_ieee r1.___w, r1.w, l66, r2.y\n"
+"dcl_literal l67, 0x37C00001, 0x37C00001, 0x37C00001, 0x37C00001\n"
+"mad_ieee r1.___w, r2.z, l67, r1.w\n"
+"dcl_literal l68, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1.x___, r1.x, l68, r1.w\n"
+"add r1.___w, r2.x, r1.x\n"
+"add r2.x___, r2.x, r1.w_neg(xyzw)\n"
+"add r1.x___, r1.x, r2.x\n"
+"add r2.x___, r1.y, r1.x\n"
+"add r1._y__, r1.y, r2.x_neg(xyzw)\n"
+"add r0.___w, r0.w, r1.y\n"
+"add r0.___w, r1.x, r0.w\n"
+"add r1.x___, r2.x, r0.w\n"
+"add r1._y__, r2.x, r1.x_neg(xyzw)\n"
+"add r2.x___, r1.w, r1.x\n"
+"add r2._y__, r1.x, r2.x_neg(xyzw)\n"
+"add r2._y__, r1.w, r2.y\n"
+"add r2.__z_, r1.w, r2.x_neg(xyzw)\n"
+"lt r1.___w, r1.w_abs, r1.x_abs\n"
+"add r1.x___, r1.x, r2.z\n"
+"cmov_logical r1.x___, r1.w, r2.y, r1.x\n"
+"add r0.___w, r0.w, r1.y\n"
+"add r0.___w, r1.x, r0.w\n"
+"add r0.___w, r2.x, r0.w\n"
+"div_zeroop(infinity) r0._y__, r0.x, r0.y\n"
+"mul_ieee r1.x___, r0.y, r0.y\n"
+"dcl_literal l69, 0x3E178897, 0x3E178897, 0x3E178897, 0x3E178897\n"
+"dcl_literal l70, 0x3E1CD04F, 0x3E1CD04F, 0x3E1CD04F, 0x3E1CD04F\n"
+"mad_ieee r1._y__, r1.x, l69, l70\n"
+"dcl_literal l71, 0x3E3A3325, 0x3E3A3325, 0x3E3A3325, 0x3E3A3325\n"
+"mad_ieee r1._y__, r1.x, r1.y, l71\n"
+"dcl_literal l72, 0x3E638E29, 0x3E638E29, 0x3E638E29, 0x3E638E29\n"
+"mad_ieee r1._y__, r1.x, r1.y, l72\n"
+"dcl_literal l73, 0x3E924925, 0x3E924925, 0x3E924925, 0x3E924925\n"
+"mad_ieee r1._y__, r1.x, r1.y, l73\n"
+"dcl_literal l74, 0x3ECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD\n"
+"mad_ieee r1._y__, r1.x, r1.y, l74\n"
+"dcl_literal l75, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r1._y__, r1.x, r1.y, l75\n"
+"mul_ieee r1.___w, r0.x, r0.x\n"
+"dcl_literal l76, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r2.x___, r1.w, l76\n"
+"dcl_literal l77, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), l77, r0.x\n"
+"mad_ieee r1.x___, r1.x, r1.y, r2.x\n"
+"mad_ieee r0._y__, r0.y_neg(xyzw), r1.x, r2.x\n"
+"add r0._y__, r0.x, r0.y_neg(xyzw)\n"
+"dcl_literal l78, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, l78, r0.x\n"
+"dcl_literal l79, 0x3EB00000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.xy__, r0.x, l79\n"
+"and r1.x___, r1.x, r2.x\n"
+"dcl_literal l80, 0xBECA0000, 0xBECA0000, 0xBECA0000, 0xBECA0000\n"
+"ige r1._y__, l80, r0.x\n"
+"and r1._y__, r2.y, r1.y\n"
+"ior r1.x___, r1.x, r1.y\n"
+"cmov_logical r0._y__, r1.x, r0.y, r0.w\n"
+"dcl_literal l81, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l81\n"
+"dcl_literal l82, 0x24800000, 0x31000000, 0x00000000, 0x00000000\n"
+"ilt r1.xy__, r0.w, l82\n"
+"cmov_logical r0._y__, r1.x, r0.x, r0.y\n"
+"dcl_literal l83, 0x24800000, 0x24800000, 0x24800000, 0x24800000\n"
+"ige r1.x___, r0.w, l83\n"
+"itof r0.___w, r0.w\n"
+"and r1.x___, r1.y, r1.x\n"
+"cmov_logical r0._y__, r1.x, r1.w, r0.y\n"
+"dcl_literal l84, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r0.___w, r0.w, l84\n"
+"cmov_logical r0._y__, r0.w, r0.x, r0.y\n"
+"dcl_literal l85, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"eq r0.___w, r0.x, l85\n"
+"dcl_literal l86, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"lt r0.x___, r0.x, l86\n"
+"dcl_literal l87, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0._y__, r0.w, l87, r0.y\n"
+"dcl_literal l88, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r0.x, l88, r0.y\n"
+"dcl_literal l89, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0._y__, r0.z, l89\n"
+"cmov_logical r0.x___, r0.y, r0.z, r0.x\n"
+"dcl_literal l90, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0._y__, r0.z, l90\n"
+"cmov_logical r0.x___, r1.z, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__log2_f32",
+"mdef(284)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00800000, 0x00000000, 0x00000000\n"
+"ilt r0._yz_, r0.x, l0\n"
+"if_logicalnz r0.y\n"
+" \n"
+" dcl_literal l1, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0._y__, r0.x, l1\n"
+" itof r0._y__, r0.y\n"
+" \n"
+" dcl_literal l2, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r0._y_w, r0.y, l2\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0._y__, r0.y, l3\n"
+" \n"
+" dcl_literal l4, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+" iadd r0._y__, r0.y, l4\n"
+" \n"
+" dcl_literal l5, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r0.___w, r0.w, l5\n"
+" \n"
+" dcl_literal l6, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0._y__, l6, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, l7, r0.y\n"
+" ishr r1._y__, r0.w, r0.y\n"
+" inegate r0._y__, r0.y\n"
+" \n"
+" dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l8\n"
+" iadd r0._y__, r0.w, r0.y\n"
+" cmov_logical r0._y__, r1.x, r1.y, r0.y\n"
+" \n"
+" dcl_literal l9, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7\n"
+" mov r0.___w, l9\n"
+"else\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.___w, l10\n"
+" mov r0._y__, r0.x\n"
+"endif\n"
+"\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r0.y, l11\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"\n"
+"dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, l12, r0.w\n"
+"\n"
+"dcl_literal l13, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0._y__, r0.y, l13\n"
+"\n"
+"dcl_literal l14, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1.x___, l14, r0.y\n"
+"\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.x___, r1.x, l15\n"
+"\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1._y__, r1.x, l16\n"
+"ior r0._y__, r0.y, r1.y\n"
+"\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l17\n"
+"iadd r0.___w, r0.w, r1.x\n"
+"itof r0.___w, r0.w\n"
+"\n"
+"dcl_literal l18, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r1.xy__, r0.y, l18\n"
+"div_zeroop(infinity) r0._y__, r1.x, r1.y\n"
+"mul_ieee r1._y__, r0.y, r0.y\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l19, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l20, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, l19, l20\n"
+"\n"
+"dcl_literal l21, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l21\n"
+"mul_ieee r1.___w, r1.z, r2.x\n"
+"\n"
+"dcl_literal l22, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r1.__z_, r1.z, r2.y, l22\n"
+"mad_ieee r1._y__, r1.y, r1.z, r1.w\n"
+"mul_ieee r1.__z_, r1.x, r1.x\n"
+"\n"
+"dcl_literal l23, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.___w, r1.z, l23\n"
+"\n"
+"dcl_literal l24, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1._y__, r1.z, l24, r1.y\n"
+"mad_ieee r0._y__, r0.y_neg(xyzw), r1.y, r1.w\n"
+"add r0._y__, r1.x_neg(xyzw), r0.y\n"
+"\n"
+"dcl_literal l25, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0._y__, r0.y_neg(xyzw), l25, r0.w\n"
+"\n"
+"dcl_literal l26, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l26\n"
+"\n"
+"dcl_literal l27, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0._y__, r0.w, r0.y, l27\n"
+"\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.x___, r0.w, l28\n"
+"and r0.__z_, r0.z, r1.x\n"
+"\n"
+"dcl_literal l29, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.z, l29, r0.y\n"
+"\n"
+"dcl_literal l30, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.x, l30\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l31, r0.w\n"
+"\n"
+"dcl_literal l32, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l32\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__logb_f32",
+"mdef(285)_out(2)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.y, l1\n"
+"dcl_literal l2, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l2\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l3\n"
+"dcl_literal l4, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l4, r0.w_neg(xyzw)\n"
+"dcl_literal l5, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l5\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l6\n"
+"dcl_literal l7, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r0.___w, r0.y, r0.w, l7\n"
+"dcl_literal l8, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.___w, r0.w_neg(xyzw), l8\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l9, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r1.x___, r0.y, l9\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1._y__, r0.y, l10\n"
+"and r1.x___, r1.x, r1.y\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l11\n"
+"itof r0.__z_, r0.z\n"
+"dcl_literal l12, 0xC2FE0000, 0xC2FE0000, 0xC2FE0000, 0xC2FE0000\n"
+"add r0.__z_, r0.z, l12\n"
+"cmov_logical r0.__z_, r1.x, r0.w, r0.z\n"
+"dcl_literal l13, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0.__z_, r0.y, r0.z, l13\n"
+"dcl_literal l14, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l14\n"
+"cmov_logical r0.__z_, r0.w, r0.y, r0.z\n"
+"dcl_literal l15, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l15, r0.y\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mov out1, r1\n"
+"mend\n"
+,1,2
+},
+{ "__log_f32",
+"mdef(286)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00800000, 0x00000000, 0x00000000\n"
+"ilt r0._yz_, r0.x, l0\n"
+"if_logicalnz r0.y\n"
+" \n"
+" dcl_literal l1, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0._y__, r0.x, l1\n"
+" itof r0._y__, r0.y\n"
+" \n"
+" dcl_literal l2, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+" and r0._y_w, r0.y, l2\n"
+" \n"
+" dcl_literal l3, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0._y__, r0.y, l3\n"
+" \n"
+" dcl_literal l4, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+" iadd r0._y__, r0.y, l4\n"
+" \n"
+" dcl_literal l5, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r0.___w, r0.w, l5\n"
+" \n"
+" dcl_literal l6, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0._y__, l6, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, l7, r0.y\n"
+" ishr r1._y__, r0.w, r0.y\n"
+" inegate r0._y__, r0.y\n"
+" \n"
+" dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l8\n"
+" iadd r0._y__, r0.w, r0.y\n"
+" cmov_logical r0._y__, r1.x, r1.y, r0.y\n"
+" \n"
+" dcl_literal l9, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7\n"
+" mov r0.___w, l9\n"
+"else\n"
+" \n"
+" dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.___w, l10\n"
+" mov r0._y__, r0.x\n"
+"endif\n"
+"\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r0.y, l11\n"
+"iadd r0.___w, r1.x, r0.w\n"
+"\n"
+"dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, l12, r0.w\n"
+"\n"
+"dcl_literal l13, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0._y__, r0.y, l13\n"
+"\n"
+"dcl_literal l14, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r1.x___, l14, r0.y\n"
+"\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r1.x___, r1.x, l15\n"
+"\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r1._y__, r1.x, l16\n"
+"ior r0._y__, r0.y, r1.y\n"
+"\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.x___, r1.x, l17\n"
+"iadd r0.___w, r0.w, r1.x\n"
+"itof r0.___w, r0.w\n"
+"\n"
+"dcl_literal l18, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r1.xy__, r0.y, l18\n"
+"div_zeroop(infinity) r0._y__, r1.x, r1.y\n"
+"mul_ieee r1._y__, r0.y, r0.y\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l19, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l20, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, l19, l20\n"
+"\n"
+"dcl_literal l21, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l21\n"
+"mul_ieee r1.___w, r1.z, r2.x\n"
+"\n"
+"dcl_literal l22, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r1.__z_, r1.z, r2.y, l22\n"
+"mad_ieee r1._y__, r1.y, r1.z, r1.w\n"
+"mul_ieee r1.__z_, r1.x, r1.x\n"
+"\n"
+"dcl_literal l23, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r1._y__, r1.z, l23, r1.y\n"
+"\n"
+"dcl_literal l24, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r1.___w, r0.w, l24\n"
+"mad_ieee r0._y__, r0.y, r1.y, r1.w\n"
+"\n"
+"dcl_literal l25, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0._y__, r1.z, l25, r0.y_neg(xyzw)\n"
+"add r0._y__, r1.x_neg(xyzw), r0.y\n"
+"\n"
+"dcl_literal l26, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r0._y__, r0.w, l26, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l27, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l27\n"
+"\n"
+"dcl_literal l28, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0._y__, r0.w, r0.y, l28\n"
+"\n"
+"dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.x___, r0.w, l29\n"
+"and r0.__z_, r0.z, r1.x\n"
+"\n"
+"dcl_literal l30, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.z, l30, r0.y\n"
+"\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.__z_, r0.x, l31\n"
+"cmov_logical r0._y__, r0.z, r0.x, r0.y\n"
+"\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l32, r0.w\n"
+"\n"
+"dcl_literal l33, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l33\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__ltof_f32",
+"mdef(287)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"inot r0.__zw, r0.xxxy\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r1.x___, r0.z, l1\n"
+"ult r0.__z_, r1.x, r0.z\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.__z_, r0.z, l2\n"
+"iadd r0.__z_, r0.w, r0.z\n"
+"dcl_literal l3, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r0.___w, r0.y, l3\n"
+"cmov_logical r0._y__, r0.w, r0.z, r0.y\n"
+"cmov_logical r0.x___, r0.w, r1.x, r0.x\n"
+"dcl_literal l4, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r0.__z_, r0.y, l4\n"
+"dcl_literal l5, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r1.x___, r0.x, l5\n"
+"ior r0.__z_, r0.z, r1.x\n"
+"dcl_literal l6, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.__z_, r0.z, l6\n"
+"dcl_literal l7, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1.x___, r0.z, l7\n"
+"dcl_literal l8, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.x___, r1.x, l8\n"
+"dcl_literal l9, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l9\n"
+"dcl_literal l10, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1.x___, l10, r1.x_neg(xyzw)\n"
+"dcl_literal l11, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l11\n"
+"dcl_literal l12, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1.x___, r1.x, l12\n"
+"dcl_literal l13, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r1.x___, r1.x, l13\n"
+"dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r0.__z_, r0.z, r1.x, l14\n"
+"dcl_literal l15, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r1.x___, r0.y, l15\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1._y__, r1.x, l16\n"
+"dcl_literal l17, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1._y__, r1.y, l17\n"
+"dcl_literal l18, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1._y__, r1.y, l18\n"
+"dcl_literal l19, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1._y__, l19, r1.y_neg(xyzw)\n"
+"dcl_literal l20, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1._y__, r1.y, l20\n"
+"dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1._y__, r1.y, l21\n"
+"dcl_literal l22, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r1._y__, r1.y, l22\n"
+"dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r1.x___, r1.x, r1.y, l23\n"
+"iadd r0.__z_, r1.x, r0.z\n"
+"dcl_literal l24, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r1._y__, r0.x, l24\n"
+"dcl_literal l25, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1.__z_, r1.y, l25\n"
+"dcl_literal l26, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.__z_, r1.z, l26\n"
+"dcl_literal l27, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.__z_, r1.z, l27\n"
+"dcl_literal l28, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1.__z_, l28, r1.z_neg(xyzw)\n"
+"dcl_literal l29, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.__z_, r1.z, l29\n"
+"dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1.__z_, r1.z, l30\n"
+"dcl_literal l31, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r1.__z_, r1.z, l31\n"
+"dcl_literal l32, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r1._y__, r1.y, r1.z, l32\n"
+"iadd r1._y__, r0.z, r1.y\n"
+"dcl_literal l33, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r1.__z_, r1.x, l33\n"
+"cmov_logical r1.x___, r1.z, r0.z, r1.x\n"
+"dcl_literal l34, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r0.__z_, r0.z, l34\n"
+"cmov_logical r0.__z_, r0.z, r1.y, r1.x\n"
+"dcl_literal l35, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"imin r1.x___, r0.z, l35\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r1.x___, r1.x, l36\n"
+"dcl_literal l37, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"imin r1._y__, r1.x, l37\n"
+"ishl r1._y__, r0.y, r1.y\n"
+"dcl_literal l38, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r1.__z_, r1.x, l38\n"
+"dcl_literal l39, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.___w, r1.x, l39\n"
+"cmov_logical r1.__z_, r1.w, r1.z, r1.x\n"
+"dcl_literal l40, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r2.x___, l40, r1.z_neg(xyzw)\n"
+"ishl r1.__z_, r0.x, r1.z\n"
+"ushr r2.x___, r0.x, r2.x\n"
+"ior r1._y__, r1.y, r2.x\n"
+"cmov_logical r1._y__, r1.w, r1.z, r1.y\n"
+"dcl_literal l41, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.__z_, r1.w, l41, r1.z\n"
+"cmov_logical r0.xy__, r1.x, r1.zyzz, r0.xyxx\n"
+"dcl_literal l42, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r1.x___, r0.y, l42\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0._y__, r0.y, l43\n"
+"dcl_literal l44, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ishl r1._y__, r1.x, l44\n"
+"dcl_literal l45, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ushr r1.x___, r1.x, l45\n"
+"dcl_literal l46, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ushr r1.__z_, r0.x, l46\n"
+"ior r1._y__, r1.y, r1.z\n"
+"dcl_literal l47, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ishl r1.__z_, r0.x, l47\n"
+"dcl_literal l48, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.x___, r0.x, l48\n"
+"dcl_literal l49, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r1.__z_, l49, r1.z\n"
+"dcl_literal l50, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.__z_, r1.z, l50\n"
+"ior r1._y__, r1.y, r1.z\n"
+"dcl_literal l51, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ieq r1.__z_, r1.y, l51\n"
+"dcl_literal l52, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ult r1._y__, l52, r1.y\n"
+"dcl_literal l53, 0x000000BE, 0x000000BE, 0x000000BE, 0x000000BE\n"
+"iadd r1.___w, r0.z_neg(xyzw), l53\n"
+"dcl_literal l54, 0x0000003F, 0x0000003F, 0x0000003F, 0x0000003F\n"
+"iadd r0.__z_, l54, r0.z_neg(xyzw)\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1.___w, r1.w, l55\n"
+"ior r1.x___, r1.x, r1.w\n"
+"dcl_literal l56, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.___w, r1.x, l56\n"
+"iadd r1.___w, r1.x, r1.w\n"
+"cmov_logical r1.x___, r1.z, r1.w, r1.x\n"
+"dcl_literal l57, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r1.__z_, r1.x, l57\n"
+"cmov_logical r1.x___, r1.y, r1.z, r1.x\n"
+"and r0.x___, r0.y, r0.x\n"
+"dcl_literal l58, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"ilt r0._y__, r0.z, l58\n"
+"ior r0.x___, r0.x, r0.y\n"
+"dcl_literal l59, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l59, r1.x\n"
+"ior r0.x___, r0.w, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__mad_f32",
+"mdef(288)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mad_ieee r0.x___, r0.x, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__mad_f64",
+"mdef(289)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"dmad r0.xy__, r0.xy, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__max_f32",
+"mdef(290)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"max r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__min_f32",
+"mdef(291)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"min r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__mix_f32",
+"mdef(292)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"add r0.___w, r0.x_neg(xyzw), r1.x\n"
+"mad_ieee r0.x___, r0.w, r2.x, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__modf_2f32f32",
+"mdef(293)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.___w, r0.x\n"
+"round_z r0._y__, r0.x\n"
+"add r0.__z_, r0.x, r0.y_neg(xyzw)\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x00000000\n"
+"and r1.xyz_, r0.x, l0\n"
+"ior r0.x___, r0.z, r1.y\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r1.x, l1\n"
+"dcl_literal l2, 0x00000000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ieq r2.xy__, r1.zxzz, l2\n"
+"and r0.___w, r0.w, r2.x\n"
+"ior r0.__z_, r0.y, r1.y\n"
+"cmov_logical r0.xy__, r0.w, r1.wyww, r0.xzxx\n"
+"cmov_logical r0.xy__, r2.y, r1.ywyy, r0.xyxx\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l3, r1.x\n"
+"dcl_literal l4, 0x7FC00000, 0x7FC00000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r0.z, l4, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__mul_i64",
+"mdef(294)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul r2.xy, r0.yx, r1.xy\n"
+"iadd r4.x, r2.x, r3.y\n"
+"imul_high r5.x, r0.x, r1.x\n"
+"umul r6.x, r4.x, r5.x\n"
+"umul r7.x, r0.x, r1.x\n"
+"iadd r0.xy, r7.x0, r6.0x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__mul_v2i64",
+"mdef(295)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"umul r2, r0.yxwz, r1.xyzw\n"
+"iadd r4.x_z, r2.x0z, r3.y0w\n"
+"imul_high r5.x_z, r0.x0z, r1.x0z\n"
+"umul r6.x_z, r4.x0z, r5.x0z\n"
+"umul r7.x_z, r0.x0z, r1.x0z\n"
+"iadd r0, r7.x0z0, r6.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__nan_u32",
+"mdef(296)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l0, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.x___, r0.x, l0\n"
+"dcl_literal l1, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_cos_f32",
+"mdef(297)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"cos_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_divide_2f32",
+"mdef(298)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"div_zeroop(infinity) r0.xy__, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__native_divide_4f32",
+"mdef(299)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"div_zeroop(infinity) r0, r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__native_divide_f32",
+"mdef(300)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"div_zeroop(infinity) r0.x___, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__native_exp10_2f32",
+"mdef(301)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x40549A78, 0x40549A78, 0x40549A78, 0x40549A78\n"
+"mul_ieee r1.xy__, r0.xy, l0\n"
+"exp_vec r0.xy__, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp10_4f32",
+"mdef(302)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x40549A78, 0x40549A78, 0x40549A78, 0x40549A78\n"
+"mul_ieee r1, r0, l0\n"
+"exp_vec r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp10_f32",
+"mdef(303)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x40549A78, 0x40549A78, 0x40549A78, 0x40549A78\n"
+"mul_ieee r1.x___, r0.x, l0\n"
+"exp_vec r0.x___, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp2_2f32",
+"mdef(304)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"exp_vec r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp2_4f32",
+"mdef(305)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"exp_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp2_f32",
+"mdef(306)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"exp_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp_2f32",
+"mdef(307)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r1.xy__, r0.xy, l0\n"
+"exp_vec r0.xy__, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp_4f32",
+"mdef(308)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r1, r0, l0\n"
+"exp_vec r0, r1\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_exp_f32",
+"mdef(309)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r0._y__, r0.x, l0\n"
+"exp_vec r0.x___, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log10_2f32",
+"mdef(310)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r1.xy__, r0.x\n"
+"\n"
+"dcl_literal l0, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B\n"
+"mul_ieee r0.xy__, r1.xy, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log10_4f32",
+"mdef(311)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r1, r0\n"
+"\n"
+"dcl_literal l0, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B\n"
+"mul_ieee r0, r1, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log10_f32",
+"mdef(312)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r1.x___, r0.x\n"
+"\n"
+"dcl_literal l0, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B, 0x3E9A209B\n"
+"mul_ieee r0.x___, r1.x, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log2_2f32",
+"mdef(313)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log2_4f32",
+"mdef(314)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log2_f32",
+"mdef(315)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log_2f32",
+"mdef(316)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r1.xy__, r0.xy\n"
+"\n"
+"dcl_literal l0, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r0.xy__, r1.xy, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log_4f32",
+"mdef(317)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r1, r0\n"
+"\n"
+"dcl_literal l0, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r0, r1, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_log_f32",
+"mdef(318)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"log_vec r1.x___, r0.x\n"
+"\n"
+"dcl_literal l0, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r0.x___, r1.x, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_powr_2f32",
+"mdef(319)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"log_vec r2.xy, r0.xy\n"
+"mul_ieee r3.xy, r1.xy, r2.xy\n"
+"exp_vec r3.xy, r3.xy\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r4.xy, r0.xy_abs, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy, r4.xy, l1, r3.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__native_powr_4f32",
+"mdef(320)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"log_vec r2, r0\n"
+"mul_ieee r3, r1, r2\n"
+"exp_vec r3, r3\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r4, r0_abs, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0, r4, l1, r3\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__native_powr_f32",
+"mdef(321)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"log_vec r2.x, r0.x\n"
+"mul_ieee r3.x, r1.x, r2.x\n"
+"exp_vec r3.x, r3.x\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r4.x, r0.x_abs, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x, r4.x, l1, r3.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__native_recip_2f32",
+"mdef(322)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.xy__, l0, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_recip_4f32",
+"mdef(323)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0, l0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_recip_f32",
+"mdef(324)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.x___, l0, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_rsqrt_2f32",
+"mdef(325)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_rsqrt_4f32",
+"mdef(326)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_rsqrt_f32",
+"mdef(327)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"rsq_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_sin_2f32",
+"mdef(328)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sin_vec r0.xy, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_sin_4f32",
+"mdef(329)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sin_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_sin_f32",
+"mdef(330)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sin_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_sqrt_2f32",
+"mdef(331)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0.xy__, r0.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_sqrt_4f32",
+"mdef(332)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_sqrt_f32",
+"mdef(333)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sqrt_vec r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_tan_2f32",
+"mdef(334)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sin_vec r1.xy__, r0.xy\n"
+"cos_vec r2.xy__, r0.xy\n"
+"div_zeroop(infinity) r0.xy__, r1.xy, r2.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_tan_4f32",
+"mdef(335)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sin_vec r1, r0\n"
+"cos_vec r2, r0\n"
+"div_zeroop(infinity) r0, r1, r2\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__native_tan_f32",
+"mdef(336)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"sin_vec r1.x___, r0.x\n"
+"cos_vec r2.x___, r0.x\n"
+"div_zeroop(infinity) r0.x___, r1.x, r2.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__nextafter_f32",
+"mdef(337)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__zw, r0.xxxy, l0\n"
+"and r0.__z_, r0.w, r0.z\n"
+"ilt r0.___w, r0.x, r0.y\n"
+"and r0.__z_, r0.z, r0.w\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, l1, r0.x\n"
+"and r0.___w, r0.w, r1.x\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"dcl_literal l3, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r0.__z_, r0.z, l2, l3\n"
+"iadd r0.__z_, r0.x, r0.z\n"
+"dcl_literal l4, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x00000000\n"
+"and r1.xyz_, r0.xyyx, l4\n"
+"ine r0.___w, r1.x, r1.y\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r1.___w, r1.x, l5\n"
+"and r0.___w, r0.w, r1.w\n"
+"dcl_literal l6, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ior r1.__z_, r1.z, l6\n"
+"cmov_logical r0.__z_, r0.w, r1.z, r0.z\n"
+"ieq r0.x___, r0.x, r0.y\n"
+"ieq r0.___w, r1.x, r1.y\n"
+"dcl_literal l7, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ult r1.xy__, l7, r1.xyxx\n"
+"and r0.___w, r1.w, r0.w\n"
+"ior r0.x___, r0.x, r0.w\n"
+"cmov_logical r0.x___, r0.x, r0.y, r0.z\n"
+"ior r0._y__, r1.y, r1.x\n"
+"dcl_literal l8, 0x7FC00001, 0x7FC00001, 0x7FC00001, 0x7FC00001\n"
+"cmov_logical r0.x___, r0.y, l8, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__normalize_2f32",
+"mdef(338)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r2.x___, r0.xyxx, r0.xyxx\n"
+"rsq_vec r2._y__, r2.x\n"
+"mov r2.__zw, r0.xxxy_abs\n"
+"\n"
+"dcl_literal l7, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ilt r2.__zw, l7, r2.zzzw\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000001, 0x00000001\n"
+"and r2.__zw, r2.zzzw, l8\n"
+"iadd r2.__z_, r2.z, r2.w\n"
+"mul_ieee r2._y_w, r0.xxxy, r2.y\n"
+"\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r2.x___, r2.x, l9\n"
+"\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.xy__, r2.x, l10, r2.ywyy\n"
+"\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.__z_, l11, r2.z\n"
+"\n"
+"dcl_literal l12, 0x7FC00000, 0x7FC00000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r2.z, l12, r2.xyxx\n"
+"\n"
+"\n"
+"mov r0.__zw, l13\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__normalize_4f32",
+"mdef(339)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dp2_ieee r2.x___, r0.xyxx, r0.xyxx\n"
+"mad_ieee r2.x___, r0.z, r0.z, r2.x\n"
+"mad_ieee r2.x___, r0.w, r0.w, r2.x\n"
+"rsq_vec r2._y__, r2.x\n"
+"mov r3, r0_abs\n"
+"\n"
+"dcl_literal l14, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r3, l14, r3\n"
+"\n"
+"dcl_literal l15, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3, r3, l15\n"
+"iadd r2.__z_, r3.x, r3.y\n"
+"iadd r2.__z_, r2.z, r3.z\n"
+"iadd r2.__z_, r2.z, r3.w\n"
+"mul_ieee r0, r0, r2.y\n"
+"\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r2.x___, r2.x, l16\n"
+"\n"
+"dcl_literal l17, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0, r2.x, l17, r0\n"
+"\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, l18, r2.z\n"
+"\n"
+"dcl_literal l19, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0, r2.x, l19, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__normalize_f32",
+"mdef(340)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r2.x___, r0.x_abs\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r2.__z_, l1, r2.x\n"
+"and r2._y__, r2.y, r2.z\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"uge r2.x___, l2, r2.x\n"
+"and r2._y__, r2.y, r2.x\n"
+"\n"
+"dcl_literal l3, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r2._y__, r2.y, l3, r0.x\n"
+"\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, l4, r0.x\n"
+"and r2.__z_, r2.z, r2.w\n"
+"and r2.x___, r2.x, r2.z\n"
+"\n"
+"dcl_literal l5, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l5, r2.y\n"
+"\n"
+"\n"
+"mov r0._yzw, l6\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__pown_f32i32",
+"mdef(341)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x80000000, 0x80000000, 0x00000001\n"
+"and r1, r0.xxyy, l0\n"
+"itof r0.__z_, r1.x\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r0.__zw, r0.z, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0.___w, r0.w, l3\n"
+"dcl_literal l4, 0x00000024, 0x00000024, 0x00000024, 0x00000024\n"
+"iadd r0.__z_, r0.z, l4\n"
+"dcl_literal l5, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.__z_, l5, r0.z_neg(xyzw)\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r2.x___, l6, r0.z\n"
+"dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.__z_, r2.x, l7, r0.z\n"
+"inegate r2.x___, r0.z\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r2.x, l8\n"
+"iadd r2.x___, r0.w, r2.x\n"
+"ishr r0.___w, r0.w, r0.z\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l9, r0.z\n"
+"cmov_logical r0.__z_, r0.z, r0.w, r2.x\n"
+"dcl_literal l10, 0x00800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"ilt r2.xy__, r1.x, l10\n"
+"cmov_logical r0.__z_, r2.x, r0.z, r1.x\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r0.__zw, r0.z, l11\n"
+"dcl_literal l12, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"ior r0.___w, r0.w, l12\n"
+"dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l13\n"
+"dcl_literal l14, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.__z_, r0.w, l14, l15\n"
+"round_z r2.___w, r2.z\n"
+"ftoi r2.__z_, r2.z\n"
+"dcl_literal l16, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r0.___w, r2.w_neg(xyzw), l16, r0.w\n"
+"add r3.x___, r0.w, cb0[1].x\n"
+"add r3._y__, r0.w, r3.x_neg(xyzw)\n"
+"dcl_literal l17, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mul_ieee r3.__z_, r2.w, l17\n"
+"dcl_literal l18, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.___w, r3.z, l18\n"
+"dcl_literal l19, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r2.___w, r2.w, l19, r3.w_neg(xyzw)\n"
+"div_zeroop(infinity) r0.___w, r0.w, r3.z\n"
+"dcl_literal l20, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r4.x___, r0.w, l20\n"
+"mad_ieee r3.x___, r4.x, r3.w, r3.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.x, r2.w, r3.x\n"
+"add r4.x___, r0.w, r4.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.x, r3.w, r3.x\n"
+"mad_ieee r2.___w, r4.x, r2.w, r3.x\n"
+"add r2.___w, r3.y, r2.w_neg(xyzw)\n"
+"div_zeroop(infinity) r2.___w, r2.w, r3.z\n"
+"add r3.x___, r0.w, r2.w\n"
+"dcl_literal l21, 0x3E000000, 0x3E000000, 0x3E000000, 0x3E000000\n"
+"dcl_literal l22, 0x3E124925, 0x3E124925, 0x3E124925, 0x3E124925\n"
+"mad_ieee r3._y__, r0.w_neg(xyzw), l21, l22\n"
+"dcl_literal l23, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3._y__, r0.w_neg(xyzw), r3.y, l23\n"
+"dcl_literal l24, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD\n"
+"mad_ieee r3._y__, r0.w_neg(xyzw), r3.y, l24\n"
+"dcl_literal l25, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mad_ieee r3._y__, r0.w_neg(xyzw), r3.y, l25\n"
+"dcl_literal l26, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+"mad_ieee r3._y__, r0.w_neg(xyzw), r3.y, l26\n"
+"dcl_literal l27, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r0.w_neg(xyzw), r3.y, l27\n"
+"mul_ieee r3.__z_, r0.w_neg(xyzw), r0.w\n"
+"add r0.___w, r0.w, r3.x_neg(xyzw)\n"
+"mad_ieee r3.___w, r3.y, r3.z, r3.x\n"
+"add r3.x___, r3.x, r3.w_neg(xyzw)\n"
+"mad_ieee r3.x___, r3.y, r3.z, r3.x\n"
+"add r0.___w, r2.w, r0.w\n"
+"add r0.___w, r3.x, r0.w\n"
+"add r2.___w, r3.w, r0.w\n"
+"add r3.x___, r3.w, r2.w_neg(xyzw)\n"
+"add r0.___w, r0.w, r3.x\n"
+"dcl_literal l28, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r2.w, l28\n"
+"add r3._y__, r2.w, r3.x_neg(xyzw)\n"
+"dcl_literal l29, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r2.___w, r2.w, l29\n"
+"dcl_literal l30, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r3.__z_, r3.x, l30, r2.w_neg(xyzw)\n"
+"dcl_literal l31, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r3.__z_, r3.y, l31, r3.z\n"
+"dcl_literal l32, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.x___, r3.x, l32, r3.z\n"
+"dcl_literal l33, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.x___, r3.y, l33, r3.x\n"
+"dcl_literal l34, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0.___w, r0.w, l34, r3.x\n"
+"add r3.x___, r2.w, r0.w\n"
+"add r2.___w, r2.w, r3.x_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.w\n"
+"dcl_literal l35, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0\n"
+"iadd r2.__z_, r2.z, l35\n"
+"dcl_literal l36, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r2.___w, r2.z, l36\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l38, 0x00000000, 0x2FB85A45, 0x312C77EC, 0x31FD14FD\n"
+"cmov_logical r4, r2.w, l37, l38\n"
+"dcl_literal l39, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r5, r2.w, l39\n"
+"dcl_literal l40, 0x31D64899, 0x2F2DABBA, 0x31CDA79E, 0x309E2B87\n"
+"cmov_logical r4, r5.x, l40, r4\n"
+"dcl_literal l41, 0x324FDEB4, 0x3201781E, 0x31DB4EC9, 0x31C32597\n"
+"cmov_logical r4, r5.y, l41, r4\n"
+"dcl_literal l42, 0x32481340, 0x32D6985C, 0x3230E074, 0x326A4CDF\n"
+"cmov_logical r4, r5.z, l42, r4\n"
+"dcl_literal l43, 0x323CD1B9, 0x32CFDEB4, 0x3259D0CE, 0x31458715\n"
+"cmov_logical r4, r5.w, l43, r4\n"
+"dcl_literal l44, 0x00000005, 0x00000006, 0x00000007, 0x00000008\n"
+"ieq r6, r2.w, l44\n"
+"dcl_literal l45, 0x3211F171, 0x3227F605, 0x32B9C415, 0x325295B5\n"
+"cmov_logical r4, r6.x, l45, r4\n"
+"dcl_literal l46, 0x2F4BA83C, 0x30F3AA69, 0x320CB06D, 0x32B97998\n"
+"cmov_logical r4, r6.y, l46, r4\n"
+"dcl_literal l47, 0x30D63AA6, 0x33550F2A, 0x3374AE80, 0x3321393E\n"
+"cmov_logical r4, r6.z, l47, r4\n"
+"dcl_literal l48, 0x3267EF5A, 0x32AB49CA, 0x3355010B, 0x3374C355\n"
+"cmov_logical r4, r6.w, l48, r4\n"
+"dcl_literal l49, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C\n"
+"ieq r7, r2.w, l49\n"
+"dcl_literal l50, 0x32BCD1B9, 0x3261151F, 0x325313A6, 0x3320F04D\n"
+"cmov_logical r4, r7.x, l50, r4\n"
+"dcl_literal l51, 0x32E4788D, 0x32A76195, 0x3332703B, 0x3349A817\n"
+"cmov_logical r4, r7.y, l51, r4\n"
+"dcl_literal l52, 0x332DF384, 0x32B06EF9, 0x33300016, 0x331565B0\n"
+"cmov_logical r4, r7.z, l52, r4\n"
+"dcl_literal l53, 0x336A8086, 0x331C70C1, 0x332C00A7, 0x3257990D\n"
+"cmov_logical r4, r7.w, l53, r4\n"
+"dcl_literal l54, 0x00000000, 0x0000000D, 0x0000000E, 0x0000000F\n"
+"ieq r3._yzw, r2.w, l54\n"
+"dcl_literal l55, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l56, 0x00000000, 0x3CB73CB4, 0x3D35D69B, 0x3D8759C4\n"
+"cmov_logical r8, r2.w, l55, l56\n"
+"dcl_literal l57, 0x32F5532E, 0x336633F3, 0x31B86815, 0x33654999\n"
+"cmov_logical r4, r3.y, l57, r4\n"
+"dcl_literal l58, 0x32D26089, 0x2FCBA83C, 0x32B3FF57, 0x32E0E014\n"
+"cmov_logical r4, r3.z, l58, r4\n"
+"dcl_literal l59, 0x331B1354, 0x330D9D4B, 0x32BCF065, 0x330BBE12\n"
+"cmov_logical r4, r3.w, l59, r4\n"
+"dcl_literal l60, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"and r2.___w, r2.z, l60\n"
+"dcl_literal l61, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"ieq r2.__z_, r2.z, l61\n"
+"dcl_literal l62, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r9.xyz_, r2.w, l62\n"
+"cmov_logical r2.___w, r9.x, r4.y, r4.x\n"
+"cmov_logical r2.___w, r9.y, r4.z, r2.w\n"
+"cmov_logical r2.___w, r9.z, r4.w, r2.w\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r2.z, l63, r2.w\n"
+"add r4.x___, r3.x, r2.w\n"
+"add r3.x___, r3.x, r4.x_neg(xyzw)\n"
+"add r2.___w, r2.w, r3.x\n"
+"add r0.___w, r0.w, r2.w\n"
+"add r2.___w, r4.x, r0.w\n"
+"add r3.x___, r4.x, r2.w_neg(xyzw)\n"
+"dcl_literal l64, 0x3DB31FB7, 0x3DDE4212, 0x3E0462C4, 0x3E19574F\n"
+"cmov_logical r4, r5.x, l64, r8\n"
+"dcl_literal l65, 0x3E2E00D1, 0x3E42615E, 0x3E567AF1, 0x3E6A4F72\n"
+"cmov_logical r4, r5.y, l65, r4\n"
+"dcl_literal l66, 0x3E7DE0B5, 0x3E88983E, 0x3E92203D, 0x3E9B8926\n"
+"cmov_logical r4, r5.z, l66, r4\n"
+"dcl_literal l67, 0x3EA4D3C2, 0x3EAE00D1, 0x3EB7110E, 0x3EC0052B\n"
+"cmov_logical r4, r5.w, l67, r4\n"
+"dcl_literal l68, 0x3EC8DDD4, 0x3ED19BB0, 0x3EDA3F5F, 0x3EE2C97D\n"
+"cmov_logical r4, r6.x, l68, r4\n"
+"dcl_literal l69, 0x3EEB3A9F, 0x3EF39355, 0x3EFBD42B, 0x3F01FED4\n"
+"cmov_logical r4, r6.y, l69, r4\n"
+"dcl_literal l70, 0x3F060828, 0x3F0A064F, 0x3F0DF988, 0x3F11E20E\n"
+"cmov_logical r4, r6.z, l70, r4\n"
+"dcl_literal l71, 0x3F15C01A, 0x3F1993E3, 0x3F1D5D9F, 0x3F211D83\n"
+"cmov_logical r4, r6.w, l71, r4\n"
+"dcl_literal l72, 0x3F24D3C2, 0x3F28808C, 0x3F2C2411, 0x3F2FBE7F\n"
+"cmov_logical r4, r7.x, l72, r4\n"
+"dcl_literal l73, 0x3F335004, 0x3F36D8CB, 0x3F3A58FE, 0x3F3DD0C7\n"
+"cmov_logical r4, r7.y, l73, r4\n"
+"dcl_literal l74, 0x3F41404E, 0x3F44A7BA, 0x3F480730, 0x3F4B5ED6\n"
+"cmov_logical r4, r7.z, l74, r4\n"
+"dcl_literal l75, 0x3F4EAECF, 0x3F51F73F, 0x3F553847, 0x3F587209\n"
+"cmov_logical r4, r7.w, l75, r4\n"
+"dcl_literal l76, 0x3F5BA4A4, 0x3F5ED038, 0x3F61F4E5, 0x3F6512C6\n"
+"cmov_logical r4, r3.y, l76, r4\n"
+"dcl_literal l77, 0x3F6829FB, 0x3F6B3A9F, 0x3F6E44CD, 0x3F7148A1\n"
+"cmov_logical r4, r3.z, l77, r4\n"
+"dcl_literal l78, 0x3F744635, 0x3F773DA3, 0x3F7A2F04, 0x3F7D1A70\n"
+"cmov_logical r4, r3.w, l78, r4\n"
+"cmov_logical r3._y__, r9.x, r4.y, r4.x\n"
+"cmov_logical r3._y__, r9.y, r4.z, r3.y\n"
+"cmov_logical r3._y__, r9.z, r4.w, r3.y\n"
+"dcl_literal l79, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r2.__z_, r2.z, l79, r3.y\n"
+"add r3._y__, r2.w, r2.z\n"
+"add r2.__z_, r2.z, r3.y_neg(xyzw)\n"
+"add r2.__z_, r2.w, r2.z\n"
+"add r0.___w, r0.w, r3.x\n"
+"add r0.___w, r2.z, r0.w\n"
+"add r2.__z_, r3.y, r0.w\n"
+"add r2.___w, r3.y, r2.z_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.w\n"
+"dcl_literal l80, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l80\n"
+"dcl_literal l81, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC\n"
+"and r2.x___, r2.x, l81\n"
+"iadd r0.__z_, r0.z, r2.x\n"
+"itof r0.__z_, r0.z\n"
+"add r2.x___, r2.z, r0.z\n"
+"add r0.__z_, r0.z, r2.x_neg(xyzw)\n"
+"add r0.__z_, r2.z, r0.z\n"
+"add r0.__z_, r0.w, r0.z\n"
+"add r0.___w, r2.x, r0.z\n"
+"add r2.x___, r2.x, r0.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r2.x\n"
+"dcl_literal l82, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r0.y, l82\n"
+"inegate r2.__z_, r0.y\n"
+"cmov_logical r2.x___, r2.x, r2.z, r0.y\n"
+"dcl_literal l83, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishr r2.__z_, r2.x, l83\n"
+"itof r2.__z_, r2.z\n"
+"dcl_literal l84, 0x43800000, 0x43800000, 0x43800000, 0x43800000\n"
+"mul_ieee r2.__z_, r2.z, l84\n"
+"itof r2.___w, r2.x\n"
+"dcl_literal l85, 0x01000000, 0x01000000, 0x01000000, 0x01000000\n"
+"ilt r3.x___, l85, r2.x\n"
+"dcl_literal l86, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r2.x___, r2.x, l86\n"
+"cmov_logical r2.__z_, r3.x, r2.z, r2.w\n"
+"ior r2.__z_, r1.z, r2.z\n"
+"mul_ieee r2.___w, r0.z, r2.z\n"
+"add r0.__z_, r0.w, r0.z\n"
+"itof r2.x___, r2.x\n"
+"and r2.x___, r3.x, r2.x\n"
+"dcl_literal l87, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r3.x___, r2.x, l87\n"
+"ior r1.__z_, r1.z, r2.x\n"
+"cmov_logical r1.__z_, r3.x, r1.z, r2.x\n"
+"mad_ieee r2.x___, r0.w, r1.z, r2.w\n"
+"add r1.__z_, r2.z, r1.z\n"
+"dcl_literal l88, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2.___w, r0.w, l88\n"
+"mul_ieee r3.x___, r0.w, r2.z\n"
+"add r0.___w, r0.w, r2.w_neg(xyzw)\n"
+"dcl_literal l89, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3._y__, r2.z, l89\n"
+"add r2.__z_, r2.z, r3.y_neg(xyzw)\n"
+"mad_ieee r3.__z_, r2.w, r3.y, r3.x_neg(xyzw)\n"
+"mad_ieee r2.___w, r2.w, r2.z, r3.z\n"
+"mad_ieee r2.___w, r0.w, r3.y, r2.w\n"
+"mad_ieee r0.___w, r0.w, r2.z, r2.w\n"
+"add r0.___w, r2.x, r0.w\n"
+"add r2.x___, r3.x, r0.w\n"
+"add r2.__z_, r3.x, r2.x_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.z\n"
+"add r2.__z_, r2.x, r0.w\n"
+"round_z r2.___w, r2.z\n"
+"ftoi r2.__z_, r2.z\n"
+"dcl_literal l90, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+"lt r3.x___, l90, r2.w_abs\n"
+"ftoi r3._y__, r2.w_abs\n"
+"dcl_literal l91, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+"cmov_logical r3.x___, r3.x, l91, r3.y\n"
+"inegate r3._y__, r3.x\n"
+"dcl_literal l92, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r3.__z_, r2.w, l92\n"
+"cmov_logical r3.x___, r3.z, r3.y, r3.x\n"
+"dcl_literal l93, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r3._y__, r3.x, l93\n"
+"add r3.__z_, r2.x, r2.w_neg(xyzw)\n"
+"add r2.x___, r2.x, r3.z_neg(xyzw)\n"
+"add r2.x___, r2.w_neg(xyzw), r2.x\n"
+"add r0.___w, r0.w, r2.x\n"
+"add r2.x___, r3.z, r0.w\n"
+"add r2.___w, r3.z, r2.x_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.w\n"
+"add r0.___w, r2.x, r0.w\n"
+"dcl_literal l94, 0x3F317218, 0x00000000, 0x00000000, 0x42000000\n"
+"mul_ieee r2.x__w, r0.w, l94\n"
+"dcl_literal l95, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x0FFFF000\n"
+"and r3.__zw, r0.w, l95\n"
+"dcl_literal l96, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r4.x___, r3.w, l96, r2.x_neg(xyzw)\n"
+"dcl_literal l97, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r4.x___, r3.w, l97, r4.x\n"
+"add r3.___w, r0.w, r3.w_neg(xyzw)\n"
+"dcl_literal l98, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B\n"
+"lt r3.__z_, r3.z, l98\n"
+"dcl_literal l99, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r4.x___, r3.w, l99, r4.x\n"
+"dcl_literal l100, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r3.___w, r3.w, l100, r4.x\n"
+"dcl_literal l101, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r4.x___, r0.w, l101, r3.w\n"
+"dcl_literal l102, 0x00000000, 0x34904540, 0x3A1BECC5, 0x00000000\n"
+"dcl_literal l103, 0x00000000, 0x3CC2F57D, 0xBC42EEB9, 0x00000000\n"
+"mad_ieee r4._yz_, r4.x, l102, l103\n"
+"dcl_literal l104, 0x00000000, 0x37874471, 0x3DDB50CA, 0x00000000\n"
+"mad_ieee r4._yz_, r4.yyzy, r4.x, l104\n"
+"dcl_literal l105, 0x00000000, 0x3F7FE378, 0xBEFFE15B, 0x00000000\n"
+"mad_ieee r4._yz_, r4.yyzy, r4.x, l105\n"
+"dcl_literal l106, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378\n"
+"mad_ieee r4.x___, r4.z, r4.x, l106\n"
+"div_zeroop(infinity) r4.x___, r4.y, r4.x\n"
+"mul_ieee r3.___w, r3.w, r4.x\n"
+"mad_ieee r2.x___, r4.x, r2.x, r3.w\n"
+"dcl_literal l107, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, l107, r2.w\n"
+"dcl_literal l108, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.x___, r2.x, l108\n"
+"dcl_literal l109, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r2.x, l109\n"
+"cmov_logical r2.x___, r4.z, r3.x, r3.y\n"
+"itof r3.x___, r3.x\n"
+"dcl_literal l110, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3._y__, r2.x, l110\n"
+"itof r3.___w, r4.w\n"
+"cmov_logical r3.___w, r4.z, r4.x, r3.w\n"
+"iadd r3._y__, r3.w, r3.y\n"
+"dcl_literal l111, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r4.x_z_, r3.w, l111\n"
+"dcl_literal l112, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.___w, r4.x, l112\n"
+"dcl_literal l113, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r4.x___, r4.z, l113\n"
+"iadd r2.x___, r3.w, r2.x\n"
+"dcl_literal l114, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l114\n"
+"dcl_literal l115, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3.___w, l115, r2.x\n"
+"dcl_literal l116, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.x___, l116, r2.x_neg(xyzw)\n"
+"dcl_literal l117, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r3._y__, r3.w, l117, r3.y\n"
+"dcl_literal l118, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+"ilt r4.__zw, l118, r2.x\n"
+"dcl_literal l119, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r3.___w, r4.z, l119, r2.x\n"
+"dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, r2.x, l120\n"
+"dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r2.x, l121, r3.w\n"
+"ishr r2.x___, r4.x, r2.x\n"
+"cmov_logical r2.x___, r4.w, r2.x, r3.y\n"
+"dcl_literal l122, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+"lt r3.x___, l122, r3.x\n"
+"dcl_literal l123, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.x___, r3.x, l123, r2.x\n"
+"ior r2.x___, r4.y, r2.x\n"
+"dcl_literal l124, 0x42000000, 0x42000000, 0x00000000, 0x00000000\n"
+"dcl_literal l125, 0x3F000000, 0xBF000000, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r0.w, l124, l125\n"
+"ftoi r3.xy__, r3.xyxx\n"
+"cmov_logical r2.___w, r2.w, r3.x, r3.y\n"
+"itof r3.x___, r2.w\n"
+"dcl_literal l126, 0x3D000000, 0x3D000000, 0x3D000000, 0x3D000000\n"
+"mad_ieee r0.___w, r3.x_neg(xyzw), l126, r0.w\n"
+"dcl_literal l127, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820\n"
+"dcl_literal l128, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3.x___, r0.w, l127, l128\n"
+"dcl_literal l129, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r0.___w, r0.w, l129\n"
+"mul_ieee r3._y__, r0.w, r0.w\n"
+"mad_ieee r0.___w, r3.y, r3.x, r0.w\n"
+"dcl_literal l130, 0x0000001F, 0x00000003, 0x00000000, 0x00000000\n"
+"and r3.xy__, r2.w, l130\n"
+"iadd r2.___w, r2.w, r3.x_neg(xyzw)\n"
+"dcl_literal l131, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r4.xyz_, r3.y, l131\n"
+"dcl_literal l132, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r3.x___, r3.x, l132\n"
+"dcl_literal l133, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l134, 0x3F800000, 0x3F82C000, 0x3F858000, 0x3F888000\n"
+"cmov_logical r5, r3.x, l133, l134\n"
+"dcl_literal l135, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r6, r3.x, l135\n"
+"dcl_literal l136, 0x3F8B8000, 0x3F8E8000, 0x3F91C000, 0x3F94C000\n"
+"cmov_logical r5, r6.x, l136, r5\n"
+"dcl_literal l137, 0x3F980000, 0x3F9B8000, 0x3F9EC000, 0x3FA24000\n"
+"cmov_logical r5, r6.y, l137, r5\n"
+"dcl_literal l138, 0x3FA5C000, 0x3FA98000, 0x3FAD4000, 0x3FB10000\n"
+"cmov_logical r5, r6.z, l138, r5\n"
+"dcl_literal l139, 0x3FB50000, 0x3FB8C000, 0x3FBD0000, 0x3FC10000\n"
+"cmov_logical r5, r6.w, l139, r5\n"
+"dcl_literal l140, 0x00000005, 0x00000006, 0x00000007, 0x00000000\n"
+"ieq r7.xyz_, r3.x, l140\n"
+"dcl_literal l141, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l142, 0x00000000, 0x39D86988, 0x3AAB0D9F, 0x3A407404\n"
+"cmov_logical r8, r3.x, l141, l142\n"
+"dcl_literal l143, 0x3FC54000, 0x3FC98000, 0x3FCE0000, 0x3FD28000\n"
+"cmov_logical r5, r7.x, l143, r5\n"
+"dcl_literal l144, 0x3FD74000, 0x3FDBC000, 0x3FE0C000, 0x3FE58000\n"
+"cmov_logical r5, r7.y, l144, r5\n"
+"dcl_literal l145, 0x3FEAC000, 0x3FEFC000, 0x3FF50000, 0x3FFA8000\n"
+"cmov_logical r5, r7.z, l145, r5\n"
+"cmov_logical r3.x___, r4.x, r5.y, r5.x\n"
+"cmov_logical r3.x___, r4.y, r5.z, r3.x\n"
+"cmov_logical r3.x___, r4.z, r5.w, r3.x\n"
+"dcl_literal l146, 0x3A2E0F1E, 0x3A90E62D, 0x38F4DCE0, 0x3AD3BEA3\n"
+"cmov_logical r5, r6.x, l146, r8\n"
+"dcl_literal l147, 0x3ADFC146, 0x39D39B9C, 0x3AD4C982, 0x3AC10C0C\n"
+"cmov_logical r5, r6.y, l147, r5\n"
+"dcl_literal l148, 0x3AFB5AA6, 0x3A856AD3, 0x3A41F752, 0x3A8FD607\n"
+"cmov_logical r5, r6.z, l148, r5\n"
+"dcl_literal l149, 0x391E6678, 0x3AEEBD1D, 0x398A39F4, 0x3AB13329\n"
+"cmov_logical r5, r6.w, l149, r5\n"
+"dcl_literal l150, 0x3A9CA845, 0x3AE6F619, 0x3A923054, 0x3AA07647\n"
+"cmov_logical r5, r7.x, l150, r5\n"
+"dcl_literal l151, 0x391F9958, 0x3AEEDE5F, 0x39CDEEC0, 0x3AE41B9D\n"
+"cmov_logical r5, r7.y, l151, r5\n"
+"dcl_literal l152, 0x37C6E7C0, 0x3A92E66F, 0x3A95F454, 0x38ECB6D0\n"
+"cmov_logical r5, r7.z, l152, r5\n"
+"cmov_logical r3._y__, r4.x, r5.y, r5.x\n"
+"cmov_logical r3._y__, r4.y, r5.z, r3.y\n"
+"cmov_logical r3._y__, r4.z, r5.w, r3.y\n"
+"add r3.___w, r3.x, r3.y\n"
+"mad_ieee r0.___w, r3.w, r0.w, r3.y\n"
+"add r0.___w, r3.x, r0.w\n"
+"dcl_literal l153, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r0.w, l153\n"
+"dcl_literal l154, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ixor r0.___w, r2.w, l154\n"
+"imax r2.___w, r2.w, r2.w_neg(xyzw)\n"
+"dcl_literal l155, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r0.___w, r0.w, l155\n"
+"dcl_literal l156, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"udiv r2.___w, r2.w, l156\n"
+"inegate r3.x___, r2.w\n"
+"cmov_logical r0.___w, r0.w, r3.x, r2.w\n"
+"iadd r0.___w, r0.w, r2.z\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l157, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+"lt r2.__z_, l157, r0.w_abs\n"
+"ftoi r2.___w, r0.w_abs\n"
+"dcl_literal l158, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r0.w, l158\n"
+"dcl_literal l159, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+"cmov_logical r2.__z_, r2.z, l159, r2.w\n"
+"inegate r2.___w, r2.z\n"
+"cmov_logical r0.___w, r0.w, r2.w, r2.z\n"
+"dcl_literal l160, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.__z_, r0.w, l160\n"
+"cmov_logical r2.__z_, r4.z, r0.w, r2.z\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l161, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.___w, r2.z, l161\n"
+"itof r3.x___, r4.w\n"
+"cmov_logical r3.x___, r4.z, r4.x, r3.x\n"
+"iadd r2.___w, r3.x, r2.w\n"
+"dcl_literal l162, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r3.xy__, r3.x, l162\n"
+"dcl_literal l163, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r3.x, l163\n"
+"dcl_literal l164, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3._y__, r3.y, l164\n"
+"iadd r2.__z_, r3.x, r2.z\n"
+"dcl_literal l165, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.__z_, r2.z, l165\n"
+"dcl_literal l166, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3.x___, l166, r2.z\n"
+"dcl_literal l167, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.__z_, l167, r2.z_neg(xyzw)\n"
+"dcl_literal l168, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.___w, r3.x, l168, r2.w\n"
+"dcl_literal l169, 0x00000017, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.x__w, l169, r2.z\n"
+"dcl_literal l170, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r3.x___, r3.x, l170, r2.z\n"
+"dcl_literal l171, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.__z_, r2.z, l171\n"
+"dcl_literal l172, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r2.z, l172, r3.x\n"
+"ishr r2.__z_, r3.y, r2.z\n"
+"cmov_logical r2.__z_, r3.w, r2.z, r2.w\n"
+"dcl_literal l173, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+"lt r0.___w, l173, r0.w\n"
+"dcl_literal l174, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r0.w, l174, r2.z\n"
+"ior r0.___w, r4.y, r0.w\n"
+"cmov_logical r0.___w, r3.z, r2.x, r0.w\n"
+"dcl_literal l175, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r2.x___, r1.z, l175\n"
+"dcl_literal l176, 0x2E800000, 0x2E800000, 0x2E800000, 0x2E800000\n"
+"ilt r2.x___, r2.x, l176\n"
+"dcl_literal l177, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.__z_, r1.z, l177\n"
+"mul_ieee r0.__z_, r0.z, r1.z\n"
+"cmov_logical r0.___w, r2.x, r2.z, r0.w\n"
+"itof r0._y__, r0.y\n"
+"dcl_literal l178, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r1.__z_, r0.y, l178\n"
+"dcl_literal l179, 0x7FFFFFFF, 0x00000000, 0x80000000, 0x00000000\n"
+"and r2.x_z_, r0.y, l179\n"
+"cmov_logical r0._y__, r1.z, r1.x, r0.w\n"
+"dcl_literal l180, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"lt r0.___w, l180, r0.z\n"
+"dcl_literal l181, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r1.__z_, r0.z, l181\n"
+"dcl_literal l182, 0xC3150000, 0xC3150000, 0xC3150000, 0xC3150000\n"
+"lt r0.__z_, r0.z, l182\n"
+"dcl_literal l183, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r1.__z_, l183, r1.z\n"
+"and r0.__zw, r0.zzzw, r1.z\n"
+"dcl_literal l184, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.w, l184, r0.y\n"
+"dcl_literal l185, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.z, l185, r0.y\n"
+"dcl_literal l186, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l186, r2.x\n"
+"inot r0.___w, r0.z\n"
+"dcl_literal l187, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r1.__z_, r2.x, l187\n"
+"inot r2.___w, r1.z\n"
+"and r0.___w, r0.w, r2.w\n"
+"dcl_literal l188, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l189, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"cmov_logical r3.xy__, r1.x, l188, l189\n"
+"dcl_literal l190, 0x00000000, 0x80000000, 0x00000000, 0x00000000\n"
+"ine r1._y_w, r1.yyyw, l190\n"
+"inot r2.___w, r1.y\n"
+"and r3.__zw, r3.xxxy, r2.w\n"
+"and r3.__z_, r0.w, r3.z\n"
+"and r1.___w, r0.w, r1.w\n"
+"and r0.___w, r0.w, r3.y\n"
+"and r3.__z_, r3.z, r1.w\n"
+"cmov_logical r0._y__, r3.z, r0.y_neg(xyzw), r0.y\n"
+"and r3.__z_, r3.y, r1.y\n"
+"dcl_literal l191, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"dcl_literal l192, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.xy__, r2.x, l191, l192\n"
+"dcl_literal l193, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ine r2.x___, r2.z, l193\n"
+"and r2.__z_, r3.z, r4.y\n"
+"and r2.__z_, r1.w, r2.z\n"
+"inot r3.__z_, r2.x\n"
+"and r2.__z_, r2.z, r3.z\n"
+"dcl_literal l194, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r2.z, l194, r0.y\n"
+"and r2.__z_, r3.w, r4.y\n"
+"and r2.__z_, r1.w, r2.z\n"
+"and r2.__z_, r3.z, r2.z\n"
+"dcl_literal l195, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0._y__, r2.z, l195, r0.y\n"
+"inot r2.__z_, r1.w\n"
+"and r3._y__, r3.y, r4.y\n"
+"and r2._y__, r2.y, r3.x\n"
+"and r3.x___, r2.z, r3.y\n"
+"and r3._y__, r3.z, r3.x\n"
+"and r3.x___, r2.x, r3.x\n"
+"dcl_literal l196, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r3.y, l196, r0.y\n"
+"and r0.___w, r0.w, r4.y\n"
+"and r0.___w, r1.w, r0.w\n"
+"and r0.___w, r2.x, r0.w\n"
+"cmov_logical r0.x___, r0.w, r0.x, r0.y\n"
+"dcl_literal l197, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r3.x, l197, r0.x\n"
+"dcl_literal l198, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r0._y__, r1.x, l198\n"
+"and r0.___w, r1.y, r0.y\n"
+"and r0._y__, r2.w, r0.y\n"
+"ior r3.x___, r4.x, r0.w\n"
+"inot r0.___w, r0.w\n"
+"dcl_literal l199, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r3.x, l199, r0.x\n"
+"dcl_literal l200, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r3.x___, r1.x, l200\n"
+"dcl_literal l201, 0x00000000, 0x7F800000, 0x00000000, 0x3F800000\n"
+"ilt r3._y_w, l201, r1.x\n"
+"and r1.x___, r2.w, r3.x\n"
+"and r1.x___, r4.y, r1.x\n"
+"and r1.___w, r1.w, r1.x\n"
+"and r1.x___, r2.z, r1.x\n"
+"and r2.__z_, r3.z, r1.w\n"
+"and r1.___w, r2.x, r1.w\n"
+"dcl_literal l202, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.x___, r2.z, l202, r0.x\n"
+"dcl_literal l203, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0.x___, r1.w, l203, r0.x\n"
+"and r1.___w, r3.z, r1.x\n"
+"and r1.x___, r2.x, r1.x\n"
+"dcl_literal l204, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r1.w, l204, r0.x\n"
+"dcl_literal l205, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.x, l205, r0.x\n"
+"and r1.x___, r1.y, r3.x\n"
+"inot r1._y__, r3.x\n"
+"and r1.x___, r4.y, r1.x\n"
+"and r1.___w, r4.y, r3.y\n"
+"and r2.__z_, r3.z, r1.x\n"
+"and r1.x___, r2.x, r1.x\n"
+"dcl_literal l206, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r2.z, l206, r0.x\n"
+"dcl_literal l207, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.x, l207, r0.x\n"
+"and r0._y__, r1.z, r0.y\n"
+"dcl_literal l208, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r0.y, l208, r0.x\n"
+"and r0._y__, r1.z, r2.y\n"
+"and r1.x___, r3.z, r0.y\n"
+"and r0._y__, r2.x, r0.y\n"
+"dcl_literal l209, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.x, l209, r0.x\n"
+"inot r1.x___, r3.y\n"
+"and r1.x___, r1.y, r1.x\n"
+"and r1.x___, r3.w, r1.x\n"
+"and r1.x___, r1.z, r1.x\n"
+"and r1._y__, r3.z, r1.x\n"
+"and r1.x___, r2.x, r1.x\n"
+"ior r0._y__, r0.y, r1.y\n"
+"dcl_literal l210, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.y, l210, r0.x\n"
+"and r0._y__, r0.z, r0.w\n"
+"ior r0._y__, r1.w, r0.y\n"
+"dcl_literal l211, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.x, l211, r0.x\n"
+"dcl_literal l212, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l212, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__powr_f32",
+"mdef(342)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x80000000, 0x7FFFFFFF, 0x80000000\n"
+"and r1, r0.xxyy, l0\n"
+"itof r0.x___, r1.x\n"
+"dcl_literal l1, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r0.x_z_, r0.x, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.x___, r0.x, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0.__z_, r0.z, l3\n"
+"dcl_literal l4, 0x00000024, 0x00000024, 0x00000024, 0x00000024\n"
+"iadd r0.x___, r0.x, l4\n"
+"dcl_literal l5, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.x___, l5, r0.x_neg(xyzw)\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r0.___w, l6, r0.x\n"
+"dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.x___, r0.w, l7, r0.x\n"
+"inegate r0.___w, r0.x\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.___w, r0.w, l8\n"
+"iadd r0.___w, r0.z, r0.w\n"
+"ishr r0.__z_, r0.z, r0.x\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, l9, r0.x\n"
+"cmov_logical r0.x___, r0.x, r0.z, r0.w\n"
+"dcl_literal l10, 0x00800000, 0x2E800000, 0x3F800000, 0x00000000\n"
+"ilt r2.xyz_, r1.xzxx, l10\n"
+"cmov_logical r0.x___, r2.x, r0.x, r1.x\n"
+"dcl_literal l11, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r0.x_z_, r0.x, l11\n"
+"dcl_literal l12, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"ior r0.__z_, r0.z, l12\n"
+"dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.x___, r0.x, l13\n"
+"dcl_literal l14, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r0.z, l14, l15\n"
+"round_z r2.___w, r0.w\n"
+"ftoi r0.___w, r0.w\n"
+"dcl_literal l16, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r0.__z_, r2.w_neg(xyzw), l16, r0.z\n"
+"add r3.x___, r0.z, cb0[1].x\n"
+"add r3._y__, r0.z, r3.x_neg(xyzw)\n"
+"dcl_literal l17, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mul_ieee r3.__z_, r2.w, l17\n"
+"dcl_literal l18, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.___w, r3.z, l18\n"
+"dcl_literal l19, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r2.___w, r2.w, l19, r3.w_neg(xyzw)\n"
+"div_zeroop(infinity) r0.__z_, r0.z, r3.z\n"
+"dcl_literal l20, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r4.x___, r0.z, l20\n"
+"mad_ieee r3.x___, r4.x, r3.w, r3.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.x, r2.w, r3.x\n"
+"add r4.x___, r0.z, r4.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.x, r3.w, r3.x\n"
+"mad_ieee r2.___w, r4.x, r2.w, r3.x\n"
+"add r2.___w, r3.y, r2.w_neg(xyzw)\n"
+"div_zeroop(infinity) r2.___w, r2.w, r3.z\n"
+"add r3.x___, r0.z, r2.w\n"
+"dcl_literal l21, 0x3E000000, 0x3E000000, 0x3E000000, 0x3E000000\n"
+"dcl_literal l22, 0x3E124925, 0x3E124925, 0x3E124925, 0x3E124925\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), l21, l22\n"
+"dcl_literal l23, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l23\n"
+"dcl_literal l24, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l24\n"
+"dcl_literal l25, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l25\n"
+"dcl_literal l26, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l26\n"
+"dcl_literal l27, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r0.z_neg(xyzw), r3.y, l27\n"
+"mul_ieee r3.__z_, r0.z_neg(xyzw), r0.z\n"
+"add r0.__z_, r0.z, r3.x_neg(xyzw)\n"
+"mad_ieee r3.___w, r3.y, r3.z, r3.x\n"
+"add r3.x___, r3.x, r3.w_neg(xyzw)\n"
+"mad_ieee r3.x___, r3.y, r3.z, r3.x\n"
+"add r0.__z_, r2.w, r0.z\n"
+"add r0.__z_, r3.x, r0.z\n"
+"add r2.___w, r3.w, r0.z\n"
+"add r3.x___, r3.w, r2.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r3.x\n"
+"dcl_literal l28, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r2.w, l28\n"
+"add r3._y__, r2.w, r3.x_neg(xyzw)\n"
+"dcl_literal l29, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r2.___w, r2.w, l29\n"
+"dcl_literal l30, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r3.__z_, r3.x, l30, r2.w_neg(xyzw)\n"
+"dcl_literal l31, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r3.__z_, r3.y, l31, r3.z\n"
+"dcl_literal l32, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.x___, r3.x, l32, r3.z\n"
+"dcl_literal l33, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.x___, r3.y, l33, r3.x\n"
+"dcl_literal l34, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0.__z_, r0.z, l34, r3.x\n"
+"add r3.x___, r2.w, r0.z\n"
+"add r2.___w, r2.w, r3.x_neg(xyzw)\n"
+"add r0.__z_, r0.z, r2.w\n"
+"dcl_literal l35, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0\n"
+"iadd r0.___w, r0.w, l35\n"
+"dcl_literal l36, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r2.___w, r0.w, l36\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l38, 0x00000000, 0x2FB85A45, 0x312C77EC, 0x31FD14FD\n"
+"cmov_logical r4, r2.w, l37, l38\n"
+"dcl_literal l39, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r5, r2.w, l39\n"
+"dcl_literal l40, 0x31D64899, 0x2F2DABBA, 0x31CDA79E, 0x309E2B87\n"
+"cmov_logical r4, r5.x, l40, r4\n"
+"dcl_literal l41, 0x324FDEB4, 0x3201781E, 0x31DB4EC9, 0x31C32597\n"
+"cmov_logical r4, r5.y, l41, r4\n"
+"dcl_literal l42, 0x32481340, 0x32D6985C, 0x3230E074, 0x326A4CDF\n"
+"cmov_logical r4, r5.z, l42, r4\n"
+"dcl_literal l43, 0x323CD1B9, 0x32CFDEB4, 0x3259D0CE, 0x31458715\n"
+"cmov_logical r4, r5.w, l43, r4\n"
+"dcl_literal l44, 0x00000005, 0x00000006, 0x00000007, 0x00000008\n"
+"ieq r6, r2.w, l44\n"
+"dcl_literal l45, 0x3211F171, 0x3227F605, 0x32B9C415, 0x325295B5\n"
+"cmov_logical r4, r6.x, l45, r4\n"
+"dcl_literal l46, 0x2F4BA83C, 0x30F3AA69, 0x320CB06D, 0x32B97998\n"
+"cmov_logical r4, r6.y, l46, r4\n"
+"dcl_literal l47, 0x30D63AA6, 0x33550F2A, 0x3374AE80, 0x3321393E\n"
+"cmov_logical r4, r6.z, l47, r4\n"
+"dcl_literal l48, 0x3267EF5A, 0x32AB49CA, 0x3355010B, 0x3374C355\n"
+"cmov_logical r4, r6.w, l48, r4\n"
+"dcl_literal l49, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C\n"
+"ieq r7, r2.w, l49\n"
+"dcl_literal l50, 0x32BCD1B9, 0x3261151F, 0x325313A6, 0x3320F04D\n"
+"cmov_logical r4, r7.x, l50, r4\n"
+"dcl_literal l51, 0x32E4788D, 0x32A76195, 0x3332703B, 0x3349A817\n"
+"cmov_logical r4, r7.y, l51, r4\n"
+"dcl_literal l52, 0x332DF384, 0x32B06EF9, 0x33300016, 0x331565B0\n"
+"cmov_logical r4, r7.z, l52, r4\n"
+"dcl_literal l53, 0x336A8086, 0x331C70C1, 0x332C00A7, 0x3257990D\n"
+"cmov_logical r4, r7.w, l53, r4\n"
+"dcl_literal l54, 0x00000000, 0x0000000D, 0x0000000E, 0x0000000F\n"
+"ieq r3._yzw, r2.w, l54\n"
+"dcl_literal l55, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l56, 0x00000000, 0x3CB73CB4, 0x3D35D69B, 0x3D8759C4\n"
+"cmov_logical r8, r2.w, l55, l56\n"
+"dcl_literal l57, 0x32F5532E, 0x336633F3, 0x31B86815, 0x33654999\n"
+"cmov_logical r4, r3.y, l57, r4\n"
+"dcl_literal l58, 0x32D26089, 0x2FCBA83C, 0x32B3FF57, 0x32E0E014\n"
+"cmov_logical r4, r3.z, l58, r4\n"
+"dcl_literal l59, 0x331B1354, 0x330D9D4B, 0x32BCF065, 0x330BBE12\n"
+"cmov_logical r4, r3.w, l59, r4\n"
+"dcl_literal l60, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"and r2.___w, r0.w, l60\n"
+"dcl_literal l61, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"ieq r0.___w, r0.w, l61\n"
+"dcl_literal l62, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r9.xyz_, r2.w, l62\n"
+"cmov_logical r2.___w, r9.x, r4.y, r4.x\n"
+"cmov_logical r2.___w, r9.y, r4.z, r2.w\n"
+"cmov_logical r2.___w, r9.z, r4.w, r2.w\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r0.w, l63, r2.w\n"
+"add r4.x___, r3.x, r2.w\n"
+"add r3.x___, r3.x, r4.x_neg(xyzw)\n"
+"add r2.___w, r2.w, r3.x\n"
+"add r0.__z_, r0.z, r2.w\n"
+"add r2.___w, r4.x, r0.z\n"
+"add r3.x___, r4.x, r2.w_neg(xyzw)\n"
+"dcl_literal l64, 0x3DB31FB7, 0x3DDE4212, 0x3E0462C4, 0x3E19574F\n"
+"cmov_logical r4, r5.x, l64, r8\n"
+"dcl_literal l65, 0x3E2E00D1, 0x3E42615E, 0x3E567AF1, 0x3E6A4F72\n"
+"cmov_logical r4, r5.y, l65, r4\n"
+"dcl_literal l66, 0x3E7DE0B5, 0x3E88983E, 0x3E92203D, 0x3E9B8926\n"
+"cmov_logical r4, r5.z, l66, r4\n"
+"dcl_literal l67, 0x3EA4D3C2, 0x3EAE00D1, 0x3EB7110E, 0x3EC0052B\n"
+"cmov_logical r4, r5.w, l67, r4\n"
+"dcl_literal l68, 0x3EC8DDD4, 0x3ED19BB0, 0x3EDA3F5F, 0x3EE2C97D\n"
+"cmov_logical r4, r6.x, l68, r4\n"
+"dcl_literal l69, 0x3EEB3A9F, 0x3EF39355, 0x3EFBD42B, 0x3F01FED4\n"
+"cmov_logical r4, r6.y, l69, r4\n"
+"dcl_literal l70, 0x3F060828, 0x3F0A064F, 0x3F0DF988, 0x3F11E20E\n"
+"cmov_logical r4, r6.z, l70, r4\n"
+"dcl_literal l71, 0x3F15C01A, 0x3F1993E3, 0x3F1D5D9F, 0x3F211D83\n"
+"cmov_logical r4, r6.w, l71, r4\n"
+"dcl_literal l72, 0x3F24D3C2, 0x3F28808C, 0x3F2C2411, 0x3F2FBE7F\n"
+"cmov_logical r4, r7.x, l72, r4\n"
+"dcl_literal l73, 0x3F335004, 0x3F36D8CB, 0x3F3A58FE, 0x3F3DD0C7\n"
+"cmov_logical r4, r7.y, l73, r4\n"
+"dcl_literal l74, 0x3F41404E, 0x3F44A7BA, 0x3F480730, 0x3F4B5ED6\n"
+"cmov_logical r4, r7.z, l74, r4\n"
+"dcl_literal l75, 0x3F4EAECF, 0x3F51F73F, 0x3F553847, 0x3F587209\n"
+"cmov_logical r4, r7.w, l75, r4\n"
+"dcl_literal l76, 0x3F5BA4A4, 0x3F5ED038, 0x3F61F4E5, 0x3F6512C6\n"
+"cmov_logical r4, r3.y, l76, r4\n"
+"dcl_literal l77, 0x3F6829FB, 0x3F6B3A9F, 0x3F6E44CD, 0x3F7148A1\n"
+"cmov_logical r4, r3.z, l77, r4\n"
+"dcl_literal l78, 0x3F744635, 0x3F773DA3, 0x3F7A2F04, 0x3F7D1A70\n"
+"cmov_logical r4, r3.w, l78, r4\n"
+"cmov_logical r3._y__, r9.x, r4.y, r4.x\n"
+"cmov_logical r3._y__, r9.y, r4.z, r3.y\n"
+"cmov_logical r3._y__, r9.z, r4.w, r3.y\n"
+"dcl_literal l79, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.___w, r0.w, l79, r3.y\n"
+"add r3._y__, r2.w, r0.w\n"
+"add r0.___w, r0.w, r3.y_neg(xyzw)\n"
+"add r0.___w, r2.w, r0.w\n"
+"add r0.__z_, r0.z, r3.x\n"
+"add r0.__z_, r0.w, r0.z\n"
+"add r0.___w, r3.y, r0.z\n"
+"add r2.___w, r3.y, r0.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r2.w\n"
+"dcl_literal l80, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.x___, r0.x, l80\n"
+"dcl_literal l81, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC\n"
+"and r2.x___, r2.x, l81\n"
+"iadd r0.x___, r0.x, r2.x\n"
+"itof r0.x___, r0.x\n"
+"add r2.x___, r0.w, r0.x\n"
+"add r0.x___, r0.x, r2.x_neg(xyzw)\n"
+"add r0.x___, r0.w, r0.x\n"
+"add r0.x___, r0.z, r0.x\n"
+"add r0.__z_, r2.x, r0.x\n"
+"add r0.___w, r2.x, r0.z_neg(xyzw)\n"
+"dcl_literal l82, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2.x___, r0.z, l82\n"
+"mul_ieee r2.___w, r0.z, r0.y\n"
+"dcl_literal l83, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r0.y, l83\n"
+"mad_ieee r3._y__, r2.x, r3.x, r2.w_neg(xyzw)\n"
+"add r3.__z_, r0.y, r3.x_neg(xyzw)\n"
+"mad_ieee r3._y__, r2.x, r3.z, r3.y\n"
+"add r2.x___, r0.z, r2.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r2.x, r3.x, r3.y\n"
+"mad_ieee r2.x___, r2.x, r3.z, r3.x\n"
+"add r0.x___, r0.x, r0.w\n"
+"mad_ieee r0.___w, r0.x, r0.y, r2.x\n"
+"add r0.x___, r0.z, r0.x\n"
+"add r0.__z_, r2.w, r0.w\n"
+"add r2.x___, r2.w, r0.z_neg(xyzw)\n"
+"add r0.___w, r0.w, r2.x\n"
+"add r2.x___, r0.z, r0.w\n"
+"round_z r2.___w, r2.x\n"
+"ftoi r2.x___, r2.x\n"
+"add r3.x___, r0.z, r2.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r3.x_neg(xyzw)\n"
+"add r0.__z_, r2.w_neg(xyzw), r0.z\n"
+"add r0.__z_, r0.w, r0.z\n"
+"add r0.___w, r3.x, r0.z\n"
+"add r3.x___, r3.x, r0.w_neg(xyzw)\n"
+"add r0.__z_, r0.z, r3.x\n"
+"add r0.__z_, r0.w, r0.z\n"
+"dcl_literal l84, 0x42000000, 0x42000000, 0x00000000, 0x00000000\n"
+"dcl_literal l85, 0x3F000000, 0xBF000000, 0x00000000, 0x00000000\n"
+"mad_ieee r3.xy__, r0.z, l84, l85\n"
+"ftoi r3.xy__, r3.xyxx\n"
+"dcl_literal l86, 0x00000000, 0x00000000, 0x3F317218, 0x42000000\n"
+"mul_ieee r3.__zw, r0.z, l86\n"
+"dcl_literal l87, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, l87, r3.w\n"
+"cmov_logical r0.___w, r0.w, r3.x, r3.y\n"
+"itof r3.x___, r0.w\n"
+"dcl_literal l88, 0x3D000000, 0x3D000000, 0x3D000000, 0x3D000000\n"
+"mad_ieee r3.x___, r3.x_neg(xyzw), l88, r0.z\n"
+"dcl_literal l89, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820\n"
+"dcl_literal l90, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r3.x, l89, l90\n"
+"dcl_literal l91, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r3.x___, r3.x, l91\n"
+"mul_ieee r3.___w, r3.x, r3.x\n"
+"mad_ieee r3.x___, r3.w, r3.y, r3.x\n"
+"dcl_literal l92, 0x00000000, 0x0000001F, 0x00000000, 0x00000003\n"
+"and r3._y_w, r0.w, l92\n"
+"iadd r0.___w, r0.w, r3.y_neg(xyzw)\n"
+"dcl_literal l93, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r4.xyz_, r3.w, l93\n"
+"dcl_literal l94, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r3._y__, r3.y, l94\n"
+"dcl_literal l95, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l96, 0x3F800000, 0x3F82C000, 0x3F858000, 0x3F888000\n"
+"cmov_logical r5, r3.y, l95, l96\n"
+"dcl_literal l97, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r6, r3.y, l97\n"
+"dcl_literal l98, 0x3F8B8000, 0x3F8E8000, 0x3F91C000, 0x3F94C000\n"
+"cmov_logical r5, r6.x, l98, r5\n"
+"dcl_literal l99, 0x3F980000, 0x3F9B8000, 0x3F9EC000, 0x3FA24000\n"
+"cmov_logical r5, r6.y, l99, r5\n"
+"dcl_literal l100, 0x3FA5C000, 0x3FA98000, 0x3FAD4000, 0x3FB10000\n"
+"cmov_logical r5, r6.z, l100, r5\n"
+"dcl_literal l101, 0x3FB50000, 0x3FB8C000, 0x3FBD0000, 0x3FC10000\n"
+"cmov_logical r5, r6.w, l101, r5\n"
+"dcl_literal l102, 0x00000005, 0x00000006, 0x00000007, 0x00000000\n"
+"ieq r7.xyz_, r3.y, l102\n"
+"dcl_literal l103, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l104, 0x00000000, 0x39D86988, 0x3AAB0D9F, 0x3A407404\n"
+"cmov_logical r8, r3.y, l103, l104\n"
+"dcl_literal l105, 0x3FC54000, 0x3FC98000, 0x3FCE0000, 0x3FD28000\n"
+"cmov_logical r5, r7.x, l105, r5\n"
+"dcl_literal l106, 0x3FD74000, 0x3FDBC000, 0x3FE0C000, 0x3FE58000\n"
+"cmov_logical r5, r7.y, l106, r5\n"
+"dcl_literal l107, 0x3FEAC000, 0x3FEFC000, 0x3FF50000, 0x3FFA8000\n"
+"cmov_logical r5, r7.z, l107, r5\n"
+"cmov_logical r3._y__, r4.x, r5.y, r5.x\n"
+"cmov_logical r3._y__, r4.y, r5.z, r3.y\n"
+"cmov_logical r3._y__, r4.z, r5.w, r3.y\n"
+"dcl_literal l108, 0x3A2E0F1E, 0x3A90E62D, 0x38F4DCE0, 0x3AD3BEA3\n"
+"cmov_logical r5, r6.x, l108, r8\n"
+"dcl_literal l109, 0x3ADFC146, 0x39D39B9C, 0x3AD4C982, 0x3AC10C0C\n"
+"cmov_logical r5, r6.y, l109, r5\n"
+"dcl_literal l110, 0x3AFB5AA6, 0x3A856AD3, 0x3A41F752, 0x3A8FD607\n"
+"cmov_logical r5, r6.z, l110, r5\n"
+"dcl_literal l111, 0x391E6678, 0x3AEEBD1D, 0x398A39F4, 0x3AB13329\n"
+"cmov_logical r5, r6.w, l111, r5\n"
+"dcl_literal l112, 0x3A9CA845, 0x3AE6F619, 0x3A923054, 0x3AA07647\n"
+"cmov_logical r5, r7.x, l112, r5\n"
+"dcl_literal l113, 0x391F9958, 0x3AEEDE5F, 0x39CDEEC0, 0x3AE41B9D\n"
+"cmov_logical r5, r7.y, l113, r5\n"
+"dcl_literal l114, 0x37C6E7C0, 0x3A92E66F, 0x3A95F454, 0x38ECB6D0\n"
+"cmov_logical r5, r7.z, l114, r5\n"
+"cmov_logical r3.___w, r4.x, r5.y, r5.x\n"
+"cmov_logical r3.___w, r4.y, r5.z, r3.w\n"
+"cmov_logical r3.___w, r4.z, r5.w, r3.w\n"
+"add r4.x___, r3.y, r3.w\n"
+"mad_ieee r3.x___, r4.x, r3.x, r3.w\n"
+"add r3.x___, r3.y, r3.x\n"
+"dcl_literal l115, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r3.x, l115\n"
+"dcl_literal l116, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ixor r3.x___, r0.w, l116\n"
+"imax r0.___w, r0.w, r0.w_neg(xyzw)\n"
+"dcl_literal l117, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r3.x___, r3.x, l117\n"
+"dcl_literal l118, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"udiv r0.___w, r0.w, l118\n"
+"inegate r3._y__, r0.w\n"
+"cmov_logical r0.___w, r3.x, r3.y, r0.w\n"
+"iadd r0.___w, r0.w, r2.x\n"
+"dcl_literal l119, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.x___, r0.w, l119\n"
+"cmov_logical r2.x___, r4.z, r0.w, r2.x\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l120, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3.x___, r2.x, l120\n"
+"itof r3._y__, r4.w\n"
+"cmov_logical r3._y__, r4.z, r4.x, r3.y\n"
+"iadd r3.x___, r3.y, r3.x\n"
+"dcl_literal l121, 0x00000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r3._y_w, r3.y, l121\n"
+"dcl_literal l122, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3._y__, r3.y, l122\n"
+"dcl_literal l123, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.___w, r3.w, l123\n"
+"iadd r2.x___, r3.y, r2.x\n"
+"dcl_literal l124, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.x___, r2.x, l124\n"
+"dcl_literal l125, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3._y__, l125, r2.x\n"
+"dcl_literal l126, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.x___, l126, r2.x_neg(xyzw)\n"
+"dcl_literal l127, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r3.x___, r3.y, l127, r3.x\n"
+"dcl_literal l128, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r3._y__, l128, r2.x\n"
+"dcl_literal l129, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2.x___, r3.y, l129, r2.x\n"
+"dcl_literal l130, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r2.x___, r2.x, l130\n"
+"ishr r3._y__, r3.w, r2.x\n"
+"dcl_literal l131, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.x___, l131, r2.x\n"
+"cmov_logical r2.x___, r2.x, r3.y, r3.x\n"
+"dcl_literal l132, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r0.___w, l132, r0.w\n"
+"dcl_literal l133, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r0.w, l133, r2.x\n"
+"ior r0.___w, r4.y, r0.w\n"
+"dcl_literal l134, 0x7FFFFFFF, 0x0FFFF000, 0x00000000, 0x00000000\n"
+"and r3.xy__, r0.z, l134\n"
+"dcl_literal l135, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r2.x___, r3.y, l135, r3.z_neg(xyzw)\n"
+"dcl_literal l136, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r2.x___, r3.y, l136, r2.x\n"
+"add r3._y__, r0.z, r3.y_neg(xyzw)\n"
+"dcl_literal l137, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B\n"
+"lt r3.x___, r3.x, l137\n"
+"dcl_literal l138, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r2.x___, r3.y, l138, r2.x\n"
+"dcl_literal l139, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r2.x___, r3.y, l139, r2.x\n"
+"dcl_literal l140, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r0.__z_, r0.z, l140, r2.x\n"
+"dcl_literal l141, 0x00000000, 0x34904540, 0x00000000, 0x3A1BECC5\n"
+"dcl_literal l142, 0x00000000, 0x3CC2F57D, 0x00000000, 0xBC42EEB9\n"
+"mad_ieee r3._y_w, r0.z, l141, l142\n"
+"dcl_literal l143, 0x00000000, 0x37874471, 0x00000000, 0x3DDB50CA\n"
+"mad_ieee r3._y_w, r3.yyyw, r0.z, l143\n"
+"dcl_literal l144, 0x00000000, 0x3F7FE378, 0x00000000, 0xBEFFE15B\n"
+"mad_ieee r3._y_w, r3.yyyw, r0.z, l144\n"
+"dcl_literal l145, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378\n"
+"mad_ieee r0.__z_, r3.w, r0.z, l145\n"
+"div_zeroop(infinity) r0.__z_, r3.y, r0.z\n"
+"mul_ieee r2.x___, r2.x, r0.z\n"
+"mad_ieee r0.__z_, r0.z, r3.z, r2.x\n"
+"dcl_literal l146, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.__z_, r0.z, l146\n"
+"dcl_literal l147, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r0.z, l147\n"
+"ftoi r0.__z_, r2.w\n"
+"dcl_literal l148, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r2.x___, l148, r2.w\n"
+"dcl_literal l149, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.___w, r0.z, l149\n"
+"cmov_logical r0.__z_, r4.z, r0.z, r2.w\n"
+"dcl_literal l150, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.___w, r0.z, l150\n"
+"itof r3._y__, r4.w\n"
+"cmov_logical r3._y__, r4.z, r4.x, r3.y\n"
+"iadd r2.___w, r3.y, r2.w\n"
+"dcl_literal l151, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r3._yz_, r3.y, l151\n"
+"dcl_literal l152, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3._y__, r3.y, l152\n"
+"dcl_literal l153, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.__z_, r3.z, l153\n"
+"iadd r0.__z_, r3.y, r0.z\n"
+"dcl_literal l154, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l154\n"
+"dcl_literal l155, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3._y__, l155, r0.z\n"
+"dcl_literal l156, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.__z_, l156, r0.z_neg(xyzw)\n"
+"dcl_literal l157, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.___w, r3.y, l157, r2.w\n"
+"dcl_literal l158, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r3._y__, l158, r0.z\n"
+"dcl_literal l159, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.__z_, r3.y, l159, r0.z\n"
+"dcl_literal l160, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.__z_, r0.z, l160\n"
+"ishr r3._y__, r3.z, r0.z\n"
+"dcl_literal l161, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l161, r0.z\n"
+"cmov_logical r0.__z_, r0.z, r3.y, r2.w\n"
+"dcl_literal l162, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r2.x, l162, r0.z\n"
+"ior r0.__z_, r4.y, r0.z\n"
+"cmov_logical r0.__z_, r3.x, r0.z, r0.w\n"
+"dcl_literal l163, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.y, l163\n"
+"cmov_logical r0.__z_, r2.y, r0.w, r0.z\n"
+"dcl_literal l164, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r0.___w, r0.y, l164\n"
+"mul_ieee r0.x___, r0.y, r0.x\n"
+"cmov_logical r0._y__, r0.w, r1.x, r0.z\n"
+"dcl_literal l165, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"lt r0.__z_, l165, r0.x\n"
+"dcl_literal l166, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.x, l166\n"
+"dcl_literal l167, 0xC3150000, 0xC3150000, 0xC3150000, 0xC3150000\n"
+"lt r0.x___, r0.x, l167\n"
+"dcl_literal l168, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r0.___w, l168, r0.w\n"
+"and r0.x_z_, r0.xxzx, r0.w\n"
+"dcl_literal l169, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.z, l169, r0.y\n"
+"dcl_literal l170, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l170, r0.y\n"
+"dcl_literal l171, 0x00000000, 0x80000000, 0x80000000, 0x00000000\n"
+"ine r0._yz_, r1.yywy, l171\n"
+"inot r1._y_w, r0.zzzy\n"
+"dcl_literal l172, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l173, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"cmov_logical r3, r1.xzzx, l172, l173\n"
+"and r2.xy__, r1.ywyy, r3.xwxx\n"
+"and r0.___w, r3.z, r2.x\n"
+"dcl_literal l174, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.w, l174, r0.x\n"
+"and r2.x__w, r0.zzzy, r3.xxxw\n"
+"and r0.___w, r3.z, r2.x\n"
+"dcl_literal l175, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.w, l175, r0.x\n"
+"dcl_literal l176, 0x7F800000, 0x7F800000, 0x3F800000, 0x00000000\n"
+"ilt r4.xyz_, l176, r1.xzxx\n"
+"inot r0.___w, r4.x\n"
+"dcl_literal l177, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ieq r1.__zw, r1.xxxz, l177\n"
+"dcl_literal l178, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r1.x___, r1.x, l178\n"
+"inot r5.xy__, r1.zwzz\n"
+"and r0.___w, r0.w, r5.x\n"
+"and r2.x___, r2.w, r0.w\n"
+"and r0.___w, r4.z, r0.w\n"
+"ior r2.___w, r4.y, r4.x\n"
+"and r2.x___, r3.y, r2.x\n"
+"dcl_literal l179, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l179, r0.x\n"
+"ior r2.x___, r3.x, r1.z\n"
+"and r2.x___, r3.y, r2.x\n"
+"dcl_literal l180, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r2.x, l180, r0.x\n"
+"and r1.x___, r0.y, r1.x\n"
+"and r2.x___, r5.y, r1.x\n"
+"and r1.x___, r1.w, r1.x\n"
+"and r2.x___, r3.z, r2.x\n"
+"dcl_literal l181, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l181, r0.x\n"
+"dcl_literal l182, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r1.x, l182, r0.x\n"
+"and r1.x_z_, r0.yyzy, r1.z\n"
+"and r0._y__, r1.y, r1.x\n"
+"and r1.x___, r3.z, r1.z\n"
+"and r0._y__, r3.z, r0.y\n"
+"and r1.__z_, r2.z, r3.w\n"
+"dcl_literal l183, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.y, l183, r0.x\n"
+"and r0._y__, r1.w, r1.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"and r1.__z_, r1.y, r0.y\n"
+"and r0._yz_, r0.z, r0.yywy\n"
+"and r0.___w, r1.y, r0.w\n"
+"dcl_literal l184, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.z, l184, r0.x\n"
+"ior r0._y__, r0.y, r0.w\n"
+"dcl_literal l185, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.y, l185, r0.x\n"
+"ior r0._y__, r1.x, r0.z\n"
+"dcl_literal l186, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.y, l186, r0.x\n"
+"ior r0._y__, r2.y, r2.w\n"
+"dcl_literal l187, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l187, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__pow_f32",
+"mdef(343)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x80000000, 0x7FFFFFFF, 0x80000000\n"
+"and r1, r0.xxyy, l0\n"
+"itof r0.__z_, r1.x\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r0.__zw, r0.z, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r0.___w, r0.w, l3\n"
+"dcl_literal l4, 0x00000024, 0x00000024, 0x00000024, 0x00000024\n"
+"iadd r0.__z_, r0.z, l4\n"
+"dcl_literal l5, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.__z_, l5, r0.z_neg(xyzw)\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r2.x___, l6, r0.z\n"
+"dcl_literal l7, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.__z_, r2.x, l7, r0.z\n"
+"inegate r2.x___, r0.z\n"
+"dcl_literal l8, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r2.x, l8\n"
+"iadd r2.x___, r0.w, r2.x\n"
+"ishr r0.___w, r0.w, r0.z\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, l9, r0.z\n"
+"cmov_logical r0.__z_, r0.z, r0.w, r2.x\n"
+"dcl_literal l10, 0x00800000, 0x2E800000, 0x3F800000, 0x00000000\n"
+"ilt r2.xyz_, r1.xzxx, l10\n"
+"cmov_logical r0.__z_, r2.x, r0.z, r1.x\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r0.__zw, r0.z, l11\n"
+"dcl_literal l12, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"ior r0.___w, r0.w, l12\n"
+"dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r0.z, l13\n"
+"dcl_literal l14, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"dcl_literal l15, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.___w, r0.w, l14, l15\n"
+"round_z r3.x___, r2.w\n"
+"ftoi r2.___w, r2.w\n"
+"dcl_literal l16, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mul_ieee r3._y__, r3.x, l16\n"
+"dcl_literal l17, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.__z_, r3.y, l17\n"
+"dcl_literal l18, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r3.___w, r3.x, l18, r3.z_neg(xyzw)\n"
+"dcl_literal l19, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+"mad_ieee r0.___w, r3.x_neg(xyzw), l19, r0.w\n"
+"add r3.x___, r0.w, cb0[1].x\n"
+"div_zeroop(infinity) r4.x___, r0.w, r3.y\n"
+"add r0.___w, r0.w, r3.x_neg(xyzw)\n"
+"dcl_literal l20, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r4._y__, r4.x, l20\n"
+"mad_ieee r3.x___, r4.y, r3.z, r3.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.y, r3.w, r3.x\n"
+"add r4._y__, r4.x, r4.y_neg(xyzw)\n"
+"mad_ieee r3.x___, r4.y, r3.z, r3.x\n"
+"mad_ieee r3.x___, r4.y, r3.w, r3.x\n"
+"add r0.___w, r0.w, r3.x_neg(xyzw)\n"
+"div_zeroop(infinity) r0.___w, r0.w, r3.y\n"
+"add r3.x___, r4.x, r0.w\n"
+"dcl_literal l21, 0x3E000000, 0x3E000000, 0x3E000000, 0x3E000000\n"
+"dcl_literal l22, 0x3E124925, 0x3E124925, 0x3E124925, 0x3E124925\n"
+"mad_ieee r3._y__, r4.x_neg(xyzw), l21, l22\n"
+"dcl_literal l23, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3._y__, r4.x_neg(xyzw), r3.y, l23\n"
+"dcl_literal l24, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD, 0x3E4CCCCD\n"
+"mad_ieee r3._y__, r4.x_neg(xyzw), r3.y, l24\n"
+"dcl_literal l25, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mad_ieee r3._y__, r4.x_neg(xyzw), r3.y, l25\n"
+"dcl_literal l26, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+"mad_ieee r3._y__, r4.x_neg(xyzw), r3.y, l26\n"
+"dcl_literal l27, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r4.x_neg(xyzw), r3.y, l27\n"
+"mul_ieee r3.__z_, r4.x_neg(xyzw), r4.x\n"
+"add r3.___w, r4.x, r3.x_neg(xyzw)\n"
+"mad_ieee r4.x___, r3.y, r3.z, r3.x\n"
+"add r3.x___, r3.x, r4.x_neg(xyzw)\n"
+"mad_ieee r3.x___, r3.y, r3.z, r3.x\n"
+"add r0.___w, r0.w, r3.w\n"
+"add r0.___w, r3.x, r0.w\n"
+"add r3.x___, r4.x, r0.w\n"
+"add r3._y__, r4.x, r3.x_neg(xyzw)\n"
+"dcl_literal l28, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.__z_, r3.x, l28\n"
+"add r3.___w, r3.x, r3.z_neg(xyzw)\n"
+"dcl_literal l29, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mul_ieee r3.x___, r3.x, l29\n"
+"dcl_literal l30, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r4.x___, r3.z, l30, r3.x_neg(xyzw)\n"
+"dcl_literal l31, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000, 0x3FB8A000\n"
+"mad_ieee r4.x___, r3.w, l31, r4.x\n"
+"dcl_literal l32, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.__z_, r3.z, l32, r4.x\n"
+"dcl_literal l33, 0x39A3B296, 0x39A3B296, 0x39A3B296, 0x39A3B296\n"
+"mad_ieee r3.__z_, r3.w, l33, r3.z\n"
+"add r0.___w, r0.w, r3.y\n"
+"dcl_literal l34, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r0.___w, r0.w, l34, r3.z\n"
+"add r3._y__, r3.x, r0.w\n"
+"add r3.x___, r3.x, r3.y_neg(xyzw)\n"
+"add r0.___w, r0.w, r3.x\n"
+"dcl_literal l35, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0\n"
+"iadd r2.___w, r2.w, l35\n"
+"dcl_literal l36, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r3.x___, r2.w, l36\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l38, 0x00000000, 0x2FB85A45, 0x312C77EC, 0x31FD14FD\n"
+"cmov_logical r4, r3.x, l37, l38\n"
+"dcl_literal l39, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r5, r3.x, l39\n"
+"dcl_literal l40, 0x31D64899, 0x2F2DABBA, 0x31CDA79E, 0x309E2B87\n"
+"cmov_logical r4, r5.x, l40, r4\n"
+"dcl_literal l41, 0x324FDEB4, 0x3201781E, 0x31DB4EC9, 0x31C32597\n"
+"cmov_logical r4, r5.y, l41, r4\n"
+"dcl_literal l42, 0x32481340, 0x32D6985C, 0x3230E074, 0x326A4CDF\n"
+"cmov_logical r4, r5.z, l42, r4\n"
+"dcl_literal l43, 0x323CD1B9, 0x32CFDEB4, 0x3259D0CE, 0x31458715\n"
+"cmov_logical r4, r5.w, l43, r4\n"
+"dcl_literal l44, 0x00000005, 0x00000006, 0x00000007, 0x00000008\n"
+"ieq r6, r3.x, l44\n"
+"dcl_literal l45, 0x3211F171, 0x3227F605, 0x32B9C415, 0x325295B5\n"
+"cmov_logical r4, r6.x, l45, r4\n"
+"dcl_literal l46, 0x2F4BA83C, 0x30F3AA69, 0x320CB06D, 0x32B97998\n"
+"cmov_logical r4, r6.y, l46, r4\n"
+"dcl_literal l47, 0x30D63AA6, 0x33550F2A, 0x3374AE80, 0x3321393E\n"
+"cmov_logical r4, r6.z, l47, r4\n"
+"dcl_literal l48, 0x3267EF5A, 0x32AB49CA, 0x3355010B, 0x3374C355\n"
+"cmov_logical r4, r6.w, l48, r4\n"
+"dcl_literal l49, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C\n"
+"ieq r7, r3.x, l49\n"
+"dcl_literal l50, 0x32BCD1B9, 0x3261151F, 0x325313A6, 0x3320F04D\n"
+"cmov_logical r4, r7.x, l50, r4\n"
+"dcl_literal l51, 0x32E4788D, 0x32A76195, 0x3332703B, 0x3349A817\n"
+"cmov_logical r4, r7.y, l51, r4\n"
+"dcl_literal l52, 0x332DF384, 0x32B06EF9, 0x33300016, 0x331565B0\n"
+"cmov_logical r4, r7.z, l52, r4\n"
+"dcl_literal l53, 0x336A8086, 0x331C70C1, 0x332C00A7, 0x3257990D\n"
+"cmov_logical r4, r7.w, l53, r4\n"
+"dcl_literal l54, 0x0000000D, 0x0000000E, 0x0000000F, 0x00000000\n"
+"ieq r8.xyz_, r3.x, l54\n"
+"dcl_literal l55, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l56, 0x00000000, 0x3CB73CB4, 0x3D35D69B, 0x3D8759C4\n"
+"cmov_logical r9, r3.x, l55, l56\n"
+"dcl_literal l57, 0x32F5532E, 0x336633F3, 0x31B86815, 0x33654999\n"
+"cmov_logical r4, r8.x, l57, r4\n"
+"dcl_literal l58, 0x32D26089, 0x2FCBA83C, 0x32B3FF57, 0x32E0E014\n"
+"cmov_logical r4, r8.y, l58, r4\n"
+"dcl_literal l59, 0x331B1354, 0x330D9D4B, 0x32BCF065, 0x330BBE12\n"
+"cmov_logical r4, r8.z, l59, r4\n"
+"dcl_literal l60, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"and r3.x___, r2.w, l60\n"
+"dcl_literal l61, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"ieq r2.___w, r2.w, l61\n"
+"dcl_literal l62, 0x00000001, 0x00000000, 0x00000002, 0x00000003\n"
+"ieq r3.x_zw, r3.x, l62\n"
+"cmov_logical r4.x___, r3.x, r4.y, r4.x\n"
+"cmov_logical r4.x___, r3.z, r4.z, r4.x\n"
+"cmov_logical r4.x___, r3.w, r4.w, r4.x\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.x___, r2.w, l63, r4.x\n"
+"add r4._y__, r3.y, r4.x\n"
+"add r3._y__, r3.y, r4.y_neg(xyzw)\n"
+"add r3._y__, r4.x, r3.y\n"
+"add r0.___w, r0.w, r3.y\n"
+"add r3._y__, r4.y, r0.w\n"
+"add r4.x___, r4.y, r3.y_neg(xyzw)\n"
+"dcl_literal l64, 0x3DB31FB7, 0x3DDE4212, 0x3E0462C4, 0x3E19574F\n"
+"cmov_logical r9, r5.x, l64, r9\n"
+"dcl_literal l65, 0x3E2E00D1, 0x3E42615E, 0x3E567AF1, 0x3E6A4F72\n"
+"cmov_logical r9, r5.y, l65, r9\n"
+"dcl_literal l66, 0x3E7DE0B5, 0x3E88983E, 0x3E92203D, 0x3E9B8926\n"
+"cmov_logical r9, r5.z, l66, r9\n"
+"dcl_literal l67, 0x3EA4D3C2, 0x3EAE00D1, 0x3EB7110E, 0x3EC0052B\n"
+"cmov_logical r5, r5.w, l67, r9\n"
+"dcl_literal l68, 0x3EC8DDD4, 0x3ED19BB0, 0x3EDA3F5F, 0x3EE2C97D\n"
+"cmov_logical r5, r6.x, l68, r5\n"
+"dcl_literal l69, 0x3EEB3A9F, 0x3EF39355, 0x3EFBD42B, 0x3F01FED4\n"
+"cmov_logical r5, r6.y, l69, r5\n"
+"dcl_literal l70, 0x3F060828, 0x3F0A064F, 0x3F0DF988, 0x3F11E20E\n"
+"cmov_logical r5, r6.z, l70, r5\n"
+"dcl_literal l71, 0x3F15C01A, 0x3F1993E3, 0x3F1D5D9F, 0x3F211D83\n"
+"cmov_logical r5, r6.w, l71, r5\n"
+"dcl_literal l72, 0x3F24D3C2, 0x3F28808C, 0x3F2C2411, 0x3F2FBE7F\n"
+"cmov_logical r5, r7.x, l72, r5\n"
+"dcl_literal l73, 0x3F335004, 0x3F36D8CB, 0x3F3A58FE, 0x3F3DD0C7\n"
+"cmov_logical r5, r7.y, l73, r5\n"
+"dcl_literal l74, 0x3F41404E, 0x3F44A7BA, 0x3F480730, 0x3F4B5ED6\n"
+"cmov_logical r5, r7.z, l74, r5\n"
+"dcl_literal l75, 0x3F4EAECF, 0x3F51F73F, 0x3F553847, 0x3F587209\n"
+"cmov_logical r5, r7.w, l75, r5\n"
+"dcl_literal l76, 0x3F5BA4A4, 0x3F5ED038, 0x3F61F4E5, 0x3F6512C6\n"
+"cmov_logical r5, r8.x, l76, r5\n"
+"dcl_literal l77, 0x3F6829FB, 0x3F6B3A9F, 0x3F6E44CD, 0x3F7148A1\n"
+"cmov_logical r5, r8.y, l77, r5\n"
+"dcl_literal l78, 0x3F744635, 0x3F773DA3, 0x3F7A2F04, 0x3F7D1A70\n"
+"cmov_logical r5, r8.z, l78, r5\n"
+"cmov_logical r3.x___, r3.x, r5.y, r5.x\n"
+"cmov_logical r3.x___, r3.z, r5.z, r3.x\n"
+"cmov_logical r3.x___, r3.w, r5.w, r3.x\n"
+"dcl_literal l79, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r2.___w, r2.w, l79, r3.x\n"
+"add r3.x___, r3.y, r2.w\n"
+"add r2.___w, r2.w, r3.x_neg(xyzw)\n"
+"add r2.___w, r3.y, r2.w\n"
+"add r0.___w, r0.w, r4.x\n"
+"add r0.___w, r2.w, r0.w\n"
+"add r2.___w, r3.x, r0.w\n"
+"add r3.x___, r3.x, r2.w_neg(xyzw)\n"
+"dcl_literal l80, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.__z_, r0.z, l80\n"
+"dcl_literal l81, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC, 0xFFFFFFDC\n"
+"and r2.x___, r2.x, l81\n"
+"iadd r0.__z_, r0.z, r2.x\n"
+"itof r0.__z_, r0.z\n"
+"add r2.x___, r2.w, r0.z\n"
+"add r0.__z_, r0.z, r2.x_neg(xyzw)\n"
+"add r0.__z_, r2.w, r0.z\n"
+"add r0.___w, r0.w, r3.x\n"
+"add r0.__z_, r0.z, r0.w\n"
+"add r0.___w, r2.x, r0.z\n"
+"add r2.x___, r2.x, r0.w_neg(xyzw)\n"
+"dcl_literal l82, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r2.___w, r0.w, l82\n"
+"mul_ieee r3.x___, r0.w, r0.y\n"
+"dcl_literal l83, 0x00000000, 0xFFFFF000, 0x7F800000, 0x00000000\n"
+"and r3._yz_, r0.y, l83\n"
+"mad_ieee r3.___w, r2.w, r3.y, r3.x_neg(xyzw)\n"
+"add r4.x___, r0.y, r3.y_neg(xyzw)\n"
+"mad_ieee r3.___w, r2.w, r4.x, r3.w\n"
+"add r2.___w, r0.w, r2.w_neg(xyzw)\n"
+"mad_ieee r3._y__, r2.w, r3.y, r3.w\n"
+"mad_ieee r2.___w, r2.w, r4.x, r3.y\n"
+"dcl_literal l84, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3._y__, r3.z, l84\n"
+"add r0.__z_, r0.z, r2.x\n"
+"mad_ieee r2.x___, r0.z, r0.y, r2.w\n"
+"add r0.__z_, r0.w, r0.z\n"
+"add r0.___w, r3.x, r2.x\n"
+"add r2.___w, r3.x, r0.w_neg(xyzw)\n"
+"add r2.x___, r2.x, r2.w\n"
+"add r2.___w, r0.w, r2.x\n"
+"round_z r3.x___, r2.w\n"
+"ftoi r2.___w, r2.w\n"
+"add r3.__z_, r0.w, r3.x_neg(xyzw)\n"
+"add r0.___w, r0.w, r3.z_neg(xyzw)\n"
+"add r0.___w, r3.x_neg(xyzw), r0.w\n"
+"add r0.___w, r2.x, r0.w\n"
+"add r2.x___, r3.z, r0.w\n"
+"add r3.__z_, r3.z, r2.x_neg(xyzw)\n"
+"add r0.___w, r0.w, r3.z\n"
+"add r0.___w, r2.x, r0.w\n"
+"dcl_literal l85, 0x00000000, 0x00000000, 0x42000000, 0x42000000\n"
+"dcl_literal l86, 0x00000000, 0x00000000, 0x3F000000, 0xBF000000\n"
+"mad_ieee r3.__zw, r0.w, l85, l86\n"
+"ftoi r3.__zw, r3.zzzw\n"
+"dcl_literal l87, 0x3F317218, 0x42000000, 0x00000000, 0x00000000\n"
+"mul_ieee r4.xy__, r0.w, l87\n"
+"dcl_literal l88, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.x___, l88, r4.y\n"
+"cmov_logical r2.x___, r2.x, r3.z, r3.w\n"
+"itof r3.__z_, r2.x\n"
+"dcl_literal l89, 0x3D000000, 0x3D000000, 0x3D000000, 0x3D000000\n"
+"mad_ieee r3.__z_, r3.z_neg(xyzw), l89, r0.w\n"
+"dcl_literal l90, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820, 0x3DEC9820\n"
+"dcl_literal l91, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3.___w, r3.z, l90, l91\n"
+"dcl_literal l92, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mul_ieee r3.__z_, r3.z, l92\n"
+"mul_ieee r4._y__, r3.z, r3.z\n"
+"mad_ieee r3.__z_, r4.y, r3.w, r3.z\n"
+"dcl_literal l93, 0x00000000, 0x0000001F, 0x00000003, 0x00000000\n"
+"and r4._yz_, r2.x, l93\n"
+"iadd r2.x___, r2.x, r4.y_neg(xyzw)\n"
+"dcl_literal l94, 0x00000001, 0x00000002, 0x00000003, 0x00000000\n"
+"ieq r5.xyz_, r4.z, l94\n"
+"dcl_literal l95, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"ishr r3.___w, r4.y, l95\n"
+"dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l97, 0x3F800000, 0x3F82C000, 0x3F858000, 0x3F888000\n"
+"cmov_logical r6, r3.w, l96, l97\n"
+"dcl_literal l98, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+"ieq r7, r3.w, l98\n"
+"dcl_literal l99, 0x3F8B8000, 0x3F8E8000, 0x3F91C000, 0x3F94C000\n"
+"cmov_logical r6, r7.x, l99, r6\n"
+"dcl_literal l100, 0x3F980000, 0x3F9B8000, 0x3F9EC000, 0x3FA24000\n"
+"cmov_logical r6, r7.y, l100, r6\n"
+"dcl_literal l101, 0x3FA5C000, 0x3FA98000, 0x3FAD4000, 0x3FB10000\n"
+"cmov_logical r6, r7.z, l101, r6\n"
+"dcl_literal l102, 0x3FB50000, 0x3FB8C000, 0x3FBD0000, 0x3FC10000\n"
+"cmov_logical r6, r7.w, l102, r6\n"
+"dcl_literal l103, 0x00000000, 0x00000005, 0x00000006, 0x00000007\n"
+"ieq r4._yzw, r3.w, l103\n"
+"dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l105, 0x00000000, 0x39D86988, 0x3AAB0D9F, 0x3A407404\n"
+"cmov_logical r8, r3.w, l104, l105\n"
+"dcl_literal l106, 0x3FC54000, 0x3FC98000, 0x3FCE0000, 0x3FD28000\n"
+"cmov_logical r6, r4.y, l106, r6\n"
+"dcl_literal l107, 0x3FD74000, 0x3FDBC000, 0x3FE0C000, 0x3FE58000\n"
+"cmov_logical r6, r4.z, l107, r6\n"
+"dcl_literal l108, 0x3FEAC000, 0x3FEFC000, 0x3FF50000, 0x3FFA8000\n"
+"cmov_logical r6, r4.w, l108, r6\n"
+"cmov_logical r3.___w, r5.x, r6.y, r6.x\n"
+"cmov_logical r3.___w, r5.y, r6.z, r3.w\n"
+"cmov_logical r3.___w, r5.z, r6.w, r3.w\n"
+"dcl_literal l109, 0x3A2E0F1E, 0x3A90E62D, 0x38F4DCE0, 0x3AD3BEA3\n"
+"cmov_logical r6, r7.x, l109, r8\n"
+"dcl_literal l110, 0x3ADFC146, 0x39D39B9C, 0x3AD4C982, 0x3AC10C0C\n"
+"cmov_logical r6, r7.y, l110, r6\n"
+"dcl_literal l111, 0x3AFB5AA6, 0x3A856AD3, 0x3A41F752, 0x3A8FD607\n"
+"cmov_logical r6, r7.z, l111, r6\n"
+"dcl_literal l112, 0x391E6678, 0x3AEEBD1D, 0x398A39F4, 0x3AB13329\n"
+"cmov_logical r6, r7.w, l112, r6\n"
+"dcl_literal l113, 0x3A9CA845, 0x3AE6F619, 0x3A923054, 0x3AA07647\n"
+"cmov_logical r6, r4.y, l113, r6\n"
+"dcl_literal l114, 0x391F9958, 0x3AEEDE5F, 0x39CDEEC0, 0x3AE41B9D\n"
+"cmov_logical r6, r4.z, l114, r6\n"
+"dcl_literal l115, 0x37C6E7C0, 0x3A92E66F, 0x3A95F454, 0x38ECB6D0\n"
+"cmov_logical r6, r4.w, l115, r6\n"
+"cmov_logical r4._y__, r5.x, r6.y, r6.x\n"
+"cmov_logical r4._y__, r5.y, r6.z, r4.y\n"
+"cmov_logical r4._y__, r5.z, r6.w, r4.y\n"
+"add r4.__z_, r3.w, r4.y\n"
+"mad_ieee r3.__z_, r4.z, r3.z, r4.y\n"
+"add r3.__z_, r3.w, r3.z\n"
+"dcl_literal l116, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r5, r3.z, l116\n"
+"dcl_literal l117, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ixor r3.__z_, r2.x, l117\n"
+"imax r2.x___, r2.x, r2.x_neg(xyzw)\n"
+"dcl_literal l118, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r3.__z_, r3.z, l118\n"
+"dcl_literal l119, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"udiv r2.x___, r2.x, l119\n"
+"inegate r3.___w, r2.x\n"
+"cmov_logical r2.x___, r3.z, r3.w, r2.x\n"
+"iadd r2.x___, r2.x, r2.w\n"
+"dcl_literal l120, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r2.___w, r2.x, l120\n"
+"cmov_logical r2.___w, r5.z, r2.x, r2.w\n"
+"itof r2.x___, r2.x\n"
+"dcl_literal l121, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3.__z_, r2.w, l121\n"
+"itof r3.___w, r5.w\n"
+"cmov_logical r3.___w, r5.z, r5.x, r3.w\n"
+"iadd r3.__z_, r3.w, r3.z\n"
+"dcl_literal l122, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r4._yz_, r3.w, l122\n"
+"dcl_literal l123, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.___w, r4.y, l123\n"
+"dcl_literal l124, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r4._y__, r4.z, l124\n"
+"iadd r2.___w, r3.w, r2.w\n"
+"dcl_literal l125, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.___w, r2.w, l125\n"
+"dcl_literal l126, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3.___w, l126, r2.w\n"
+"dcl_literal l127, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.___w, l127, r2.w_neg(xyzw)\n"
+"dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r3.__z_, r3.w, l128, r3.z\n"
+"dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r3.___w, l129, r2.w\n"
+"dcl_literal l130, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2.___w, r3.w, l130, r2.w\n"
+"dcl_literal l131, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r2.___w, r2.w, l131\n"
+"ishr r3.___w, r4.y, r2.w\n"
+"dcl_literal l132, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.___w, l132, r2.w\n"
+"cmov_logical r2.___w, r2.w, r3.w, r3.z\n"
+"dcl_literal l133, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r2.x___, l133, r2.x\n"
+"dcl_literal l134, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.x___, r2.x, l134, r2.w\n"
+"ior r2.x___, r5.y, r2.x\n"
+"dcl_literal l135, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x0FFFF000\n"
+"and r3.__zw, r0.w, l135\n"
+"dcl_literal l136, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r2.___w, r3.w, l136, r4.x_neg(xyzw)\n"
+"dcl_literal l137, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r2.___w, r3.w, l137, r2.w\n"
+"add r3.___w, r0.w, r3.w_neg(xyzw)\n"
+"dcl_literal l138, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B, 0x3DB8AA3B\n"
+"lt r3.__z_, r3.z, l138\n"
+"dcl_literal l139, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r2.___w, r3.w, l139, r2.w\n"
+"dcl_literal l140, 0x325F473E, 0x325F473E, 0x325F473E, 0x325F473E\n"
+"mad_ieee r2.___w, r3.w, l140, r2.w\n"
+"dcl_literal l141, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+"mad_ieee r0.___w, r0.w, l141, r2.w\n"
+"dcl_literal l142, 0x00000000, 0x34904540, 0x3A1BECC5, 0x00000000\n"
+"dcl_literal l143, 0x00000000, 0x3CC2F57D, 0xBC42EEB9, 0x00000000\n"
+"mad_ieee r4._yz_, r0.w, l142, l143\n"
+"dcl_literal l144, 0x00000000, 0x37874471, 0x3DDB50CA, 0x00000000\n"
+"mad_ieee r4._yz_, r4.yyzy, r0.w, l144\n"
+"dcl_literal l145, 0x00000000, 0x3F7FE378, 0xBEFFE15B, 0x00000000\n"
+"mad_ieee r4._yz_, r4.yyzy, r0.w, l145\n"
+"dcl_literal l146, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378, 0x3F7FE378\n"
+"mad_ieee r0.___w, r4.z, r0.w, l146\n"
+"div_zeroop(infinity) r0.___w, r4.y, r0.w\n"
+"mul_ieee r2.___w, r2.w, r0.w\n"
+"mad_ieee r0.___w, r0.w, r4.x, r2.w\n"
+"dcl_literal l147, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.w, l147\n"
+"dcl_literal l148, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r4, r0.w, l148\n"
+"ftoi r0.___w, r3.x\n"
+"dcl_literal l149, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r2.___w, l149, r3.x\n"
+"dcl_literal l150, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r3.x___, r0.w, l150\n"
+"cmov_logical r0.___w, r4.z, r0.w, r3.x\n"
+"dcl_literal l151, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3.x___, r0.w, l151\n"
+"itof r3.___w, r4.w\n"
+"cmov_logical r3.___w, r4.z, r4.x, r3.w\n"
+"iadd r3.x___, r3.w, r3.x\n"
+"dcl_literal l152, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r4.x_z_, r3.w, l152\n"
+"dcl_literal l153, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.___w, r4.x, l153\n"
+"dcl_literal l154, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r4.x___, r4.z, l154\n"
+"iadd r0.___w, r3.w, r0.w\n"
+"dcl_literal l155, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l155\n"
+"dcl_literal l156, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r3.___w, l156, r0.w\n"
+"dcl_literal l157, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.___w, l157, r0.w_neg(xyzw)\n"
+"dcl_literal l158, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r3.x___, r3.w, l158, r3.x\n"
+"dcl_literal l159, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r3.___w, l159, r0.w\n"
+"dcl_literal l160, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.___w, r3.w, l160, r0.w\n"
+"dcl_literal l161, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.___w, r0.w, l161\n"
+"ishr r3.___w, r4.x, r0.w\n"
+"dcl_literal l162, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l162, r0.w\n"
+"cmov_logical r0.___w, r0.w, r3.w, r3.x\n"
+"dcl_literal l163, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.___w, r2.w, l163, r0.w\n"
+"ior r0.___w, r4.y, r0.w\n"
+"cmov_logical r0.___w, r3.z, r0.w, r2.x\n"
+"dcl_literal l164, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.x___, r0.y, l164\n"
+"cmov_logical r0.___w, r2.y, r2.x, r0.w\n"
+"dcl_literal l165, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r2.x___, r0.y, l165\n"
+"cmov_logical r0.___w, r2.x, r1.x, r0.w\n"
+"mul_ieee r0._y__, r0.y, r0.z\n"
+"dcl_literal l166, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+"lt r0.__z_, l166, r0.y\n"
+"dcl_literal l167, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r2.x___, r0.y, l167\n"
+"dcl_literal l168, 0xC3150000, 0xC3150000, 0xC3150000, 0xC3150000\n"
+"lt r0._y__, r0.y, l168\n"
+"dcl_literal l169, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r2.x___, l169, r2.x\n"
+"and r0._yz_, r0.yyzy, r2.x\n"
+"dcl_literal l170, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r0.z, l170, r0.w\n"
+"dcl_literal l171, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.y, l171, r0.z\n"
+"round_neginf r0.__z_, r1.z\n"
+"ne r0.___w, r0.z, r1.z\n"
+"eq r0.__z_, r0.z, r1.z\n"
+"dcl_literal l172, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"lt r2.x___, r1.z, l172\n"
+"ior r0.___w, r0.w, r2.x\n"
+"dcl_literal l173, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r2.x___, r3.y, l173\n"
+"dcl_literal l174, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ige r2.x___, l174, r2.x\n"
+"dcl_literal l175, 0x7F800000, 0x7F800000, 0x3F800000, 0x00000000\n"
+"ilt r3.xyz_, l175, r1.xzxx\n"
+"inot r2._y_w, r3.xxxy\n"
+"dcl_literal l176, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ieq r4.xy__, r1.xzxx, l176\n"
+"inot r4.__zw, r4.xxxy\n"
+"and r2._y_w, r2.yyyw, r4.zzzw\n"
+"and r2.x___, r2.x, r2.w\n"
+"and r0.__zw, r0.zzzw, r2.x\n"
+"dcl_literal l177, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"dcl_literal l178, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"cmov_logical r0.___w, r0.w, l177, l178\n"
+"dcl_literal l179, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r2.x___, r1.z, l179\n"
+"frc r2.x___, r2.x\n"
+"dcl_literal l180, 0x3DCCCCCD, 0x3DCCCCCD, 0x3DCCCCCD, 0x3DCCCCCD\n"
+"lt r3.___w, l180, r2.x\n"
+"dcl_literal l181, 0x3A83126F, 0x3A83126F, 0x3A83126F, 0x3A83126F\n"
+"lt r2.x___, r2.x, l181\n"
+"and r3.___w, r0.z, r3.w\n"
+"dcl_literal l182, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r3.w, l182, r0.w\n"
+"dcl_literal l183, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ge r3.___w, r1.z, l183\n"
+"and r0.__z_, r0.z, r3.w\n"
+"and r0.__z_, r2.x, r0.z\n"
+"dcl_literal l184, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"cmov_logical r0.__z_, r0.z, l184, r0.w\n"
+"dcl_literal l185, 0x00000001, 0x00000000, 0x00000002, 0x00000000\n"
+"ieq r5.xyz_, r0.z, l185\n"
+"dcl_literal l186, 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF\n"
+"dcl_literal l187, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000\n"
+"cmov_logical r6, r1.xxzz, l186, l187\n"
+"dcl_literal l188, 0x00000000, 0x00000000, 0x80000000, 0x80000000\n"
+"ine r0.__zw, r1.yyyw, l188\n"
+"dcl_literal l189, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"eq r1.x___, r1.x, l189\n"
+"inot r1._yz_, r0.zzwz\n"
+"and r4.__zw, r6.yyyx, r1.y\n"
+"and r1.___w, r2.w, r4.z\n"
+"and r2.x___, r6.w, r4.w\n"
+"and r1.___w, r5.x, r1.w\n"
+"cmov_logical r0._y__, r1.w, r0.y_neg(xyzw), r0.y\n"
+"and r1.___w, r6.x, r0.z\n"
+"and r1.___w, r6.w, r1.w\n"
+"and r1.___w, r5.x, r1.w\n"
+"and r1.___w, r1.z, r1.w\n"
+"dcl_literal l190, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r1.w, l190, r0.y\n"
+"and r1.___w, r5.x, r2.x\n"
+"and r1.___w, r1.z, r1.w\n"
+"dcl_literal l191, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0._y__, r1.w, l191, r0.y\n"
+"ior r1.___w, r5.z, r5.y\n"
+"and r2.x___, r6.w, r6.x\n"
+"and r2.x___, r1.w, r2.x\n"
+"and r3.___w, r1.z, r2.x\n"
+"and r2.x___, r0.w, r2.x\n"
+"dcl_literal l192, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r3.w, l192, r0.y\n"
+"and r3.___w, r2.w, r6.x\n"
+"and r3.___w, r6.w, r3.w\n"
+"and r3.___w, r5.x, r3.w\n"
+"and r3.___w, r0.w, r3.w\n"
+"cmov_logical r0.x___, r3.w, r0.x, r0.y\n"
+"dcl_literal l193, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r2.x, l193, r0.x\n"
+"and r0._y__, r0.z, r1.x\n"
+"and r1.x___, r1.x, r1.y\n"
+"ior r2.x___, r6.z, r0.y\n"
+"inot r0._y__, r0.y\n"
+"dcl_literal l194, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r2.x, l194, r0.x\n"
+"and r2.x___, r4.x, r1.y\n"
+"and r2.x___, r6.w, r2.x\n"
+"and r3.___w, r5.x, r2.x\n"
+"and r1.___w, r1.w, r2.x\n"
+"and r2.x___, r1.z, r3.w\n"
+"and r3.___w, r0.w, r3.w\n"
+"dcl_literal l195, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.x___, r2.x, l195, r0.x\n"
+"dcl_literal l196, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"cmov_logical r0.x___, r3.w, l196, r0.x\n"
+"and r2.x___, r1.z, r1.w\n"
+"and r1.___w, r0.w, r1.w\n"
+"dcl_literal l197, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r2.x, l197, r0.x\n"
+"dcl_literal l198, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.w, l198, r0.x\n"
+"and r0.__z_, r4.x, r0.z\n"
+"and r0.__z_, r6.w, r0.z\n"
+"and r1.___w, r1.z, r0.z\n"
+"and r0.__z_, r0.w, r0.z\n"
+"dcl_literal l199, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r1.w, l199, r0.x\n"
+"dcl_literal l200, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.z, l200, r0.x\n"
+"and r0.__z_, r4.y, r1.x\n"
+"dcl_literal l201, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r0.z, l201, r0.x\n"
+"and r0.__z_, r2.z, r6.y\n"
+"and r0.__z_, r4.y, r0.z\n"
+"and r1.x___, r1.z, r0.z\n"
+"and r0.__z_, r0.w, r0.z\n"
+"dcl_literal l202, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r1.x, l202, r0.x\n"
+"and r1.x___, r3.z, r2.y\n"
+"and r1.x___, r4.y, r1.x\n"
+"and r1.__z_, r1.z, r1.x\n"
+"and r0.___w, r0.w, r1.x\n"
+"and r1.x___, r2.y, r1.y\n"
+"ior r0.__z_, r0.z, r1.z\n"
+"dcl_literal l203, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l203, r0.x\n"
+"and r0._y__, r3.y, r0.y\n"
+"and r0.__z_, r3.x, r6.w\n"
+"ior r0._y__, r0.y, r0.z\n"
+"and r0.__z_, r6.y, r1.x\n"
+"and r0.__z_, r6.w, r0.z\n"
+"and r0.__z_, r2.w, r0.z\n"
+"and r0.__z_, r5.y, r0.z\n"
+"ior r0._y__, r0.y, r0.z\n"
+"dcl_literal l204, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.x___, r0.w, l204, r0.x\n"
+"dcl_literal l205, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l205, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__radians_f32",
+"mdef(344)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x3C8EFA35, 0x3F800000, 0x00000000, 0x00000000\n"
+"mul_ieee r0.x___, r0.x, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__reciprocal_f32",
+"mdef(345)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r1.x___, r0.x\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x41E00000, 0x00000000\n"
+"mov r0.__zw, l0\n"
+"dcl_literal l1, 0x7FFFFFFF, 0x7F800000, 0x80000000, 0x007FFFFF\n"
+"and r2, r1.x, l1\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1._y__, r2.x, l2\n"
+"dcl_literal l3, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r1.__z_, r2.x, l3\n"
+"and r1._y__, r1.y, r1.z\n"
+"dcl_literal l4, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r2.y, l4\n"
+"dcl_literal l5, 0xFFFFFF81, 0xFFFFFF9D, 0x00000000, 0x00000000\n"
+"iadd r0.xy__, r1.z, l5\n"
+"cmov_logical r0.xy__, r1.y, r0.yzyy, r0.xwxx\n"
+"dcl_literal l6, 0x41600000, 0x41600000, 0x41600000, 0x41600000\n"
+"dcl_literal l7, 0xC1600000, 0xC1600000, 0xC1600000, 0xC1600000\n"
+"cmov_logical r0.__z_, r1.y, l6, l7\n"
+"dcl_literal l8, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r0.___w, l8, r0.x\n"
+"dcl_literal l9, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r0.x___, l9, r0.x\n"
+"and r0.x___, r0.w, r0.x\n"
+"cmov_logical r0.x___, r0.x, r0.z, r0.y\n"
+"ftoi r0._y__, r0.x\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0.x___, r0.x, l10\n"
+"dcl_literal l11, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+"iadd r0.__z_, r0.y, l11\n"
+"cmov_logical r0.___w, r2.y, r0.y, r0.z\n"
+"dcl_literal l12, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1._y__, r0.w, l12\n"
+"itof r1.__z_, r2.w\n"
+"cmov_logical r1.__z_, r2.y, r2.x, r1.z\n"
+"iadd r1._y__, r1.z, r1.y\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r1.__zw, r1.z, l13\n"
+"dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r1.z, l14\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l15\n"
+"iadd r0.___w, r1.z, r0.w\n"
+"dcl_literal l16, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r0.w, l16\n"
+"dcl_literal l17, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.__z_, l17, r0.w\n"
+"dcl_literal l18, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0.___w, l18, r0.w_neg(xyzw)\n"
+"dcl_literal l19, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1._y__, r1.z, l19, r1.y\n"
+"dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.__z_, l20, r0.w\n"
+"dcl_literal l21, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0.___w, r1.z, l21, r0.w\n"
+"dcl_literal l22, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.___w, r0.w, l22\n"
+"ishr r1.__z_, r1.w, r0.w\n"
+"dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l23, r0.w\n"
+"cmov_logical r0.___w, r0.w, r1.z, r1.y\n"
+"ior r0.___w, r2.z, r0.w\n"
+"cmov_logical r0.___w, r0.x, r0.w, r1.x\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.___w, l24, r0.w\n"
+"dcl_literal l25, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r1, r0.w, l25\n"
+"cmov_logical r0._y__, r1.z, r0.y, r0.z\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r0.__z_, r0.y, l26\n"
+"itof r1.___w, r1.w\n"
+"cmov_logical r1.__z_, r1.z, r1.x, r1.w\n"
+"iadd r0.__z_, r1.z, r0.z\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r1.__zw, r1.z, l27\n"
+"dcl_literal l28, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.__z_, r1.z, l28\n"
+"dcl_literal l29, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r1.w, l29\n"
+"iadd r0._y__, r1.z, r0.y\n"
+"dcl_literal l30, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0._y__, r0.y, l30\n"
+"dcl_literal l31, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ilt r1.__z_, l31, r0.y\n"
+"dcl_literal l32, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"iadd r0._y__, l32, r0.y_neg(xyzw)\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r1.z, l33, r0.z\n"
+"dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r1.__z_, l34, r0.y\n"
+"dcl_literal l35, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r0._y__, r1.z, l35, r0.y\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0._y__, r0.y, l36\n"
+"ishr r1.__z_, r1.w, r0.y\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0._y__, l37, r0.y\n"
+"cmov_logical r0._y__, r0.y, r1.z, r0.z\n"
+"ior r0._y__, r1.y, r0.y\n"
+"dcl_literal l38, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, r1.x, l38\n"
+"and r0.x___, r0.x, r0.z\n"
+"cmov_logical r0.x___, r0.x, r0.y, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__remainder_f32",
+"mdef(346)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000, 0x7F800000\n"
+"and r1, r0.xyxy, l0\n"
+"dcl_literal l1, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__z_, r1.w, l1\n"
+"dcl_literal l2, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ilt r0.___w, r0.z, l2\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.___w, r1.y, l3\n"
+"and r0.___w, r0.w, r1.w\n"
+"dcl_literal l4, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"iadd r0.__z_, l4, r0.z_neg(xyzw)\n"
+"and r0.__z_, r0.w, r0.z\n"
+"itof r0.___w, r0.z\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r2.x___, r0.w, l5\n"
+"if_logicalnz r2.x\n"
+" \n"
+" dcl_literal l6, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x007FFFFF\n"
+" and r2._yzw, r0.y, l6\n"
+" itof r2.___w, r2.w\n"
+" cmov_logical r2._y__, r2.z, r2.y, r2.w\n"
+" \n"
+" dcl_literal l7, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.___w, r0.z, l7\n"
+" cmov_logical r2.__z_, r2.z, r0.z, r2.w\n"
+" \n"
+" dcl_literal l8, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.y, l8\n"
+" \n"
+" dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r3.x, l9\n"
+" iadd r2.___w, r2.w, r2.z\n"
+" \n"
+" dcl_literal l10, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.__z_, r2.z, l10\n"
+" iadd r2._y__, r2.y, r2.z\n"
+" \n"
+" dcl_literal l11, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.__z_, r2.w, l11\n"
+" \n"
+" dcl_literal l12, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l12, r2.z\n"
+" \n"
+" dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2._y__, r2.w, l13, r2.y\n"
+" \n"
+" dcl_literal l14, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.__z_, l14, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r3.y, l15\n"
+" \n"
+" dcl_literal l16, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l16, r2.z\n"
+" \n"
+" dcl_literal l17, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.__z_, r3.x, l17, r2.z\n"
+" \n"
+" dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.__z_, r2.z, l18\n"
+" ishr r2.___w, r2.w, r2.z\n"
+" \n"
+" dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, l19, r2.z\n"
+" cmov_logical r2._y__, r2.z, r2.w, r2.y\n"
+"else\n"
+" mov r2._y__, r1.y\n"
+"endif\n"
+"dcl_literal l20, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.__z_, r2.y, l20\n"
+"dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.z, l21\n"
+"dcl_literal l22, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r2.___w, r0.x, l22\n"
+"dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.___w, r2.w, l23\n"
+"ige r3.x___, r2.z, r2.w\n"
+"iadd r2.__z_, r2.w, r2.z_neg(xyzw)\n"
+"itof r2.__z_, r2.z\n"
+"dcl_literal l24, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+"mul_ieee r2.__z_, r2.z, l24\n"
+"round_neginf r2.__z_, r2.z\n"
+"dcl_literal l25, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r3.x, l25, r2.z\n"
+"ftoi r2.___w, r2.z\n"
+"dcl_literal l26, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+"imul r3.x___, r2.w, l26\n"
+"dcl_literal l27, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r3.x___, r3.x, l27\n"
+"dcl_literal l28, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+"imad r3._y__, r2.w, l28, r3.x_neg(xyzw)\n"
+"dcl_literal l29, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r3.x___, r3.x, l29\n"
+"dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3.x___, r3.x, l30\n"
+"mul_ieee r3.x___, r2.y, r3.x\n"
+"dcl_literal l31, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r3._y__, r3.y, l31\n"
+"dcl_literal l32, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3._y__, r3.y, l32\n"
+"mul_ieee r3.x___, r3.x, r3.y\n"
+"dcl_literal l33, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r2.__z_, r2.z, l33\n"
+"cmov_logical r2._y__, r2.z, r2.y, r3.x\n"
+"dcl_literal l34, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"dcl_literal l35, 0x39800000, 0x39800000, 0x39800000, 0x39800000\n"
+"cmov_logical r2.__z_, r2.z, l34, l35\n"
+"dcl_literal l36, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.x___, r2.y, l36\n"
+"dcl_literal l37, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r3.x, l37\n"
+"dcl_literal l38, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r3.x___, r3.x, l38\n"
+"dcl_literal l39, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3._y__, r2.y, l39\n"
+"dcl_literal l40, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r3.__z_, l40, r3.x\n"
+"dcl_literal l41, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r3.x___, l41, r3.x\n"
+"and r3.x___, r3.z, r3.x\n"
+"dcl_literal l42, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3.__z_, r3.x, l42\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.___w, l43, r2.w\n"
+"and r3.x___, r3.x, r3.w\n"
+"cmov_logical r3.x___, r3.x, r3.y, r2.y\n"
+"mov r3._y__, r1.x\n"
+"mov r3.___w, r3.x\n"
+"dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r4.x___, l44\n"
+"mov r4._y__, r2.y\n"
+"mov r4.__z_, r3.z\n"
+"whileloop\n"
+" ige r4.___w, r4.x, r2.w\n"
+" break_logicalnz r4.w\n"
+" div_zeroop(infinity) r4.___w, r3.y, r3.w\n"
+" \n"
+" dcl_literal l45, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+" mul_ieee r5.x___, r4.w, l45\n"
+" cmov_logical r4.___w, r4.z, r5.x, r4.w\n"
+" round_neginf r4.___w, r4.w\n"
+" \n"
+" dcl_literal l46, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r5.x___, r4.y, l46\n"
+" add r5._y__, r4.y, r5.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l47, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r5.__z_, r4.w, l47\n"
+" add r5.___w, r4.w, r5.z_neg(xyzw)\n"
+" mul_ieee r4.___w, r4.y, r4.w\n"
+" mad_ieee r6.x___, r5.x, r5.z, r4.w_neg(xyzw)\n"
+" mad_ieee r5.x___, r5.x, r5.w, r6.x\n"
+" mad_ieee r5.x___, r5.y, r5.z, r5.x\n"
+" mad_ieee r5.x___, r5.y, r5.w, r5.x\n"
+" add r5._y__, r3.y, r4.w_neg(xyzw)\n"
+" add r5.__z_, r3.y, r5.y_neg(xyzw)\n"
+" add r4.___w, r4.w_neg(xyzw), r5.z\n"
+" add r4.___w, r5.x_neg(xyzw), r4.w\n"
+" add r4.___w, r5.y, r4.w\n"
+" \n"
+" dcl_literal l48, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r5.x___, r4.w, l48\n"
+" add r5._y__, r4.y, r4.w\n"
+" cmov_logical r4.___w, r5.x, r5.y, r4.w\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r5.x___, r4.w, l49\n"
+" add r5._y__, r4.y, r4.w\n"
+" cmov_logical r4.___w, r5.x, r5.y, r4.w\n"
+" ge r5.x___, r4.w, r4.y\n"
+" add r5._y__, r4.y_neg(xyzw), r4.w\n"
+" cmov_logical r3._y__, r5.x, r5.y, r4.w\n"
+" mul_ieee r4._y__, r2.z, r4.y\n"
+" \n"
+" dcl_literal l50, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r4.x___, r4.x, l50\n"
+" mov r3.___w, r4.y\n"
+" \n"
+" dcl_literal l51, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4.__z_, l51\n"
+"endloop\n"
+"mov r2._yzw, r3.wwyw\n"
+"dcl_literal l52, 0x7F800000, 0x7F800000, 0xFFFFF000, 0x00000000\n"
+"and r3.xyz_, r2.zwyz, l52\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.xy__, r3.xyxx, l53\n"
+"dcl_literal l54, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2.___w, r3.y, l54\n"
+"dcl_literal l55, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3._y__, r2.y, l55\n"
+"dcl_literal l56, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r3.___w, l56, r2.w\n"
+"dcl_literal l57, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r2.___w, l57, r2.w\n"
+"and r2.___w, r3.w, r2.w\n"
+"cmov_logical r3._y__, r2.w, r3.y, r2.y\n"
+"div_zeroop(infinity) r3._y__, r2.z, r3.y\n"
+"dcl_literal l58, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3.___w, r3.y, l58\n"
+"cmov_logical r2.___w, r2.w, r3.w, r3.y\n"
+"round_neginf r2.___w, r2.w\n"
+"add r3._y__, r2.y, r3.z_neg(xyzw)\n"
+"dcl_literal l59, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.___w, r2.w, l59\n"
+"add r4.x___, r2.w, r3.w_neg(xyzw)\n"
+"mul_ieee r4._y__, r2.y, r2.w\n"
+"mad_ieee r4.__z_, r3.z, r3.w, r4.y_neg(xyzw)\n"
+"mad_ieee r3.__z_, r3.z, r4.x, r4.z\n"
+"mad_ieee r3.__z_, r3.y, r3.w, r3.z\n"
+"mad_ieee r3._y__, r3.y, r4.x, r3.z\n"
+"add r3.__z_, r2.z, r4.y_neg(xyzw)\n"
+"add r3.___w, r2.z, r3.z_neg(xyzw)\n"
+"add r3.___w, r4.y_neg(xyzw), r3.w\n"
+"add r3._y__, r3.y_neg(xyzw), r3.w\n"
+"add r3._y__, r3.z, r3.y\n"
+"ftoi r3.__z_, r2.w\n"
+"dcl_literal l60, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, r3.y, l60\n"
+"add r4.x___, r2.y, r3.y\n"
+"dcl_literal l61, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4._y__, r3.z, l61\n"
+"cmov_logical r3._yz_, r2.w, r4.xxyx, r3.yyzy\n"
+"dcl_literal l62, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, r3.y, l62\n"
+"add r4.x___, r2.y, r3.y\n"
+"dcl_literal l63, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4._y__, r3.z, l63\n"
+"cmov_logical r3._yz_, r2.w, r4.xxyx, r3.yyzy\n"
+"ge r2.___w, r3.y, r2.y\n"
+"add r4.x___, r2.y_neg(xyzw), r3.y\n"
+"dcl_literal l64, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r4._y__, r3.z, l64\n"
+"cmov_logical r3._yz_, r2.w, r4.xxyx, r3.yyzy\n"
+"cmov_logical r2.__z_, r3.x, r3.y, r2.z\n"
+"dcl_literal l65, 0x41F00000, 0x41F00000, 0x41F00000, 0x41F00000\n"
+"lt r0.___w, l65, r0.w\n"
+"ishl r0.__z_, r3.z, r0.z\n"
+"dcl_literal l66, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r0.w, l66, r0.z\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r2.z, l67\n"
+"and r0.___w, r2.x, r0.w\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l68, 0x7FFFFFFF, 0x7F800000, 0x80000000, 0x007FFFFF\n"
+" and r3, r2.z, l68\n"
+" \n"
+" dcl_literal l69, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r0.___w, r3.x, l69\n"
+" \n"
+" dcl_literal l70, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r0.___w, r0.w, l70\n"
+" \n"
+" dcl_literal l71, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.___w, r0.w, l71\n"
+" \n"
+" dcl_literal l72, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r0.___w, l72, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l73, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.___w, r0.w, l73\n"
+" \n"
+" dcl_literal l74, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r0.___w, r0.w, l74\n"
+" \n"
+" dcl_literal l75, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" cmov_logical r0.___w, r3.x, r0.w, l75\n"
+" \n"
+" dcl_literal l76, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.___w, r0.w_neg(xyzw), l76\n"
+" \n"
+" dcl_literal l77, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r2.x___, r3.x, l77\n"
+" \n"
+" dcl_literal l78, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ine r2.___w, r3.x, l78\n"
+" and r2.x___, r2.x, r2.w\n"
+" \n"
+" dcl_literal l79, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r3.y, l79\n"
+" \n"
+" dcl_literal l80, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.___w, r2.w, l80\n"
+" cmov_logical r0.___w, r2.x, r0.w, r2.w\n"
+" \n"
+" dcl_literal l81, 0x7FFFFFFF, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r4.xyz_, r0.y, l81\n"
+" \n"
+" dcl_literal l82, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r0._y__, r4.x, l82\n"
+" \n"
+" dcl_literal l83, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r0._y__, r0.y, l83\n"
+" \n"
+" dcl_literal l84, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0._y__, r0.y, l84\n"
+" \n"
+" dcl_literal l85, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r0._y__, l85, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l86, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0._y__, r0.y, l86\n"
+" \n"
+" dcl_literal l87, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r0._y__, r0.y, l87\n"
+" \n"
+" dcl_literal l88, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" cmov_logical r0._y__, r4.x, r0.y, l88\n"
+" \n"
+" dcl_literal l89, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0._y__, r0.y_neg(xyzw), l89\n"
+" \n"
+" dcl_literal l90, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r2.x___, r4.x, l90\n"
+" \n"
+" dcl_literal l91, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ine r2.___w, r4.x, l91\n"
+" and r2.x___, r2.x, r2.w\n"
+" \n"
+" dcl_literal l92, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r4.y, l92\n"
+" \n"
+" dcl_literal l93, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.___w, r2.w, l93\n"
+" cmov_logical r0._y__, r2.x, r0.y, r2.w\n"
+" inegate r2.x___, r0.y\n"
+" itof r2.___w, r2.x\n"
+" itof r4.__z_, r4.z\n"
+" cmov_logical r4.x___, r4.y, r4.x, r4.z\n"
+" \n"
+" dcl_literal l94, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r4.__z_, r2.x, l94\n"
+" cmov_logical r4.__z_, r4.y, r2.x, r4.z\n"
+" \n"
+" dcl_literal l95, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r4.x, l95\n"
+" \n"
+" dcl_literal l96, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.___w, r5.x, l96\n"
+" \n"
+" dcl_literal l97, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4.___w, r4.w, l97\n"
+" \n"
+" dcl_literal l98, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r5.x___, r4.z, l98\n"
+" iadd r5.x___, r4.x, r5.x\n"
+" iadd r4.__z_, r4.w, r4.z\n"
+" \n"
+" dcl_literal l99, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r5.__z_, l99, r4.z\n"
+" \n"
+" dcl_literal l100, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r5.x___, r5.z, l100, r5.x\n"
+" \n"
+" dcl_literal l101, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r4.__z_, l101, r4.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l102, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r5._y__, r5.y, l102\n"
+" \n"
+" dcl_literal l103, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5.__z_, l103, r4.z\n"
+" \n"
+" dcl_literal l104, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r4.__z_, r5.z, l104, r4.z\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r4.__z_, r4.z, l105\n"
+" ishr r5.__z_, r5.y, r4.z\n"
+" \n"
+" dcl_literal l106, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.__z_, l106, r4.z\n"
+" cmov_logical r4.__z_, r4.z, r5.z, r5.x\n"
+" \n"
+" dcl_literal l107, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r2.___w, l107, r2.w\n"
+" \n"
+" dcl_literal l108, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.___w, r2.w, l108, r4.z\n"
+" inegate r4.__z_, r0.w\n"
+" itof r5.x___, r4.z\n"
+" itof r3.___w, r3.w\n"
+" cmov_logical r3.x___, r3.y, r3.x, r3.w\n"
+" \n"
+" dcl_literal l109, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r3.___w, r4.z, l109\n"
+" cmov_logical r3._y__, r3.y, r4.z, r3.w\n"
+" \n"
+" dcl_literal l110, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r5.__zw, r3.x, l110\n"
+" \n"
+" dcl_literal l111, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.___w, r5.z, l111\n"
+" iadd r3.___w, r3.w, r3.y\n"
+" \n"
+" dcl_literal l112, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3._y__, r3.y, l112\n"
+" iadd r3.x___, r3.x, r3.y\n"
+" \n"
+" dcl_literal l113, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3._y__, r3.w, l113\n"
+" \n"
+" dcl_literal l114, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.___w, l114, r3.y\n"
+" \n"
+" dcl_literal l115, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r3.x___, r3.w, l115, r3.x\n"
+" \n"
+" dcl_literal l116, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3._y__, l116, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l117, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r5.w, l117\n"
+" \n"
+" dcl_literal l118, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l118, r3.y\n"
+" \n"
+" dcl_literal l119, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r4.z, l119, r3.y\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r3._y__, r3.y, l120\n"
+" ishr r3.___w, r3.w, r3.y\n"
+" \n"
+" dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3._y__, l121, r3.y\n"
+" cmov_logical r3.x___, r3.y, r3.w, r3.x\n"
+" \n"
+" dcl_literal l122, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r3._y__, l122, r5.x\n"
+" \n"
+" dcl_literal l123, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r3.x___, r3.y, l123, r3.x\n"
+" ior r3.x___, r3.z, r3.x\n"
+" iadd r0.___w, r0.w, r2.x\n"
+" mov r3._y__, r3.x\n"
+" \n"
+" dcl_literal l124, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r3.__z_, l124\n"
+" mov r2.x___, r0.w\n"
+" whileloop\n"
+" \n"
+" dcl_literal l125, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ige r3.___w, l125, r2.x\n"
+" break_logicalnz r3.w\n"
+" \n"
+" dcl_literal l126, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r3.___w, r3.z, l126\n"
+" ge r4.__z_, r3.y, r2.w\n"
+" add r5.x___, r2.w_neg(xyzw), r3.y\n"
+" cmov_logical r5.x___, r4.z, r5.x, r3.y\n"
+" \n"
+" dcl_literal l127, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r5.__z_, r3.w, l127\n"
+" cmov_logical r3.__z_, r4.z, r5.z, r3.w\n"
+" add r3._y__, r5.x, r5.x\n"
+" \n"
+" dcl_literal l128, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r2.x___, r2.x, l128\n"
+" endloop\n"
+" \n"
+" dcl_literal l129, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, r3.z, l129\n"
+" ge r3.x___, r3.y, r2.w\n"
+" add r2.___w, r2.w_neg(xyzw), r3.y\n"
+" cmov_logical r2.___w, r3.x, r2.w, r3.y\n"
+" \n"
+" dcl_literal l130, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r3._y__, r2.x, l130\n"
+" cmov_logical r2.x___, r3.x, r3.y, r2.x\n"
+" itof r3.x___, r0.y\n"
+" \n"
+" dcl_literal l131, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r6, r2.w, l131\n"
+" itof r2.___w, r6.w\n"
+" cmov_logical r2.___w, r6.z, r6.x, r2.w\n"
+" \n"
+" dcl_literal l132, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r3._y__, r0.y, l132\n"
+" cmov_logical r0._y__, r6.z, r0.y, r3.y\n"
+" \n"
+" dcl_literal l133, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3._yz_, r2.w, l133\n"
+" \n"
+" dcl_literal l134, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l134\n"
+" iadd r3._y__, r3.y, r0.y\n"
+" \n"
+" dcl_literal l135, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l135\n"
+" iadd r0._y__, r2.w, r0.y\n"
+" \n"
+" dcl_literal l136, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.___w, r3.y, l136\n"
+" \n"
+" dcl_literal l137, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l137, r2.w\n"
+" \n"
+" dcl_literal l138, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r3.y, l138, r0.y\n"
+" \n"
+" dcl_literal l139, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.___w, l139, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l140, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.z, l140\n"
+" \n"
+" dcl_literal l141, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.__z_, l141, r2.w\n"
+" \n"
+" dcl_literal l142, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r3.z, l142, r2.w\n"
+" \n"
+" dcl_literal l143, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.___w, r2.w, l143\n"
+" ishr r3._y__, r3.y, r2.w\n"
+" \n"
+" dcl_literal l144, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.___w, l144, r2.w\n"
+" cmov_logical r0._y__, r2.w, r3.y, r0.y\n"
+" \n"
+" dcl_literal l145, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r2.___w, l145, r3.x\n"
+" \n"
+" dcl_literal l146, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r2.w, l146, r0.y\n"
+" ior r0._y__, r6.y, r0.y\n"
+" \n"
+" dcl_literal l147, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ige r0.___w, r0.w, l147\n"
+" cmov_logical r2.__z_, r0.w, r0.y, r2.z\n"
+" and r0._y__, r2.x, r0.w\n"
+" iadd r0.__z_, r0.z, r0.y\n"
+" \n"
+" dcl_literal l148, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0._y__, r2.z, l148\n"
+" \n"
+" dcl_literal l149, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0._y__, r0.y, l149\n"
+" \n"
+" dcl_literal l150, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" ilt r0._y__, r0.y, l150\n"
+" if_logicalnz r0.y\n"
+" \n"
+" dcl_literal l151, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r3, r2.z, l151\n"
+" itof r0._y__, r3.w\n"
+" cmov_logical r0._y__, r3.z, r3.x, r0.y\n"
+" \n"
+" dcl_literal l152, 0x00000030, 0x00000000, 0x00000000, 0x18000000\n"
+" \n"
+" dcl_literal l153, 0xFFFFFF9B, 0x00000000, 0x00000000, 0xCD800000\n"
+" cmov_logical r2.x__w, r3.z, l152, l153\n"
+" \n"
+" dcl_literal l154, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r3.x_z_, r0.y, l154\n"
+" \n"
+" dcl_literal l155, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r3.x, l155\n"
+" iadd r0._y_w, r0.yyyw, r2.wwwx\n"
+" \n"
+" dcl_literal l156, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.___w, r0.w, l156\n"
+" \n"
+" dcl_literal l157, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.x___, l157, r0.w\n"
+" \n"
+" dcl_literal l158, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r2.x, l158, r0.y\n"
+" \n"
+" dcl_literal l159, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.___w, l159, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l160, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.x___, r3.z, l160\n"
+" \n"
+" dcl_literal l161, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.___w, l161, r0.w\n"
+" \n"
+" dcl_literal l162, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r2.w, l162, r0.w\n"
+" \n"
+" dcl_literal l163, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r0.___w, r0.w, l163\n"
+" ishr r2.x___, r2.x, r0.w\n"
+" \n"
+" dcl_literal l164, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0.___w, l164, r0.w\n"
+" cmov_logical r0._y__, r0.w, r2.x, r0.y\n"
+" ior r2.__z_, r3.y, r0.y\n"
+" \n"
+" dcl_literal l165, 0x00000000, 0x00000030, 0x00000000, 0x18000000\n"
+" \n"
+" dcl_literal l166, 0x00000000, 0xFFFFFF9B, 0x00000000, 0xCD800000\n"
+" cmov_logical r0._y_w, r4.y, l165, l166\n"
+" iadd r0._y_w, r4.wwwx, r0.yyyw\n"
+" \n"
+" dcl_literal l167, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.x___, l167, r0.y\n"
+" \n"
+" dcl_literal l168, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r2.x, l168, r0.w\n"
+" \n"
+" dcl_literal l169, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0._y__, l169, r0.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l170, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.x___, l170, r0.y\n"
+" \n"
+" dcl_literal l171, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0._y__, r2.x, l171, r0.y\n"
+" \n"
+" dcl_literal l172, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r0._y__, r0.y, l172\n"
+" ishr r2.x___, r5.y, r0.y\n"
+" \n"
+" dcl_literal l173, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0._y__, l173, r0.y\n"
+" cmov_logical r2._y__, r0.y, r2.x, r0.w\n"
+" \n"
+" dcl_literal l174, 0xC2400000, 0xC2400000, 0xC2400000, 0xC2400000\n"
+" mov r0._y__, l174\n"
+" else\n"
+" mov r2._y__, r1.y\n"
+" \n"
+" dcl_literal l175, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0._y__, l175\n"
+" endif\n"
+"else\n"
+" \n"
+" dcl_literal l176, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0._y__, l176\n"
+"endif\n"
+"add r0.___w, r2.z, r2.z\n"
+"lt r2.x___, r2.y, r0.w\n"
+"eq r0.___w, r0.w, r2.y\n"
+"dcl_literal l177, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.__z_, r0.z, l177\n"
+"dcl_literal l178, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r0.z, l178\n"
+"and r0.__z_, r0.w, r0.z\n"
+"ior r0.__z_, r2.x, r0.z\n"
+"add r0.___w, r2.y_neg(xyzw), r2.z\n"
+"cmov_logical r0.__z_, r0.z, r0.w, r2.z\n"
+"dcl_literal l179, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0.___w, r0.y, l179\n"
+"if_logicalnz r0.w\n"
+" ftoi r0._y__, r0.y\n"
+" \n"
+" dcl_literal l180, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r2, r0.z, l180\n"
+" itof r0.___w, r2.w\n"
+" cmov_logical r0.___w, r2.z, r2.x, r0.w\n"
+" \n"
+" dcl_literal l181, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.x___, r0.y, l181\n"
+" cmov_logical r0._y__, r2.z, r0.y, r2.x\n"
+" \n"
+" dcl_literal l182, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r2.x_z_, r0.w, l182\n"
+" \n"
+" dcl_literal l183, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.x___, r2.x, l183\n"
+" iadd r2.x___, r2.x, r0.y\n"
+" \n"
+" dcl_literal l184, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0._y__, r0.y, l184\n"
+" iadd r0._y__, r0.w, r0.y\n"
+" \n"
+" dcl_literal l185, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.___w, r2.x, l185\n"
+" \n"
+" dcl_literal l186, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.x___, l186, r0.w\n"
+" \n"
+" dcl_literal l187, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0._y__, r2.x, l187, r0.y\n"
+" \n"
+" dcl_literal l188, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.___w, l188, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l189, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.x___, r2.z, l189\n"
+" \n"
+" dcl_literal l190, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.__z_, l190, r0.w\n"
+" \n"
+" dcl_literal l191, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r2.z, l191, r0.w\n"
+" \n"
+" dcl_literal l192, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r0.___w, r0.w, l192\n"
+" ishr r2.x___, r2.x, r0.w\n"
+" \n"
+" dcl_literal l193, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0.___w, l193, r0.w\n"
+" cmov_logical r0._y__, r0.w, r2.x, r0.y\n"
+" ior r0.__z_, r2.y, r0.y\n"
+"endif\n"
+"dcl_literal l194, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ixor r0._y__, r0.z, l194\n"
+"cmov_logical r0._y__, r1.z, r0.y, r0.z\n"
+"dcl_literal l195, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__zw, l195, r1.xxxy\n"
+"ior r2.x___, r0.w, r0.z\n"
+"dcl_literal l196, 0x7FC00001, 0x7FC00001, 0x7FC00001, 0x7FC00001\n"
+"cmov_logical r0._y__, r2.x, l196, r0.y\n"
+"dcl_literal l197, 0x00000000, 0x7F800000, 0x7F800000, 0x00000000\n"
+"ieq r2, r1.yxyx, l197\n"
+"ior r2.x___, r2.y, r2.x\n"
+"dcl_literal l198, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r2.x, l198, r0.y\n"
+"inot r2.xy__, r2.yzyy\n"
+"and r2.x___, r2.z, r2.x\n"
+"inot r0.__zw, r0.zzzw\n"
+"and r0.__z_, r2.x, r0.z\n"
+"and r0.___w, r1.w, r0.w\n"
+"and r1.___w, r2.w, r0.w\n"
+"ior r0.__z_, r0.z, r1.w\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"ieq r0._y__, r1.x, r1.y\n"
+"and r0.__z_, r2.y, r0.w\n"
+"and r0._y__, r0.y, r0.z\n"
+"cmov_logical r0.x___, r0.y, r1.z, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__remquo_2f32f32",
+"mdef(347)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l0, 0x80000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x80000000\n"
+"and r1, r0.xxyy, l0\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"and r0.__zw, r0.yyyx, l1\n"
+"dcl_literal l2, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.__zw, r0.zzzw, l2\n"
+"dcl_literal l3, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ilt r2.x___, r0.z, l3\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2._y__, r1.z, l4\n"
+"and r2.x___, r2.x, r2.y\n"
+"dcl_literal l5, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"iadd r0.__z_, l5, r0.z_neg(xyzw)\n"
+"and r0.__z_, r2.x, r0.z\n"
+"itof r2.x___, r0.z\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r2.__z_, r2.x, l6\n"
+"if_logicalnz r2.z\n"
+" \n"
+" dcl_literal l7, 0x7FFFFFFF, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3.xyz_, r0.y, l7\n"
+" itof r2.___w, r3.z\n"
+" cmov_logical r2.___w, r3.y, r3.x, r2.w\n"
+" \n"
+" dcl_literal l8, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r3.x___, r0.z, l8\n"
+" cmov_logical r3.x___, r3.y, r0.z, r3.x\n"
+" \n"
+" dcl_literal l9, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3._yz_, r2.w, l9\n"
+" \n"
+" dcl_literal l10, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l10\n"
+" iadd r3._y__, r3.y, r3.x\n"
+" \n"
+" dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.x___, r3.x, l11\n"
+" iadd r2.___w, r2.w, r3.x\n"
+" \n"
+" dcl_literal l12, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3.x___, r3.y, l12\n"
+" \n"
+" dcl_literal l13, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l13, r3.x\n"
+" \n"
+" dcl_literal l14, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.___w, r3.y, l14, r2.w\n"
+" \n"
+" dcl_literal l15, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.x___, l15, r3.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l16, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.z, l16\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.__z_, l17, r3.x\n"
+" \n"
+" dcl_literal l18, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.x___, r3.z, l18, r3.x\n"
+" \n"
+" dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r3.x___, r3.x, l19\n"
+" ishr r3._y__, r3.y, r3.x\n"
+" \n"
+" dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.x___, l20, r3.x\n"
+" cmov_logical r2.___w, r3.x, r3.y, r2.w\n"
+"else\n"
+" mov r2.___w, r1.z\n"
+"endif\n"
+"dcl_literal l21, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.x___, r2.w, l21\n"
+"dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r3.x, l22\n"
+"ige r3._y__, r3.x, r0.w\n"
+"iadd r0.___w, r0.w, r3.x_neg(xyzw)\n"
+"itof r0.___w, r0.w\n"
+"dcl_literal l23, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+"mul_ieee r0.___w, r0.w, l23\n"
+"round_neginf r0.___w, r0.w\n"
+"dcl_literal l24, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.___w, r3.y, l24, r0.w\n"
+"ftoi r3.x___, r0.w\n"
+"dcl_literal l25, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+"imul r3._y__, r3.x, l25\n"
+"dcl_literal l26, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r3._y__, r3.y, l26\n"
+"dcl_literal l27, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+"imad r3.__z_, r3.x, l27, r3.y_neg(xyzw)\n"
+"dcl_literal l28, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r3._y__, r3.y, l28\n"
+"dcl_literal l29, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3._y__, r3.y, l29\n"
+"mul_ieee r3._y__, r2.w, r3.y\n"
+"dcl_literal l30, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"iadd r3.__z_, r3.z, l30\n"
+"dcl_literal l31, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r3.__z_, r3.z, l31\n"
+"mul_ieee r3._y__, r3.y, r3.z\n"
+"dcl_literal l32, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r3.__z_, r0.w, l32\n"
+"cmov_logical r2.___w, r3.z, r2.w, r3.y\n"
+"dcl_literal l33, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"dcl_literal l34, 0x39800000, 0x39800000, 0x39800000, 0x39800000\n"
+"cmov_logical r3._y__, r3.z, l33, l34\n"
+"dcl_literal l35, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r2.w, l35\n"
+"dcl_literal l36, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.__z_, r3.z, l36\n"
+"dcl_literal l37, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r3.__z_, r3.z, l37\n"
+"dcl_literal l38, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3.___w, r2.w, l38\n"
+"dcl_literal l39, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r4.x___, l39, r3.z\n"
+"dcl_literal l40, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r3.__z_, l40, r3.z\n"
+"and r3.__z_, r4.x, r3.z\n"
+"dcl_literal l41, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r4.x___, r3.z, l41\n"
+"dcl_literal l42, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, l42, r0.w\n"
+"and r0.___w, r3.z, r0.w\n"
+"cmov_logical r0.___w, r0.w, r3.w, r2.w\n"
+"mov r3.__z_, r1.y\n"
+"mov r3.___w, r0.w\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r4._y__, l43\n"
+"mov r4.__z_, r2.w\n"
+"mov r4.___w, r4.x\n"
+"whileloop\n"
+" ige r5.x___, r4.y, r3.x\n"
+" break_logicalnz r5.x\n"
+" div_zeroop(infinity) r5.x___, r3.z, r3.w\n"
+" \n"
+" dcl_literal l44, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+" mul_ieee r5._y__, r5.x, l44\n"
+" cmov_logical r5.x___, r4.w, r5.y, r5.x\n"
+" round_neginf r5.x___, r5.x\n"
+" \n"
+" dcl_literal l45, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r5._y__, r4.z, l45\n"
+" add r5.__z_, r4.z, r5.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l46, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r5.___w, r5.x, l46\n"
+" add r6.x___, r5.x, r5.w_neg(xyzw)\n"
+" mul_ieee r5.x___, r4.z, r5.x\n"
+" mad_ieee r6._y__, r5.y, r5.w, r5.x_neg(xyzw)\n"
+" mad_ieee r5._y__, r5.y, r6.x, r6.y\n"
+" mad_ieee r5._y__, r5.z, r5.w, r5.y\n"
+" mad_ieee r5._y__, r5.z, r6.x, r5.y\n"
+" add r5.__z_, r3.z, r5.x_neg(xyzw)\n"
+" add r5.___w, r3.z, r5.z_neg(xyzw)\n"
+" add r5.x___, r5.x_neg(xyzw), r5.w\n"
+" add r5.x___, r5.y_neg(xyzw), r5.x\n"
+" add r5.x___, r5.z, r5.x\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r5._y__, r5.x, l47\n"
+" add r5.__z_, r4.z, r5.x\n"
+" cmov_logical r5.x___, r5.y, r5.z, r5.x\n"
+" \n"
+" dcl_literal l48, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r5._y__, r5.x, l48\n"
+" add r5.__z_, r4.z, r5.x\n"
+" cmov_logical r5.x___, r5.y, r5.z, r5.x\n"
+" ge r5._y__, r5.x, r4.z\n"
+" add r5.__z_, r4.z_neg(xyzw), r5.x\n"
+" cmov_logical r3.__z_, r5.y, r5.z, r5.x\n"
+" mul_ieee r4.__z_, r3.y, r4.z\n"
+" \n"
+" dcl_literal l49, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r4._y__, r4.y, l49\n"
+" mov r3.___w, r4.z\n"
+" \n"
+" dcl_literal l50, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4.___w, l50\n"
+"endloop\n"
+"mov r3.xyz_, r3.wzww\n"
+"dcl_literal l51, 0x7F800000, 0x7F800000, 0xFFFFF000, 0x00000000\n"
+"and r4.xyz_, r3.yzxy, l51\n"
+"dcl_literal l52, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.__zw, r4.xxxy, l52\n"
+"dcl_literal l53, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r0.___w, r3.w, l53\n"
+"dcl_literal l54, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r2.___w, r3.x, l54\n"
+"dcl_literal l55, 0x00000069, 0x00000069, 0x00000069, 0x00000069\n"
+"ilt r3.___w, l55, r0.w\n"
+"dcl_literal l56, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"ige r0.___w, l56, r0.w\n"
+"and r0.___w, r3.w, r0.w\n"
+"cmov_logical r2.___w, r0.w, r2.w, r3.x\n"
+"div_zeroop(infinity) r2.___w, r3.y, r2.w\n"
+"dcl_literal l57, 0x38800000, 0x38800000, 0x38800000, 0x38800000\n"
+"mul_ieee r3.___w, r2.w, l57\n"
+"cmov_logical r0.___w, r0.w, r3.w, r2.w\n"
+"round_neginf r0.___w, r0.w\n"
+"add r2.___w, r3.x, r4.z_neg(xyzw)\n"
+"dcl_literal l58, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.___w, r0.w, l58\n"
+"add r4.x___, r0.w, r3.w_neg(xyzw)\n"
+"mul_ieee r4._y__, r3.x, r0.w\n"
+"mad_ieee r4.___w, r4.z, r3.w, r4.y_neg(xyzw)\n"
+"mad_ieee r4.__z_, r4.z, r4.x, r4.w\n"
+"mad_ieee r3.___w, r2.w, r3.w, r4.z\n"
+"mad_ieee r2.___w, r2.w, r4.x, r3.w\n"
+"add r3.___w, r3.y, r4.y_neg(xyzw)\n"
+"add r4.x___, r3.y, r3.w_neg(xyzw)\n"
+"add r4.x___, r4.y_neg(xyzw), r4.x\n"
+"add r2.___w, r2.w_neg(xyzw), r4.x\n"
+"add r4.x___, r3.w, r2.w\n"
+"ftoi r4._y__, r0.w\n"
+"dcl_literal l59, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r4.x, l59\n"
+"add r4.__z_, r3.x, r4.x\n"
+"dcl_literal l60, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.___w, r4.y, l60\n"
+"cmov_logical r4.xy__, r0.w, r4.zwzz, r4.xyxx\n"
+"dcl_literal l61, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r4.x, l61\n"
+"add r4.__z_, r3.x, r4.x\n"
+"dcl_literal l62, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.___w, r4.y, l62\n"
+"cmov_logical r4.xy__, r0.w, r4.zwzz, r4.xyxx\n"
+"ge r0.___w, r4.x, r3.x\n"
+"add r4.__z_, r3.x_neg(xyzw), r4.x\n"
+"dcl_literal l63, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r4.___w, r4.y, l63\n"
+"cmov_logical r4.xy__, r0.w, r4.zwzz, r4.xyxx\n"
+"cmov_logical r3._y__, r3.z, r4.x, r3.y\n"
+"dcl_literal l64, 0x41F00000, 0x41F00000, 0x41F00000, 0x41F00000\n"
+"lt r0.___w, l64, r2.x\n"
+"ishl r0.__z_, r4.y, r0.z\n"
+"dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.__z_, r0.w, l65, r0.z\n"
+"dcl_literal l66, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.__z_, r3.y, l66\n"
+"and r0.__z_, r2.z, r0.z\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l67, 0x7FFFFFFF, 0x7F800000, 0x80000000, 0x007FFFFF\n"
+" and r4, r3.y, l67\n"
+" \n"
+" dcl_literal l68, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r0.__z_, r4.x, l68\n"
+" \n"
+" dcl_literal l69, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r0.__z_, r0.z, l69\n"
+" \n"
+" dcl_literal l70, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.__z_, r0.z, l70\n"
+" \n"
+" dcl_literal l71, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r0.__z_, l71, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l72, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.__z_, r0.z, l72\n"
+" \n"
+" dcl_literal l73, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r0.__z_, r0.z, l73\n"
+" \n"
+" dcl_literal l74, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" cmov_logical r0.__z_, r4.x, r0.z, l74\n"
+" \n"
+" dcl_literal l75, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.__z_, r0.z_neg(xyzw), l75\n"
+" \n"
+" dcl_literal l76, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r0.___w, r4.x, l76\n"
+" \n"
+" dcl_literal l77, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ine r2.x___, r4.x, l77\n"
+" and r0.___w, r0.w, r2.x\n"
+" \n"
+" dcl_literal l78, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.x___, r4.y, l78\n"
+" \n"
+" dcl_literal l79, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.x___, r2.x, l79\n"
+" cmov_logical r0.__z_, r0.w, r0.z, r2.x\n"
+" \n"
+" dcl_literal l80, 0x7FFFFFFF, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r2.x_zw, r0.y, l80\n"
+" \n"
+" dcl_literal l81, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r0.___w, r2.x, l81\n"
+" \n"
+" dcl_literal l82, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r0.___w, r0.w, l82\n"
+" \n"
+" dcl_literal l83, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.___w, r0.w, l83\n"
+" \n"
+" dcl_literal l84, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r0.___w, l84, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l85, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.___w, r0.w, l85\n"
+" \n"
+" dcl_literal l86, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r0.___w, r0.w, l86\n"
+" \n"
+" dcl_literal l87, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" cmov_logical r0.___w, r2.x, r0.w, l87\n"
+" \n"
+" dcl_literal l88, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.___w, r0.w_neg(xyzw), l88\n"
+" \n"
+" dcl_literal l89, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r3.___w, r2.x, l89\n"
+" \n"
+" dcl_literal l90, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ine r5.x___, r2.x, l90\n"
+" and r3.___w, r3.w, r5.x\n"
+" \n"
+" dcl_literal l91, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r5.x___, r2.z, l91\n"
+" \n"
+" dcl_literal l92, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r5.x___, r5.x, l92\n"
+" cmov_logical r0.___w, r3.w, r0.w, r5.x\n"
+" inegate r3.___w, r0.w\n"
+" itof r5.x___, r3.w\n"
+" itof r2.___w, r2.w\n"
+" cmov_logical r2.x___, r2.z, r2.x, r2.w\n"
+" \n"
+" dcl_literal l93, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.___w, r3.w, l93\n"
+" cmov_logical r2.___w, r2.z, r3.w, r2.w\n"
+" \n"
+" dcl_literal l94, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r5._yz_, r2.x, l94\n"
+" \n"
+" dcl_literal l95, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r5._y__, r5.y, l95\n"
+" \n"
+" dcl_literal l96, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r5._y__, r5.y, l96\n"
+" \n"
+" dcl_literal l97, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r5.___w, r2.w, l97\n"
+" iadd r5.___w, r2.x, r5.w\n"
+" iadd r2.___w, r5.y, r2.w\n"
+" \n"
+" dcl_literal l98, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r6.x___, l98, r2.w\n"
+" \n"
+" dcl_literal l99, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r5.___w, r6.x, l99, r5.w\n"
+" \n"
+" dcl_literal l100, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.___w, l100, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l101, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r5.__z_, r5.z, l101\n"
+" \n"
+" dcl_literal l102, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r6.x___, l102, r2.w\n"
+" \n"
+" dcl_literal l103, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r6.x, l103, r2.w\n"
+" \n"
+" dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.___w, r2.w, l104\n"
+" ishr r6.x___, r5.z, r2.w\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.___w, l105, r2.w\n"
+" cmov_logical r2.___w, r2.w, r6.x, r5.w\n"
+" \n"
+" dcl_literal l106, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r5.x___, l106, r5.x\n"
+" \n"
+" dcl_literal l107, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.___w, r5.x, l107, r2.w\n"
+" inegate r5.x___, r0.z\n"
+" itof r5.___w, r5.x\n"
+" itof r4.___w, r4.w\n"
+" cmov_logical r4.x___, r4.y, r4.x, r4.w\n"
+" \n"
+" dcl_literal l108, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r4.___w, r5.x, l108\n"
+" cmov_logical r4._y__, r4.y, r5.x, r4.w\n"
+" \n"
+" dcl_literal l109, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r6.xy__, r4.x, l109\n"
+" \n"
+" dcl_literal l110, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.___w, r6.x, l110\n"
+" iadd r4.___w, r4.w, r4.y\n"
+" \n"
+" dcl_literal l111, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4._y__, r4.y, l111\n"
+" iadd r4.x___, r4.x, r4.y\n"
+" \n"
+" dcl_literal l112, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4._y__, r4.w, l112\n"
+" \n"
+" dcl_literal l113, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r4.___w, l113, r4.y\n"
+" \n"
+" dcl_literal l114, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r4.x___, r4.w, l114, r4.x\n"
+" \n"
+" dcl_literal l115, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r4._y__, l115, r4.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l116, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.___w, r6.y, l116\n"
+" \n"
+" dcl_literal l117, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5.x___, l117, r4.y\n"
+" \n"
+" dcl_literal l118, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r4._y__, r5.x, l118, r4.y\n"
+" \n"
+" dcl_literal l119, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r4._y__, r4.y, l119\n"
+" ishr r4.___w, r4.w, r4.y\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, l120, r4.y\n"
+" cmov_logical r4.x___, r4.y, r4.w, r4.x\n"
+" \n"
+" dcl_literal l121, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r4._y__, l121, r5.w\n"
+" \n"
+" dcl_literal l122, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r4.x___, r4.y, l122, r4.x\n"
+" ior r4.x___, r4.z, r4.x\n"
+" iadd r0.__z_, r0.z, r3.w\n"
+" mov r4._y__, r4.x\n"
+" \n"
+" dcl_literal l123, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4.__z_, l123\n"
+" mov r3.___w, r0.z\n"
+" whileloop\n"
+" \n"
+" dcl_literal l124, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ige r4.___w, l124, r3.w\n"
+" break_logicalnz r4.w\n"
+" \n"
+" dcl_literal l125, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r4.___w, r4.z, l125\n"
+" ge r5.x___, r4.y, r2.w\n"
+" add r5.___w, r2.w_neg(xyzw), r4.y\n"
+" cmov_logical r5.___w, r5.x, r5.w, r4.y\n"
+" \n"
+" dcl_literal l126, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r6.x___, r4.w, l126\n"
+" cmov_logical r4.__z_, r5.x, r6.x, r4.w\n"
+" add r4._y__, r5.w, r5.w\n"
+" \n"
+" dcl_literal l127, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r3.___w, r3.w, l127\n"
+" endloop\n"
+" \n"
+" dcl_literal l128, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r3.___w, r4.z, l128\n"
+" ge r4.x___, r4.y, r2.w\n"
+" add r2.___w, r2.w_neg(xyzw), r4.y\n"
+" cmov_logical r2.___w, r4.x, r2.w, r4.y\n"
+" \n"
+" dcl_literal l129, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r4._y__, r3.w, l129\n"
+" cmov_logical r3.___w, r4.x, r4.y, r3.w\n"
+" itof r4.x___, r0.w\n"
+" \n"
+" dcl_literal l130, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r6, r2.w, l130\n"
+" itof r2.___w, r6.w\n"
+" cmov_logical r2.___w, r6.z, r6.x, r2.w\n"
+" \n"
+" dcl_literal l131, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r4._y__, r0.w, l131\n"
+" cmov_logical r0.___w, r6.z, r0.w, r4.y\n"
+" \n"
+" dcl_literal l132, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r4._yz_, r2.w, l132\n"
+" \n"
+" dcl_literal l133, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4._y__, r4.y, l133\n"
+" iadd r4._y__, r4.y, r0.w\n"
+" \n"
+" dcl_literal l134, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l134\n"
+" iadd r0.___w, r2.w, r0.w\n"
+" \n"
+" dcl_literal l135, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.___w, r4.y, l135\n"
+" \n"
+" dcl_literal l136, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r4._y__, l136, r2.w\n"
+" \n"
+" dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r4.y, l137, r0.w\n"
+" \n"
+" dcl_literal l138, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.___w, l138, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l139, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4._y__, r4.z, l139\n"
+" \n"
+" dcl_literal l140, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l140, r2.w\n"
+" \n"
+" dcl_literal l141, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r4.z, l141, r2.w\n"
+" \n"
+" dcl_literal l142, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r2.___w, r2.w, l142\n"
+" ishr r4._y__, r4.y, r2.w\n"
+" \n"
+" dcl_literal l143, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.___w, l143, r2.w\n"
+" cmov_logical r0.___w, r2.w, r4.y, r0.w\n"
+" \n"
+" dcl_literal l144, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r2.___w, l144, r4.x\n"
+" \n"
+" dcl_literal l145, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r2.w, l145, r0.w\n"
+" ior r0.___w, r6.y, r0.w\n"
+" \n"
+" dcl_literal l146, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ige r0.__z_, r0.z, l146\n"
+" cmov_logical r3._y__, r0.z, r0.w, r3.y\n"
+" and r0.__z_, r3.w, r0.z\n"
+" iadd r3.__z_, r3.z, r0.z\n"
+" \n"
+" dcl_literal l147, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r0.__z_, r3.y, l147\n"
+" \n"
+" dcl_literal l148, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r0.z, l148\n"
+" \n"
+" dcl_literal l149, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" ilt r0.__z_, r0.z, l149\n"
+" if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l150, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r4, r3.y, l150\n"
+" itof r0.__z_, r4.w\n"
+" cmov_logical r0.__z_, r4.z, r4.x, r0.z\n"
+" \n"
+" dcl_literal l151, 0x00000030, 0x00000000, 0x18000000, 0x00000000\n"
+" \n"
+" dcl_literal l152, 0xFFFFFF9B, 0x00000000, 0xCD800000, 0x00000000\n"
+" cmov_logical r4.x_z_, r4.z, l151, l152\n"
+" \n"
+" dcl_literal l153, 0x7F800000, 0x00000000, 0x00000000, 0x007FFFFF\n"
+" and r5.x__w, r0.z, l153\n"
+" \n"
+" dcl_literal l154, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r5.x, l154\n"
+" iadd r0.__zw, r0.zzzw, r4.zzzx\n"
+" \n"
+" dcl_literal l155, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.___w, r0.w, l155\n"
+" \n"
+" dcl_literal l156, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l156, r0.w\n"
+" \n"
+" dcl_literal l157, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.__z_, r2.w, l157, r0.z\n"
+" \n"
+" dcl_literal l158, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.___w, l158, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l159, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r5.w, l159\n"
+" \n"
+" dcl_literal l160, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.___w, l160, r0.w\n"
+" \n"
+" dcl_literal l161, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r3.w, l161, r0.w\n"
+" \n"
+" dcl_literal l162, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r0.___w, r0.w, l162\n"
+" ishr r2.___w, r2.w, r0.w\n"
+" \n"
+" dcl_literal l163, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0.___w, l163, r0.w\n"
+" cmov_logical r0.__z_, r0.w, r2.w, r0.z\n"
+" ior r3._y__, r4.y, r0.z\n"
+" \n"
+" dcl_literal l164, 0x00000000, 0x00000000, 0x00000030, 0x18000000\n"
+" \n"
+" dcl_literal l165, 0x00000000, 0x00000000, 0xFFFFFF9B, 0xCD800000\n"
+" cmov_logical r0.__zw, r2.z, l164, l165\n"
+" iadd r0.___w, r2.x, r0.w\n"
+" iadd r0.__z_, r5.y, r0.z\n"
+" \n"
+" dcl_literal l166, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.x___, l166, r0.z\n"
+" \n"
+" dcl_literal l167, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r2.x, l167, r0.w\n"
+" \n"
+" dcl_literal l168, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.__z_, l168, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l169, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.x___, l169, r0.z\n"
+" \n"
+" dcl_literal l170, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.__z_, r2.x, l170, r0.z\n"
+" \n"
+" dcl_literal l171, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r0.__z_, r0.z, l171\n"
+" ishr r2.x___, r5.z, r0.z\n"
+" \n"
+" dcl_literal l172, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0.__z_, l172, r0.z\n"
+" cmov_logical r3.x___, r0.z, r2.x, r0.w\n"
+" \n"
+" dcl_literal l173, 0xC2400000, 0xC2400000, 0xC2400000, 0xC2400000\n"
+" mov r0.__z_, l173\n"
+" else\n"
+" mov r3.x___, r1.z\n"
+" \n"
+" dcl_literal l174, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.__z_, l174\n"
+" endif\n"
+"else\n"
+" \n"
+" dcl_literal l175, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.__z_, l175\n"
+"endif\n"
+"add r0.___w, r3.y, r3.y\n"
+"lt r2.x___, r3.x, r0.w\n"
+"eq r0.___w, r0.w, r3.x\n"
+"dcl_literal l176, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r2.__z_, r3.z, l176\n"
+"dcl_literal l177, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2.__z_, r2.z, l177\n"
+"and r0.___w, r0.w, r2.z\n"
+"ior r0.___w, r2.x, r0.w\n"
+"add r2.x___, r3.x_neg(xyzw), r3.y\n"
+"dcl_literal l178, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r2.__z_, r3.z, l178\n"
+"cmov_logical r2.x_z_, r0.w, r2.xxzx, r3.yyzy\n"
+"dcl_literal l179, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r0.___w, r0.z, l179\n"
+"if_logicalnz r0.w\n"
+" ftoi r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l180, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r3, r2.x, l180\n"
+" itof r0.___w, r3.w\n"
+" cmov_logical r0.___w, r3.z, r3.x, r0.w\n"
+" \n"
+" dcl_literal l181, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r2.___w, r0.z, l181\n"
+" cmov_logical r0.__z_, r3.z, r0.z, r2.w\n"
+" \n"
+" dcl_literal l182, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r3.x_z_, r0.w, l182\n"
+" \n"
+" dcl_literal l183, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r3.x, l183\n"
+" iadd r2.___w, r2.w, r0.z\n"
+" \n"
+" dcl_literal l184, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.__z_, r0.z, l184\n"
+" iadd r0.__z_, r0.w, r0.z\n"
+" \n"
+" dcl_literal l185, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r0.___w, r2.w, l185\n"
+" \n"
+" dcl_literal l186, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l186, r0.w\n"
+" \n"
+" dcl_literal l187, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.__z_, r2.w, l187, r0.z\n"
+" \n"
+" dcl_literal l188, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r0.___w, l188, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l189, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r3.z, l189\n"
+" \n"
+" dcl_literal l190, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l190, r0.w\n"
+" \n"
+" dcl_literal l191, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r3.x, l191, r0.w\n"
+" \n"
+" dcl_literal l192, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" imax r0.___w, r0.w, l192\n"
+" ishr r2.___w, r2.w, r0.w\n"
+" \n"
+" dcl_literal l193, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r0.___w, l193, r0.w\n"
+" cmov_logical r0.__z_, r0.w, r2.w, r0.z\n"
+" ior r2.x___, r3.y, r0.z\n"
+"endif\n"
+"dcl_literal l194, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"and r0.__z_, r2.z, l194\n"
+"dcl_literal l195, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ixor r0.___w, r2.x, l195\n"
+"cmov_logical r2.x___, r1.x, r0.w, r2.x\n"
+"ixor r0.___w, r1.w, r1.x\n"
+"inegate r1.___w, r0.z\n"
+"cmov_logical r2.__z_, r0.w, r1.w, r0.z\n"
+"dcl_literal l196, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__zw, l196, r1.yyyz\n"
+"ior r1.___w, r0.w, r0.z\n"
+"dcl_literal l197, 0x7FC00001, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x_z_, r1.w, l197, r2.xxzx\n"
+"dcl_literal l198, 0x00000000, 0x7F800000, 0x7F800000, 0x00000000\n"
+"ieq r3, r1.zyzy, l198\n"
+"ior r1.___w, r3.y, r3.x\n"
+"dcl_literal l199, 0xFFC00000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x_z_, r1.w, l199, r2.xxzx\n"
+"inot r3.xy__, r3.yzyy\n"
+"and r1.___w, r3.z, r3.x\n"
+"inot r0.__zw, r0.zzzw\n"
+"and r0.__z_, r1.w, r0.z\n"
+"and r0.___w, r2.y, r0.w\n"
+"and r1.___w, r3.w, r0.w\n"
+"ior r0.__z_, r0.z, r1.w\n"
+"mov r2._y__, r0.x\n"
+"dcl_literal l200, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r2.___w, l200\n"
+"cmov_logical r0.x_z_, r0.z, r2.yywy, r2.xxzx\n"
+"ieq r1.__z_, r1.y, r1.z\n"
+"and r0.___w, r3.y, r0.w\n"
+"and r0.___w, r1.z, r0.w\n"
+"ieq r0._y__, r2.y, r0.y\n"
+"dcl_literal l201, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"dcl_literal l202, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1._y__, r0.y, l201, l202\n"
+"cmov_logical r0.xy__, r0.w, r1.xyxx, r0.xzxx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__rint_f32",
+"mdef(348)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._y__, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l1, r0.y\n"
+"round_nearest r0.__z_, r0.x\n"
+"\n"
+"dcl_literal l2, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l2\n"
+"cmov_logical r0.x___, r0.y, r0.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__rootn_f32i32",
+"mdef(349)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"itof r0.__z_, r0.y\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.y, l0\n"
+"inegate r1.x___, r0.y\n"
+"cmov_logical r1.x___, r0.w, r1.x, r0.y\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.__z_, l1, r0.z\n"
+"dcl_literal l2, 0x7FFFFFFF, 0x80000000, 0x80000001, 0x00000001\n"
+"and r2, r0.xxyy, l2\n"
+"dcl_literal l3, 0x00000000, 0x00000001, 0x7F800000, 0x80000000\n"
+"ieq r3, r2.zzxy, l3\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1._y__, l4, r2.x\n"
+"inot r1.__z_, r3.z\n"
+"inot r1.___w, r1.y\n"
+"and r1.__z_, r1.z, r1.w\n"
+"dcl_literal l5, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r1.___w, r0.z, l5\n"
+"dcl_literal l6, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.___w, l6, r1.w\n"
+"dcl_literal l7, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"ieq r4.x___, r1.x, l7\n"
+"if_logicalnz r4.x\n"
+" \n"
+" dcl_literal l8, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l9, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4.x___, r0.w, l8, l9\n"
+" \n"
+" dcl_literal l10, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r4._y__, r0.x, l10\n"
+" if_logicalz r4.y\n"
+" \n"
+" dcl_literal l11, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r4.__z_, r0.x, l11\n"
+" itof r4.__z_, r4.z\n"
+" \n"
+" dcl_literal l12, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r4.__zw, r4.z, l12\n"
+" \n"
+" dcl_literal l13, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.__z_, r4.z, l13\n"
+" \n"
+" dcl_literal l14, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+" iadd r4.__z_, r4.z, l14\n"
+" \n"
+" dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.___w, r4.w, l15\n"
+" \n"
+" dcl_literal l16, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r4.__z_, l16, r4.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5.x___, l17, r4.z\n"
+" \n"
+" dcl_literal l18, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r4.__z_, r5.x, l18, r4.z\n"
+" \n"
+" dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.x___, l19, r4.z\n"
+" ishr r5._y__, r4.w, r4.z\n"
+" inegate r4.__z_, r4.z\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4.__z_, r4.z, l20\n"
+" iadd r4.__z_, r4.w, r4.z\n"
+" cmov_logical r4.__z_, r5.x, r5.y, r4.z\n"
+" \n"
+" dcl_literal l21, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r4._y__, r4.z, l21\n"
+" \n"
+" dcl_literal l22, 0xFFFFFFF0, 0xFFFFFFF0, 0xFFFFFFF0, 0xFFFFFFF0\n"
+" mov r4.___w, l22\n"
+" else\n"
+" \n"
+" dcl_literal l23, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r4.__z_, r0.x, l23\n"
+" \n"
+" dcl_literal l24, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4.___w, l24\n"
+" endif\n"
+" \n"
+" dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4._y__, r4.y, l25\n"
+" \n"
+" dcl_literal l26, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4._y__, r4.y, l26\n"
+" \n"
+" dcl_literal l27, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r4.__z_, r4.z, l27\n"
+" itof r4._y__, r4.y\n"
+" \n"
+" dcl_literal l28, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB, 0x3EAAAAAB\n"
+" mul_ieee r5.x___, r4.y, l28\n"
+" round_nearest r5.x___, r5.x\n"
+" \n"
+" dcl_literal l29, 0x40400000, 0x40400000, 0x40400000, 0x40400000\n"
+" mad_ieee r4._y__, r5.x_neg(xyzw), l29, r4.y\n"
+" ftoi r4._y__, r4.y\n"
+" ftoi r5.x___, r5.x\n"
+" iadd r4.___w, r5.x, r4.w\n"
+" \n"
+" dcl_literal l30, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r4._y__, r4.y, l30\n"
+" \n"
+" dcl_literal l31, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4._y__, r4.y, l31\n"
+" ior r4._y__, r4.z, r4.y\n"
+" \n"
+" dcl_literal l32, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.__z_, r4.y, l32\n"
+" \n"
+" dcl_literal l33, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4.__z_, r4.z, l33\n"
+" \n"
+" dcl_literal l34, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r4._y__, r4.y, l34\n"
+" \n"
+" dcl_literal l35, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+" iadd r5.x___, l35, r4.y\n"
+" \n"
+" dcl_literal l36, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" and r5.x___, r5.x, l36\n"
+" \n"
+" dcl_literal l37, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ixor r5._y__, r5.x, l37\n"
+" ior r4._y__, r4.y, r5.y\n"
+" \n"
+" dcl_literal l38, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r5.x___, r5.x, l38\n"
+" iadd r4.__z_, r4.z, r5.x\n"
+" itof r4.__z_, r4.z\n"
+" \n"
+" dcl_literal l39, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+" add r5.xy__, r4.y, l39\n"
+" div_zeroop(infinity) r4._y__, r5.x, r5.y\n"
+" mul_ieee r5._y__, r4.y, r4.y\n"
+" mul_ieee r5.__z_, r5.y, r5.y\n"
+" \n"
+" dcl_literal l40, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l41, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+" mad_ieee r6.xy__, r5.z, l40, l41\n"
+" \n"
+" dcl_literal l42, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+" mad_ieee r6.xy__, r5.z, r6.xyxx, l42\n"
+" mul_ieee r5.___w, r5.z, r6.x\n"
+" \n"
+" dcl_literal l43, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+" mad_ieee r5.__z_, r5.z, r6.y, l43\n"
+" mad_ieee r5._y__, r5.y, r5.z, r5.w\n"
+" mul_ieee r5.__z_, r5.x, r5.x\n"
+" \n"
+" dcl_literal l44, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mul_ieee r5.___w, r5.z, l44\n"
+" \n"
+" dcl_literal l45, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r5._y__, r5.z, l45, r5.y\n"
+" mad_ieee r4._y__, r4.y_neg(xyzw), r5.y, r5.w\n"
+" add r4._y__, r5.x_neg(xyzw), r4.y\n"
+" \n"
+" dcl_literal l46, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820\n"
+" mul_ieee r5.x___, r4.z, l46\n"
+" mul_ieee r5._y__, r5.x, r5.x\n"
+" \n"
+" dcl_literal l47, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+" \n"
+" dcl_literal l48, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+" mad_ieee r5.__z_, r5.y, l47, l48\n"
+" \n"
+" dcl_literal l49, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+" mad_ieee r5.__z_, r5.y, r5.z, l49\n"
+" \n"
+" dcl_literal l50, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+" mad_ieee r5.__z_, r5.y, r5.z, l50\n"
+" \n"
+" dcl_literal l51, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+" mad_ieee r5.__z_, r5.y, r5.z, l51\n"
+" mad_ieee r5._y__, r5.y_neg(xyzw), r5.z, r5.x\n"
+" mul_ieee r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l52, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+" add r5._y__, r5.y, l52\n"
+" div_zeroop(infinity) r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l53, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820, 0x3E6C9820\n"
+" mad_ieee r4.__z_, r4.z_neg(xyzw), l53, r5.x\n"
+" \n"
+" dcl_literal l54, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r4.__z_, r4.z_neg(xyzw), l54\n"
+" \n"
+" dcl_literal l55, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB\n"
+" mul_ieee r5.x___, r4.y, l55\n"
+" mul_ieee r5._y__, r5.x, r5.x\n"
+" \n"
+" dcl_literal l56, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+" \n"
+" dcl_literal l57, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+" mad_ieee r5.__z_, r5.y, l56, l57\n"
+" \n"
+" dcl_literal l58, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+" mad_ieee r5.__z_, r5.y, r5.z, l58\n"
+" \n"
+" dcl_literal l59, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+" mad_ieee r5.__z_, r5.y, r5.z, l59\n"
+" \n"
+" dcl_literal l60, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+" mad_ieee r5.__z_, r5.y, r5.z, l60\n"
+" mad_ieee r5._y__, r5.y_neg(xyzw), r5.z, r5.x\n"
+" mul_ieee r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l61, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+" add r5._y__, r5.y, l61\n"
+" div_zeroop(infinity) r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l62, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB, 0xBEAAAAAB\n"
+" mad_ieee r4._y__, r4.y_neg(xyzw), l62, r5.x\n"
+" \n"
+" dcl_literal l63, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r4._y__, r4.y_neg(xyzw), l63\n"
+" mul_ieee r4._y__, r4.z, r4.y\n"
+" \n"
+" dcl_literal l64, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" div_zeroop(infinity) r4.__z_, l64, r4.y\n"
+" cmov_logical r4._y__, r0.w, r4.z, r4.y\n"
+" imul r4.x___, r4.w, r4.x\n"
+" itof r4.x___, r4.x\n"
+" ftoi r4.__z_, r4.x_abs\n"
+" \n"
+" dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r4.x___, r4.x, l65\n"
+" inegate r4.___w, r4.z\n"
+" cmov_logical r4.x___, r4.x, r4.w, r4.z\n"
+" \n"
+" dcl_literal l66, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r5, r4.y, l66\n"
+" itof r4._y__, r5.w\n"
+" cmov_logical r4._y__, r5.z, r5.x, r4.y\n"
+" \n"
+" dcl_literal l67, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r4.__z_, r4.x, l67\n"
+" cmov_logical r4.x___, r5.z, r4.x, r4.z\n"
+" \n"
+" dcl_literal l68, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r4.__zw, r4.y, l68\n"
+" \n"
+" dcl_literal l69, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4.__z_, r4.z, l69\n"
+" iadd r4.__z_, r4.z, r4.x\n"
+" \n"
+" dcl_literal l70, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4.x___, r4.x, l70\n"
+" iadd r4.x___, r4.y, r4.x\n"
+" \n"
+" dcl_literal l71, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4._y__, r4.z, l71\n"
+" \n"
+" dcl_literal l72, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r4.__z_, l72, r4.y\n"
+" \n"
+" dcl_literal l73, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r4.x___, r4.z, l73, r4.x\n"
+" \n"
+" dcl_literal l74, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r4._y__, l74, r4.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l75, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.__z_, r4.w, l75\n"
+" \n"
+" dcl_literal l76, 0x00000017, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.x_z_, l76, r4.y\n"
+" \n"
+" dcl_literal l77, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r4.___w, r5.x, l77, r4.y\n"
+" \n"
+" dcl_literal l78, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, r4.y, l78\n"
+" \n"
+" dcl_literal l79, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4._y__, r4.y, l79, r4.w\n"
+" ishr r4._y__, r4.z, r4.y\n"
+" cmov_logical r4.x___, r5.z, r4.y, r4.x\n"
+" ior r4.x___, r5.y, r4.x\n"
+"endif\n"
+"dcl_literal l80, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+"ine r1.x___, r1.x, l80\n"
+"if_logicalnz r1.x\n"
+" itof r1.x___, r2.x\n"
+" \n"
+" dcl_literal l81, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r4._yz_, r1.x, l81\n"
+" \n"
+" dcl_literal l82, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r4.y, l82\n"
+" \n"
+" dcl_literal l83, 0x00000019, 0x00000019, 0x00000019, 0x00000019\n"
+" iadd r1.x___, r1.x, l83\n"
+" \n"
+" dcl_literal l84, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4._y__, r4.z, l84\n"
+" \n"
+" dcl_literal l85, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.x___, l85, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l86, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4.__z_, l86, r1.x\n"
+" \n"
+" dcl_literal l87, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.x___, r4.z, l87, r1.x\n"
+" \n"
+" dcl_literal l88, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.__z_, l88, r1.x\n"
+" ishr r4.___w, r4.y, r1.x\n"
+" inegate r1.x___, r1.x\n"
+" \n"
+" dcl_literal l89, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.x___, r1.x, l89\n"
+" iadd r1.x___, r4.y, r1.x\n"
+" cmov_logical r1.x___, r4.z, r4.w, r1.x\n"
+" \n"
+" dcl_literal l90, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ilt r4._y__, r2.x, l90\n"
+" cmov_logical r1.x___, r4.y, r1.x, r2.x\n"
+" \n"
+" dcl_literal l91, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7, 0xFFFFFFE7\n"
+" and r4._y__, r4.y, l91\n"
+" \n"
+" dcl_literal l92, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r4.__zw, r1.x, l92\n"
+" \n"
+" dcl_literal l93, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r4.z, l93\n"
+" iadd r1.x___, r1.x, r4.y\n"
+" \n"
+" dcl_literal l94, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" ior r4._y__, r4.w, l94\n"
+" \n"
+" dcl_literal l95, 0x43000000, 0x43000000, 0x43000000, 0x43000000\n"
+" \n"
+" dcl_literal l96, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r4.__z_, r4.y, l95, l96\n"
+" ftoi r4.___w, r4.z\n"
+" \n"
+" dcl_literal l97, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0, 0xFFFFFFC0\n"
+" iadd r4.___w, r4.w, l97\n"
+" \n"
+" dcl_literal l98, 0x00000001, 0x00000002, 0x00000003, 0x00000004\n"
+" ieq r5, r4.w, l98\n"
+" \n"
+" dcl_literal l99, 0x31B0FC04, 0x3C7E0540, 0x00000000, 0x00000000\n"
+" and r6.xy__, r5.x, l99\n"
+" \n"
+" dcl_literal l100, 0x33439E0D, 0x3CFC14C0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r5.y, l100, r6.xyxx\n"
+" \n"
+" dcl_literal l101, 0x32F632DD, 0x3D3BA2C0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r5.z, l101, r5.xyxx\n"
+" \n"
+" dcl_literal l102, 0x32C01163, 0x3D785180, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r5.w, l102, r5.xyxx\n"
+" \n"
+" dcl_literal l103, 0x00000005, 0x00000006, 0x00000007, 0x00000008\n"
+" ieq r6, r4.w, l103\n"
+" \n"
+" dcl_literal l104, 0x33161BD2, 0x3D9A0EB8, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l104, r5.xyxx\n"
+" \n"
+" dcl_literal l105, 0x330AE56B, 0x3DB78690, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l105, r5.xyxx\n"
+" \n"
+" dcl_literal l106, 0x32692B56, 0x3DD49368, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l106, r5.xyxx\n"
+" \n"
+" dcl_literal l107, 0x32DC55E6, 0x3DF13838, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l107, r5.xyxx\n"
+" \n"
+" dcl_literal l108, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C\n"
+" ieq r6, r4.w, l108\n"
+" \n"
+" dcl_literal l109, 0x3379A11D, 0x3E06BBF0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l109, r5.xyxx\n"
+" \n"
+" dcl_literal l110, 0x33703FEA, 0x3E14AA94, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l110, r5.xyxx\n"
+" \n"
+" dcl_literal l111, 0x335996FA, 0x3E226958, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l111, r5.xyxx\n"
+" \n"
+" dcl_literal l112, 0x33614F28, 0x3E2FF980, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l112, r5.xyxx\n"
+" \n"
+" dcl_literal l113, 0x0000000D, 0x0000000E, 0x0000000F, 0x00000010\n"
+" ieq r6, r4.w, l113\n"
+" \n"
+" dcl_literal l114, 0x30843642, 0x3E3D5C48, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l114, r5.xyxx\n"
+" \n"
+" dcl_literal l115, 0x3267A2B6, 0x3E4A92D4, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l115, r5.xyxx\n"
+" \n"
+" dcl_literal l116, 0x331D0180, 0x3E579E48, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l116, r5.xyxx\n"
+" \n"
+" dcl_literal l117, 0x330F3534, 0x3E647FBC, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l117, r5.xyxx\n"
+" \n"
+" dcl_literal l118, 0x00000011, 0x00000012, 0x00000013, 0x00000014\n"
+" ieq r6, r4.w, l118\n"
+" \n"
+" dcl_literal l119, 0x335C55E6, 0x3E713838, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l119, r5.xyxx\n"
+" \n"
+" dcl_literal l120, 0x335ABC7C, 0x3E7DC8C0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l120, r5.xyxx\n"
+" \n"
+" dcl_literal l121, 0x3309CE44, 0x3E851926, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l121, r5.xyxx\n"
+" \n"
+" dcl_literal l122, 0x332EAE98, 0x3E8B3AE4, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l122, r5.xyxx\n"
+" \n"
+" dcl_literal l123, 0x00000015, 0x00000016, 0x00000017, 0x00000018\n"
+" ieq r6, r4.w, l123\n"
+" \n"
+" dcl_literal l124, 0x336F3DE6, 0x3E914A0E, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l124, r5.xyxx\n"
+" \n"
+" dcl_literal l125, 0x336B8475, 0x3E974714, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l125, r5.xyxx\n"
+" \n"
+" dcl_literal l126, 0x32AB4A2F, 0x3E9D3262, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l126, r5.xyxx\n"
+" \n"
+" dcl_literal l127, 0x310717B1, 0x3EA30C5E, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l127, r5.xyxx\n"
+" \n"
+" dcl_literal l128, 0x00000019, 0x0000001A, 0x0000001B, 0x0000001C\n"
+" ieq r6, r4.w, l128\n"
+" \n"
+" dcl_literal l129, 0x31E5BF06, 0x3EA8D56C, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l129, r5.xyxx\n"
+" \n"
+" dcl_literal l130, 0x337D6027, 0x3EAE8DEC, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l130, r5.xyxx\n"
+" \n"
+" dcl_literal l131, 0x32F4D8A5, 0x3EB43640, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l131, r5.xyxx\n"
+" \n"
+" dcl_literal l132, 0x335AEF40, 0x3EB9CEBE, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l132, r5.xyxx\n"
+" \n"
+" dcl_literal l133, 0x0000001D, 0x0000001E, 0x0000001F, 0x00000020\n"
+" ieq r6, r4.w, l133\n"
+" \n"
+" dcl_literal l134, 0x336E0ABF, 0x3EBF57C0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l134, r5.xyxx\n"
+" \n"
+" dcl_literal l135, 0x31D8284B, 0x3EC4D19C, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l135, r5.xyxx\n"
+" \n"
+" dcl_literal l136, 0x32E108B8, 0x3ECA3CA0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l136, r5.xyxx\n"
+" \n"
+" dcl_literal l137, 0x3332FE61, 0x3ECF991E, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l137, r5.xyxx\n"
+" \n"
+" dcl_literal l138, 0x00000021, 0x00000022, 0x00000023, 0x00000024\n"
+" ieq r6, r4.w, l138\n"
+" \n"
+" dcl_literal l139, 0x32D4D042, 0x3ED4E764, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l139, r5.xyxx\n"
+" \n"
+" dcl_literal l140, 0x336F323E, 0x3EDA27BA, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l140, r5.xyxx\n"
+" \n"
+" dcl_literal l141, 0x32ED38DC, 0x3EDF5A6C, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l141, r5.xyxx\n"
+" \n"
+" dcl_literal l142, 0x31F35344, 0x3EE47FBE, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l142, r5.xyxx\n"
+" \n"
+" dcl_literal l143, 0x00000025, 0x00000026, 0x00000027, 0x00000028\n"
+" ieq r6, r4.w, l143\n"
+" \n"
+" dcl_literal l144, 0x337803AF, 0x3EE997F2, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l144, r5.xyxx\n"
+" \n"
+" dcl_literal l145, 0x31983894, 0x3EEEA350, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l145, r5.xyxx\n"
+" \n"
+" dcl_literal l146, 0x33075A1E, 0x3EF3A212, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l146, r5.xyxx\n"
+" \n"
+" dcl_literal l147, 0x32FD7837, 0x3EF8947A, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l147, r5.xyxx\n"
+" \n"
+" dcl_literal l148, 0x00000029, 0x0000002A, 0x0000002B, 0x0000002C\n"
+" ieq r6, r4.w, l148\n"
+" \n"
+" dcl_literal l149, 0x32778F32, 0x3EFD7AC4, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l149, r5.xyxx\n"
+" \n"
+" dcl_literal l150, 0x3234BA20, 0x3F012A95, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l150, r5.xyxx\n"
+" \n"
+" dcl_literal l151, 0x3360E6FA, 0x3F0391F2, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l151, r5.xyxx\n"
+" \n"
+" dcl_literal l152, 0x3204A550, 0x3F05F397, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l152, r5.xyxx\n"
+" \n"
+" dcl_literal l153, 0x0000002D, 0x0000002E, 0x0000002F, 0x00000030\n"
+" ieq r6, r4.w, l153\n"
+" \n"
+" dcl_literal l154, 0x33716A65, 0x3F084F9C, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l154, r5.xyxx\n"
+" \n"
+" dcl_literal l155, 0x3317A6AF, 0x3F0AA61E, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l155, r5.xyxx\n"
+" \n"
+" dcl_literal l156, 0x33233E4B, 0x3F0CF735, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l156, r5.xyxx\n"
+" \n"
+" dcl_literal l157, 0x33738207, 0x3F0F42FA, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l157, r5.xyxx\n"
+" \n"
+" dcl_literal l158, 0x00000031, 0x00000032, 0x00000033, 0x00000034\n"
+" ieq r6, r4.w, l158\n"
+" \n"
+" dcl_literal l159, 0x333DF5FA, 0x3F118986, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l159, r5.xyxx\n"
+" \n"
+" dcl_literal l160, 0x33144D89, 0x3F13CAF0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l160, r5.xyxx\n"
+" \n"
+" dcl_literal l161, 0x32D448E9, 0x3F16074F, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l161, r5.xyxx\n"
+" \n"
+" dcl_literal l162, 0x331A7886, 0x3F183EB9, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l162, r5.xyxx\n"
+" \n"
+" dcl_literal l163, 0x00000035, 0x00000036, 0x00000037, 0x00000038\n"
+" ieq r6, r4.w, l163\n"
+" \n"
+" dcl_literal l164, 0x336CE70F, 0x3F1A7144, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l164, r5.xyxx\n"
+" \n"
+" dcl_literal l165, 0x331AB151, 0x3F1C9F06, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l165, r5.xyxx\n"
+" \n"
+" dcl_literal l166, 0x32A71570, 0x3F1EC813, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l166, r5.xyxx\n"
+" \n"
+" dcl_literal l167, 0x3284672B, 0x3F20EC7F, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l167, r5.xyxx\n"
+" \n"
+" dcl_literal l168, 0x00000039, 0x0000003A, 0x0000003B, 0x0000003C\n"
+" ieq r6, r4.w, l168\n"
+" \n"
+" dcl_literal l169, 0x318717B1, 0x3F230C5E, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l169, r5.xyxx\n"
+" \n"
+" dcl_literal l170, 0x336D81F6, 0x3F2527C2, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l170, r5.xyxx\n"
+" \n"
+" dcl_literal l171, 0x330DBADE, 0x3F273EC0, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l171, r5.xyxx\n"
+" \n"
+" dcl_literal l172, 0x324B78B5, 0x3F295169, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l172, r5.xyxx\n"
+" \n"
+" dcl_literal l173, 0x0000003D, 0x0000003E, 0x0000003F, 0x00000040\n"
+" ieq r6, r4.w, l173\n"
+" \n"
+" dcl_literal l174, 0x332D9F9D, 0x3F2B5FCE, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.x, l174, r5.xyxx\n"
+" \n"
+" dcl_literal l175, 0x32C359F3, 0x3F2D6A02, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.y, l175, r5.xyxx\n"
+" \n"
+" dcl_literal l176, 0x32924167, 0x3F2F7015, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.z, l176, r5.xyxx\n"
+" \n"
+" dcl_literal l177, 0x3377D1CF, 0x3F317217, 0x00000000, 0x00000000\n"
+" cmov_logical r5.xy__, r6.w, l177, r5.xyxx\n"
+" round_z r4.__z_, r4.z\n"
+" \n"
+" dcl_literal l178, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+" mul_ieee r4.___w, r4.z, l178\n"
+" \n"
+" dcl_literal l179, 0x3C000000, 0x3C000000, 0x3C000000, 0x3C000000\n"
+" mad_ieee r4._y__, r4.z_neg(xyzw), l179, r4.y\n"
+" \n"
+" dcl_literal l180, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r4.__z_, r4.y, l180, r4.w\n"
+" div_zeroop(infinity) r4._y__, r4.y, r4.z\n"
+" mul_ieee r4.__z_, r4.y, r4.y\n"
+" \n"
+" dcl_literal l181, 0x3C4CCCCD, 0x3C4CCCCD, 0x3C4CCCCD, 0x3C4CCCCD\n"
+" \n"
+" dcl_literal l182, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+" mad_ieee r4.___w, r4.z, l181, l182\n"
+" mul_ieee r4.__z_, r4.z, r4.w\n"
+" mad_ieee r4._y__, r4.y, r4.z, r4.y\n"
+" \n"
+" dcl_literal l183, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.x___, r1.x, l183\n"
+" itof r1.x___, r1.x\n"
+" \n"
+" dcl_literal l184, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r4.__z_, r1.x, l184\n"
+" add r4.___w, r1.x, r4.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l185, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r5.__z_, r0.z, l185\n"
+" add r5.___w, r0.z, r5.z_neg(xyzw)\n"
+" mul_ieee r6.x___, r0.z, r1.x\n"
+" mul_ieee r1.x___, r1.x, r0.z\n"
+" mad_ieee r6._y__, r4.z, r5.z, r1.x_neg(xyzw)\n"
+" mad_ieee r4.__z_, r4.z, r5.w, r6.y\n"
+" mad_ieee r4.__z_, r4.w, r5.z, r4.z\n"
+" mad_ieee r4.__z_, r4.w, r5.w, r4.z\n"
+" \n"
+" dcl_literal l186, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r4.___w, r5.y, l186\n"
+" add r6._y__, r5.y, r4.w_neg(xyzw)\n"
+" mul_ieee r6.__z_, r5.y, r0.z\n"
+" mad_ieee r6.___w, r4.w, r5.z, r6.z_neg(xyzw)\n"
+" mad_ieee r4.___w, r4.w, r5.w, r6.w\n"
+" mad_ieee r4.___w, r6.y, r5.z, r4.w\n"
+" mad_ieee r4.___w, r6.y, r5.w, r4.w\n"
+" \n"
+" dcl_literal l187, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r5.__z_, r1.x, l187\n"
+" \n"
+" dcl_literal l188, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+" \n"
+" dcl_literal l189, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" cmov_logical r5.__z_, r5.z, l188, l189\n"
+" add r5.__z_, r1.x, r5.z\n"
+" round_z r5.__z_, r5.z\n"
+" add r1.x___, r1.x, r5.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l190, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+" mul_ieee r5.___w, r1.x, l190\n"
+" mul_ieee r6._y__, r5.w, r5.w\n"
+" \n"
+" dcl_literal l191, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+" \n"
+" dcl_literal l192, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+" mad_ieee r6.___w, r6.y, l191, l192\n"
+" \n"
+" dcl_literal l193, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+" mad_ieee r6.___w, r6.y, r6.w, l193\n"
+" \n"
+" dcl_literal l194, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+" mad_ieee r6.___w, r6.y, r6.w, l194\n"
+" \n"
+" dcl_literal l195, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+" mad_ieee r6.___w, r6.y, r6.w, l195\n"
+" mad_ieee r6._y__, r6.y_neg(xyzw), r6.w, r5.w\n"
+" mul_ieee r5.___w, r5.w, r6.y\n"
+" \n"
+" dcl_literal l196, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" add r6._y__, r6.y_neg(xyzw), l196\n"
+" div_zeroop(infinity) r5.___w, r5.w, r6.y\n"
+" \n"
+" dcl_literal l197, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+" mad_ieee r5.___w, r1.x_neg(xyzw), l197, r5.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l198, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+" mad_ieee r1.x___, r1.x_neg(xyzw), l198, r5.w\n"
+" \n"
+" dcl_literal l199, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1.x___, r1.x_neg(xyzw), l199\n"
+" \n"
+" dcl_literal l200, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r5.___w, r6.z, l200\n"
+" \n"
+" dcl_literal l201, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+" \n"
+" dcl_literal l202, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" cmov_logical r5.___w, r5.w, l201, l202\n"
+" \n"
+" dcl_literal l203, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+" mad_ieee r5.___w, r6.z, l203, r5.w\n"
+" round_z r5.___w, r5.w\n"
+" \n"
+" dcl_literal l204, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+" mad_ieee r6._y__, r5.w_neg(xyzw), l204, r6.z\n"
+" \n"
+" dcl_literal l205, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+" mad_ieee r6.__z_, r5.w_neg(xyzw), l205, r6.y\n"
+" mul_ieee r6.___w, r6.z, r6.z\n"
+" \n"
+" dcl_literal l206, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+" \n"
+" dcl_literal l207, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+" mad_ieee r7.x___, r6.w, l206, l207\n"
+" \n"
+" dcl_literal l208, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+" mad_ieee r7.x___, r6.w, r7.x, l208\n"
+" \n"
+" dcl_literal l209, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+" mad_ieee r7.x___, r6.w, r7.x, l209\n"
+" \n"
+" dcl_literal l210, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+" mad_ieee r7.x___, r6.w, r7.x, l210\n"
+" mad_ieee r6.___w, r6.w_neg(xyzw), r7.x, r6.z\n"
+" mul_ieee r6.__z_, r6.z, r6.w\n"
+" \n"
+" dcl_literal l211, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" add r6.___w, r6.w_neg(xyzw), l211\n"
+" div_zeroop(infinity) r6.__z_, r6.z, r6.w\n"
+" \n"
+" dcl_literal l212, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+" mad_ieee r6.__z_, r5.w, l212, r6.z_neg(xyzw)\n"
+" add r6._y__, r6.y_neg(xyzw), r6.z\n"
+" \n"
+" dcl_literal l213, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r6._y__, r6.y_neg(xyzw), l213\n"
+" \n"
+" dcl_literal l214, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+" mad_ieee r4.__z_, r4.z, l214, r4.w\n"
+" add r4._y__, r5.x, r4.y\n"
+" mad_ieee r4.__z_, r0.z, r4.y, r4.z\n"
+" \n"
+" dcl_literal l215, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r4.___w, r4.z, l215\n"
+" \n"
+" dcl_literal l216, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+" \n"
+" dcl_literal l217, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" cmov_logical r4.___w, r4.w, l216, l217\n"
+" \n"
+" dcl_literal l218, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+" mad_ieee r4.___w, r4.z, l218, r4.w\n"
+" round_z r4.___w, r4.w\n"
+" \n"
+" dcl_literal l219, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+" mad_ieee r4.__z_, r4.w_neg(xyzw), l219, r4.z\n"
+" \n"
+" dcl_literal l220, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+" mad_ieee r5.x___, r4.w_neg(xyzw), l220, r4.z\n"
+" mul_ieee r6.__z_, r5.x, r5.x\n"
+" \n"
+" dcl_literal l221, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+" \n"
+" dcl_literal l222, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+" mad_ieee r6.___w, r6.z, l221, l222\n"
+" \n"
+" dcl_literal l223, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+" mad_ieee r6.___w, r6.z, r6.w, l223\n"
+" \n"
+" dcl_literal l224, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+" mad_ieee r6.___w, r6.z, r6.w, l224\n"
+" \n"
+" dcl_literal l225, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+" mad_ieee r6.___w, r6.z, r6.w, l225\n"
+" mad_ieee r6.__z_, r6.z_neg(xyzw), r6.w, r5.x\n"
+" mul_ieee r5.x___, r5.x, r6.z\n"
+" \n"
+" dcl_literal l226, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+" add r6.__z_, r6.z_neg(xyzw), l226\n"
+" div_zeroop(infinity) r5.x___, r5.x, r6.z\n"
+" \n"
+" dcl_literal l227, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+" mad_ieee r5.x___, r4.w, l227, r5.x_neg(xyzw)\n"
+" add r4.__z_, r4.z_neg(xyzw), r5.x\n"
+" \n"
+" dcl_literal l228, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r4.__z_, r4.z_neg(xyzw), l228\n"
+" mul_ieee r1.x___, r1.x, r6.y\n"
+" mul_ieee r1.x___, r4.z, r1.x\n"
+" add r4.__z_, r5.z, r5.w\n"
+" add r4.__z_, r4.w, r4.z\n"
+" \n"
+" dcl_literal l229, 0x44800000, 0x44800000, 0x44800000, 0x44800000\n"
+" lt r4.___w, l229, r4.z_abs\n"
+" ftoi r5.x___, r4.z_abs\n"
+" \n"
+" dcl_literal l230, 0x00000400, 0x00000400, 0x00000400, 0x00000400\n"
+" cmov_logical r4.___w, r4.w, l230, r5.x\n"
+" \n"
+" dcl_literal l231, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r4.__z_, r4.z, l231\n"
+" inegate r5.x___, r4.w\n"
+" cmov_logical r4.__z_, r4.z, r5.x, r4.w\n"
+" itof r4.___w, r4.z\n"
+" \n"
+" dcl_literal l232, 0x7FFFFFFF, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+" and r7, r1.x, l232\n"
+" itof r1.x___, r7.w\n"
+" cmov_logical r1.x___, r7.z, r7.x, r1.x\n"
+" \n"
+" dcl_literal l233, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B, 0xFFFFFF6B\n"
+" iadd r5.x___, r4.z, l233\n"
+" cmov_logical r4.__z_, r7.z, r4.z, r5.x\n"
+" \n"
+" dcl_literal l234, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+" and r5.x_z_, r1.x, l234\n"
+" \n"
+" dcl_literal l235, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r5.x___, r5.x, l235\n"
+" iadd r5.x___, r5.x, r4.z\n"
+" \n"
+" dcl_literal l236, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r4.__z_, r4.z, l236\n"
+" iadd r1.x___, r1.x, r4.z\n"
+" \n"
+" dcl_literal l237, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r4.__z_, r5.x, l237\n"
+" \n"
+" dcl_literal l238, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r5.x___, l238, r4.z\n"
+" \n"
+" dcl_literal l239, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.x___, r5.x, l239, r1.x\n"
+" \n"
+" dcl_literal l240, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r4.__z_, l240, r4.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l241, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r5.x___, r5.z, l241\n"
+" \n"
+" dcl_literal l242, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r5.__zw, l242, r4.z\n"
+" \n"
+" dcl_literal l243, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r5.__z_, r5.z, l243, r4.z\n"
+" \n"
+" dcl_literal l244, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.__z_, r4.z, l244\n"
+" \n"
+" dcl_literal l245, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.__z_, r4.z, l245, r5.z\n"
+" ishr r4.__z_, r5.x, r4.z\n"
+" cmov_logical r1.x___, r5.w, r4.z, r1.x\n"
+" \n"
+" dcl_literal l246, 0x43960000, 0x43960000, 0x43960000, 0x43960000\n"
+" lt r4.__z_, l246, r4.w\n"
+" \n"
+" dcl_literal l247, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.x___, r4.z, l247, r1.x\n"
+" ior r1.x___, r7.y, r1.x\n"
+" \n"
+" dcl_literal l248, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" eq r4.__z_, r0.z, l248\n"
+" cmov_logical r1.x___, r4.z, r2.x, r1.x\n"
+" add r4._y__, r5.y, r4.y\n"
+" mul_ieee r0.__z_, r0.z, r4.y\n"
+" \n"
+" dcl_literal l249, 0x3F317218, 0x3F317218, 0x3F317218, 0x3F317218\n"
+" mad_ieee r0.__z_, r6.x, l249, r0.z\n"
+" \n"
+" dcl_literal l250, 0x42B17218, 0x42B17218, 0x42B17218, 0x42B17218\n"
+" lt r4._y__, l250, r0.z\n"
+" \n"
+" dcl_literal l251, 0x00000000, 0x00000000, 0x7F800000, 0xFF800000\n"
+" ieq r4.__zw, r0.z, l251\n"
+" ior r4._y__, r4.y, r4.z\n"
+" \n"
+" dcl_literal l252, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.x___, r4.y, l252, r1.x\n"
+" \n"
+" dcl_literal l253, 0xC2CFF1B5, 0xC2CFF1B5, 0xC2CFF1B5, 0xC2CFF1B5\n"
+" ge r0.__z_, l253, r0.z\n"
+" ior r0.__z_, r4.w, r0.z\n"
+" \n"
+" dcl_literal l254, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.x___, r0.z, l254, r1.x\n"
+"endif\n"
+"and r0.__z_, r3.w, r1.z\n"
+"dcl_literal l255, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l256, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"cmov_logical r5, r2.x, l255, l256\n"
+"and r0.__z_, r0.z, r5.w\n"
+"cmov_logical r0.__z_, r0.z, r4.x_neg(xyzw), r4.x\n"
+"and r3, r3, r5\n"
+"dcl_literal l257, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r3.x, l257, r0.z\n"
+"cmov_logical r0.__z_, r3.y, r0.x, r0.z\n"
+"dcl_literal l258, 0x80000000, 0x00000000, 0x80000001, 0x00000000\n"
+"ieq r1.x_z_, r2.z, l258\n"
+"and r1.x_z_, r5.y, r1.xxzx\n"
+"dcl_literal l259, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r1.x, l259, r0.z\n"
+"dcl_literal l260, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ior r1.x___, r2.y, l260\n"
+"cmov_logical r0.__z_, r1.z, r1.x, r0.z\n"
+"and r0.___w, r0.w, r3.z\n"
+"cmov_logical r0.__z_, r0.w, r2.y, r0.z\n"
+"dcl_literal l261, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r0.___w, r0.y, l261\n"
+"and r0.___w, r3.z, r0.w\n"
+"cmov_logical r0.x___, r0.w, r0.x, r0.z\n"
+"dcl_literal l262, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.__z_, l262, r2.w\n"
+"and r0.__z_, r3.w, r0.z\n"
+"dcl_literal l263, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0._y__, l263, r0.y\n"
+"ior r0._y__, r0.z, r0.y\n"
+"ior r0.__z_, r1.y, r1.w\n"
+"ior r0._y__, r0.y, r0.z\n"
+"dcl_literal l264, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r0.y, l264, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__round_f32",
+"mdef(350)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"frc r0._y__, r0.x_abs\n"
+"add r0.__z_, r0.x_abs, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l0, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"lt r0._y__, r0.y, l0\n"
+"\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.z, l1\n"
+"cmov_logical r0._y__, r0.y, r0.z, r0.w\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x7FFFFFFF, 0x80000000\n"
+"and r0.__zw, r0.x, l2\n"
+"\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r1.x___, r0.z, l3\n"
+"cmov_logical r0._y__, r1.x, r0.x, r0.y\n"
+"\n"
+"dcl_literal l4, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l4\n"
+"\n"
+"dcl_literal l5, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l5, r0.z\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"ior r0.x___, r0.w, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__rsqrt_f32",
+"mdef(351)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.__z_, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r0.y, l2\n"
+"and r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r0.x, l3\n"
+"and r0.___w, r0.w, r1.x\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l4, r0.y\n"
+"ior r0.__z_, r0.z, r0.y\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l5, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.__z_, r0.x, l5\n"
+" itof r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l6, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r1.xy__, r0.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r1.x, l7\n"
+" \n"
+" dcl_literal l8, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r0.__z_, r0.z, l8\n"
+" \n"
+" dcl_literal l9, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.x___, r1.y, l9\n"
+" \n"
+" dcl_literal l10, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.__z_, l10, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1._y__, l11, r0.z\n"
+" \n"
+" dcl_literal l12, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.__z_, r1.y, l12, r0.z\n"
+" \n"
+" dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1._y__, l13, r0.z\n"
+" ishr r1.__z_, r1.x, r0.z\n"
+" inegate r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.__z_, r0.z, l14\n"
+" iadd r0.__z_, r1.x, r0.z\n"
+" cmov_logical r0.__z_, r1.y, r1.z, r0.z\n"
+" rsq_vec r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l15, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r1.x___, r0.z, l15\n"
+" if_logicalz r1.x\n"
+" \n"
+" dcl_literal l16, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1._y__, r0.z, l16\n"
+" itof r1._y__, r1.y\n"
+" \n"
+" dcl_literal l17, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r1._yz_, r1.y, l17\n"
+" \n"
+" dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1._y__, r1.y, l18\n"
+" \n"
+" dcl_literal l19, 0x0000000C, 0x0000000C, 0x0000000C, 0x0000000C\n"
+" iadd r1._y__, r1.y, l19\n"
+" \n"
+" dcl_literal l20, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.__z_, r1.z, l20\n"
+" \n"
+" dcl_literal l21, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1._y__, l21, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l22, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1.___w, l22, r1.y\n"
+" \n"
+" dcl_literal l23, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1._y__, r1.w, l23, r1.y\n"
+" \n"
+" dcl_literal l24, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.___w, l24, r1.y\n"
+" ishr r2.x___, r1.z, r1.y\n"
+" inegate r1._y__, r1.y\n"
+" \n"
+" dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1._y__, r1.y, l25\n"
+" iadd r1._y__, r1.z, r1.y\n"
+" cmov_logical r1._y__, r1.w, r2.x, r1.y\n"
+" else\n"
+" \n"
+" dcl_literal l26, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r0.__z_, r0.z, l26\n"
+" \n"
+" dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l27\n"
+" \n"
+" dcl_literal l28, 0x06000000, 0x06000000, 0x06000000, 0x06000000\n"
+" iadd r0.__z_, r0.z, l28\n"
+" \n"
+" dcl_literal l29, 0xFFFFFF8D, 0xFFFFFF8D, 0xFFFFFF8D, 0xFFFFFF8D\n"
+" iadd r1.x___, r1.x, l29\n"
+" \n"
+" dcl_literal l30, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1.x___, l30, r1.x\n"
+" \n"
+" dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1._y__, r1.x, l31, r0.z\n"
+" endif\n"
+" \n"
+" dcl_literal l32, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+" cmov_logical r0.__z_, r0.w, l32, r1.y\n"
+" \n"
+" dcl_literal l33, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+" ior r0.___w, r0.x, l33\n"
+" cmov_logical r0.x___, r0.y, r0.w, r0.z\n"
+"else\n"
+" rsq_vec r0.x___, r0.x\n"
+"endif\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sabs_diff_i16",
+"mdef(352)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"imax r0.___w, r0.w_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l1, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.x___, r0.w, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sabs_diff_i32",
+"mdef(353)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imin r0.___w, r0.y, r0.x\n"
+"imax r1.x___, r0.y, r0.x\n"
+"iadd r0.x___, r1.x, r0.w_neg(xyzw)\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sabs_diff_i8",
+"mdef(354)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"imax r0.___w, r0.w_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l0, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.x___, r0.w, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sabs_i16",
+"mdef(355)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"imax r0.x___, r0.x_neg(xyzw), r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sabs_i32",
+"mdef(356)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"imax r0.x___, r0.x_neg(xyzw), r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sabs_i8",
+"mdef(357)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"imax r0.x___, r0.x_neg(xyzw), r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sadd_sat_i16",
+"mdef(358)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l3, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000\n"
+"imax r0.___w, r0.w, l3\n"
+"\n"
+"dcl_literal l4, 0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF\n"
+"imin r0.x___, r0.w, l4\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sadd_sat_i8",
+"mdef(359)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l0, 0xFFFFFF80, 0xFFFFFF80, 0xFFFFFF80, 0xFFFFFF80\n"
+"imax r0.___w, r0.w, l0\n"
+"\n"
+"dcl_literal l1, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"imin r1.x___, r0.w, l1\n"
+"mov r0.x___, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sadd_sat_u32",
+"mdef(360)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, l6, r0.y\n"
+"ilt r1._y__, r0.w, r0.x\n"
+"and r1.x___, r1.x, r1.y\n"
+"\n"
+"dcl_literal l7, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"cmov_logical r1.x___, r1.x, l7, r0.w\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, r0.y, l8\n"
+"ilt r0.___w, r0.x, r0.w\n"
+"and r0.___w, r1.y, r0.w\n"
+"\n"
+"dcl_literal l9, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.x___, r0.w, l9, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sall_i16",
+"mdef(361)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l3, 0x00008000, 0x00008000, 0x00008000, 0x00008000\n"
+"and r0.__z_, r0.x, l3\n"
+"\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l4, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sall_i32",
+"mdef(362)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r0.x___, r0.x, l6\n"
+"\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l7, l8\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sall_i8",
+"mdef(363)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l1, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sany_i16",
+"mdef(364)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l3, 0x00008000, 0x00008000, 0x00008000, 0x00008000\n"
+"and r0.__z_, r0.x, l3\n"
+"\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l4, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sany_i32",
+"mdef(365)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r0.x___, r0.x, l6\n"
+"\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l7, l8\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sany_i8",
+"mdef(366)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l1, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sclz_i16",
+"mdef(367)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l10, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.x, l10\n"
+"\n"
+"dcl_literal l11, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l11\n"
+"\n"
+"dcl_literal l12, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l12\n"
+"\n"
+"dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l13\n"
+"\n"
+"dcl_literal l14, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l14, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l15, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l15\n"
+"\n"
+"dcl_literal l16, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l16\n"
+"\n"
+"dcl_literal l17, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.___w, r0.w, l17\n"
+"\n"
+"dcl_literal l18, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.x___, r0.z, r0.w, l18\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sclz_i32",
+"mdef(368)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l19, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r0.__z_, r0.x, l19\n"
+"\n"
+"dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.z, l20\n"
+"\n"
+"dcl_literal l21, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l21\n"
+"\n"
+"dcl_literal l22, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l22\n"
+"\n"
+"dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l23\n"
+"\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l24, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l25, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l25\n"
+"\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l26\n"
+"\n"
+"dcl_literal l27, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.___w, r0.w, l27\n"
+"\n"
+"dcl_literal l28, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.__z_, r0.z, r0.w, l28\n"
+"\n"
+"dcl_literal l29, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.___w, r0.x, l29\n"
+"\n"
+"dcl_literal l30, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1.x___, r0.w, l30\n"
+"\n"
+"dcl_literal l31, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.x___, r1.x, l31\n"
+"\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l32\n"
+"\n"
+"dcl_literal l33, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1.x___, l33, r1.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l34, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l34\n"
+"\n"
+"dcl_literal l35, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1.x___, r1.x, l35\n"
+"\n"
+"dcl_literal l36, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r1.x___, r1.x, l36\n"
+"\n"
+"dcl_literal l37, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.___w, r0.w, r1.x, l37\n"
+"\n"
+"dcl_literal l38, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ieq r1.x___, r0.z, l38\n"
+"iadd r0.___w, r0.z, r0.w\n"
+"cmov_logical r0.x___, r1.x, r0.w, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sclz_i8",
+"mdef(369)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l2\n"
+"\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l3\n"
+"\n"
+"dcl_literal l4, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l4, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l5, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l5\n"
+"\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l6\n"
+"\n"
+"dcl_literal l7, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.___w, r0.w, l7\n"
+"\n"
+"dcl_literal l8, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.__z_, r0.z, r0.w, l8\n"
+"\n"
+"dcl_literal l9, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.x___, r0.z, l9\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sdiv_i16",
+"mdef(370)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l13, 0x0000FFFF, 0x0000FFFF, 0x00008000, 0x00000000\n"
+"and r1.xyz_, r0.xyxx, l13\n"
+"dcl_literal l14, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l14\n"
+"dcl_literal l15, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"ior r1.___w, r1.x, l15\n"
+"cmov_logical r1.x___, r1.z, r1.w, r1.x\n"
+"dcl_literal l16, 0x00008000, 0x00008000, 0x00008000, 0x00008000\n"
+"and r1.__z_, r0.w, l16\n"
+"dcl_literal l17, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"ior r1.___w, r0.w, l17\n"
+"cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+"ixor r1.__z_, r1.x, r0.w\n"
+"imax r1.x___, r1.x, r1.x_neg(xyzw)\n"
+"imax r0.___w, r0.w, r0.w_neg(xyzw)\n"
+"udiv r0.___w, r1.x, r0.w\n"
+"inegate r1.x___, r0.w\n"
+"dcl_literal l18, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r1.__z_, r1.z, l18\n"
+"cmov_logical r0.___w, r1.z, r1.x, r0.w\n"
+"dcl_literal l19, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.___w, r0.w, l19\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r1.y, r0.w, l20\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sdiv_i32",
+"mdef(371)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l25, 0, 0, 0, 0\n"
+"mov r0._y__, r1.x\n"
+"ilt r1.xy, r0, l25\n"
+"iadd r0.xy, r0, r1\n"
+"ixor r0.xy, r0, r1\n"
+"udiv r0.x, r0.x, r0.y\n"
+"ixor r1.x, r1.x, r1.y\n"
+"iadd r0.x, r0.x, r1.x\n"
+"ixor r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sdiv_i64",
+"mdef(372)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0.__zw, r1.yyxy\n"
+"dcl_literal l1, 0x80000000, 0x80000000, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.ywyy, l1\n"
+"inegate r2, r0\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r1.__zw, l2, r2.zzzx\n"
+"iadd r1.__zw, r1.zzzw, r2.wwwy\n"
+"cmov_logical r0._y_w, r1.yyyx, r1.zzzw, r0.wwwy\n"
+"cmov_logical r0.x_z_, r1.xxyx, r2.xxzx, r0.xxzx\n"
+"ult r1.__z_, r0.w, r0.y\n"
+"ieq r1.___w, r0.w, r0.y\n"
+"ult r2.x___, r0.x, r0.z\n"
+"and r2.x___, r1.w, r2.x\n"
+"ior r2.x___, r1.z, r2.x\n"
+"dcl_literal l3, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2.x___, r2.x, l3, l4\n"
+"ieq r2._y__, r0.x, r0.z\n"
+"and r2._y__, r1.w, r2.y\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.x___, r2.y, l5, r2.x\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r2._yz_, r0.yyzy, l6\n"
+"and r2.__z_, r2.z, r2.y\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.__z_, r2.z, l7, r0.z\n"
+"dcl_literal l8, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3.xy__, r0.ywyy, l8\n"
+"dcl_literal l9, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.__zw, r0.yyyw, l9\n"
+"dcl_literal l10, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r2.___w, r0.z, l10\n"
+"ior r2.___w, r3.z, r2.w\n"
+"dcl_literal l11, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r2.w, l11\n"
+"dcl_literal l12, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.__z_, r0.z, l12\n"
+"dcl_literal l13, 0x3F800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"ior r4.xy__, r3.xyxx, l13\n"
+"dcl_literal l14, 0xBF800000, 0xBF800000, 0x00000000, 0x00000000\n"
+"add r4.xy__, r4.xyxx, l14\n"
+"dcl_literal l15, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"and r4.xy__, r4.xyxx, l15\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"iadd r4.xy__, l16, r4.xyxx_neg(xyzw)\n"
+"dcl_literal l17, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"and r4.xy__, r4.xyxx, l17\n"
+"dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.xy__, r4.xyxx, l18\n"
+"dcl_literal l19, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"iadd r4.xy__, r4.xyxx, l19\n"
+"dcl_literal l20, 0x00000017, 0x00000017, 0x00000000, 0x00000000\n"
+"cmov_logical r3.xy__, r3.xyxx, r4.xyxx, l20\n"
+"dcl_literal l21, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r2.w, l21\n"
+"dcl_literal l22, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l22\n"
+"dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l23\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l24, r4.x_neg(xyzw)\n"
+"dcl_literal l25, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l25\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l26\n"
+"dcl_literal l27, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l27\n"
+"dcl_literal l28, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r2.___w, r2.w, r4.x, l28\n"
+"dcl_literal l29, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.z, l29\n"
+"dcl_literal l30, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l30\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l31\n"
+"dcl_literal l32, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l32, r4.x_neg(xyzw)\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l33\n"
+"dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l34\n"
+"dcl_literal l35, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r4.x___, r4.x, l35\n"
+"dcl_literal l36, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.__z_, r3.z, r4.x, l36\n"
+"dcl_literal l37, 0x00000017, 0x00000017, 0x00000000, 0x00000000\n"
+"ieq r4.xy__, r3.xyxx, l37\n"
+"iadd r2.___w, r3.x, r2.w\n"
+"cmov_logical r3.x___, r4.x, r2.w, r3.x\n"
+"dcl_literal l38, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r4.x___, r2.w, l38\n"
+"iadd r2.___w, r2.w, r3.z\n"
+"cmov_logical r2.___w, r4.x, r2.w, r3.x\n"
+"dcl_literal l39, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ige r3.x___, r2.w, l39\n"
+"dcl_literal l40, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3.__z_, r2.w, l40\n"
+"dcl_literal l41, 0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F\n"
+"cmov_logical r3.x___, r3.x, r3.z, l41\n"
+"ult r3.__z_, r0.x, r0.z\n"
+"and r3.__z_, r1.w, r3.z\n"
+"ior r1.__z_, r1.z, r3.z\n"
+"dcl_literal l42, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l43, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.__z_, r1.z, l42, l43\n"
+"ieq r3.__z_, r0.x, r0.z\n"
+"and r1.___w, r1.w, r3.z\n"
+"dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.__z_, r1.w, l44, r1.z\n"
+"dcl_literal l45, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r1.___w, r0.x, l45\n"
+"ior r1.___w, r3.w, r1.w\n"
+"dcl_literal l46, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.___w, r1.w, l46\n"
+"dcl_literal l47, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.__z_, r0.x, l47\n"
+"dcl_literal l48, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r1.w, l48\n"
+"dcl_literal l49, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l49\n"
+"dcl_literal l50, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l50\n"
+"dcl_literal l51, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l51, r3.w_neg(xyzw)\n"
+"dcl_literal l52, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l52\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l53\n"
+"dcl_literal l54, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l54\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r1.___w, r1.w, r3.w, l55\n"
+"dcl_literal l56, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.z, l56\n"
+"dcl_literal l57, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l57\n"
+"dcl_literal l58, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l58\n"
+"dcl_literal l59, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l59, r3.w_neg(xyzw)\n"
+"dcl_literal l60, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l60\n"
+"dcl_literal l61, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l61\n"
+"dcl_literal l62, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.___w, r3.w, l62\n"
+"dcl_literal l63, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.__z_, r3.z, r3.w, l63\n"
+"iadd r1.___w, r3.y, r1.w\n"
+"cmov_logical r3._y__, r4.y, r1.w, r3.y\n"
+"dcl_literal l64, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.___w, r1.w, l64\n"
+"iadd r1.___w, r1.w, r3.z\n"
+"cmov_logical r1.___w, r3.w, r1.w, r3.y\n"
+"ilt r3._y__, r1.w, r2.w\n"
+"ieq r3.__z_, r2.w, r1.w\n"
+"dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r1.z, l65\n"
+"and r1.__z_, r3.z, r1.z\n"
+"ior r1.__z_, r3.y, r1.z\n"
+"iadd r3._y__, r2.w, r1.w_neg(xyzw)\n"
+"dcl_literal l66, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.__z_, r1.z, r3.y, l66\n"
+"ilt r3._y__, r3.x, r1.z\n"
+"iadd r3.__z_, r1.z, r3.x_neg(xyzw)\n"
+"and r3._y__, r3.y, r3.z\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.__z_, r1.z, l67\n"
+"dcl_literal l68, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3._y__, r3.z, l68, r3.y\n"
+"dcl_literal l69, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r1.z, l69\n"
+"if_logicalnz r1.z\n"
+" \n"
+" dcl_literal l70, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.__z_, r3.y, l70\n"
+" \n"
+" dcl_literal l71, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.___w, r3.y, l71\n"
+" cmov_logical r3.___w, r3.z, r3.w, r3.y\n"
+" \n"
+" dcl_literal l72, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.x___, r3.y, l72\n"
+" \n"
+" dcl_literal l73, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4._y__, l73, r3.w_neg(xyzw)\n"
+" ushr r4._y__, r0.z, r4.y\n"
+" ishl r3.___w, r0.z, r3.w\n"
+" ishl r4.x___, r0.y, r4.x\n"
+" ior r4.x___, r4.y, r4.x\n"
+" cmov_logical r4.x___, r3.z, r0.z, r4.x\n"
+" \n"
+" dcl_literal l74, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.__z_, r3.z, l74, r3.w\n"
+" cmov_logical r3.___w, r3.y, r4.x, r0.y\n"
+" cmov_logical r3.__z_, r3.y, r3.z, r0.z\n"
+" \n"
+" dcl_literal l75, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r4.x___, r1.w, l75\n"
+" \n"
+" dcl_literal l76, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4._y__, r1.w, l76\n"
+" cmov_logical r4._y__, r4.x, r4.y, r1.w\n"
+" \n"
+" dcl_literal l77, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.__z_, r1.w, l77\n"
+" \n"
+" dcl_literal l78, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4._y__, l78, r4.y_neg(xyzw)\n"
+" ushr r4.___w, r0.x, r4.y\n"
+" ishl r5.x___, r0.w, r4.z\n"
+" ior r4.___w, r4.w, r5.x\n"
+" cmov_logical r4.___w, r4.x, r0.x, r4.w\n"
+" cmov_logical r4.___w, r1.w, r4.w, r0.w\n"
+" ushr r4._y__, r3.z, r4.y\n"
+" ishl r4.__z_, r3.w, r4.z\n"
+" ior r4._y__, r4.y, r4.z\n"
+" cmov_logical r4.x___, r4.x, r3.z, r4.y\n"
+" cmov_logical r1.___w, r1.w, r4.x, r3.w\n"
+" udiv r4.x___, r4.w, r1.w\n"
+" umul r4._y__, r4.x, r1.w\n"
+" ilt r4.__z_, r4.w, r4.y\n"
+" iadd r5.x___, r4.y, r4.w_neg(xyzw)\n"
+" iadd r5.x___, r5.x, r1.w\n"
+" \n"
+" dcl_literal l79, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r5.x___, r5.x, l79\n"
+" iadd r4._y__, r4.w, r4.y_neg(xyzw)\n"
+" cmov_logical r4._y__, r4.z, r5.x, r4.y\n"
+" udiv r1.___w, r4.y, r1.w\n"
+" iadd r4._y__, r4.x, r1.w_neg(xyzw)\n"
+" iadd r1.___w, r4.x, r1.w\n"
+" cmov_logical r1.___w, r4.z, r4.y, r1.w\n"
+" \n"
+" dcl_literal l80, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r3.z, l80\n"
+" \n"
+" dcl_literal l81, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r3.z, l81\n"
+" \n"
+" dcl_literal l82, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r1.w, l82\n"
+" \n"
+" dcl_literal l83, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r1.w, l83\n"
+" umul r5.x___, r4.y, r4.w\n"
+" umul r4._y__, r4.y, r4.z\n"
+" umul r5._y__, r4.x, r4.w\n"
+" umul r4.x___, r4.x, r4.z\n"
+" \n"
+" dcl_literal l84, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.__z_, r4.x, l84\n"
+" \n"
+" dcl_literal l85, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.___w, r5.y, l85\n"
+" iadd r5.__z_, r5.z, r5.w\n"
+" iadd r4._y__, r4.y, r5.z\n"
+" \n"
+" dcl_literal l86, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.__z_, r4.y, l86\n"
+" iadd r5.x___, r5.x, r5.z\n"
+" \n"
+" dcl_literal l87, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5._y__, r5.y, l87\n"
+" iadd r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l88, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r4._y__, r4.y, l88\n"
+" \n"
+" dcl_literal l89, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r4.x, l89\n"
+" ior r4.x___, r4.y, r4.x\n"
+" \n"
+" dcl_literal l90, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4._y__, r3.w, l90\n"
+" \n"
+" dcl_literal l91, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5._y__, r3.w, l91\n"
+" umul r5._y__, r5.y, r4.z\n"
+" umul r4._y_w, r4.y, r4.zzzw\n"
+" \n"
+" dcl_literal l92, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r4.y, l92\n"
+" \n"
+" dcl_literal l93, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.___w, r4.w, l93\n"
+" iadd r4.__z_, r4.z, r4.w\n"
+" iadd r4.__z_, r5.y, r4.z\n"
+" \n"
+" dcl_literal l94, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r4.__z_, r4.z, l94\n"
+" \n"
+" dcl_literal l95, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4._y__, r4.y, l95\n"
+" ior r4._y__, r4.z, r4.y\n"
+" iadd r4.__z_, r4.y, r5.x\n"
+" ult r4._y__, r4.z, r4.y\n"
+" ult r4.___w, r0.w, r4.z\n"
+" ieq r5.x___, r0.w, r4.z\n"
+" ult r5._y__, r0.x, r4.x\n"
+" and r5._y__, r5.x, r5.y\n"
+" ior r4.___w, r4.w, r5.y\n"
+" \n"
+" dcl_literal l96, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l97, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4.___w, r4.w, l96, l97\n"
+" ieq r5._y__, r0.x, r4.x\n"
+" and r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l98, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.___w, r5.x, l98, r4.w\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.___w, r4.w, l99\n"
+" ior r4._y__, r4.y, r4.w\n"
+" \n"
+" dcl_literal l100, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.___w, r1.w, l100\n"
+" cmov_logical r1.___w, r4.y, r4.w, r1.w\n"
+" iadd r3.__z_, r4.x, r3.z_neg(xyzw)\n"
+" ult r4.___w, r4.x, r3.z\n"
+" iadd r4.___w, r4.z, r4.w\n"
+" iadd r3.___w, r4.w, r3.w_neg(xyzw)\n"
+" cmov_logical r3.__zw, r4.y, r3.zzzw, r4.xxxz\n"
+" iadd r4._y__, r0.x, r3.z_neg(xyzw)\n"
+" ult r3.__z_, r0.x, r4.y\n"
+" iadd r3.__z_, r0.w, r3.z\n"
+" iadd r4.x___, r3.z, r3.w_neg(xyzw)\n"
+"else\n"
+" mov r4.xy__, r0.wxww\n"
+" \n"
+" dcl_literal l101, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r1.___w, l101\n"
+"endif\n"
+"ult r3.__z_, r4.x, r0.y\n"
+"ieq r3.___w, r4.x, r0.y\n"
+"ult r4.__z_, r4.y, r0.z\n"
+"and r4.__z_, r3.w, r4.z\n"
+"ior r3.__z_, r3.z, r4.z\n"
+"dcl_literal l102, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l103, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r3.__z_, r3.z, l102, l103\n"
+"ieq r4.__z_, r4.y, r0.z\n"
+"and r3.___w, r3.w, r4.z\n"
+"dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.__z_, r3.w, l104, r3.z\n"
+"dcl_literal l105, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.___w, r4.x, l105\n"
+"dcl_literal l106, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r4.__z_, r4.x, l106\n"
+"dcl_literal l107, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r4.___w, r4.y, l107\n"
+"ior r3.___w, r3.w, r4.w\n"
+"dcl_literal l108, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3.___w, r3.w, l108\n"
+"dcl_literal l109, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r4.___w, r4.y, l109\n"
+"dcl_literal l110, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.x___, r4.z, l110\n"
+"dcl_literal l111, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.x___, r5.x, l111\n"
+"dcl_literal l112, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l112\n"
+"dcl_literal l113, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.x___, l113, r5.x_neg(xyzw)\n"
+"dcl_literal l114, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l114\n"
+"dcl_literal l115, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.x___, r5.x, l115\n"
+"dcl_literal l116, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.x___, r5.x, l116\n"
+"dcl_literal l117, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r4.__z_, r4.z, r5.x, l117\n"
+"dcl_literal l118, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.x___, r3.w, l118\n"
+"dcl_literal l119, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.x___, r5.x, l119\n"
+"dcl_literal l120, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l120\n"
+"dcl_literal l121, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.x___, l121, r5.x_neg(xyzw)\n"
+"dcl_literal l122, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l122\n"
+"dcl_literal l123, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.x___, r5.x, l123\n"
+"dcl_literal l124, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.x___, r5.x, l124\n"
+"dcl_literal l125, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.___w, r3.w, r5.x, l125\n"
+"dcl_literal l126, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.x___, r4.w, l126\n"
+"dcl_literal l127, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.x___, r5.x, l127\n"
+"dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l128\n"
+"dcl_literal l129, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.x___, l129, r5.x_neg(xyzw)\n"
+"dcl_literal l130, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l130\n"
+"dcl_literal l131, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.x___, r5.x, l131\n"
+"dcl_literal l132, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r5.x___, r5.x, l132\n"
+"dcl_literal l133, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r4.___w, r4.w, r5.x, l133\n"
+"dcl_literal l134, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r5.x___, r4.z, l134\n"
+"iadd r3.___w, r4.z, r3.w\n"
+"cmov_logical r4.__z_, r5.x, r3.w, r4.z\n"
+"dcl_literal l135, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r5.x___, r3.w, l135\n"
+"iadd r3.___w, r3.w, r4.w\n"
+"cmov_logical r3.___w, r5.x, r3.w, r4.z\n"
+"ilt r4.__z_, r3.w, r2.w\n"
+"ieq r4.___w, r2.w, r3.w\n"
+"dcl_literal l136, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r3.__z_, r3.z, l136\n"
+"and r3.__z_, r4.w, r3.z\n"
+"ior r3.__z_, r4.z, r3.z\n"
+"and r1.__z_, r1.z, r3.z\n"
+"iadd r3.__z_, r2.w, r3.w_neg(xyzw)\n"
+"dcl_literal l137, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.__z_, r1.z, r3.z, l137\n"
+"ilt r3.__z_, r3.x, r1.z\n"
+"iadd r4.__z_, r1.z, r3.x_neg(xyzw)\n"
+"and r3.__z_, r3.z, r4.z\n"
+"dcl_literal l138, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.__z_, r1.z, l138\n"
+"dcl_literal l139, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.__z_, r4.z, l139, r3.z\n"
+"dcl_literal l140, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r1.z, l140\n"
+"if_logicalnz r1.z\n"
+" \n"
+" dcl_literal l141, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r4.__z_, r3.z, l141\n"
+" \n"
+" dcl_literal l142, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4.___w, r3.z, l142\n"
+" cmov_logical r4.___w, r4.z, r4.w, r3.z\n"
+" \n"
+" dcl_literal l143, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.x___, r3.z, l143\n"
+" \n"
+" dcl_literal l144, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5._y__, l144, r4.w_neg(xyzw)\n"
+" ushr r5._y__, r0.z, r5.y\n"
+" ishl r4.___w, r0.z, r4.w\n"
+" ishl r5.x___, r0.y, r5.x\n"
+" ior r5.x___, r5.y, r5.x\n"
+" cmov_logical r5.x___, r4.z, r0.z, r5.x\n"
+" \n"
+" dcl_literal l145, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.__z_, r4.z, l145, r4.w\n"
+" cmov_logical r4.___w, r3.z, r5.x, r0.y\n"
+" cmov_logical r4.__z_, r3.z, r4.z, r0.z\n"
+" \n"
+" dcl_literal l146, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r5.x___, r3.w, l146\n"
+" \n"
+" dcl_literal l147, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r5._y__, r3.w, l147\n"
+" cmov_logical r5._y__, r5.x, r5.y, r3.w\n"
+" \n"
+" dcl_literal l148, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.__z_, r3.w, l148\n"
+" \n"
+" dcl_literal l149, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5._y__, l149, r5.y_neg(xyzw)\n"
+" ushr r5.___w, r4.y, r5.y\n"
+" ishl r6.x___, r4.x, r5.z\n"
+" ior r5.___w, r5.w, r6.x\n"
+" cmov_logical r5.___w, r5.x, r4.y, r5.w\n"
+" cmov_logical r5.___w, r3.w, r5.w, r4.x\n"
+" ushr r5._y__, r4.z, r5.y\n"
+" ishl r5.__z_, r4.w, r5.z\n"
+" ior r5._y__, r5.y, r5.z\n"
+" cmov_logical r5.x___, r5.x, r4.z, r5.y\n"
+" cmov_logical r3.___w, r3.w, r5.x, r4.w\n"
+" udiv r5.x___, r5.w, r3.w\n"
+" umul r5._y__, r5.x, r3.w\n"
+" ilt r5.__z_, r5.w, r5.y\n"
+" iadd r6.x___, r5.y, r5.w_neg(xyzw)\n"
+" iadd r6.x___, r6.x, r3.w\n"
+" \n"
+" dcl_literal l150, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.x___, r6.x, l150\n"
+" iadd r5._y__, r5.w, r5.y_neg(xyzw)\n"
+" cmov_logical r5._y__, r5.z, r6.x, r5.y\n"
+" udiv r3.___w, r5.y, r3.w\n"
+" iadd r5._y__, r5.x, r3.w_neg(xyzw)\n"
+" iadd r3.___w, r5.x, r3.w\n"
+" cmov_logical r3.___w, r5.z, r5.y, r3.w\n"
+" \n"
+" dcl_literal l151, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r4.z, l151\n"
+" \n"
+" dcl_literal l152, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5._y__, r4.z, l152\n"
+" \n"
+" dcl_literal l153, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.__z_, r3.w, l153\n"
+" \n"
+" dcl_literal l154, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r3.w, l154\n"
+" umul r6.x___, r5.y, r5.w\n"
+" umul r5._y__, r5.y, r5.z\n"
+" umul r6._y__, r5.x, r5.w\n"
+" umul r5.x___, r5.x, r5.z\n"
+" \n"
+" dcl_literal l155, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.__z_, r5.x, l155\n"
+" \n"
+" dcl_literal l156, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6.___w, r6.y, l156\n"
+" iadd r6.__z_, r6.z, r6.w\n"
+" iadd r5._y__, r5.y, r6.z\n"
+" \n"
+" dcl_literal l157, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.__z_, r5.y, l157\n"
+" iadd r6.x___, r6.x, r6.z\n"
+" \n"
+" dcl_literal l158, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r6.y, l158\n"
+" iadd r6.x___, r6.x, r6.y\n"
+" \n"
+" dcl_literal l159, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r5._y__, r5.y, l159\n"
+" \n"
+" dcl_literal l160, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r5.x, l160\n"
+" ior r5.x___, r5.y, r5.x\n"
+" \n"
+" dcl_literal l161, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r4.w, l161\n"
+" \n"
+" dcl_literal l162, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r4.w, l162\n"
+" umul r6._y__, r6.y, r5.z\n"
+" umul r5._y_w, r5.y, r5.zzzw\n"
+" \n"
+" dcl_literal l163, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.__z_, r5.y, l163\n"
+" \n"
+" dcl_literal l164, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.___w, r5.w, l164\n"
+" iadd r5.__z_, r5.z, r5.w\n"
+" iadd r5.__z_, r6.y, r5.z\n"
+" \n"
+" dcl_literal l165, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r5.__z_, r5.z, l165\n"
+" \n"
+" dcl_literal l166, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r5.y, l166\n"
+" ior r5._y__, r5.z, r5.y\n"
+" iadd r5.__z_, r5.y, r6.x\n"
+" ult r5._y__, r5.z, r5.y\n"
+" ult r5.___w, r4.x, r5.z\n"
+" ieq r6.x___, r4.x, r5.z\n"
+" ult r6._y__, r4.y, r5.x\n"
+" and r6._y__, r6.x, r6.y\n"
+" ior r5.___w, r5.w, r6.y\n"
+" \n"
+" dcl_literal l167, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l168, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r5.___w, r5.w, l167, l168\n"
+" ieq r6._y__, r4.y, r5.x\n"
+" and r6.x___, r6.x, r6.y\n"
+" \n"
+" dcl_literal l169, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r5.___w, r6.x, l169, r5.w\n"
+" \n"
+" dcl_literal l170, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.___w, r5.w, l170\n"
+" ior r5._y__, r5.y, r5.w\n"
+" \n"
+" dcl_literal l171, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r5.___w, r3.w, l171\n"
+" cmov_logical r3.___w, r5.y, r5.w, r3.w\n"
+" iadd r4.__z_, r5.x, r4.z_neg(xyzw)\n"
+" ult r5.___w, r5.x, r4.z\n"
+" iadd r5.___w, r5.z, r5.w\n"
+" iadd r4.___w, r5.w, r4.w_neg(xyzw)\n"
+" cmov_logical r4.__zw, r5.y, r4.zzzw, r5.xxxz\n"
+" iadd r4.__z_, r4.y, r4.z_neg(xyzw)\n"
+" ult r5.x___, r4.y, r4.z\n"
+" iadd r5.x___, r4.x, r5.x\n"
+" iadd r4.x___, r5.x, r4.w_neg(xyzw)\n"
+" mov r4.xy__, r4.xzxx\n"
+"else\n"
+" \n"
+" dcl_literal l172, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r3.___w, l172\n"
+"endif\n"
+"ult r4.__z_, r4.x, r0.y\n"
+"ieq r4.___w, r4.x, r0.y\n"
+"ult r5.x___, r4.y, r0.z\n"
+"and r5.x___, r4.w, r5.x\n"
+"ior r4.__z_, r4.z, r5.x\n"
+"dcl_literal l173, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l174, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r4.__z_, r4.z, l173, l174\n"
+"ieq r5.x___, r4.y, r0.z\n"
+"and r4.___w, r4.w, r5.x\n"
+"dcl_literal l175, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.__z_, r4.w, l175, r4.z\n"
+"dcl_literal l176, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r4.___w, r4.x, l176\n"
+"dcl_literal l177, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r5.x___, r4.x, l177\n"
+"dcl_literal l178, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r5._y__, r4.y, l178\n"
+"ior r4.___w, r4.w, r5.y\n"
+"dcl_literal l179, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r4.___w, r4.w, l179\n"
+"dcl_literal l180, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r5._y__, r4.y, l180\n"
+"dcl_literal l181, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.__z_, r5.x, l181\n"
+"dcl_literal l182, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.__z_, r5.z, l182\n"
+"dcl_literal l183, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l183\n"
+"dcl_literal l184, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.__z_, l184, r5.z_neg(xyzw)\n"
+"dcl_literal l185, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l185\n"
+"dcl_literal l186, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.__z_, r5.z, l186\n"
+"dcl_literal l187, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.__z_, r5.z, l187\n"
+"dcl_literal l188, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r5.x___, r5.x, r5.z, l188\n"
+"dcl_literal l189, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.__z_, r4.w, l189\n"
+"dcl_literal l190, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.__z_, r5.z, l190\n"
+"dcl_literal l191, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l191\n"
+"dcl_literal l192, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.__z_, l192, r5.z_neg(xyzw)\n"
+"dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l193\n"
+"dcl_literal l194, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.__z_, r5.z, l194\n"
+"dcl_literal l195, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.__z_, r5.z, l195\n"
+"dcl_literal l196, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r4.___w, r4.w, r5.z, l196\n"
+"dcl_literal l197, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.__z_, r5.y, l197\n"
+"dcl_literal l198, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.__z_, r5.z, l198\n"
+"dcl_literal l199, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l199\n"
+"dcl_literal l200, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.__z_, l200, r5.z_neg(xyzw)\n"
+"dcl_literal l201, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l201\n"
+"dcl_literal l202, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.__z_, r5.z, l202\n"
+"dcl_literal l203, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r5.__z_, r5.z, l203\n"
+"dcl_literal l204, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r5._y__, r5.y, r5.z, l204\n"
+"dcl_literal l205, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r5.__z_, r5.x, l205\n"
+"iadd r4.___w, r5.x, r4.w\n"
+"cmov_logical r5.x___, r5.z, r4.w, r5.x\n"
+"dcl_literal l206, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r5.__z_, r4.w, l206\n"
+"iadd r4.___w, r4.w, r5.y\n"
+"cmov_logical r4.___w, r5.z, r4.w, r5.x\n"
+"ilt r5.x___, r4.w, r2.w\n"
+"ieq r5._y__, r2.w, r4.w\n"
+"dcl_literal l207, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r4.__z_, r4.z, l207\n"
+"and r4.__z_, r5.y, r4.z\n"
+"ior r4.__z_, r5.x, r4.z\n"
+"and r1.__z_, r1.z, r4.z\n"
+"iadd r4.__z_, r2.w, r4.w_neg(xyzw)\n"
+"dcl_literal l208, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.__z_, r1.z, r4.z, l208\n"
+"ilt r4.__z_, r3.x, r1.z\n"
+"iadd r5.x___, r1.z, r3.x_neg(xyzw)\n"
+"and r4.__z_, r4.z, r5.x\n"
+"dcl_literal l209, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r5.x___, r1.z, l209\n"
+"dcl_literal l210, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.__z_, r5.x, l210, r4.z\n"
+"dcl_literal l211, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r1.z, l211\n"
+"if_logicalnz r1.z\n"
+" \n"
+" dcl_literal l212, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r5.x___, r4.z, l212\n"
+" \n"
+" dcl_literal l213, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r5._y__, r4.z, l213\n"
+" cmov_logical r5._y__, r5.x, r5.y, r4.z\n"
+" \n"
+" dcl_literal l214, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.__z_, r4.z, l214\n"
+" \n"
+" dcl_literal l215, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5.___w, l215, r5.y_neg(xyzw)\n"
+" ushr r5.___w, r0.z, r5.w\n"
+" ishl r5._y__, r0.z, r5.y\n"
+" ishl r5.__z_, r0.y, r5.z\n"
+" ior r5.__z_, r5.w, r5.z\n"
+" cmov_logical r5.__z_, r5.x, r0.z, r5.z\n"
+" \n"
+" dcl_literal l216, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r5.x___, r5.x, l216, r5.y\n"
+" cmov_logical r5.xy__, r4.z, r5.xzxx, r0.zyzz\n"
+" \n"
+" dcl_literal l217, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r5.__z_, r4.w, l217\n"
+" \n"
+" dcl_literal l218, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r5.___w, r4.w, l218\n"
+" cmov_logical r5.___w, r5.z, r5.w, r4.w\n"
+" \n"
+" dcl_literal l219, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r6.x___, r4.w, l219\n"
+" \n"
+" dcl_literal l220, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5.___w, l220, r5.w_neg(xyzw)\n"
+" ushr r6._y__, r4.y, r5.w\n"
+" ishl r6.__z_, r4.x, r6.x\n"
+" ior r6._y__, r6.y, r6.z\n"
+" cmov_logical r6._y__, r5.z, r4.y, r6.y\n"
+" cmov_logical r6._y__, r4.w, r6.y, r4.x\n"
+" ushr r5.___w, r5.x, r5.w\n"
+" ishl r6.x___, r5.y, r6.x\n"
+" ior r5.___w, r5.w, r6.x\n"
+" cmov_logical r5.__z_, r5.z, r5.x, r5.w\n"
+" cmov_logical r4.___w, r4.w, r5.z, r5.y\n"
+" udiv r5.__z_, r6.y, r4.w\n"
+" umul r5.___w, r5.z, r4.w\n"
+" ilt r6.x___, r6.y, r5.w\n"
+" iadd r6.__z_, r5.w, r6.y_neg(xyzw)\n"
+" iadd r6.__z_, r6.z, r4.w\n"
+" \n"
+" dcl_literal l221, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.__z_, r6.z, l221\n"
+" iadd r5.___w, r6.y, r5.w_neg(xyzw)\n"
+" cmov_logical r5.___w, r6.x, r6.z, r5.w\n"
+" udiv r4.___w, r5.w, r4.w\n"
+" iadd r5.___w, r5.z, r4.w_neg(xyzw)\n"
+" iadd r4.___w, r5.z, r4.w\n"
+" cmov_logical r4.___w, r6.x, r5.w, r4.w\n"
+" \n"
+" dcl_literal l222, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.__z_, r5.x, l222\n"
+" \n"
+" dcl_literal l223, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r5.x, l223\n"
+" \n"
+" dcl_literal l224, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6.x___, r4.w, l224\n"
+" \n"
+" dcl_literal l225, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r4.w, l225\n"
+" umul r6.__z_, r5.w, r6.y\n"
+" umul r5.___w, r5.w, r6.x\n"
+" umul r6.___w, r5.z, r6.y\n"
+" umul r5.__z_, r5.z, r6.x\n"
+" \n"
+" dcl_literal l226, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r7.x___, r5.z, l226\n"
+" \n"
+" dcl_literal l227, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r7._y__, r6.w, l227\n"
+" iadd r7.x___, r7.x, r7.y\n"
+" iadd r5.___w, r5.w, r7.x\n"
+" \n"
+" dcl_literal l228, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r7.x___, r5.w, l228\n"
+" iadd r6.__z_, r6.z, r7.x\n"
+" \n"
+" dcl_literal l229, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.___w, r6.w, l229\n"
+" iadd r6.__z_, r6.z, r6.w\n"
+" \n"
+" dcl_literal l230, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r5.___w, r5.w, l230\n"
+" \n"
+" dcl_literal l231, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.__z_, r5.z, l231\n"
+" ior r5.__z_, r5.w, r5.z\n"
+" \n"
+" dcl_literal l232, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.___w, r5.y, l232\n"
+" \n"
+" dcl_literal l233, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.___w, r5.y, l233\n"
+" umul r6.___w, r6.w, r6.x\n"
+" umul r6._y__, r5.w, r6.y\n"
+" umul r5.___w, r5.w, r6.x\n"
+" \n"
+" dcl_literal l234, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x___, r5.w, l234\n"
+" \n"
+" dcl_literal l235, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6._y__, r6.y, l235\n"
+" iadd r6.x___, r6.x, r6.y\n"
+" iadd r6.x___, r6.w, r6.x\n"
+" \n"
+" dcl_literal l236, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r6.x___, r6.x, l236\n"
+" \n"
+" dcl_literal l237, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.___w, r5.w, l237\n"
+" ior r5.___w, r6.x, r5.w\n"
+" iadd r6.x___, r5.w, r6.z\n"
+" ult r5.___w, r6.x, r5.w\n"
+" ult r6._y__, r4.x, r6.x\n"
+" ieq r6.__z_, r4.x, r6.x\n"
+" ult r6.___w, r4.y, r5.z\n"
+" and r6.___w, r6.z, r6.w\n"
+" ior r6._y__, r6.y, r6.w\n"
+" \n"
+" dcl_literal l238, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l239, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r6._y__, r6.y, l238, l239\n"
+" ieq r6.___w, r4.y, r5.z\n"
+" and r6.__z_, r6.z, r6.w\n"
+" \n"
+" dcl_literal l240, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r6._y__, r6.z, l240, r6.y\n"
+" \n"
+" dcl_literal l241, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r6._y__, r6.y, l241\n"
+" ior r5.___w, r5.w, r6.y\n"
+" \n"
+" dcl_literal l242, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6._y__, r4.w, l242\n"
+" cmov_logical r4.___w, r5.w, r6.y, r4.w\n"
+" iadd r5.x___, r5.z, r5.x_neg(xyzw)\n"
+" ult r6._y__, r5.z, r5.x\n"
+" iadd r6._y__, r6.x, r6.y\n"
+" iadd r5._y__, r6.y, r5.y_neg(xyzw)\n"
+" cmov_logical r5._y__, r5.w, r5.y, r6.x\n"
+" cmov_logical r5.x___, r5.w, r5.x, r5.z\n"
+" iadd r5.x___, r4.y, r5.x_neg(xyzw)\n"
+" ult r5.__z_, r4.y, r5.x\n"
+" iadd r5.__z_, r4.x, r5.z\n"
+" iadd r4.x___, r5.z, r5.y_neg(xyzw)\n"
+" mov r4._y__, r5.x\n"
+"else\n"
+" \n"
+" dcl_literal l243, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4.___w, l243\n"
+"endif\n"
+"ult r5.x___, r4.x, r0.y\n"
+"ieq r5._y__, r4.x, r0.y\n"
+"ult r5.__z_, r4.y, r0.z\n"
+"and r5.__z_, r5.y, r5.z\n"
+"ior r5.x___, r5.x, r5.z\n"
+"dcl_literal l244, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l245, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r5.x___, r5.x, l244, l245\n"
+"ieq r5.__z_, r4.y, r0.z\n"
+"and r5._y__, r5.y, r5.z\n"
+"dcl_literal l246, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r5.x___, r5.y, l246, r5.x\n"
+"dcl_literal l247, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r5._y__, r4.x, l247\n"
+"dcl_literal l248, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r5.__z_, r4.x, l248\n"
+"dcl_literal l249, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r5.___w, r4.y, l249\n"
+"ior r5._y__, r5.y, r5.w\n"
+"dcl_literal l250, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r5._y__, r5.y, l250\n"
+"dcl_literal l251, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r5.___w, r4.y, l251\n"
+"dcl_literal l252, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r6.x___, r5.z, l252\n"
+"dcl_literal l253, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r6.x___, r6.x, l253\n"
+"dcl_literal l254, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r6.x___, r6.x, l254\n"
+"dcl_literal l255, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r6.x___, l255, r6.x_neg(xyzw)\n"
+"dcl_literal l256, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r6.x___, r6.x, l256\n"
+"dcl_literal l257, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r6.x___, r6.x, l257\n"
+"dcl_literal l258, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r6.x___, r6.x, l258\n"
+"dcl_literal l259, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r5.__z_, r5.z, r6.x, l259\n"
+"dcl_literal l260, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r6.x___, r5.y, l260\n"
+"dcl_literal l261, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r6.x___, r6.x, l261\n"
+"dcl_literal l262, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r6.x___, r6.x, l262\n"
+"dcl_literal l263, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r6.x___, l263, r6.x_neg(xyzw)\n"
+"dcl_literal l264, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r6.x___, r6.x, l264\n"
+"dcl_literal l265, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r6.x___, r6.x, l265\n"
+"dcl_literal l266, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r6.x___, r6.x, l266\n"
+"dcl_literal l267, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r5._y__, r5.y, r6.x, l267\n"
+"dcl_literal l268, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r6.x___, r5.w, l268\n"
+"dcl_literal l269, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r6.x___, r6.x, l269\n"
+"dcl_literal l270, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r6.x___, r6.x, l270\n"
+"dcl_literal l271, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r6.x___, l271, r6.x_neg(xyzw)\n"
+"dcl_literal l272, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r6.x___, r6.x, l272\n"
+"dcl_literal l273, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r6.x___, r6.x, l273\n"
+"dcl_literal l274, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r6.x___, r6.x, l274\n"
+"dcl_literal l275, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r5.___w, r5.w, r6.x, l275\n"
+"dcl_literal l276, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r6.x___, r5.z, l276\n"
+"iadd r5._y__, r5.z, r5.y\n"
+"cmov_logical r5.__z_, r6.x, r5.y, r5.z\n"
+"dcl_literal l277, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r6.x___, r5.y, l277\n"
+"iadd r5._y__, r5.y, r5.w\n"
+"cmov_logical r5._y__, r6.x, r5.y, r5.z\n"
+"ilt r5.__z_, r5.y, r2.w\n"
+"ieq r5.___w, r2.w, r5.y\n"
+"dcl_literal l278, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r5.x___, r5.x, l278\n"
+"and r5.x___, r5.w, r5.x\n"
+"ior r5.x___, r5.z, r5.x\n"
+"and r1.__z_, r1.z, r5.x\n"
+"iadd r2.___w, r2.w, r5.y_neg(xyzw)\n"
+"dcl_literal l279, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.__z_, r1.z, r2.w, l279\n"
+"ilt r2.___w, r3.x, r1.z\n"
+"iadd r3.x___, r1.z, r3.x_neg(xyzw)\n"
+"and r2.___w, r2.w, r3.x\n"
+"dcl_literal l280, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.x___, r1.z, l280\n"
+"dcl_literal l281, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r3.x, l281, r2.w\n"
+"dcl_literal l282, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.__z_, r1.z, l282\n"
+"if_logicalnz r1.z\n"
+" \n"
+" dcl_literal l283, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r1.__z_, r2.w, l283\n"
+" \n"
+" dcl_literal l284, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.x___, r2.w, l284\n"
+" cmov_logical r3.x___, r1.z, r3.x, r2.w\n"
+" \n"
+" dcl_literal l285, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.x___, r2.w, l285\n"
+" \n"
+" dcl_literal l286, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5.__z_, l286, r3.x_neg(xyzw)\n"
+" ushr r5.__z_, r0.z, r5.z\n"
+" ishl r3.x___, r0.z, r3.x\n"
+" ishl r5.x___, r0.y, r5.x\n"
+" ior r5.x___, r5.z, r5.x\n"
+" cmov_logical r5.x___, r1.z, r0.z, r5.x\n"
+" \n"
+" dcl_literal l287, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.__z_, r1.z, l287, r3.x\n"
+" cmov_logical r0._y__, r2.w, r5.x, r0.y\n"
+" cmov_logical r1.__z_, r2.w, r1.z, r0.z\n"
+" \n"
+" dcl_literal l288, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.x___, r5.y, l288\n"
+" \n"
+" dcl_literal l289, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r5.x___, r5.y, l289\n"
+" cmov_logical r5.x___, r3.x, r5.x, r5.y\n"
+" \n"
+" dcl_literal l290, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.__z_, r5.y, l290\n"
+" \n"
+" dcl_literal l291, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5.x___, l291, r5.x_neg(xyzw)\n"
+" ushr r5.___w, r4.y, r5.x\n"
+" ishl r6.x___, r4.x, r5.z\n"
+" ior r5.___w, r5.w, r6.x\n"
+" cmov_logical r5.___w, r3.x, r4.y, r5.w\n"
+" cmov_logical r5.___w, r5.y, r5.w, r4.x\n"
+" ushr r5.x___, r1.z, r5.x\n"
+" ishl r5.__z_, r0.y, r5.z\n"
+" ior r5.x___, r5.x, r5.z\n"
+" cmov_logical r3.x___, r3.x, r1.z, r5.x\n"
+" cmov_logical r3.x___, r5.y, r3.x, r0.y\n"
+" udiv r5.x___, r5.w, r3.x\n"
+" umul r5._y__, r5.x, r3.x\n"
+" ilt r5.__z_, r5.w, r5.y\n"
+" iadd r6.x___, r5.y, r5.w_neg(xyzw)\n"
+" iadd r6.x___, r6.x, r3.x\n"
+" \n"
+" dcl_literal l292, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.x___, r6.x, l292\n"
+" iadd r5._y__, r5.w, r5.y_neg(xyzw)\n"
+" cmov_logical r5._y__, r5.z, r6.x, r5.y\n"
+" udiv r3.x___, r5.y, r3.x\n"
+" iadd r5._y__, r5.x, r3.x_neg(xyzw)\n"
+" iadd r3.x___, r5.x, r3.x\n"
+" cmov_logical r3.x___, r5.z, r5.y, r3.x\n"
+" \n"
+" dcl_literal l293, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r1.z, l293\n"
+" \n"
+" dcl_literal l294, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.__z_, r1.z, l294\n"
+" \n"
+" dcl_literal l295, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r3.x, l295\n"
+" \n"
+" dcl_literal l296, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.__z_, r3.x, l296\n"
+" umul r5.___w, r1.z, r5.z\n"
+" umul r1.__z_, r1.z, r5.y\n"
+" umul r6.x___, r5.x, r5.z\n"
+" umul r5.x___, r5.x, r5.y\n"
+" \n"
+" dcl_literal l297, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r5.x, l297\n"
+" \n"
+" dcl_literal l298, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6.__z_, r6.x, l298\n"
+" iadd r6._y__, r6.y, r6.z\n"
+" iadd r1.__z_, r1.z, r6.y\n"
+" \n"
+" dcl_literal l299, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r1.z, l299\n"
+" iadd r5.___w, r5.w, r6.y\n"
+" \n"
+" dcl_literal l300, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x___, r6.x, l300\n"
+" iadd r5.___w, r5.w, r6.x\n"
+" \n"
+" dcl_literal l301, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.__z_, r1.z, l301\n"
+" \n"
+" dcl_literal l302, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r5.x, l302\n"
+" ior r1.__z_, r1.z, r5.x\n"
+" \n"
+" dcl_literal l303, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r0.y, l303\n"
+" \n"
+" dcl_literal l304, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r0._y__, r0.y, l304\n"
+" umul r0._y__, r0.y, r5.y\n"
+" umul r5.x_z_, r5.x, r5.yyzy\n"
+" \n"
+" dcl_literal l305, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5._y__, r5.x, l305\n"
+" \n"
+" dcl_literal l306, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.__z_, r5.z, l306\n"
+" iadd r5._y__, r5.y, r5.z\n"
+" iadd r0._y__, r0.y, r5.y\n"
+" \n"
+" dcl_literal l307, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r0._y__, r0.y, l307\n"
+" \n"
+" dcl_literal l308, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r5.x, l308\n"
+" ior r0._y__, r0.y, r5.x\n"
+" iadd r5.x___, r0.y, r5.w\n"
+" ult r0._y__, r5.x, r0.y\n"
+" ult r5._y__, r4.x, r5.x\n"
+" ieq r4.x___, r4.x, r5.x\n"
+" ult r5.x___, r4.y, r1.z\n"
+" and r5.x___, r4.x, r5.x\n"
+" ior r5.x___, r5.y, r5.x\n"
+" \n"
+" dcl_literal l309, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l310, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r5.x___, r5.x, l309, l310\n"
+" ieq r1.__z_, r4.y, r1.z\n"
+" and r1.__z_, r4.x, r1.z\n"
+" \n"
+" dcl_literal l311, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.__z_, r1.z, l311, r5.x\n"
+" \n"
+" dcl_literal l312, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.__z_, r1.z, l312\n"
+" ior r0._y__, r0.y, r1.z\n"
+" \n"
+" dcl_literal l313, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r3.x, l313\n"
+" cmov_logical r0._y__, r0.y, r1.z, r3.x\n"
+"else\n"
+" \n"
+" dcl_literal l314, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0._y__, l314\n"
+"endif\n"
+"dcl_literal l315, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.__z_, r3.y, l315\n"
+"dcl_literal l316, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3.x___, r3.y, l316\n"
+"cmov_logical r3.x___, r1.z, r3.x, r3.y\n"
+"dcl_literal l317, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r4.x___, l317, r3.x_neg(xyzw)\n"
+"ushr r4.x___, r1.w, r4.x\n"
+"ishl r3.x___, r1.w, r3.x\n"
+"cmov_logical r4.x___, r1.z, r1.w, r4.x\n"
+"dcl_literal l318, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.__z_, r1.z, l318, r3.x\n"
+"dcl_literal l319, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r3.y, r4.x, l319\n"
+"cmov_logical r1.__z_, r3.y, r1.z, r1.w\n"
+"dcl_literal l320, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.___w, r3.z, l320\n"
+"dcl_literal l321, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3._y__, r3.z, l321\n"
+"cmov_logical r3._y__, r1.w, r3.y, r3.z\n"
+"dcl_literal l322, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r4.x___, l322, r3.y_neg(xyzw)\n"
+"ushr r4.x___, r3.w, r4.x\n"
+"ishl r3._y__, r3.w, r3.y\n"
+"cmov_logical r4.x___, r1.w, r3.w, r4.x\n"
+"dcl_literal l323, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.___w, r1.w, l323, r3.y\n"
+"dcl_literal l324, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3._y__, r3.z, r4.x, l324\n"
+"cmov_logical r1.___w, r3.z, r1.w, r3.w\n"
+"iadd r1.___w, r1.z, r1.w\n"
+"ult r1.__z_, r1.w, r1.z\n"
+"dcl_literal l325, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.__z_, r1.z, l325\n"
+"iadd r1.__z_, r3.x, r1.z\n"
+"iadd r1.__z_, r3.y, r1.z\n"
+"dcl_literal l326, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r3.x___, r4.z, l326\n"
+"dcl_literal l327, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3._y__, r4.z, l327\n"
+"cmov_logical r3._y__, r3.x, r3.y, r4.z\n"
+"dcl_literal l328, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r3.__z_, l328, r3.y_neg(xyzw)\n"
+"ushr r3.__z_, r4.w, r3.z\n"
+"ishl r3._y__, r4.w, r3.y\n"
+"cmov_logical r3.__z_, r3.x, r4.w, r3.z\n"
+"dcl_literal l329, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r3.x, l329, r3.y\n"
+"dcl_literal l330, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3._y__, r4.z, r3.z, l330\n"
+"cmov_logical r3.x___, r4.z, r3.x, r4.w\n"
+"iadd r3.x___, r1.w, r3.x\n"
+"ult r1.___w, r3.x, r1.w\n"
+"dcl_literal l331, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.___w, r1.w, l331\n"
+"iadd r1.__z_, r1.z, r1.w\n"
+"iadd r1.__z_, r3.y, r1.z\n"
+"dcl_literal l332, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.___w, r2.w, l332\n"
+"dcl_literal l333, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3._y__, r2.w, l333\n"
+"cmov_logical r3._y__, r1.w, r3.y, r2.w\n"
+"dcl_literal l334, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r3.__z_, l334, r3.y_neg(xyzw)\n"
+"ushr r3.__z_, r0.y, r3.z\n"
+"ishl r3._y__, r0.y, r3.y\n"
+"cmov_logical r3.__z_, r1.w, r0.y, r3.z\n"
+"dcl_literal l335, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.___w, r1.w, l335, r3.y\n"
+"dcl_literal l336, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3._y__, r2.w, r3.z, l336\n"
+"cmov_logical r0._y__, r2.w, r1.w, r0.y\n"
+"iadd r0._y__, r3.x, r0.y\n"
+"ult r1.___w, r0.y, r3.x\n"
+"dcl_literal l337, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.___w, r1.w, l337\n"
+"iadd r1.__z_, r1.z, r1.w\n"
+"iadd r1.__z_, r3.y, r1.z\n"
+"dcl_literal l338, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.___w, r2.x, l338\n"
+"dcl_literal l339, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.__z_, r1.w, l339, r1.z\n"
+"dcl_literal l340, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r1.w, l340, r0.y\n"
+"dcl_literal l341, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ieq r0.__z_, r0.z, l341\n"
+"and r0.__z_, r2.y, r0.z\n"
+"cmov_logical r0.___w, r0.z, r0.w, r1.z\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"dcl_literal l342, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r2.z, l342, r0.xwxx\n"
+"inegate r0.__z_, r0.x\n"
+"dcl_literal l343, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r0.___w, l343, r0.z\n"
+"iadd r0.___w, r0.w, r0.y_neg(xyzw)\n"
+"ixor r1.x___, r1.y, r1.x\n"
+"cmov_logical r0.xy__, r1.x, r0.zwzz, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sdiv_i8",
+"mdef(373)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l1, 0x000000FF, 0x000000FF, 0x00000080, 0x00000000\n"
+"and r1.xyz_, r0.xyxx, l1\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l2\n"
+"dcl_literal l3, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00\n"
+"ior r1.___w, r1.x, l3\n"
+"cmov_logical r1.x___, r1.z, r1.w, r1.x\n"
+"dcl_literal l4, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+"and r1.__z_, r0.w, l4\n"
+"dcl_literal l5, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00\n"
+"ior r1.___w, r0.w, l5\n"
+"cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+"ixor r1.__z_, r1.x, r0.w\n"
+"imax r1.x___, r1.x, r1.x_neg(xyzw)\n"
+"imax r0.___w, r0.w, r0.w_neg(xyzw)\n"
+"udiv r0.___w, r1.x, r0.w\n"
+"inegate r1.x___, r0.w\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r1.__z_, r1.z, l6\n"
+"cmov_logical r0.___w, r1.z, r1.x, r0.w\n"
+"dcl_literal l7, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.___w, r0.w, l7\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r1.y, r0.w, l8\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shadd_i16",
+"mdef(374)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r0.x___, r0.w, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shadd_i32",
+"mdef(375)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"and r0.___w, r0.y, r0.x\n"
+"\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.___w, r0.w, l4\n"
+"\n"
+"dcl_literal l5, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r1.xy__, r0.xyxx, l5\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shadd_i8",
+"mdef(376)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l0, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r0.___w, r0.w, l0\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shl_i64_i64",
+"mdef(377)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x, r1.x, l0.x\n"
+"iand r3.x, r1.x, l0.y\n"
+"iadd r4.x, l0.y, r2_neg(x)\n"
+"ishl r5.x, r0.x, r2.x\n"
+"ishl r6.x, r0.y, r2.x\n"
+"ushr r7.x, r0.x, r4.x\n"
+"cmov_logical r8.x, r2.x, r7.x, l0.z\n"
+"ior r9.x, r6.x, r8.x\n"
+"cmov_logical r0.x___, r3.x, l0.z, r5.x\n"
+"cmov_logical r0._y__, r3.x, r5.x, r9.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shl_i64_v2i64",
+"mdef(378)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x_z, r1.x0z, l0.x\n"
+"iand r3.x_z, r1.x0z, l0.y\n"
+"iadd r4.x_z, l0.y, r2_neg(xyz)\n"
+"ishl r5.x_z, r0.x0z, r2.x0z\n"
+"ishl r6.x_z, r0.y0w, r2.x0z\n"
+"ushr r7.x_z, r0.x0z, r4.x0z\n"
+"cmov_logical r8.x_z, r2.x0z, r7.x0z, l0.z\n"
+"ior r9.x_z, r6.x0z, r8.x0z\n"
+"cmov_logical r0.x_z_, r3.x0z, l0.z, r5.x0z\n"
+"cmov_logical r0._y_w, r3.0x0z, r5.0x0z, r9.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shl_i64",
+"mdef(379)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x, r1.x, l0.x\n"
+"iand r3.x, r1.x, l0.y\n"
+"iadd r4.x, l0.y, r2_neg(x)\n"
+"ishl r5.x, r0.x, r2.x\n"
+"ishl r6.x, r0.y, r2.x\n"
+"ushr r7.x, r0.x, r4.x\n"
+"cmov_logical r8.x, r2.x, r7.x, l0.z\n"
+"ior r9.x, r6.x, r8.x\n"
+"cmov_logical r0.x___, r3.x, l0.z, r5.x\n"
+"cmov_logical r0._y__, r3.x, r5.x, r9.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shl_v2i64",
+"mdef(380)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x_z, r1.x0z, l0.x\n"
+"iand r3.x_z, r1.x0z, l0.y\n"
+"iadd r4.x_z, l0.y, r2_neg(xyz)\n"
+"ishl r5.x_z, r0.x0z, r2.x0z\n"
+"ishl r6.x_z, r0.y0w, r2.x0z\n"
+"ushr r7.x_z, r0.x0z, r4.x0z\n"
+"cmov_logical r8.x_z, r2.x0z, r7.x0z, l0.z\n"
+"ior r9.x_z, r6.x0z, r8.x0z\n"
+"cmov_logical r0.x_z_, r3.x0z, l0.z, r5.x0z\n"
+"cmov_logical r0._y_w, r3.0x0z, r5.0x0z, r9.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shr_i64_i64",
+"mdef(381)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x, r1.x, l0.x\n"
+"iand r3.x, r1.x, l0.y\n"
+"iadd r4.x, l0.y, r2_neg(x)\n"
+"ushr r5.x, r0.y, r2.x\n"
+"ushr r6.x, r0.x, r2.x\n"
+"ishl r7.x, r0.y, r4.x\n"
+"cmov_logical r8.x, r2.x, r7.x, l0.z\n"
+"ior r9.x, r6.x, r8.x\n"
+"cmov_logical r0.x___, r3.x, r5.x, r9.x\n"
+"cmov_logical r0._y__, r3.x, l0.z, r5.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shr_i64_v2i64",
+"mdef(382)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x_z, r1.x0z, l0.x\n"
+"iand r3.x_z, r1.x0z, l0.y\n"
+"iadd r4.x_z, l0.y, r2_neg(xyz)\n"
+"ushr r5.x_z, r0.y0w, r2.x0z\n"
+"ushr r6.x_z, r0.x0z, r2.x0z\n"
+"ishl r7.x_z, r0.y0w, r4.x0z\n"
+"cmov_logical r8.x_z, r2.x0z, r7.x0z, l0.z\n"
+"ior r9.x_z, r6.x0z, r8.x0z\n"
+"cmov_logical r0.x_z_, r3.x0z, r5.x0z, r9.x0z\n"
+"cmov_logical r0._y_w, r3.0x0z, l0.z, r5.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shr_i64",
+"mdef(383)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x, r1.x, l0.x\n"
+"iand r3.x, r1.x, l0.y\n"
+"iadd r4.x, l0.y, r2_neg(x)\n"
+"ushr r5.x, r0.y, r2.x\n"
+"ushr r6.x, r0.x, r2.x\n"
+"ishl r7.x, r0.y, r4.x\n"
+"cmov_logical r8.x, r2.x, r7.x, l0.z\n"
+"ior r9.x, r6.x, r8.x\n"
+"cmov_logical r0.x___, r3.x, r5.x, r9.x\n"
+"cmov_logical r0._y__, r3.x, l0.z, r5.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__shr_v2i64",
+"mdef(384)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x_z, r1.x0z, l0.x\n"
+"iand r3.x_z, r1.x0z, l0.y\n"
+"iadd r4.x_z, l0.y, r2_neg(xyz)\n"
+"ushr r5.x_z, r0.y0w, r2.x0z\n"
+"ushr r6.x_z, r0.x0z, r2.x0z\n"
+"ishl r7.x_z, r0.y0w, r4.x0z\n"
+"cmov_logical r8.x_z, r2.x0z, r7.x0z, l0.z\n"
+"ior r9.x_z, r6.x0z, r8.x0z\n"
+"cmov_logical r0.x_z_, r3.x0z, r5.x0z, r9.x0z\n"
+"cmov_logical r0._y_w, r3.0x0z, l0.z, r5.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sign_f32",
+"mdef(385)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0._y__, r0.x, l0\n"
+"mov r0.__z_, r0.x_abs\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0.__z_, l2, r0.z\n"
+"and r0._y__, r0.y, r0.w\n"
+"\n"
+"dcl_literal l3, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0._y__, r0.y, l3, r0.x\n"
+"\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1.x___, l4, r0.x\n"
+"and r0.x___, r0.w, r1.x\n"
+"\n"
+"dcl_literal l5, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r0.x, l5, r0.y\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l6, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sincos_2f32f32",
+"mdef(386)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, r0.yxzw\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.__z_, r0.y, l0\n"
+"dcl_literal l1, 0x32000000, 0x7F800000, 0x00000000, 0x00000000\n"
+"ige r1.xy__, r0.z, l1\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, r0.z, l2\n"
+"and r0.__z_, r1.x, r0.z\n"
+"if_logicalnz r0.z\n"
+" mov r0.__z_, r0.y_abs\n"
+" \n"
+" dcl_literal l3, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680\n"
+" lt r0.___w, r0.z, l3\n"
+" if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l4, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.___w, r0.z, l4\n"
+" round_nearest r0.___w, r0.w\n"
+" \n"
+" dcl_literal l5, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r1.x___, r0.w, l5\n"
+" add r1.__z_, r0.w, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l6, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1.___w, r0.w, l6\n"
+" \n"
+" dcl_literal l7, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.w, l7\n"
+" \n"
+" dcl_literal l8, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l8, r2.xyxx_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l9, r3.xyxx\n"
+" \n"
+" dcl_literal l10, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.z, l10, r3.xyxx\n"
+" \n"
+" dcl_literal l11, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.z, l11, r3.xyxx\n"
+" add r0.___w, r0.z, r2.x_neg(xyzw)\n"
+" add r2.___w, r0.z, r0.w_neg(xyzw)\n"
+" add r2.x___, r2.x_neg(xyzw), r2.w\n"
+" add r2.x___, r3.x_neg(xyzw), r2.x\n"
+" add r0.___w, r0.w, r2.x\n"
+" add r2.x___, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r2.x_neg(xyzw)\n"
+" add r0.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r3.y_neg(xyzw), r0.w\n"
+" add r0.___w, r2.x, r0.w\n"
+" \n"
+" dcl_literal l12, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r2.x___, r1.x, l12, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l13, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.x, l13, r2.x\n"
+" \n"
+" dcl_literal l14, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.x___, r1.z, l14, r1.x\n"
+" \n"
+" dcl_literal l15, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.__z_, r1.z, l15, r1.x\n"
+" add r2.x___, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r2.x_neg(xyzw)\n"
+" add r0.___w, r2.z_neg(xyzw), r0.w\n"
+" add r1.x___, r2.x, r0.w\n"
+" frc r0.___w, r1.w\n"
+" \n"
+" dcl_literal l16, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r0.___w, r0.w, l16\n"
+" round_nearest r0.___w, r0.w\n"
+" mov r1.__z_, r1.z_neg(xyzw)\n"
+" else\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r1.___w, r0.z, l17\n"
+" \n"
+" dcl_literal l18, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.__z_, r0.z, l18\n"
+" \n"
+" dcl_literal l19, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r0.__z_, r0.z, l19\n"
+" \n"
+" dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r0.z, l20\n"
+" \n"
+" dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r0.__z_, r0.z, l21\n"
+" \n"
+" dcl_literal l22, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r3, l22, r0.z\n"
+" \n"
+" dcl_literal l23, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r4, l23, r2.x\n"
+" \n"
+" dcl_literal l24, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2._yz_, r4.yywy, l24\n"
+" \n"
+" dcl_literal l25, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r3.ywyy, l25\n"
+" iadd r2._yz_, r2.yyzy, r5.xxyx\n"
+" iadd r2._yz_, r4.xxzx, r2.yyzy\n"
+" \n"
+" dcl_literal l26, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x_z_, r2.yyzy, l26\n"
+" iadd r3.x_z_, r3.xxzx, r4.xxzx\n"
+" \n"
+" dcl_literal l27, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l27\n"
+" \n"
+" dcl_literal l28, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2._yz_, r2.yyzy, l28\n"
+" \n"
+" dcl_literal l29, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r4.ywyy, l29\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" ior r4.xy__, r2.yzyy, r4.xyxx\n"
+" iadd r5.__z_, r3.x, r4.y\n"
+" ult r2._y__, r5.z, r4.y\n"
+" \n"
+" dcl_literal l30, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r3.y, l30\n"
+" cmov_logical r2._y__, r2.y, r2.z, r3.y\n"
+" \n"
+" dcl_literal l31, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r3, l31, r0.z\n"
+" \n"
+" dcl_literal l32, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r6, l32, r2.x\n"
+" \n"
+" dcl_literal l33, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l33\n"
+" \n"
+" dcl_literal l34, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l34\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l35, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l35\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l36, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l36\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l37, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l37\n"
+" \n"
+" dcl_literal l38, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l38\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r5._y__, r2.y, r2.z\n"
+" ult r2._y__, r5.y, r2.z\n"
+" \n"
+" dcl_literal l39, 0x00000000, 0x00000000, 0x00000001, 0x00000001\n"
+" iadd r3.__zw, r3.xxxy, l39\n"
+" cmov_logical r2._y__, r2.y, r3.z, r3.x\n"
+" iadd r5.x___, r2.y, r2.w\n"
+" ult r2._y__, r5.x, r2.w\n"
+" cmov_logical r2._y__, r2.y, r3.w, r3.y\n"
+" \n"
+" dcl_literal l40, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r3, l40, r0.z\n"
+" \n"
+" dcl_literal l41, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r6, l41, r2.x\n"
+" \n"
+" dcl_literal l42, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l42\n"
+" \n"
+" dcl_literal l43, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l43\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l44, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l44\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l45, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l45\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l46, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l46\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l47\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r3.___w, r2.y, r2.z\n"
+" ult r2._y__, r3.w, r2.z\n"
+" \n"
+" dcl_literal l48, 0x00000001, 0x00000001, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r3.xyxx, l48\n"
+" cmov_logical r2._y__, r2.y, r6.x, r3.x\n"
+" iadd r3.__z_, r2.y, r2.w\n"
+" ult r2._y__, r3.z, r2.w\n"
+" cmov_logical r2._y__, r2.y, r6.y, r3.y\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x0000A2F9, 0x0000836E\n"
+" umul r2.__zw, l49, r0.z\n"
+" \n"
+" dcl_literal l50, 0x0000A2F9, 0x0000836E, 0x00000000, 0x00000000\n"
+" umul r3.xy__, l50, r2.x\n"
+" \n"
+" dcl_literal l51, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r0.__z_, r3.y, l51\n"
+" \n"
+" dcl_literal l52, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r2.w, l52\n"
+" iadd r0.__z_, r0.z, r2.x\n"
+" iadd r0.__z_, r3.x, r0.z\n"
+" \n"
+" dcl_literal l53, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.x___, r0.z, l53\n"
+" iadd r2.x___, r2.z, r2.x\n"
+" \n"
+" dcl_literal l54, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__z_, r2.w, l54\n"
+" iadd r2.x___, r2.x, r2.z\n"
+" \n"
+" dcl_literal l55, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r0.__z_, r0.z, l55\n"
+" \n"
+" dcl_literal l56, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r3.y, l56\n"
+" ior r0.__z_, r0.z, r2.z\n"
+" iadd r2._y__, r2.y, r0.z\n"
+" ult r0.__z_, r2.y, r0.z\n"
+" \n"
+" dcl_literal l57, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r2.x, l57\n"
+" cmov_logical r0.__z_, r0.z, r2.z, r2.x\n"
+" \n"
+" dcl_literal l58, 0xFFFFFF89, 0x00000000, 0xFFFFFF8A, 0x00000000\n"
+" iadd r2.x_z_, r1.w, l58\n"
+" \n"
+" dcl_literal l59, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r6.x___, r2.x, l59\n"
+" \n"
+" dcl_literal l60, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r1.___w, r6.x, l60\n"
+" iadd r1.___w, r2.x, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l61, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l61, r6.x\n"
+" \n"
+" dcl_literal l62, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r7, r2.x, l62\n"
+" inegate r2.___w, r7.x\n"
+" \n"
+" dcl_literal l63, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r8.x___, r7.y, l63\n"
+" \n"
+" dcl_literal l64, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r8._y__, r7.z, l64\n"
+" \n"
+" dcl_literal l65, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r8.__z_, r7.w, l65\n"
+" inegate r7.xyz_, r8.xyzx\n"
+" \n"
+" dcl_literal l66, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r8, r2.x, l66\n"
+" \n"
+" dcl_literal l67, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r9.x___, r8.x, l67\n"
+" \n"
+" dcl_literal l68, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r9._yzw, r8.yyzw, l68\n"
+" inegate r8, r9\n"
+" and r2.x___, r0.z, r2.w\n"
+" and r3.xy__, r2.y, r7.xyxx\n"
+" ior r2.x___, r2.x, r3.x\n"
+" and r7._yzw, r3.zzzw, r7.yyzz\n"
+" ior r2.x___, r2.x, r7.y\n"
+" ior r2.x___, r7.w, r2.x\n"
+" and r9, r5.xxyy, r8.xyyz\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.z, r2.x\n"
+" and r7._y_w, r5.z, r8.zzzw\n"
+" ior r2.x___, r2.x, r7.y\n"
+" and r2.___w, r4.x, r8.w\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r2.___w, r0.z, r7.x\n"
+" ior r2.___w, r3.y, r2.w\n"
+" ior r2.___w, r7.z, r2.w\n"
+" and r3.x___, r3.w, r8.x\n"
+" ior r2.___w, r2.w, r3.x\n"
+" ior r2.___w, r9.y, r2.w\n"
+" ior r2.___w, r9.w, r2.w\n"
+" ior r2.___w, r7.w, r2.w\n"
+" \n"
+" dcl_literal l69, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r3.x___, l69, r1.w\n"
+" and r3.x___, r2.x, r3.x\n"
+" inegate r3._y__, r1.w\n"
+" \n"
+" dcl_literal l70, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+" iadd r6._y__, l70, r3.y\n"
+" \n"
+" dcl_literal l71, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ult r4._y__, l71, r1.w\n"
+" \n"
+" dcl_literal l72, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.___w, r1.w, l72\n"
+" \n"
+" dcl_literal l73, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r1.___w, l73, r1.w\n"
+" and r1.___w, r2.x, r1.w\n"
+" ior r1.___w, r3.x, r1.w\n"
+" ushr r1.___w, r1.w, r6.y\n"
+" ushr r2.x___, r3.x, r6.y\n"
+" \n"
+" dcl_literal l74, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" and r2.___w, r2.w, l74\n"
+" \n"
+" dcl_literal l75, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.___w, r2.w, l75\n"
+" ior r2.x___, r2.x, r2.w\n"
+" cmov_logical r7.__z_, r4.y, r1.w, r2.x\n"
+" \n"
+" dcl_literal l76, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r8.x___, r6.x, l76\n"
+" \n"
+" dcl_literal l77, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ishl r6.__z_, l77, r3.y\n"
+" \n"
+" dcl_literal l78, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.___w, r6.z, l78\n"
+" \n"
+" dcl_literal l79, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFFF\n"
+" mov r8._yzw, l79\n"
+" cmov_logical r6, r6.y, r6, r8\n"
+" \n"
+" dcl_literal l80, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.___w, r6.y, l80\n"
+" \n"
+" dcl_literal l81, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r1.___w, l81, r1.w\n"
+" \n"
+" dcl_literal l82, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r2.x___, r1.w, l82\n"
+" switch r6.x\n"
+" case 0\n"
+" and r3.x___, r0.z, r1.w\n"
+" \n"
+" dcl_literal l83, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l84, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l83, l84\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l85, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l85\n"
+" \n"
+" dcl_literal l86, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l86\n"
+" \n"
+" dcl_literal l87, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.zzwz_neg(xyzw), l87\n"
+" \n"
+" dcl_literal l88, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r11.x___, r2.y_neg(xyzw), l88\n"
+" and r3._y__, r0.z, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.x___, r2.y\n"
+" mov r11._yz_, r3.zzwz\n"
+" mov r3._y__, r0.z\n"
+" endif\n"
+" and r8.x___, r2.x, r3.y\n"
+" mov r8._yzw, r11.xxyz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 1\n"
+" and r3.x___, r2.y, r1.w\n"
+" \n"
+" dcl_literal l89, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l90, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l89, l90\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l91, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l91\n"
+" \n"
+" dcl_literal l92, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l92\n"
+" \n"
+" dcl_literal l93, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.wwzw_neg(xyzw), l93\n"
+" and r3._y__, r2.y, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" mov r11.xy__, r11.zyzz\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.xy__, r3.zwzz\n"
+" mov r3._y__, r2.y\n"
+" endif\n"
+" and r11.__z_, r2.x, r3.y\n"
+" \n"
+" dcl_literal l94, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l94\n"
+" mov r8, r11.wzxy\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 2\n"
+" and r3.x___, r3.z, r1.w\n"
+" \n"
+" dcl_literal l95, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l95, l96\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l97, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l97\n"
+" \n"
+" dcl_literal l98, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6._y__, r3.w_neg(xyzw), l98\n"
+" and r4._y__, r3.z, r6.w\n"
+" iadd r6.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r5.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r5\n"
+" cmov_logical r11._yz_, r3.x, r6.xxyx, r3.zzwz\n"
+" and r11.x___, r2.x, r11.y\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l99\n"
+" mov r8, r11.wwxz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 3\n"
+" and r3.x___, r3.w, r1.w\n"
+" \n"
+" dcl_literal l100, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l101, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l100, l101\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l102, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l102\n"
+" and r4._y__, r3.w, r6.w\n"
+" iadd r4._y__, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" cmov_logical r3.x___, r3.x, r4.y, r3.w\n"
+" and r11.___w, r2.x, r3.x\n"
+" \n"
+" dcl_literal l103, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.xyz_, l103\n"
+" mov r8, r11\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 4\n"
+" and r3.x___, r5.x, r1.w\n"
+" \n"
+" dcl_literal l104, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l104, l105\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l106, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10._yz_, r5.yyzy_neg(xyzw), l106\n"
+" and r4._y__, r5.x, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" and r3.x___, r2.x, r10.x\n"
+" \n"
+" dcl_literal l107, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l107\n"
+" mov r9._yzw, r10.yyzw\n"
+" mov r9.x___, r3.x\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 5\n"
+" and r3.x___, r5.y, r1.w\n"
+" \n"
+" dcl_literal l108, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l109, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l108, l109\n"
+" inegate r10.__z_, r4.x\n"
+" \n"
+" dcl_literal l110, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.z_neg(xyzw), l110\n"
+" and r4._y__, r5.y, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r4.__zw, r5.yyyz\n"
+" cmov_logical r10.x_zw, r3.x, r10.xxyz, r4.zzwx\n"
+" and r10._y__, r2.x, r10.x\n"
+" \n"
+" dcl_literal l111, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.x___, l111\n"
+" \n"
+" dcl_literal l112, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l112\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 6\n"
+" and r3.x___, r5.z, r1.w\n"
+" \n"
+" dcl_literal l113, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l114, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l113, l114\n"
+" inegate r4.__z_, r4.x\n"
+" and r4.___w, r5.z, r6.w\n"
+" iadd r4._y__, r6.w, r4.w_neg(xyzw)\n"
+" mov r6.x___, r5.z\n"
+" mov r6._y__, r4.x\n"
+" cmov_logical r10._yz_, r3.x, r4.yyzy, r6.xxyx\n"
+" and r10.x___, r2.x, r10.y\n"
+" \n"
+" dcl_literal l115, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.___w, l115\n"
+" \n"
+" dcl_literal l116, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l116\n"
+" mov r9, r10.wwxz\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 7\n"
+" and r1.___w, r4.x, r1.w\n"
+" \n"
+" dcl_literal l117, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l118, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.x___, r1.w, l117, l118\n"
+" and r3._y__, r4.x, r6.w\n"
+" iadd r3._y__, r6.z, r3.y_neg(xyzw)\n"
+" cmov_logical r1.___w, r1.w, r3.y, r4.x\n"
+" and r6.___w, r2.x, r1.w\n"
+" \n"
+" dcl_literal l119, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r6.xyz_, l119\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l120\n"
+" mov r9, r6\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" default\n"
+" mov r8.__zw, r3.zzzw\n"
+" mov r8._y__, r2.y\n"
+" mov r8.x___, r0.z\n"
+" mov r9.___w, r4.x\n"
+" mov r9.xyz_, r5.xyzx\n"
+" \n"
+" dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r2.___w, l121\n"
+" break\n"
+" endswitch\n"
+" mov r3, r8\n"
+" mov r4, r9\n"
+" mov r0.__z_, r2.w\n"
+" \n"
+" dcl_literal l122, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5, r3, l122\n"
+" \n"
+" dcl_literal l123, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5, r5, l123\n"
+" \n"
+" dcl_literal l124, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r6, r5, l124\n"
+" \n"
+" dcl_literal l125, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r6, r6, l125\n"
+" \n"
+" dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l126\n"
+" \n"
+" dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r6, l127, r6_neg(xyzw)\n"
+" \n"
+" dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l128\n"
+" \n"
+" dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r6, r6, l129\n"
+" \n"
+" dcl_literal l130, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r6, r6, l130\n"
+" \n"
+" dcl_literal l131, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r5, r5, r6, l131\n"
+" \n"
+" dcl_literal l132, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r3, l132\n"
+" \n"
+" dcl_literal l133, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l133\n"
+" \n"
+" dcl_literal l134, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l134\n"
+" \n"
+" dcl_literal l135, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l135\n"
+" \n"
+" dcl_literal l136, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l136, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l137\n"
+" \n"
+" dcl_literal l138, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l138\n"
+" \n"
+" dcl_literal l139, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l139\n"
+" \n"
+" dcl_literal l140, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l140\n"
+" \n"
+" dcl_literal l141, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r8, r5, l141\n"
+" iadd r6, r5, r6\n"
+" cmov_logical r5, r8, r6, r5\n"
+" \n"
+" dcl_literal l142, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6, r4, l142\n"
+" \n"
+" dcl_literal l143, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r6, l143\n"
+" \n"
+" dcl_literal l144, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l144\n"
+" \n"
+" dcl_literal l145, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l145\n"
+" \n"
+" dcl_literal l146, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l146\n"
+" \n"
+" dcl_literal l147, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l147, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l148, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l148\n"
+" \n"
+" dcl_literal l149, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l149\n"
+" \n"
+" dcl_literal l150, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l150\n"
+" \n"
+" dcl_literal l151, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l151\n"
+" \n"
+" dcl_literal l152, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r8, r4, l152\n"
+" \n"
+" dcl_literal l153, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r9, r8, l153\n"
+" \n"
+" dcl_literal l154, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r9, r9, l154\n"
+" \n"
+" dcl_literal l155, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l155\n"
+" \n"
+" dcl_literal l156, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r9, l156, r9_neg(xyzw)\n"
+" \n"
+" dcl_literal l157, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l157\n"
+" \n"
+" dcl_literal l158, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r9, r9, l158\n"
+" \n"
+" dcl_literal l159, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r9, r9, l159\n"
+" \n"
+" dcl_literal l160, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r8, r8, r9, l160\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r9, r6, l161\n"
+" iadd r8, r6, r8\n"
+" cmov_logical r6, r9, r8, r6\n"
+" \n"
+" dcl_literal l162, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ieq r1.___w, r5.x, l162\n"
+" \n"
+" dcl_literal l163, 0x00000020, 0x00000040, 0x00000000, 0x00000060\n"
+" iadd r2.xy_w, r5.yzyw, l163\n"
+" cmov_logical r1.___w, r1.w, r2.x, r5.x\n"
+" \n"
+" dcl_literal l164, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+" ieq r2.x___, r1.w, l164\n"
+" cmov_logical r1.___w, r2.x, r2.y, r1.w\n"
+" \n"
+" dcl_literal l165, 0x00000060, 0x00000060, 0x00000060, 0x00000060\n"
+" ieq r2.x___, r1.w, l165\n"
+" cmov_logical r1.___w, r2.x, r2.w, r1.w\n"
+" \n"
+" dcl_literal l166, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+" ieq r2.x___, r1.w, l166\n"
+" \n"
+" dcl_literal l167, 0x00000080, 0x000000A0, 0x000000C0, 0x000000E0\n"
+" iadd r5, r6, l167\n"
+" cmov_logical r1.___w, r2.x, r5.x, r1.w\n"
+" \n"
+" dcl_literal l168, 0x000000A0, 0x000000A0, 0x000000A0, 0x000000A0\n"
+" ieq r2.x___, r1.w, l168\n"
+" cmov_logical r1.___w, r2.x, r5.y, r1.w\n"
+" \n"
+" dcl_literal l169, 0x000000C0, 0x000000C0, 0x000000C0, 0x000000C0\n"
+" ieq r2.x___, r1.w, l169\n"
+" cmov_logical r1.___w, r2.x, r5.z, r1.w\n"
+" \n"
+" dcl_literal l170, 0x000000E0, 0x000000E0, 0x000000E0, 0x000000E0\n"
+" ieq r2.x___, r1.w, l170\n"
+" cmov_logical r1.___w, r2.x, r5.w, r1.w\n"
+" \n"
+" dcl_literal l171, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishr r2.x___, r1.w, l171\n"
+" \n"
+" dcl_literal l172, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r2._y__, r2.x, l172\n"
+" iadd r2._y__, r1.w, r2.y_neg(xyzw)\n"
+" iadd r1.___w, r2.z, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l173, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l173, r2.x\n"
+" \n"
+" dcl_literal l174, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r5, r2.x, l174\n"
+" inegate r2.__z_, r5.x\n"
+" \n"
+" dcl_literal l175, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r6.x__w, r5.y, l175\n"
+" \n"
+" dcl_literal l176, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r6._y__, r5.z, l176\n"
+" \n"
+" dcl_literal l177, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r6.__z_, r5.w, l177\n"
+" inegate r5, r6\n"
+" \n"
+" dcl_literal l178, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r6, r2.x, l178\n"
+" \n"
+" dcl_literal l179, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r8.x___, r6.x, l179\n"
+" \n"
+" dcl_literal l180, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r8._y__, r6.y, l180\n"
+" \n"
+" dcl_literal l181, 0x00000006, 0x00000006, 0x00000006, 0x00000006\n"
+" ushr r8.__z_, r6.z, l181\n"
+" \n"
+" dcl_literal l182, 0x00000007, 0x00000007, 0x00000007, 0x00000007\n"
+" ushr r8.___w, r6.w, l182\n"
+" inegate r6, r8\n"
+" and r2.x__w, r3.xxxy, r2.z\n"
+" and r8, r3.yzwz, r5\n"
+" ior r2.x__w, r2.xxxw, r8.xxxw\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" and r8, r4, r6\n"
+" ior r2.x___, r2.x, r8.x\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" ior r8._y_w, r8.w, r2.x\n"
+" and r2.x___, r3.w, r5.y\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.___w, r4.x, r5.z\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r9.xyz_, r4.yzwy, r6.xyzx\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.y, r2.x\n"
+" ior r8.x___, r9.z, r2.x\n"
+" \n"
+" dcl_literal l183, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r2.y, l183\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r8.y, r2.y\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l184, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2.___w, l184, r2.y_neg(xyzw)\n"
+" ushr r8.___w, r2.x, r2.w\n"
+" \n"
+" dcl_literal l185, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r3._y__, r2.y, l185\n"
+" ishl r2.x___, r2.x, r3.y\n"
+" ushr r3.x___, r2.x, r3.y\n"
+" else\n"
+" and r2.x___, r3.z, r2.z\n"
+" and r2.__z_, r3.w, r5.w\n"
+" ior r2.x___, r2.x, r2.z\n"
+" and r2.__zw, r4.xxxy, r5.yyyz\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.__zw, r4.zzzw, r6.xxxy\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r4._y__, r2.w, r2.x\n"
+" \n"
+" dcl_literal l186, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, l186, r2.y\n"
+" \n"
+" dcl_literal l187, 0xFFFFFFF8, 0x00000000, 0xFFFFFFF7, 0x00000000\n"
+" iadd r4.x_z_, r2.y, l187\n"
+" ishl r2.__z_, r8.y, r4.x\n"
+" ishl r2.___w, r8.x, r4.z\n"
+" \n"
+" dcl_literal l188, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r2._y__, l188, r2.y_neg(xyzw)\n"
+" ushr r2._y__, r8.x, r2.y\n"
+" ior r4.___w, r2.z, r2.y\n"
+" ushr r4.x___, r2.w, r4.z\n"
+" \n"
+" dcl_literal l189, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8.__z_, l189\n"
+" cmov_logical r8, r2.x, r4.yxzw, r8\n"
+" mov r3.xy__, r8.yzyy\n"
+" endif\n"
+" \n"
+" dcl_literal l190, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r8.w, l190\n"
+" \n"
+" dcl_literal l191, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.___w, r8.w, l191\n"
+" \n"
+" dcl_literal l192, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.___w, r2.w, l192\n"
+" \n"
+" dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l193\n"
+" \n"
+" dcl_literal l194, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.___w, l194, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l195, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l195\n"
+" \n"
+" dcl_literal l196, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r2.w, l196\n"
+" inegate r8._y__, r2.w\n"
+" \n"
+" dcl_literal l197, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r2.__z_, l197\n"
+" cmov_logical r2.xy__, r2.x, r2.yzyy, r8.wyww\n"
+" iadd r2._y__, r1.w, r2.y\n"
+" \n"
+" dcl_literal l198, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r2._y__, r2.y, l198\n"
+" \n"
+" dcl_literal l199, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l199\n"
+" ior r7.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l200, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r3.y, l200\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r3.x, r3.y\n"
+" ushr r2.x___, r2.x, r3.y\n"
+" \n"
+" dcl_literal l201, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2._y__, l201, r3.y_neg(xyzw)\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" else\n"
+" \n"
+" dcl_literal l202, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.__z_, l202, r3.y\n"
+" \n"
+" dcl_literal l203, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r2.___w, r3.y, l203\n"
+" ishl r2.___w, r3.x, r2.w\n"
+" \n"
+" dcl_literal l204, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r3._y__, l204, r3.y_neg(xyzw)\n"
+" ushr r3._y__, r8.x, r3.y\n"
+" ior r2.___w, r2.w, r3.y\n"
+" cmov_logical r2.x___, r2.z, r2.w, r3.x\n"
+" endif\n"
+" \n"
+" dcl_literal l205, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r1.___w, r1.w, l205\n"
+" \n"
+" dcl_literal l206, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.x, l206\n"
+" \n"
+" dcl_literal l207, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.__z_, r2.x, l207\n"
+" \n"
+" dcl_literal l208, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.__z_, r2.z, l208\n"
+" \n"
+" dcl_literal l209, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l209\n"
+" \n"
+" dcl_literal l210, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.__z_, l210, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l211, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l211\n"
+" \n"
+" dcl_literal l212, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.__z_, r2.z, l212\n"
+" inegate r2._y__, r2.z\n"
+" \n"
+" dcl_literal l213, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r3.__z_, l213\n"
+" cmov_logical r2._yz_, r3.x, r3.yyzy, r2.xxyx\n"
+" iadd r1.___w, r1.w, r2.z\n"
+" \n"
+" dcl_literal l214, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r1.___w, r1.w, l214\n"
+" \n"
+" dcl_literal l215, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l215\n"
+" ior r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l216, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r7._y__, r2.x, r1.w, l216\n"
+" \n"
+" dcl_literal l217, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r7.z, l217\n"
+" mov r2.xy__, r7.xyxx_neg(xyzw)\n"
+" cmov_logical r2.xyz_, r0.z, r2.xyzx, r7.xyzx\n"
+" \n"
+" dcl_literal l218, 0x00000000, 0x00000000, 0xFFFFF000, 0x00000003\n"
+" and r2.__zw, r2.xxxz, l218\n"
+" add r0.__z_, r2.x, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l219, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mul_ieee r1.x___, r2.x, l219\n"
+" \n"
+" dcl_literal l220, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r1.___w, r2.z, l220, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l221, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.___w, r2.z, l221, r1.w\n"
+" \n"
+" dcl_literal l222, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r1.___w, r0.z, l222, r1.w\n"
+" \n"
+" dcl_literal l223, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r0.__z_, r0.z, l223, r1.w\n"
+" utof r0.___w, r2.w\n"
+" \n"
+" dcl_literal l224, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mad_ieee r1.__z_, r2.y, l224, r0.z\n"
+" endif\n"
+" mul_ieee r2.xy__, r1.xzxx, r1.x\n"
+" mul_ieee r0.__z_, r1.x, r2.x\n"
+" \n"
+" dcl_literal l225, 0x00000000, 0x00000000, 0x2F2EC9D3, 0xAD47D74E\n"
+" \n"
+" dcl_literal l226, 0x00000000, 0x00000000, 0xB2D72F34, 0x310F74F6\n"
+" mad_ieee r2.__zw, r2.x, l225, l226\n"
+" \n"
+" dcl_literal l227, 0x00000000, 0x00000000, 0x3636DF25, 0xB492923A\n"
+" mad_ieee r2.__zw, r2.x, r2.zzzw, l227\n"
+" \n"
+" dcl_literal l228, 0x00000000, 0x00000000, 0xB95009D4, 0x37D00AE2\n"
+" mad_ieee r2.__zw, r2.x, r2.zzzw, l228\n"
+" \n"
+" dcl_literal l229, 0x00000000, 0x00000000, 0x3C088887, 0xBAB60B60\n"
+" mad_ieee r2.__zw, r2.x, r2.zzzw, l229\n"
+" mul_ieee r1.___w, r0.z, r2.z\n"
+" \n"
+" dcl_literal l230, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1.___w, r1.z, l230, r1.w_neg(xyzw)\n"
+" mad_ieee r1.__z_, r2.x, r1.w, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l231, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB\n"
+" mad_ieee r0.__z_, r0.z_neg(xyzw), l231, r1.z\n"
+" add r3.__z_, r1.x, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l232, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB\n"
+" mad_ieee r0.__z_, r2.x, r2.w, l232\n"
+" mul_ieee r1.__z_, r2.x, r2.x\n"
+" \n"
+" dcl_literal l233, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.x___, r1.x, l233\n"
+" \n"
+" dcl_literal l234, 0x3E99999A, 0x3E99999A, 0x3E99999A, 0x3E99999A\n"
+" ige r1.___w, r1.x, l234\n"
+" \n"
+" dcl_literal l235, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ige r2.__z_, l235, r1.x\n"
+" and r1.___w, r1.w, r2.z\n"
+" \n"
+" dcl_literal l236, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000\n"
+" iadd r2.__z_, r1.x, l236\n"
+" and r1.___w, r1.w, r2.z\n"
+" \n"
+" dcl_literal l237, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ilt r1.x___, l237, r1.x\n"
+" \n"
+" dcl_literal l238, 0x3E900000, 0x3E900000, 0x3E900000, 0x3E900000\n"
+" cmov_logical r1.x___, r1.x, l238, r1.w\n"
+" \n"
+" dcl_literal l239, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1.___w, r2.x, l239, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l240, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1.x___, r1.x_neg(xyzw), l240\n"
+" mad_ieee r0.__z_, r0.z, r1.z, r2.y_neg(xyzw)\n"
+" add r0.__z_, r1.w, r0.z_neg(xyzw)\n"
+" add r1.x___, r1.x, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l241, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+" eq r2, r0.w, l241\n"
+" \n"
+" dcl_literal l242, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r0.__z_, r0.y, l242\n"
+" and r4, r2, r0.z\n"
+" mov r3.x___, r3.z_neg(xyzw)\n"
+" and r1.__z_, r4.x, r3.x\n"
+" mov r3._y__, r1.x_neg(xyzw)\n"
+" cmov_logical r0.__zw, r4.y, r3.xxxy, r1.xxxz\n"
+" cmov_logical r0.__zw, r4.z, r3.yyyz, r0.zzzw\n"
+" mov r3.___w, r1.x\n"
+" cmov_logical r0.__zw, r4.w, r3.zzzw, r0.zzzw\n"
+" \n"
+" dcl_literal l243, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ge r1.x___, r0.y, l243\n"
+" and r2, r2, r1.x\n"
+" cmov_logical r0.__zw, r2.x, r3.wwwz, r0.zzzw\n"
+" cmov_logical r0.__zw, r2.y, r3.xxxw, r0.zzzw\n"
+" cmov_logical r0.__zw, r2.z, r3.yyyx, r0.zzzw\n"
+" cmov_logical r0.xy__, r2.w, r3.zyzz, r0.zwzz\n"
+"else\n"
+" \n"
+" dcl_literal l244, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" mov r0.x___, l244\n"
+"endif\n"
+"dcl_literal l245, 0x7FC00000, 0x7FC00000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r1.y, l245, r0.yxyy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sinh_f32",
+"mdef(387)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"\n"
+"dcl_literal l2, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r0.y, l1, l2\n"
+"round_z r0.___w, r0.w\n"
+"\n"
+"dcl_literal l3, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r1.x___, r0.w_neg(xyzw), l3, r0.y\n"
+"\n"
+"dcl_literal l4, 0x42000000, 0x42000000, 0x42000000, 0x42000000\n"
+"ge r1._y__, r0.w, l4\n"
+"\n"
+"dcl_literal l5, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.__z_, r0.w, l5\n"
+"cmov_logical r1._y__, r1.y, r1.z, r0.w\n"
+"\n"
+"dcl_literal l6, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1.__z_, r0.w_neg(xyzw), l6, r1.x\n"
+"mul_ieee r1.___w, r1.z, r1.z\n"
+"\n"
+"dcl_literal l7, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l8, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r2.x___, r1.w, l7, l8\n"
+"\n"
+"dcl_literal l9, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r2.x___, r1.w, r2.x, l9\n"
+"\n"
+"dcl_literal l10, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r2.x___, r1.w, r2.x, l10\n"
+"\n"
+"dcl_literal l11, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r2.x___, r1.w, r2.x, l11\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), r2.x, r1.z\n"
+"mul_ieee r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l12, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1.___w, r1.w_neg(xyzw), l12\n"
+"div_zeroop(infinity) r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l13, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r0.___w, r0.w, l13, r1.z_neg(xyzw)\n"
+"add r0.___w, r1.x_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l14, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.x___, r0.w_neg(xyzw), l14\n"
+"\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r1.__z_, r1.y, l15\n"
+"if_logicalnz r1.z\n"
+" ftoi r1.__z_, r1.y\n"
+" \n"
+" dcl_literal l16, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r2.xyz_, r1.x, l16\n"
+" if_logicalz r2.y\n"
+" itof r1.___w, r2.z\n"
+" \n"
+" dcl_literal l17, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r1.w, l17\n"
+" \n"
+" dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.___w, r3.x, l18\n"
+" iadd r1.___w, r1.w, r1.z\n"
+" \n"
+" dcl_literal l19, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.___w, r3.y, l19\n"
+" \n"
+" dcl_literal l20, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.___w, l20, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3.x___, l21, r1.w\n"
+" \n"
+" dcl_literal l22, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.___w, r3.x, l22, r1.w\n"
+" \n"
+" dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.x___, l23, r1.w\n"
+" ishr r3._y__, r2.w, r1.w\n"
+" inegate r1.___w, r1.w\n"
+" \n"
+" dcl_literal l24, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l24\n"
+" iadd r1.___w, r2.w, r1.w\n"
+" cmov_logical r1.___w, r3.x, r3.y, r1.w\n"
+" else\n"
+" \n"
+" dcl_literal l25, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.___w, r1.x, l25\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r2.y, l26\n"
+" iadd r2._y__, r2.y, r1.z\n"
+" \n"
+" dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.__z_, r1.z, l27\n"
+" iadd r1.__z_, r2.w, r1.z\n"
+" \n"
+" dcl_literal l28, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2._y__, r2.y, l28\n"
+" \n"
+" dcl_literal l29, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2.___w, l29, r2.y\n"
+" \n"
+" dcl_literal l30, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.__z_, r2.w, l30, r1.z\n"
+" \n"
+" dcl_literal l31, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2._y__, l31, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l32, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2.__z_, r2.z, l32\n"
+" \n"
+" dcl_literal l33, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r3.xy__, l33, r2.y\n"
+" \n"
+" dcl_literal l34, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2._y__, r3.y, l34, r2.y\n"
+" ishr r2._y__, r2.z, r2.y\n"
+" cmov_logical r1.___w, r3.x, r2.y, r1.z\n"
+" endif\n"
+" \n"
+" dcl_literal l35, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r1._y__, l35, r1.y\n"
+" \n"
+" dcl_literal l36, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1._y__, r1.y, l36, r1.w\n"
+" ior r1.x___, r2.x, r1.y\n"
+" \n"
+" dcl_literal l37, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r0.___w, r1.x, l37\n"
+"else\n"
+" mov r0.___w, r0.w_neg(xyzw)\n"
+"endif\n"
+"\n"
+"dcl_literal l38, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.__z_, r0.z, l38\n"
+"\n"
+"dcl_literal l39, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1._y__, r0.y, l39\n"
+"and r0.__z_, r0.z, r1.y\n"
+"cmov_logical r0.__z_, r0.z, r0.y, r0.w\n"
+"\n"
+"dcl_literal l40, 0x41AEAC50, 0x41AEAC50, 0x41AEAC50, 0x41AEAC50\n"
+"lt r0.___w, r0.y, l40\n"
+"mul_ieee r1._y__, r0.z, r0.z\n"
+"div_zeroop(infinity) r1._y__, r1.y, r1.x\n"
+"\n"
+"dcl_literal l41, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r0.z, l41, r1.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l42, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r0.__z_, r0.z, l42\n"
+"cmov_logical r0.__z_, r0.w, r0.z, r1.x\n"
+"\n"
+"dcl_literal l43, 0x31800000, 0x31800000, 0x31800000, 0x31800000\n"
+"ilt r0.___w, r0.y, l43\n"
+"cmov_logical r0.__z_, r0.w, r0.y, r0.z\n"
+"\n"
+"dcl_literal l44, 0x42B2D4FD, 0x42B2D4FD, 0x42B2D4FD, 0x42B2D4FD\n"
+"ige r0.___w, r0.y, l44\n"
+"\n"
+"dcl_literal l45, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ige r1.x___, l45, r0.y\n"
+"and r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l46, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0.__z_, r0.w, l46, r0.z\n"
+"\n"
+"dcl_literal l47, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l47, r0.y\n"
+"\n"
+"dcl_literal l48, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.___w, r0.x, l48\n"
+"cmov_logical r0._y__, r0.y, r0.w, r0.z\n"
+"\n"
+"dcl_literal l49, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l49\n"
+"cmov_logical r0.x___, r0.x, r0.y_neg(xyzw), r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sinpi_f32",
+"mdef(388)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7F800000, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0._yzw, r0.x, l0\n"
+"add r1.x___, r0.w, r0.w\n"
+"round_nearest r1.x___, r1.x\n"
+"\n"
+"dcl_literal l1, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r1._y__, r0.w, l1, r1.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l2, 0x3E800000, 0x3FC90FDB, 0x00000000, 0x00000000\n"
+"mul_ieee r1.xy__, r1.xyxx, l2\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l3, 0x3636DF25, 0xB492923A, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l4, 0xB95009D4, 0x37D00AE2, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, l3, l4\n"
+"\n"
+"dcl_literal l5, 0x3C088887, 0xBAB60B60, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l5\n"
+"\n"
+"dcl_literal l6, 0xBE2AAAAB, 0x3D2AAAAB, 0x00000000, 0x00000000\n"
+"mad_ieee r2.xy__, r1.z, r2.xyxx, l6\n"
+"mul_ieee r1.___w, r1.y, r1.z\n"
+"mad_ieee r1._y__, r1.w, r2.x, r1.y\n"
+"frc r1.x___, r1.x\n"
+"\n"
+"dcl_literal l7, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+"mul_ieee r1.x___, r1.x, l7\n"
+"round_nearest r1.x___, r1.x\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+"eq r3, r1.x, l8\n"
+"\n"
+"dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ge r1.x___, r0.w, l9\n"
+"and r3, r3, r1.x\n"
+"and r1.x___, r1.y, r3.x\n"
+"\n"
+"dcl_literal l10, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"\n"
+"dcl_literal l11, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r1.___w, r1.z_neg(xyzw), l10, l11\n"
+"mul_ieee r1.__z_, r1.z, r1.z\n"
+"mad_ieee r1.__z_, r1.z, r2.y, r1.w\n"
+"cmov_logical r1.x___, r3.y, r1.z, r1.x\n"
+"cmov_logical r1.x___, r3.z, r1.y_neg(xyzw), r1.x\n"
+"cmov_logical r1.x___, r3.w, r1.z_neg(xyzw), r1.x\n"
+"itof r1._y__, r0.w\n"
+"\n"
+"dcl_literal l12, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r1._y__, r1.y, l12\n"
+"\n"
+"dcl_literal l13, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r1._yz_, r1.y, l13\n"
+"\n"
+"dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1._y__, r1.y, l14\n"
+"\n"
+"dcl_literal l15, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.__z_, r1.z, l15\n"
+"\n"
+"dcl_literal l16, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r1._y__, l16, r1.y_neg(xyzw)\n"
+"inegate r1.___w, r1.y\n"
+"\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1.___w, r1.w, l17\n"
+"iadd r1.___w, r1.z, r1.w\n"
+"ishr r1.__z_, r1.z, r1.y\n"
+"\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, l18, r1.y\n"
+"cmov_logical r1._y__, r1.y, r1.z, r1.w\n"
+"\n"
+"dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.__zw, r0.wwwy, l19\n"
+"\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x7F800000, 0x00000000\n"
+"ieq r2.xyz_, r0.yzwy, l20\n"
+"and r0._y__, r1.z, r2.x\n"
+"cmov_logical r0._y__, r0.y, r1.y, r1.x\n"
+"\n"
+"dcl_literal l21, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ilt r0.__z_, r0.w, l21\n"
+"and r0.__z_, r1.w, r0.z\n"
+"\n"
+"dcl_literal l22, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r1.x___, r0.w, l22\n"
+"cmov_logical r0._y__, r0.z, r1.x, r0.y\n"
+"frc r0.__z_, r0.w\n"
+"add r1.x___, r0.w, r0.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l23, 0x00000000, 0x00000000, 0x3F000000, 0x00000000\n"
+"eq r1._yz_, r0.z, l23\n"
+"\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l24, r0.w\n"
+"\n"
+"dcl_literal l25, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ge r0.___w, r1.x, l25\n"
+"\n"
+"dcl_literal l26, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.x___, r1.x, l26\n"
+"and r0.___w, r1.y, r0.w\n"
+"ior r0.___w, r2.y, r0.w\n"
+"\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0._y__, r0.w, l27, r0.y\n"
+"frc r0.___w, r1.x\n"
+"\n"
+"dcl_literal l28, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r0.___w, r0.w, l28\n"
+"\n"
+"dcl_literal l29, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"\n"
+"dcl_literal l30, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r0.___w, r0.w, l29, l30\n"
+"cmov_logical r0._y__, r1.z, r0.w, r0.y\n"
+"\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l31\n"
+"\n"
+"dcl_literal l32, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l32\n"
+"cmov_logical r0._y__, r0.w, r0.y_neg(xyzw), r0.y\n"
+"\n"
+"dcl_literal l33, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r2.z, l33, r0.y\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sin_f32",
+"mdef(389)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0._y__, r0.x_abs\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ige r0.___w, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, r0.z, l2\n"
+"and r0.___w, r0.w, r1.x\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l3, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680\n"
+" lt r0.___w, r0.y, l3\n"
+" if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l4, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.___w, r0.y, l4\n"
+" round_nearest r0.___w, r0.w\n"
+" \n"
+" dcl_literal l5, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r1.x___, r0.w, l5\n"
+" add r1._y__, r0.w, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l6, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1.__z_, r0.w, l6\n"
+" \n"
+" dcl_literal l7, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.w, l7\n"
+" \n"
+" dcl_literal l8, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l8, r2.xyxx_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l9, r3.xyxx\n"
+" \n"
+" dcl_literal l10, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l10, r3.xyxx\n"
+" \n"
+" dcl_literal l11, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l11, r3.xyxx\n"
+" add r0.___w, r0.y, r2.x_neg(xyzw)\n"
+" add r1.___w, r0.y, r0.w_neg(xyzw)\n"
+" add r1.___w, r2.x_neg(xyzw), r1.w\n"
+" add r1.___w, r3.x_neg(xyzw), r1.w\n"
+" add r0.___w, r0.w, r1.w\n"
+" add r1.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+" add r0.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r3.y_neg(xyzw), r0.w\n"
+" add r0.___w, r1.w, r0.w\n"
+" \n"
+" dcl_literal l12, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.___w, r1.x, l12, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l13, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.x, l13, r1.w\n"
+" \n"
+" dcl_literal l14, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.x___, r1.y, l14, r1.x\n"
+" \n"
+" dcl_literal l15, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1._y__, r1.y, l15, r1.x\n"
+" add r1.___w, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+" add r0.___w, r2.z_neg(xyzw), r0.w\n"
+" add r1.x___, r1.w, r0.w\n"
+" frc r0.___w, r1.z\n"
+" \n"
+" dcl_literal l16, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r0.___w, r0.w, l16\n"
+" round_nearest r0.___w, r0.w\n"
+" mov r1._y__, r1.y_neg(xyzw)\n"
+" else\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r1.__z_, r0.y, l17\n"
+" \n"
+" dcl_literal l18, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1.___w, r0.y, l18\n"
+" \n"
+" dcl_literal l19, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l19\n"
+" \n"
+" dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r1.w, l20\n"
+" \n"
+" dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.___w, r1.w, l21\n"
+" \n"
+" dcl_literal l22, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r3, l22, r1.w\n"
+" \n"
+" dcl_literal l23, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r4, l23, r2.x\n"
+" \n"
+" dcl_literal l24, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2._yz_, r4.yywy, l24\n"
+" \n"
+" dcl_literal l25, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r3.ywyy, l25\n"
+" iadd r2._yz_, r2.yyzy, r5.xxyx\n"
+" iadd r2._yz_, r4.xxzx, r2.yyzy\n"
+" \n"
+" dcl_literal l26, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x_z_, r2.yyzy, l26\n"
+" iadd r3.x_z_, r3.xxzx, r4.xxzx\n"
+" \n"
+" dcl_literal l27, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l27\n"
+" \n"
+" dcl_literal l28, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2._yz_, r2.yyzy, l28\n"
+" \n"
+" dcl_literal l29, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r4.ywyy, l29\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" ior r4.xy__, r2.yzyy, r4.xyxx\n"
+" iadd r5.__z_, r3.x, r4.y\n"
+" ult r2._y__, r5.z, r4.y\n"
+" \n"
+" dcl_literal l30, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r3.y, l30\n"
+" cmov_logical r2._y__, r2.y, r2.z, r3.y\n"
+" \n"
+" dcl_literal l31, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r3, l31, r1.w\n"
+" \n"
+" dcl_literal l32, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r6, l32, r2.x\n"
+" \n"
+" dcl_literal l33, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l33\n"
+" \n"
+" dcl_literal l34, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l34\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l35, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l35\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l36, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l36\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l37, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l37\n"
+" \n"
+" dcl_literal l38, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l38\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r5._y__, r2.y, r2.z\n"
+" ult r2._y__, r5.y, r2.z\n"
+" \n"
+" dcl_literal l39, 0x00000000, 0x00000000, 0x00000001, 0x00000001\n"
+" iadd r3.__zw, r3.xxxy, l39\n"
+" cmov_logical r2._y__, r2.y, r3.z, r3.x\n"
+" iadd r5.x___, r2.y, r2.w\n"
+" ult r2._y__, r5.x, r2.w\n"
+" cmov_logical r2._y__, r2.y, r3.w, r3.y\n"
+" \n"
+" dcl_literal l40, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r3, l40, r1.w\n"
+" \n"
+" dcl_literal l41, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r6, l41, r2.x\n"
+" \n"
+" dcl_literal l42, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l42\n"
+" \n"
+" dcl_literal l43, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l43\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l44, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l44\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l45, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l45\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l46, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l46\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l47\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r3.___w, r2.y, r2.z\n"
+" ult r2._y__, r3.w, r2.z\n"
+" \n"
+" dcl_literal l48, 0x00000001, 0x00000001, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r3.xyxx, l48\n"
+" cmov_logical r2._y__, r2.y, r6.x, r3.x\n"
+" iadd r3.__z_, r2.y, r2.w\n"
+" ult r2._y__, r3.z, r2.w\n"
+" cmov_logical r2._y__, r2.y, r6.y, r3.y\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x0000A2F9, 0x0000836E\n"
+" umul r2.__zw, l49, r1.w\n"
+" \n"
+" dcl_literal l50, 0x0000A2F9, 0x0000836E, 0x00000000, 0x00000000\n"
+" umul r3.xy__, l50, r2.x\n"
+" \n"
+" dcl_literal l51, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.___w, r3.y, l51\n"
+" \n"
+" dcl_literal l52, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r2.w, l52\n"
+" iadd r1.___w, r1.w, r2.x\n"
+" iadd r1.___w, r3.x, r1.w\n"
+" \n"
+" dcl_literal l53, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.x___, r1.w, l53\n"
+" iadd r2.x___, r2.z, r2.x\n"
+" \n"
+" dcl_literal l54, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__z_, r2.w, l54\n"
+" iadd r2.x___, r2.x, r2.z\n"
+" \n"
+" dcl_literal l55, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.___w, r1.w, l55\n"
+" \n"
+" dcl_literal l56, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r3.y, l56\n"
+" ior r1.___w, r1.w, r2.z\n"
+" iadd r2._y__, r2.y, r1.w\n"
+" ult r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l57, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r2.x, l57\n"
+" cmov_logical r1.___w, r1.w, r2.z, r2.x\n"
+" \n"
+" dcl_literal l58, 0xFFFFFF89, 0x00000000, 0xFFFFFF8A, 0x00000000\n"
+" iadd r2.x_z_, r1.z, l58\n"
+" \n"
+" dcl_literal l59, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r6.x___, r2.x, l59\n"
+" \n"
+" dcl_literal l60, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r1.__z_, r6.x, l60\n"
+" iadd r1.__z_, r2.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l61, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l61, r6.x\n"
+" \n"
+" dcl_literal l62, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r7, r2.x, l62\n"
+" inegate r2.___w, r7.x\n"
+" \n"
+" dcl_literal l63, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r8.x___, r7.y, l63\n"
+" \n"
+" dcl_literal l64, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r8._y__, r7.z, l64\n"
+" \n"
+" dcl_literal l65, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r8.__z_, r7.w, l65\n"
+" inegate r7.xyz_, r8.xyzx\n"
+" \n"
+" dcl_literal l66, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r8, r2.x, l66\n"
+" \n"
+" dcl_literal l67, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r9.x___, r8.x, l67\n"
+" \n"
+" dcl_literal l68, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r9._yzw, r8.yyzw, l68\n"
+" inegate r8, r9\n"
+" and r2.x___, r1.w, r2.w\n"
+" and r3.xy__, r2.y, r7.xyxx\n"
+" ior r2.x___, r2.x, r3.x\n"
+" and r7._yzw, r3.zzzw, r7.yyzz\n"
+" ior r2.x___, r2.x, r7.y\n"
+" ior r2.x___, r7.w, r2.x\n"
+" and r9, r5.xxyy, r8.xyyz\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.z, r2.x\n"
+" and r7._y_w, r5.z, r8.zzzw\n"
+" ior r2.x___, r2.x, r7.y\n"
+" and r2.___w, r4.x, r8.w\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r2.___w, r1.w, r7.x\n"
+" ior r2.___w, r3.y, r2.w\n"
+" ior r2.___w, r7.z, r2.w\n"
+" and r3.x___, r3.w, r8.x\n"
+" ior r2.___w, r2.w, r3.x\n"
+" ior r2.___w, r9.y, r2.w\n"
+" ior r2.___w, r9.w, r2.w\n"
+" ior r2.___w, r7.w, r2.w\n"
+" \n"
+" dcl_literal l69, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r3.x___, l69, r1.z\n"
+" and r3.x___, r2.x, r3.x\n"
+" inegate r3._y__, r1.z\n"
+" \n"
+" dcl_literal l70, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+" iadd r6._y__, l70, r3.y\n"
+" \n"
+" dcl_literal l71, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ult r4._y__, l71, r1.z\n"
+" \n"
+" dcl_literal l72, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r1.z, l72\n"
+" \n"
+" dcl_literal l73, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r1.__z_, l73, r1.z\n"
+" and r1.__z_, r2.x, r1.z\n"
+" ior r1.__z_, r3.x, r1.z\n"
+" ushr r1.__z_, r1.z, r6.y\n"
+" ushr r2.x___, r3.x, r6.y\n"
+" \n"
+" dcl_literal l74, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" and r2.___w, r2.w, l74\n"
+" \n"
+" dcl_literal l75, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.___w, r2.w, l75\n"
+" ior r2.x___, r2.x, r2.w\n"
+" cmov_logical r7.__z_, r4.y, r1.z, r2.x\n"
+" \n"
+" dcl_literal l76, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r8.x___, r6.x, l76\n"
+" \n"
+" dcl_literal l77, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ishl r6.__z_, l77, r3.y\n"
+" \n"
+" dcl_literal l78, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.___w, r6.z, l78\n"
+" \n"
+" dcl_literal l79, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFFF\n"
+" mov r8._yzw, l79\n"
+" cmov_logical r6, r6.y, r6, r8\n"
+" \n"
+" dcl_literal l80, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r6.y, l80\n"
+" \n"
+" dcl_literal l81, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r1.__z_, l81, r1.z\n"
+" \n"
+" dcl_literal l82, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r2.x___, r1.z, l82\n"
+" switch r6.x\n"
+" case 0\n"
+" and r3.x___, r1.w, r1.z\n"
+" \n"
+" dcl_literal l83, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l84, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l83, l84\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l85, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l85\n"
+" \n"
+" dcl_literal l86, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l86\n"
+" \n"
+" dcl_literal l87, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.zzwz_neg(xyzw), l87\n"
+" \n"
+" dcl_literal l88, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r11.x___, r2.y_neg(xyzw), l88\n"
+" and r3._y__, r1.w, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.x___, r2.y\n"
+" mov r11._yz_, r3.zzwz\n"
+" mov r3._y__, r1.w\n"
+" endif\n"
+" and r8.x___, r2.x, r3.y\n"
+" mov r8._yzw, r11.xxyz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 1\n"
+" and r3.x___, r2.y, r1.z\n"
+" \n"
+" dcl_literal l89, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l90, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l89, l90\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l91, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l91\n"
+" \n"
+" dcl_literal l92, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l92\n"
+" \n"
+" dcl_literal l93, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.wwzw_neg(xyzw), l93\n"
+" and r3._y__, r2.y, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" mov r11.xy__, r11.zyzz\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.xy__, r3.zwzz\n"
+" mov r3._y__, r2.y\n"
+" endif\n"
+" and r11.__z_, r2.x, r3.y\n"
+" \n"
+" dcl_literal l94, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l94\n"
+" mov r8, r11.wzxy\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 2\n"
+" and r3.x___, r3.z, r1.z\n"
+" \n"
+" dcl_literal l95, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l95, l96\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l97, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l97\n"
+" \n"
+" dcl_literal l98, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6._y__, r3.w_neg(xyzw), l98\n"
+" and r4._y__, r3.z, r6.w\n"
+" iadd r6.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r5.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r5\n"
+" cmov_logical r11._yz_, r3.x, r6.xxyx, r3.zzwz\n"
+" and r11.x___, r2.x, r11.y\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l99\n"
+" mov r8, r11.wwxz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 3\n"
+" and r3.x___, r3.w, r1.z\n"
+" \n"
+" dcl_literal l100, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l101, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l100, l101\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l102, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l102\n"
+" and r4._y__, r3.w, r6.w\n"
+" iadd r4._y__, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" cmov_logical r3.x___, r3.x, r4.y, r3.w\n"
+" and r11.___w, r2.x, r3.x\n"
+" \n"
+" dcl_literal l103, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.xyz_, l103\n"
+" mov r8, r11\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 4\n"
+" and r3.x___, r5.x, r1.z\n"
+" \n"
+" dcl_literal l104, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l104, l105\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l106, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10._yz_, r5.yyzy_neg(xyzw), l106\n"
+" and r4._y__, r5.x, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" and r3.x___, r2.x, r10.x\n"
+" \n"
+" dcl_literal l107, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l107\n"
+" mov r9._yzw, r10.yyzw\n"
+" mov r9.x___, r3.x\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 5\n"
+" and r3.x___, r5.y, r1.z\n"
+" \n"
+" dcl_literal l108, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l109, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l108, l109\n"
+" inegate r10.__z_, r4.x\n"
+" \n"
+" dcl_literal l110, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.z_neg(xyzw), l110\n"
+" and r4._y__, r5.y, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r4.__zw, r5.yyyz\n"
+" cmov_logical r10.x_zw, r3.x, r10.xxyz, r4.zzwx\n"
+" and r10._y__, r2.x, r10.x\n"
+" \n"
+" dcl_literal l111, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.x___, l111\n"
+" \n"
+" dcl_literal l112, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l112\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 6\n"
+" and r3.x___, r5.z, r1.z\n"
+" \n"
+" dcl_literal l113, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l114, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l113, l114\n"
+" inegate r4.__z_, r4.x\n"
+" and r4.___w, r5.z, r6.w\n"
+" iadd r4._y__, r6.w, r4.w_neg(xyzw)\n"
+" mov r6.x___, r5.z\n"
+" mov r6._y__, r4.x\n"
+" cmov_logical r10._yz_, r3.x, r4.yyzy, r6.xxyx\n"
+" and r10.x___, r2.x, r10.y\n"
+" \n"
+" dcl_literal l115, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.___w, l115\n"
+" \n"
+" dcl_literal l116, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l116\n"
+" mov r9, r10.wwxz\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 7\n"
+" and r1.__z_, r4.x, r1.z\n"
+" \n"
+" dcl_literal l117, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l118, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.x___, r1.z, l117, l118\n"
+" and r3._y__, r4.x, r6.w\n"
+" iadd r3._y__, r6.z, r3.y_neg(xyzw)\n"
+" cmov_logical r1.__z_, r1.z, r3.y, r4.x\n"
+" and r6.___w, r2.x, r1.z\n"
+" \n"
+" dcl_literal l119, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r6.xyz_, l119\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l120\n"
+" mov r9, r6\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" default\n"
+" mov r8.__zw, r3.zzzw\n"
+" mov r8._y__, r2.y\n"
+" mov r8.x___, r1.w\n"
+" mov r9.___w, r4.x\n"
+" mov r9.xyz_, r5.xyzx\n"
+" \n"
+" dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r2.___w, l121\n"
+" break\n"
+" endswitch\n"
+" mov r3, r8\n"
+" mov r4, r9\n"
+" mov r1.__z_, r2.w\n"
+" \n"
+" dcl_literal l122, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5, r3, l122\n"
+" \n"
+" dcl_literal l123, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5, r5, l123\n"
+" \n"
+" dcl_literal l124, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r6, r5, l124\n"
+" \n"
+" dcl_literal l125, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r6, r6, l125\n"
+" \n"
+" dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l126\n"
+" \n"
+" dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r6, l127, r6_neg(xyzw)\n"
+" \n"
+" dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l128\n"
+" \n"
+" dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r6, r6, l129\n"
+" \n"
+" dcl_literal l130, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r6, r6, l130\n"
+" \n"
+" dcl_literal l131, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r5, r5, r6, l131\n"
+" \n"
+" dcl_literal l132, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r3, l132\n"
+" \n"
+" dcl_literal l133, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l133\n"
+" \n"
+" dcl_literal l134, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l134\n"
+" \n"
+" dcl_literal l135, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l135\n"
+" \n"
+" dcl_literal l136, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l136, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l137\n"
+" \n"
+" dcl_literal l138, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l138\n"
+" \n"
+" dcl_literal l139, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l139\n"
+" \n"
+" dcl_literal l140, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l140\n"
+" \n"
+" dcl_literal l141, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r8, r5, l141\n"
+" iadd r6, r5, r6\n"
+" cmov_logical r5, r8, r6, r5\n"
+" \n"
+" dcl_literal l142, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6, r4, l142\n"
+" \n"
+" dcl_literal l143, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r6, l143\n"
+" \n"
+" dcl_literal l144, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l144\n"
+" \n"
+" dcl_literal l145, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l145\n"
+" \n"
+" dcl_literal l146, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l146\n"
+" \n"
+" dcl_literal l147, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l147, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l148, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l148\n"
+" \n"
+" dcl_literal l149, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l149\n"
+" \n"
+" dcl_literal l150, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l150\n"
+" \n"
+" dcl_literal l151, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l151\n"
+" \n"
+" dcl_literal l152, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r8, r4, l152\n"
+" \n"
+" dcl_literal l153, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r9, r8, l153\n"
+" \n"
+" dcl_literal l154, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r9, r9, l154\n"
+" \n"
+" dcl_literal l155, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l155\n"
+" \n"
+" dcl_literal l156, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r9, l156, r9_neg(xyzw)\n"
+" \n"
+" dcl_literal l157, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l157\n"
+" \n"
+" dcl_literal l158, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r9, r9, l158\n"
+" \n"
+" dcl_literal l159, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r9, r9, l159\n"
+" \n"
+" dcl_literal l160, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r8, r8, r9, l160\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r9, r6, l161\n"
+" iadd r8, r6, r8\n"
+" cmov_logical r6, r9, r8, r6\n"
+" \n"
+" dcl_literal l162, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ieq r1.___w, r5.x, l162\n"
+" \n"
+" dcl_literal l163, 0x00000020, 0x00000040, 0x00000000, 0x00000060\n"
+" iadd r2.xy_w, r5.yzyw, l163\n"
+" cmov_logical r1.___w, r1.w, r2.x, r5.x\n"
+" \n"
+" dcl_literal l164, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+" ieq r2.x___, r1.w, l164\n"
+" cmov_logical r1.___w, r2.x, r2.y, r1.w\n"
+" \n"
+" dcl_literal l165, 0x00000060, 0x00000060, 0x00000060, 0x00000060\n"
+" ieq r2.x___, r1.w, l165\n"
+" cmov_logical r1.___w, r2.x, r2.w, r1.w\n"
+" \n"
+" dcl_literal l166, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+" ieq r2.x___, r1.w, l166\n"
+" \n"
+" dcl_literal l167, 0x00000080, 0x000000A0, 0x000000C0, 0x000000E0\n"
+" iadd r5, r6, l167\n"
+" cmov_logical r1.___w, r2.x, r5.x, r1.w\n"
+" \n"
+" dcl_literal l168, 0x000000A0, 0x000000A0, 0x000000A0, 0x000000A0\n"
+" ieq r2.x___, r1.w, l168\n"
+" cmov_logical r1.___w, r2.x, r5.y, r1.w\n"
+" \n"
+" dcl_literal l169, 0x000000C0, 0x000000C0, 0x000000C0, 0x000000C0\n"
+" ieq r2.x___, r1.w, l169\n"
+" cmov_logical r1.___w, r2.x, r5.z, r1.w\n"
+" \n"
+" dcl_literal l170, 0x000000E0, 0x000000E0, 0x000000E0, 0x000000E0\n"
+" ieq r2.x___, r1.w, l170\n"
+" cmov_logical r1.___w, r2.x, r5.w, r1.w\n"
+" \n"
+" dcl_literal l171, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishr r2.x___, r1.w, l171\n"
+" \n"
+" dcl_literal l172, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r2._y__, r2.x, l172\n"
+" iadd r2._y__, r1.w, r2.y_neg(xyzw)\n"
+" iadd r1.___w, r2.z, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l173, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l173, r2.x\n"
+" \n"
+" dcl_literal l174, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r5, r2.x, l174\n"
+" inegate r2.__z_, r5.x\n"
+" \n"
+" dcl_literal l175, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r6.x__w, r5.y, l175\n"
+" \n"
+" dcl_literal l176, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r6._y__, r5.z, l176\n"
+" \n"
+" dcl_literal l177, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r6.__z_, r5.w, l177\n"
+" inegate r5, r6\n"
+" \n"
+" dcl_literal l178, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r6, r2.x, l178\n"
+" \n"
+" dcl_literal l179, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r8.x___, r6.x, l179\n"
+" \n"
+" dcl_literal l180, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r8._y__, r6.y, l180\n"
+" \n"
+" dcl_literal l181, 0x00000006, 0x00000006, 0x00000006, 0x00000006\n"
+" ushr r8.__z_, r6.z, l181\n"
+" \n"
+" dcl_literal l182, 0x00000007, 0x00000007, 0x00000007, 0x00000007\n"
+" ushr r8.___w, r6.w, l182\n"
+" inegate r6, r8\n"
+" and r2.x__w, r3.xxxy, r2.z\n"
+" and r8, r3.yzwz, r5\n"
+" ior r2.x__w, r2.xxxw, r8.xxxw\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" and r8, r4, r6\n"
+" ior r2.x___, r2.x, r8.x\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" ior r8._y_w, r8.w, r2.x\n"
+" and r2.x___, r3.w, r5.y\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.___w, r4.x, r5.z\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r9.xyz_, r4.yzwy, r6.xyzx\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.y, r2.x\n"
+" ior r8.x___, r9.z, r2.x\n"
+" \n"
+" dcl_literal l183, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r2.y, l183\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r8.y, r2.y\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l184, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2.___w, l184, r2.y_neg(xyzw)\n"
+" ushr r8.___w, r2.x, r2.w\n"
+" \n"
+" dcl_literal l185, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r3._y__, r2.y, l185\n"
+" ishl r2.x___, r2.x, r3.y\n"
+" ushr r3.x___, r2.x, r3.y\n"
+" else\n"
+" and r2.x___, r3.z, r2.z\n"
+" and r2.__z_, r3.w, r5.w\n"
+" ior r2.x___, r2.x, r2.z\n"
+" and r2.__zw, r4.xxxy, r5.yyyz\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.__zw, r4.zzzw, r6.xxxy\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r4._y__, r2.w, r2.x\n"
+" \n"
+" dcl_literal l186, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, l186, r2.y\n"
+" \n"
+" dcl_literal l187, 0xFFFFFFF8, 0x00000000, 0xFFFFFFF7, 0x00000000\n"
+" iadd r4.x_z_, r2.y, l187\n"
+" ishl r2.__z_, r8.y, r4.x\n"
+" ishl r2.___w, r8.x, r4.z\n"
+" \n"
+" dcl_literal l188, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r2._y__, l188, r2.y_neg(xyzw)\n"
+" ushr r2._y__, r8.x, r2.y\n"
+" ior r4.___w, r2.z, r2.y\n"
+" ushr r4.x___, r2.w, r4.z\n"
+" \n"
+" dcl_literal l189, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8.__z_, l189\n"
+" cmov_logical r8, r2.x, r4.yxzw, r8\n"
+" mov r3.xy__, r8.yzyy\n"
+" endif\n"
+" \n"
+" dcl_literal l190, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r8.w, l190\n"
+" \n"
+" dcl_literal l191, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.___w, r8.w, l191\n"
+" \n"
+" dcl_literal l192, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.___w, r2.w, l192\n"
+" \n"
+" dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l193\n"
+" \n"
+" dcl_literal l194, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.___w, l194, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l195, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l195\n"
+" \n"
+" dcl_literal l196, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r2.w, l196\n"
+" inegate r8._y__, r2.w\n"
+" \n"
+" dcl_literal l197, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r2.__z_, l197\n"
+" cmov_logical r2.xy__, r2.x, r2.yzyy, r8.wyww\n"
+" iadd r2._y__, r1.w, r2.y\n"
+" \n"
+" dcl_literal l198, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r2._y__, r2.y, l198\n"
+" \n"
+" dcl_literal l199, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l199\n"
+" ior r7.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l200, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r3.y, l200\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r3.x, r3.y\n"
+" ushr r2.x___, r2.x, r3.y\n"
+" \n"
+" dcl_literal l201, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2._y__, l201, r3.y_neg(xyzw)\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" else\n"
+" \n"
+" dcl_literal l202, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.__z_, l202, r3.y\n"
+" \n"
+" dcl_literal l203, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r2.___w, r3.y, l203\n"
+" ishl r2.___w, r3.x, r2.w\n"
+" \n"
+" dcl_literal l204, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r3._y__, l204, r3.y_neg(xyzw)\n"
+" ushr r3._y__, r8.x, r3.y\n"
+" ior r2.___w, r2.w, r3.y\n"
+" cmov_logical r2.x___, r2.z, r2.w, r3.x\n"
+" endif\n"
+" \n"
+" dcl_literal l205, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r1.___w, r1.w, l205\n"
+" \n"
+" dcl_literal l206, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.x, l206\n"
+" \n"
+" dcl_literal l207, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.__z_, r2.x, l207\n"
+" \n"
+" dcl_literal l208, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.__z_, r2.z, l208\n"
+" \n"
+" dcl_literal l209, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l209\n"
+" \n"
+" dcl_literal l210, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.__z_, l210, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l211, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l211\n"
+" \n"
+" dcl_literal l212, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.__z_, r2.z, l212\n"
+" inegate r2._y__, r2.z\n"
+" \n"
+" dcl_literal l213, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r3.__z_, l213\n"
+" cmov_logical r2._yz_, r3.x, r3.yyzy, r2.xxyx\n"
+" iadd r1.___w, r1.w, r2.z\n"
+" \n"
+" dcl_literal l214, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r1.___w, r1.w, l214\n"
+" \n"
+" dcl_literal l215, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l215\n"
+" ior r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l216, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r7._y__, r2.x, r1.w, l216\n"
+" \n"
+" dcl_literal l217, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r7.z, l217\n"
+" mov r2.xy__, r7.xyxx_neg(xyzw)\n"
+" cmov_logical r2.xyz_, r1.z, r2.xyzx, r7.xyzx\n"
+" \n"
+" dcl_literal l218, 0x00000000, 0x00000000, 0xFFFFF000, 0x00000003\n"
+" and r1.__zw, r2.xxxz, l218\n"
+" add r2.__z_, r2.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l219, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mul_ieee r1.x___, r2.x, l219\n"
+" \n"
+" dcl_literal l220, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r2.x___, r1.z, l220, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l221, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.__z_, r1.z, l221, r2.x\n"
+" \n"
+" dcl_literal l222, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r1.__z_, r2.z, l222, r1.z\n"
+" \n"
+" dcl_literal l223, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.__z_, r2.z, l223, r1.z\n"
+" utof r0.___w, r1.w\n"
+" \n"
+" dcl_literal l224, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mad_ieee r1._y__, r2.y, l224, r1.z\n"
+" endif\n"
+" mul_ieee r1.__zw, r1.xxxy, r1.x\n"
+" mul_ieee r2.x___, r1.x, r1.z\n"
+" \n"
+" dcl_literal l225, 0x00000000, 0x2F2EC9D3, 0xAD47D74E, 0x00000000\n"
+" \n"
+" dcl_literal l226, 0x00000000, 0xB2D72F34, 0x310F74F6, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, l225, l226\n"
+" \n"
+" dcl_literal l227, 0x00000000, 0x3636DF25, 0xB492923A, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, r2.yyzy, l227\n"
+" \n"
+" dcl_literal l228, 0x00000000, 0xB95009D4, 0x37D00AE2, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, r2.yyzy, l228\n"
+" \n"
+" dcl_literal l229, 0x00000000, 0x3C088887, 0xBAB60B60, 0x00000000\n"
+" mad_ieee r2._yz_, r1.z, r2.yyzy, l229\n"
+" mul_ieee r2._y__, r2.x, r2.y\n"
+" \n"
+" dcl_literal l230, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r2._y__, r1.y, l230, r2.y_neg(xyzw)\n"
+" mad_ieee r1._y__, r1.z, r2.y, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l231, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB, 0xBE2AAAAB\n"
+" mad_ieee r1._y__, r2.x_neg(xyzw), l231, r1.y\n"
+" add r1._y__, r1.x, r1.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l232, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB, 0x3D2AAAAB\n"
+" mad_ieee r2.x___, r1.z, r2.z, l232\n"
+" mul_ieee r2._y__, r1.z, r1.z\n"
+" \n"
+" dcl_literal l233, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.x___, r1.x, l233\n"
+" \n"
+" dcl_literal l234, 0x3E99999A, 0x3E99999A, 0x3E99999A, 0x3E99999A\n"
+" ige r2.__z_, r1.x, l234\n"
+" \n"
+" dcl_literal l235, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ige r2.___w, l235, r1.x\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l236, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000\n"
+" iadd r2.___w, r1.x, l236\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l237, 0x3F480000, 0x3F480000, 0x3F480000, 0x3F480000\n"
+" ilt r1.x___, l237, r1.x\n"
+" \n"
+" dcl_literal l238, 0x3E900000, 0x3E900000, 0x3E900000, 0x3E900000\n"
+" cmov_logical r1.x___, r1.x, l238, r2.z\n"
+" \n"
+" dcl_literal l239, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mad_ieee r1.__z_, r1.z, l239, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l240, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1.x___, r1.x_neg(xyzw), l240\n"
+" mad_ieee r1.___w, r2.x, r2.y, r1.w_neg(xyzw)\n"
+" add r1.__z_, r1.z, r1.w_neg(xyzw)\n"
+" add r1.x___, r1.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l241, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+" eq r2, r0.w, l241\n"
+" and r0.___w, r1.y, r2.x\n"
+" cmov_logical r0.___w, r2.y, r1.x, r0.w\n"
+" cmov_logical r0.___w, r2.z, r1.y_neg(xyzw), r0.w\n"
+" cmov_logical r0._y__, r2.w, r1.x_neg(xyzw), r0.w\n"
+"endif\n"
+"\n"
+"dcl_literal l242, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l242\n"
+"cmov_logical r0._y__, r0.w, r0.y_neg(xyzw), r0.y\n"
+"\n"
+"dcl_literal l243, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.z, l243\n"
+"\n"
+"dcl_literal l244, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.w, l244, r0.y\n"
+"\n"
+"dcl_literal l245, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l245, r0.z\n"
+"\n"
+"dcl_literal l246, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l246\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__smad24_i32",
+"mdef(390)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"\n"
+"dcl_literal l0, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishl r1.xy__, r0.xyxx, l0\n"
+"\n"
+"dcl_literal l1, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishr r1.xy__, r1.xyxx, l1\n"
+"imad r0.x___, r1.x, r1.y, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smad_hi_i16",
+"mdef(391)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"imul r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l5, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ishr r1.x___, r1.x, l5\n"
+"iadd r1.x___, r1.x, r0.z\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, l6, r1.x\n"
+"\n"
+"dcl_literal l7, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r1.__z_, r1.x, l7\n"
+"cmov_logical r0.x___, r1.y, r1.z, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smad_hi_i32",
+"mdef(392)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"\n"
+"dcl_literal l10, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ishr r1.xy__, r0.xyxx, l10\n"
+"ixor r1.__z_, r0.y, r0.x\n"
+"\n"
+"dcl_literal l11, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ishr r1.__z_, r1.z, l11\n"
+"ixor r2.xy__, r0.xyxx, r1.xyxx\n"
+"iadd r1.xy__, r2.xyxx, r1.xyxx_neg(xyzw)\n"
+"\n"
+"dcl_literal l12, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r2.xy__, r1.xyxx, l12\n"
+"\n"
+"dcl_literal l13, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.xy__, r1.xyxx, l13\n"
+"umul r1.___w, r1.x, r1.y\n"
+"umul r1.x___, r1.x, r2.y\n"
+"umul r1._y__, r2.x, r1.y\n"
+"umul r2.x___, r2.x, r2.y\n"
+"\n"
+"dcl_literal l14, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r2._y__, r2.x, l14\n"
+"\n"
+"dcl_literal l15, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r2.__z_, r1.y, l15\n"
+"iadd r2._y__, r2.y, r2.z\n"
+"iadd r1.x___, r1.x, r2.y\n"
+"\n"
+"dcl_literal l16, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r2._y__, r1.x, l16\n"
+"iadd r1.___w, r1.w, r2.y\n"
+"\n"
+"dcl_literal l17, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1._y__, r1.y, l17\n"
+"iadd r1._y__, r1.w, r1.y\n"
+"\n"
+"dcl_literal l18, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ishl r1.x___, r1.x, l18\n"
+"\n"
+"dcl_literal l19, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r1.___w, r2.x, l19\n"
+"ior r1.x___, r1.x, r1.w\n"
+"ixor r1.x__w, r1.z, r1.xxxy\n"
+"iadd r1.x___, r1.x, r1.z_neg(xyzw)\n"
+"iadd r2.x___, r1.w, r1.z_neg(xyzw)\n"
+"cmov_logical r1.x___, r1.x, r1.w, r2.x\n"
+"cmov_logical r1.x___, r1.z, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smad_hi_i8",
+"mdef(393)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"imul r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l0, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishr r1.x___, r1.x, l0\n"
+"iadd r1.x___, r1.x, r0.z\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, l1, r1.x\n"
+"\n"
+"dcl_literal l2, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r1.__z_, r1.x, l2\n"
+"cmov_logical r0.x___, r1.y, r1.z, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smad_sat_i16",
+"mdef(394)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"imad r1.x___, r0.x, r0.y, r0.y\n"
+"\n"
+"dcl_literal l3, 0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF\n"
+"imin r1.x___, r1.x, l3\n"
+"\n"
+"dcl_literal l4, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000\n"
+"imax r0.___w, r1.x, l4\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smad_sat_i32",
+"mdef(395)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"imul r1.x___, r0.x, r0.y\n"
+"imad r1._y__, r0.x, r0.y, r0.y\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.__z_, l6, r0.y\n"
+"ilt r1.___w, r1.y, r1.x\n"
+"and r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l7, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"cmov_logical r1.__z_, r1.z, l7, r1.y\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.___w, r0.y, l8\n"
+"ilt r1.x___, r1.x, r1.y\n"
+"and r1.x___, r1.w, r1.x\n"
+"\n"
+"dcl_literal l9, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.___w, r1.x, l9, r1.z\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smad_sat_i8",
+"mdef(396)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"imad r1.x___, r0.x, r0.y, r0.y\n"
+"\n"
+"dcl_literal l0, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"imin r1.x___, r1.x, l0\n"
+"\n"
+"dcl_literal l1, 0xFFFFFF80, 0xFFFFFF80, 0xFFFFFF80, 0xFFFFFF80\n"
+"imax r0.___w, r1.x, l1\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smax_i16",
+"mdef(397)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imax r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smax_i32",
+"mdef(398)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imax r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smax_i8",
+"mdef(399)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imax r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smin_i16",
+"mdef(400)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imin r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smin_i32",
+"mdef(401)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imin r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smin_i8",
+"mdef(402)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"imin r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smod_i16",
+"mdef(403)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l13, 0x0000FFFF, 0x0000FFFF, 0x00008000, 0x00000000\n"
+"and r1.xyz_, r0.xyxx, l13\n"
+"dcl_literal l14, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l14\n"
+"dcl_literal l15, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"ior r1.___w, r1.x, l15\n"
+"cmov_logical r1.x___, r1.z, r1.w, r1.x\n"
+"dcl_literal l16, 0x00008000, 0x00008000, 0x00008000, 0x00008000\n"
+"and r1.__z_, r0.w, l16\n"
+"dcl_literal l17, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000\n"
+"ior r1.___w, r0.w, l17\n"
+"cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+"ixor r1.__z_, r1.x, r0.w\n"
+"imax r1.___w, r1.x, r1.x_neg(xyzw)\n"
+"imax r2.x___, r0.w, r0.w_neg(xyzw)\n"
+"udiv r1.___w, r1.w, r2.x\n"
+"inegate r2.x___, r1.w\n"
+"dcl_literal l18, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r1.__z_, r1.z, l18\n"
+"cmov_logical r1.__z_, r1.z, r2.x, r1.w\n"
+"dcl_literal l19, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.y, r1.z, l19\n"
+"imul r0.___w, r0.w, r1.y\n"
+"iadd r0.___w, r1.x, r0.w_neg(xyzw)\n"
+"dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.w, l20\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smod_i32",
+"mdef(404)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l25, 0, 0, 0, 0\n"
+"mov r0._y__, r1.x\n"
+"ilt r1.xy, r0, l25\n"
+"iadd r0.xy, r0, r1\n"
+"ixor r0.xy, r0, r1\n"
+"udiv r2.x, r0.x, r0.y\n"
+"umul r2.x, r2.x, r0.y\n"
+"iadd r0.x, r0.x, r2.x_neg(xyzw)\n"
+"iadd r0.x, r0.x, r1.x\n"
+"ixor r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smod_i64",
+"mdef(405)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0.__zw, r1.yyxy\n"
+"dcl_literal l1, 0x80000000, 0x80000000, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.ywyy, l1\n"
+"inegate r2, r0\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r1.__zw, l2, r2.zzzx\n"
+"iadd r1.__zw, r1.zzzw, r2.wwwy\n"
+"cmov_logical r0._y_w, r1.yyyx, r1.zzzw, r0.wwwy\n"
+"cmov_logical r0.x_z_, r1.xxyx, r2.xxzx, r0.xxzx\n"
+"ult r1._y__, r0.w, r0.y\n"
+"ieq r1.__z_, r0.w, r0.y\n"
+"ult r1.___w, r0.x, r0.z\n"
+"and r1.___w, r1.z, r1.w\n"
+"ior r1.___w, r1.y, r1.w\n"
+"dcl_literal l3, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.___w, r1.w, l3, l4\n"
+"ieq r2.x___, r0.x, r0.z\n"
+"and r2.x___, r1.z, r2.x\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.___w, r2.x, l5, r1.w\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r2.xy__, r0.yzyy, l6\n"
+"and r2.x___, r2.y, r2.x\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.__z_, r2.x, l7, r0.z\n"
+"dcl_literal l8, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r2._yz_, r0.yywy, l8\n"
+"dcl_literal l9, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.xy__, r0.ywyy, l9\n"
+"dcl_literal l10, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r2.___w, r0.z, l10\n"
+"ior r2.___w, r3.x, r2.w\n"
+"dcl_literal l11, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r2.w, l11\n"
+"dcl_literal l12, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.x___, r0.z, l12\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0x3F800000, 0x3F800000\n"
+"ior r3.__zw, r2.yyyz, l13\n"
+"dcl_literal l14, 0x00000000, 0x00000000, 0xBF800000, 0xBF800000\n"
+"add r3.__zw, r3.zzzw, l14\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"and r3.__zw, r3.zzzw, l15\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x3F800000, 0x3F800000\n"
+"iadd r3.__zw, l16, r3.zzzw_neg(xyzw)\n"
+"dcl_literal l17, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"and r3.__zw, r3.zzzw, l17\n"
+"dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.__zw, r3.zzzw, l18\n"
+"dcl_literal l19, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.__zw, r3.zzzw, l19\n"
+"dcl_literal l20, 0x00000000, 0x00000017, 0x00000017, 0x00000000\n"
+"cmov_logical r2._yz_, r2.yyzy, r3.zzwz, l20\n"
+"dcl_literal l21, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.__z_, r2.w, l21\n"
+"dcl_literal l22, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.__z_, r3.z, l22\n"
+"dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l23\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.__z_, l24, r3.z_neg(xyzw)\n"
+"dcl_literal l25, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l25\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.__z_, r3.z, l26\n"
+"dcl_literal l27, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.__z_, r3.z, l27\n"
+"dcl_literal l28, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r2.___w, r2.w, r3.z, l28\n"
+"dcl_literal l29, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.__z_, r3.x, l29\n"
+"dcl_literal l30, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.__z_, r3.z, l30\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l31\n"
+"dcl_literal l32, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.__z_, l32, r3.z_neg(xyzw)\n"
+"dcl_literal l33, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l33\n"
+"dcl_literal l34, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.__z_, r3.z, l34\n"
+"dcl_literal l35, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.__z_, r3.z, l35\n"
+"dcl_literal l36, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.x___, r3.x, r3.z, l36\n"
+"dcl_literal l37, 0x00000000, 0x00000000, 0x00000017, 0x00000017\n"
+"ieq r3.__zw, r2.yyyz, l37\n"
+"iadd r2.___w, r2.y, r2.w\n"
+"cmov_logical r2._y__, r3.z, r2.w, r2.y\n"
+"dcl_literal l38, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.__z_, r2.w, l38\n"
+"iadd r2.___w, r2.w, r3.x\n"
+"cmov_logical r2._y__, r3.z, r2.w, r2.y\n"
+"dcl_literal l39, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ige r2.___w, r2.y, l39\n"
+"dcl_literal l40, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3.x___, r2.y, l40\n"
+"dcl_literal l41, 0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F\n"
+"cmov_logical r2.___w, r2.w, r3.x, l41\n"
+"ult r3.x___, r0.x, r0.z\n"
+"and r3.x___, r1.z, r3.x\n"
+"ior r1._y__, r1.y, r3.x\n"
+"dcl_literal l42, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l43, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1._y__, r1.y, l42, l43\n"
+"ieq r3.x___, r0.x, r0.z\n"
+"and r1.__z_, r1.z, r3.x\n"
+"dcl_literal l44, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.z, l44, r1.y\n"
+"dcl_literal l45, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r1.__z_, r0.x, l45\n"
+"ior r1.__z_, r3.y, r1.z\n"
+"dcl_literal l46, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.__z_, r1.z, l46\n"
+"dcl_literal l47, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.x___, r0.x, l47\n"
+"dcl_literal l48, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3._y__, r1.z, l48\n"
+"dcl_literal l49, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3._y__, r3.y, l49\n"
+"dcl_literal l50, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l50\n"
+"dcl_literal l51, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3._y__, l51, r3.y_neg(xyzw)\n"
+"dcl_literal l52, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l52\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3._y__, r3.y, l53\n"
+"dcl_literal l54, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3._y__, r3.y, l54\n"
+"dcl_literal l55, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r1.__z_, r1.z, r3.y, l55\n"
+"dcl_literal l56, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3._y__, r3.x, l56\n"
+"dcl_literal l57, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3._y__, r3.y, l57\n"
+"dcl_literal l58, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l58\n"
+"dcl_literal l59, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3._y__, l59, r3.y_neg(xyzw)\n"
+"dcl_literal l60, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l60\n"
+"dcl_literal l61, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3._y__, r3.y, l61\n"
+"dcl_literal l62, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3._y__, r3.y, l62\n"
+"dcl_literal l63, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.x___, r3.x, r3.y, l63\n"
+"iadd r1.__z_, r2.z, r1.z\n"
+"cmov_logical r2.__z_, r3.w, r1.z, r2.z\n"
+"dcl_literal l64, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3._y__, r1.z, l64\n"
+"iadd r1.__z_, r1.z, r3.x\n"
+"cmov_logical r1.__z_, r3.y, r1.z, r2.z\n"
+"ilt r2.__z_, r1.z, r2.y\n"
+"ieq r3.x___, r2.y, r1.z\n"
+"dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1._y__, r1.y, l65\n"
+"and r1._y__, r3.x, r1.y\n"
+"ior r1._y__, r2.z, r1.y\n"
+"iadd r2.__z_, r2.y, r1.z_neg(xyzw)\n"
+"dcl_literal l66, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1._y__, r1.y, r2.z, l66\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r1.y, l67\n"
+"if_logicalnz r2.z\n"
+" ilt r3.x___, r2.w, r1.y\n"
+" iadd r3._y__, r1.y, r2.w_neg(xyzw)\n"
+" and r3.x___, r3.x, r3.y\n"
+" \n"
+" dcl_literal l68, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1._y__, r1.y, l68\n"
+" \n"
+" dcl_literal l69, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1._y__, r1.y, l69, r3.x\n"
+" \n"
+" dcl_literal l70, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.x___, r1.y, l70\n"
+" \n"
+" dcl_literal l71, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3._y__, r1.y, l71\n"
+" cmov_logical r3._y__, r3.x, r3.y, r1.y\n"
+" \n"
+" dcl_literal l72, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.__z_, r1.y, l72\n"
+" \n"
+" dcl_literal l73, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.___w, l73, r3.y_neg(xyzw)\n"
+" ushr r3.___w, r0.z, r3.w\n"
+" ishl r3._y__, r0.z, r3.y\n"
+" ishl r3.__z_, r0.y, r3.z\n"
+" ior r3.__z_, r3.w, r3.z\n"
+" cmov_logical r3.__z_, r3.x, r0.z, r3.z\n"
+" \n"
+" dcl_literal l74, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.x___, r3.x, l74, r3.y\n"
+" cmov_logical r3._y__, r1.y, r3.z, r0.y\n"
+" cmov_logical r1._y__, r1.y, r3.x, r0.z\n"
+" \n"
+" dcl_literal l75, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.x___, r1.z, l75\n"
+" \n"
+" dcl_literal l76, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.__z_, r1.z, l76\n"
+" cmov_logical r3.__z_, r3.x, r3.z, r1.z\n"
+" \n"
+" dcl_literal l77, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.___w, r1.z, l77\n"
+" \n"
+" dcl_literal l78, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.__z_, l78, r3.z_neg(xyzw)\n"
+" ushr r4.x___, r0.x, r3.z\n"
+" ishl r4._y__, r0.w, r3.w\n"
+" ior r4.x___, r4.x, r4.y\n"
+" cmov_logical r4.x___, r3.x, r0.x, r4.x\n"
+" cmov_logical r4.x___, r1.z, r4.x, r0.w\n"
+" ushr r3.__z_, r1.y, r3.z\n"
+" ishl r3.___w, r3.y, r3.w\n"
+" ior r3.__z_, r3.z, r3.w\n"
+" cmov_logical r3.x___, r3.x, r1.y, r3.z\n"
+" cmov_logical r1.__z_, r1.z, r3.x, r3.y\n"
+" udiv r3.x___, r4.x, r1.z\n"
+" umul r3.__z_, r3.x, r1.z\n"
+" ilt r3.___w, r4.x, r3.z\n"
+" iadd r4._y__, r3.z, r4.x_neg(xyzw)\n"
+" iadd r4._y__, r4.y, r1.z\n"
+" \n"
+" dcl_literal l79, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4._y__, r4.y, l79\n"
+" iadd r3.__z_, r4.x, r3.z_neg(xyzw)\n"
+" cmov_logical r3.__z_, r3.w, r4.y, r3.z\n"
+" udiv r1.__z_, r3.z, r1.z\n"
+" iadd r3.__z_, r3.x, r1.z_neg(xyzw)\n"
+" iadd r1.__z_, r3.x, r1.z\n"
+" cmov_logical r1.__z_, r3.w, r3.z, r1.z\n"
+" \n"
+" dcl_literal l80, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.x___, r1.y, l80\n"
+" \n"
+" dcl_literal l81, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.__z_, r1.y, l81\n"
+" \n"
+" dcl_literal l82, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r1.z, l82\n"
+" \n"
+" dcl_literal l83, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.__z_, r1.z, l83\n"
+" umul r4.x___, r3.z, r1.z\n"
+" umul r3.__z_, r3.z, r3.w\n"
+" umul r4._y__, r3.x, r1.z\n"
+" umul r3.x___, r3.x, r3.w\n"
+" \n"
+" dcl_literal l84, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r3.x, l84\n"
+" \n"
+" dcl_literal l85, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.___w, r4.y, l85\n"
+" iadd r4.__z_, r4.z, r4.w\n"
+" iadd r3.__z_, r3.z, r4.z\n"
+" \n"
+" dcl_literal l86, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r3.z, l86\n"
+" iadd r4.x___, r4.x, r4.z\n"
+" \n"
+" dcl_literal l87, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r4.y, l87\n"
+" iadd r4.x___, r4.x, r4.y\n"
+" \n"
+" dcl_literal l88, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3.__z_, r3.z, l88\n"
+" \n"
+" dcl_literal l89, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.x___, r3.x, l89\n"
+" ior r3.x___, r3.z, r3.x\n"
+" \n"
+" dcl_literal l90, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r3.y, l90\n"
+" \n"
+" dcl_literal l91, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r3.y, l91\n"
+" umul r4._y__, r4.y, r3.w\n"
+" umul r1.__z_, r3.z, r1.z\n"
+" umul r3.__z_, r3.z, r3.w\n"
+" \n"
+" dcl_literal l92, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.___w, r3.z, l92\n"
+" \n"
+" dcl_literal l93, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r1.__z_, r1.z, l93\n"
+" iadd r1.__z_, r3.w, r1.z\n"
+" iadd r1.__z_, r4.y, r1.z\n"
+" \n"
+" dcl_literal l94, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.__z_, r1.z, l94\n"
+" \n"
+" dcl_literal l95, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r3.z, l95\n"
+" ior r1.__z_, r1.z, r3.z\n"
+" iadd r3.__z_, r1.z, r4.x\n"
+" ult r1.__z_, r3.z, r1.z\n"
+" ult r3.___w, r0.w, r3.z\n"
+" ieq r4.x___, r0.w, r3.z\n"
+" ult r4._y__, r0.x, r3.x\n"
+" and r4._y__, r4.x, r4.y\n"
+" ior r3.___w, r3.w, r4.y\n"
+" \n"
+" dcl_literal l96, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l97, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r3.___w, r3.w, l96, l97\n"
+" ieq r4._y__, r0.x, r3.x\n"
+" and r4.x___, r4.x, r4.y\n"
+" \n"
+" dcl_literal l98, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.___w, r4.x, l98, r3.w\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.___w, r3.w, l99\n"
+" ior r1.__z_, r1.z, r3.w\n"
+" iadd r1._y__, r3.x, r1.y_neg(xyzw)\n"
+" ult r3.___w, r3.x, r1.y\n"
+" iadd r3.___w, r3.z, r3.w\n"
+" iadd r3._y__, r3.w, r3.y_neg(xyzw)\n"
+" cmov_logical r3._y__, r1.z, r3.y, r3.z\n"
+" cmov_logical r1._y__, r1.z, r1.y, r3.x\n"
+" iadd r1.__z_, r0.x, r1.y_neg(xyzw)\n"
+" ult r3.x___, r0.x, r1.z\n"
+" iadd r3.x___, r0.w, r3.x\n"
+" iadd r1._y__, r3.x, r3.y_neg(xyzw)\n"
+"else\n"
+" mov r1._yz_, r0.wwxw\n"
+"endif\n"
+"ult r3.x___, r1.y, r0.y\n"
+"ieq r3._y__, r1.y, r0.y\n"
+"ult r3.__z_, r1.z, r0.z\n"
+"and r3.__z_, r3.y, r3.z\n"
+"ior r3.x___, r3.x, r3.z\n"
+"dcl_literal l100, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l101, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r3.x___, r3.x, l100, l101\n"
+"ieq r3.__z_, r1.z, r0.z\n"
+"and r3._y__, r3.y, r3.z\n"
+"dcl_literal l102, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r3.y, l102, r3.x\n"
+"dcl_literal l103, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3._y__, r1.y, l103\n"
+"dcl_literal l104, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3.__z_, r1.y, l104\n"
+"dcl_literal l105, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.___w, r1.z, l105\n"
+"ior r3._y__, r3.y, r3.w\n"
+"dcl_literal l106, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3._y__, r3.y, l106\n"
+"dcl_literal l107, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.___w, r1.z, l107\n"
+"dcl_literal l108, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.z, l108\n"
+"dcl_literal l109, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l109\n"
+"dcl_literal l110, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l110\n"
+"dcl_literal l111, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l111, r4.x_neg(xyzw)\n"
+"dcl_literal l112, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l112\n"
+"dcl_literal l113, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l113\n"
+"dcl_literal l114, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l114\n"
+"dcl_literal l115, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.__z_, r3.z, r4.x, l115\n"
+"dcl_literal l116, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.y, l116\n"
+"dcl_literal l117, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l117\n"
+"dcl_literal l118, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l118\n"
+"dcl_literal l119, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l119, r4.x_neg(xyzw)\n"
+"dcl_literal l120, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l120\n"
+"dcl_literal l121, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l121\n"
+"dcl_literal l122, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l122\n"
+"dcl_literal l123, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r4.x, l123\n"
+"dcl_literal l124, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.w, l124\n"
+"dcl_literal l125, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l125\n"
+"dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l126\n"
+"dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l127, r4.x_neg(xyzw)\n"
+"dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l128\n"
+"dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l129\n"
+"dcl_literal l130, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r4.x___, r4.x, l130\n"
+"dcl_literal l131, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.___w, r3.w, r4.x, l131\n"
+"dcl_literal l132, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r4.x___, r3.z, l132\n"
+"iadd r3._y__, r3.z, r3.y\n"
+"cmov_logical r3.__z_, r4.x, r3.y, r3.z\n"
+"dcl_literal l133, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r4.x___, r3.y, l133\n"
+"iadd r3._y__, r3.y, r3.w\n"
+"cmov_logical r3._y__, r4.x, r3.y, r3.z\n"
+"ilt r3.__z_, r3.y, r2.y\n"
+"ieq r3.___w, r2.y, r3.y\n"
+"dcl_literal l134, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r3.x___, r3.x, l134\n"
+"and r3.x___, r3.w, r3.x\n"
+"ior r3.x___, r3.z, r3.x\n"
+"and r2.__z_, r2.z, r3.x\n"
+"iadd r3.x___, r2.y, r3.y_neg(xyzw)\n"
+"dcl_literal l135, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r2.__z_, r2.z, r3.x, l135\n"
+"dcl_literal l136, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r3.x___, r2.z, l136\n"
+"if_logicalnz r3.x\n"
+" ilt r3.__z_, r2.w, r2.z\n"
+" iadd r3.___w, r2.z, r2.w_neg(xyzw)\n"
+" and r3.__z_, r3.z, r3.w\n"
+" \n"
+" dcl_literal l137, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, r2.z, l137\n"
+" \n"
+" dcl_literal l138, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.__z_, r2.z, l138, r3.z\n"
+" \n"
+" dcl_literal l139, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.__z_, r2.z, l139\n"
+" \n"
+" dcl_literal l140, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.___w, r2.z, l140\n"
+" cmov_logical r3.___w, r3.z, r3.w, r2.z\n"
+" \n"
+" dcl_literal l141, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.x___, r2.z, l141\n"
+" \n"
+" dcl_literal l142, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4._y__, l142, r3.w_neg(xyzw)\n"
+" ushr r4._y__, r0.z, r4.y\n"
+" ishl r3.___w, r0.z, r3.w\n"
+" ishl r4.x___, r0.y, r4.x\n"
+" ior r4.x___, r4.y, r4.x\n"
+" cmov_logical r4.x___, r3.z, r0.z, r4.x\n"
+" \n"
+" dcl_literal l143, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.__z_, r3.z, l143, r3.w\n"
+" cmov_logical r3.___w, r2.z, r4.x, r0.y\n"
+" cmov_logical r2.__z_, r2.z, r3.z, r0.z\n"
+" \n"
+" dcl_literal l144, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.__z_, r3.y, l144\n"
+" \n"
+" dcl_literal l145, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4.x___, r3.y, l145\n"
+" cmov_logical r4.x___, r3.z, r4.x, r3.y\n"
+" \n"
+" dcl_literal l146, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4._y__, r3.y, l146\n"
+" \n"
+" dcl_literal l147, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.x___, l147, r4.x_neg(xyzw)\n"
+" ushr r4.__z_, r1.z, r4.x\n"
+" ishl r4.___w, r1.y, r4.y\n"
+" ior r4.__z_, r4.z, r4.w\n"
+" cmov_logical r4.__z_, r3.z, r1.z, r4.z\n"
+" cmov_logical r4.__z_, r3.y, r4.z, r1.y\n"
+" ushr r4.x___, r2.z, r4.x\n"
+" ishl r4._y__, r3.w, r4.y\n"
+" ior r4.x___, r4.x, r4.y\n"
+" cmov_logical r3.__z_, r3.z, r2.z, r4.x\n"
+" cmov_logical r3._y__, r3.y, r3.z, r3.w\n"
+" udiv r3.__z_, r4.z, r3.y\n"
+" umul r4.x___, r3.z, r3.y\n"
+" ilt r4._y__, r4.z, r4.x\n"
+" iadd r4.___w, r4.x, r4.z_neg(xyzw)\n"
+" iadd r4.___w, r4.w, r3.y\n"
+" \n"
+" dcl_literal l148, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.___w, r4.w, l148\n"
+" iadd r4.x___, r4.z, r4.x_neg(xyzw)\n"
+" cmov_logical r4.x___, r4.y, r4.w, r4.x\n"
+" udiv r3._y__, r4.x, r3.y\n"
+" iadd r4.x___, r3.z, r3.y_neg(xyzw)\n"
+" iadd r3._y__, r3.z, r3.y\n"
+" cmov_logical r3._y__, r4.y, r4.x, r3.y\n"
+" \n"
+" dcl_literal l149, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r2.z, l149\n"
+" \n"
+" dcl_literal l150, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r2.z, l150\n"
+" \n"
+" dcl_literal l151, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4._y__, r3.y, l151\n"
+" \n"
+" dcl_literal l152, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y__, r3.y, l152\n"
+" umul r4.__z_, r4.x, r3.y\n"
+" umul r4.x___, r4.x, r4.y\n"
+" umul r4.___w, r3.z, r3.y\n"
+" umul r3.__z_, r3.z, r4.y\n"
+" \n"
+" dcl_literal l153, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r3.z, l153\n"
+" \n"
+" dcl_literal l154, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r4.w, l154\n"
+" iadd r5.x___, r5.x, r5.y\n"
+" iadd r4.x___, r4.x, r5.x\n"
+" \n"
+" dcl_literal l155, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r4.x, l155\n"
+" iadd r4.__z_, r4.z, r5.x\n"
+" \n"
+" dcl_literal l156, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r4.w, l156\n"
+" iadd r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l157, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r4.x___, r4.x, l157\n"
+" \n"
+" dcl_literal l158, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r3.z, l158\n"
+" ior r3.__z_, r4.x, r3.z\n"
+" \n"
+" dcl_literal l159, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r3.w, l159\n"
+" \n"
+" dcl_literal l160, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.w, l160\n"
+" umul r4.___w, r4.w, r4.y\n"
+" umul r3._y__, r4.x, r3.y\n"
+" umul r4.x___, r4.x, r4.y\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r4.x, l161\n"
+" \n"
+" dcl_literal l162, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r3.y, l162\n"
+" iadd r3._y__, r4.y, r3.y\n"
+" iadd r3._y__, r4.w, r3.y\n"
+" \n"
+" dcl_literal l163, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3._y__, r3.y, l163\n"
+" \n"
+" dcl_literal l164, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r4.x, l164\n"
+" ior r3._y__, r3.y, r4.x\n"
+" iadd r4.x___, r3.y, r4.z\n"
+" ult r3._y__, r4.x, r3.y\n"
+" ult r4._y__, r1.y, r4.x\n"
+" ieq r4.__z_, r1.y, r4.x\n"
+" ult r4.___w, r1.z, r3.z\n"
+" and r4.___w, r4.z, r4.w\n"
+" ior r4._y__, r4.y, r4.w\n"
+" \n"
+" dcl_literal l165, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l166, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4._y__, r4.y, l165, l166\n"
+" ieq r4.___w, r1.z, r3.z\n"
+" and r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l167, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4._y__, r4.z, l167, r4.y\n"
+" \n"
+" dcl_literal l168, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, r4.y, l168\n"
+" ior r3._y__, r3.y, r4.y\n"
+" iadd r2.__z_, r3.z, r2.z_neg(xyzw)\n"
+" ult r4._y__, r3.z, r2.z\n"
+" iadd r4._y__, r4.x, r4.y\n"
+" iadd r3.___w, r4.y, r3.w_neg(xyzw)\n"
+" cmov_logical r3.___w, r3.y, r3.w, r4.x\n"
+" cmov_logical r2.__z_, r3.y, r2.z, r3.z\n"
+" iadd r2.__z_, r1.z, r2.z_neg(xyzw)\n"
+" ult r3._y__, r1.z, r2.z\n"
+" iadd r3._y__, r1.y, r3.y\n"
+" iadd r1._y__, r3.y, r3.w_neg(xyzw)\n"
+" mov r1.__z_, r2.z\n"
+"endif\n"
+"ult r2.__z_, r1.y, r0.y\n"
+"ieq r3._y__, r1.y, r0.y\n"
+"ult r3.__z_, r1.z, r0.z\n"
+"and r3.__z_, r3.y, r3.z\n"
+"ior r2.__z_, r2.z, r3.z\n"
+"dcl_literal l169, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l170, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2.__z_, r2.z, l169, l170\n"
+"ieq r3.__z_, r1.z, r0.z\n"
+"and r3._y__, r3.y, r3.z\n"
+"dcl_literal l171, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r3.y, l171, r2.z\n"
+"dcl_literal l172, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3._y__, r1.y, l172\n"
+"dcl_literal l173, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3.__z_, r1.y, l173\n"
+"dcl_literal l174, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.___w, r1.z, l174\n"
+"ior r3._y__, r3.y, r3.w\n"
+"dcl_literal l175, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3._y__, r3.y, l175\n"
+"dcl_literal l176, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.___w, r1.z, l176\n"
+"dcl_literal l177, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.z, l177\n"
+"dcl_literal l178, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l178\n"
+"dcl_literal l179, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l179\n"
+"dcl_literal l180, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l180, r4.x_neg(xyzw)\n"
+"dcl_literal l181, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l181\n"
+"dcl_literal l182, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l182\n"
+"dcl_literal l183, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l183\n"
+"dcl_literal l184, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.__z_, r3.z, r4.x, l184\n"
+"dcl_literal l185, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.y, l185\n"
+"dcl_literal l186, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l186\n"
+"dcl_literal l187, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l187\n"
+"dcl_literal l188, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l188, r4.x_neg(xyzw)\n"
+"dcl_literal l189, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l189\n"
+"dcl_literal l190, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l190\n"
+"dcl_literal l191, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l191\n"
+"dcl_literal l192, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r4.x, l192\n"
+"dcl_literal l193, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.w, l193\n"
+"dcl_literal l194, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l194\n"
+"dcl_literal l195, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l195\n"
+"dcl_literal l196, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l196, r4.x_neg(xyzw)\n"
+"dcl_literal l197, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l197\n"
+"dcl_literal l198, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l198\n"
+"dcl_literal l199, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r4.x___, r4.x, l199\n"
+"dcl_literal l200, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.___w, r3.w, r4.x, l200\n"
+"dcl_literal l201, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r4.x___, r3.z, l201\n"
+"iadd r3._y__, r3.z, r3.y\n"
+"cmov_logical r3.__z_, r4.x, r3.y, r3.z\n"
+"dcl_literal l202, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r4.x___, r3.y, l202\n"
+"iadd r3._y__, r3.y, r3.w\n"
+"cmov_logical r3._y__, r4.x, r3.y, r3.z\n"
+"ilt r3.__z_, r3.y, r2.y\n"
+"ieq r3.___w, r2.y, r3.y\n"
+"dcl_literal l203, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r2.z, l203\n"
+"and r2.__z_, r3.w, r2.z\n"
+"ior r2.__z_, r3.z, r2.z\n"
+"and r2.__z_, r3.x, r2.z\n"
+"iadd r3.x___, r2.y, r3.y_neg(xyzw)\n"
+"dcl_literal l204, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r2.__z_, r2.z, r3.x, l204\n"
+"dcl_literal l205, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r3.x___, r2.z, l205\n"
+"if_logicalnz r3.x\n"
+" ilt r3.__z_, r2.w, r2.z\n"
+" iadd r3.___w, r2.z, r2.w_neg(xyzw)\n"
+" and r3.__z_, r3.z, r3.w\n"
+" \n"
+" dcl_literal l206, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, r2.z, l206\n"
+" \n"
+" dcl_literal l207, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.__z_, r2.z, l207, r3.z\n"
+" \n"
+" dcl_literal l208, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.__z_, r2.z, l208\n"
+" \n"
+" dcl_literal l209, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.___w, r2.z, l209\n"
+" cmov_logical r3.___w, r3.z, r3.w, r2.z\n"
+" \n"
+" dcl_literal l210, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.x___, r2.z, l210\n"
+" \n"
+" dcl_literal l211, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4._y__, l211, r3.w_neg(xyzw)\n"
+" ushr r4._y__, r0.z, r4.y\n"
+" ishl r3.___w, r0.z, r3.w\n"
+" ishl r4.x___, r0.y, r4.x\n"
+" ior r4.x___, r4.y, r4.x\n"
+" cmov_logical r4.x___, r3.z, r0.z, r4.x\n"
+" \n"
+" dcl_literal l212, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.__z_, r3.z, l212, r3.w\n"
+" cmov_logical r3.___w, r2.z, r4.x, r0.y\n"
+" cmov_logical r2.__z_, r2.z, r3.z, r0.z\n"
+" \n"
+" dcl_literal l213, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.__z_, r3.y, l213\n"
+" \n"
+" dcl_literal l214, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4.x___, r3.y, l214\n"
+" cmov_logical r4.x___, r3.z, r4.x, r3.y\n"
+" \n"
+" dcl_literal l215, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4._y__, r3.y, l215\n"
+" \n"
+" dcl_literal l216, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.x___, l216, r4.x_neg(xyzw)\n"
+" ushr r4.__z_, r1.z, r4.x\n"
+" ishl r4.___w, r1.y, r4.y\n"
+" ior r4.__z_, r4.z, r4.w\n"
+" cmov_logical r4.__z_, r3.z, r1.z, r4.z\n"
+" cmov_logical r4.__z_, r3.y, r4.z, r1.y\n"
+" ushr r4.x___, r2.z, r4.x\n"
+" ishl r4._y__, r3.w, r4.y\n"
+" ior r4.x___, r4.x, r4.y\n"
+" cmov_logical r3.__z_, r3.z, r2.z, r4.x\n"
+" cmov_logical r3._y__, r3.y, r3.z, r3.w\n"
+" udiv r3.__z_, r4.z, r3.y\n"
+" umul r4.x___, r3.z, r3.y\n"
+" ilt r4._y__, r4.z, r4.x\n"
+" iadd r4.___w, r4.x, r4.z_neg(xyzw)\n"
+" iadd r4.___w, r4.w, r3.y\n"
+" \n"
+" dcl_literal l217, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.___w, r4.w, l217\n"
+" iadd r4.x___, r4.z, r4.x_neg(xyzw)\n"
+" cmov_logical r4.x___, r4.y, r4.w, r4.x\n"
+" udiv r3._y__, r4.x, r3.y\n"
+" iadd r4.x___, r3.z, r3.y_neg(xyzw)\n"
+" iadd r3._y__, r3.z, r3.y\n"
+" cmov_logical r3._y__, r4.y, r4.x, r3.y\n"
+" \n"
+" dcl_literal l218, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r2.z, l218\n"
+" \n"
+" dcl_literal l219, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r2.z, l219\n"
+" \n"
+" dcl_literal l220, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4._y__, r3.y, l220\n"
+" \n"
+" dcl_literal l221, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y__, r3.y, l221\n"
+" umul r4.__z_, r4.x, r3.y\n"
+" umul r4.x___, r4.x, r4.y\n"
+" umul r4.___w, r3.z, r3.y\n"
+" umul r3.__z_, r3.z, r4.y\n"
+" \n"
+" dcl_literal l222, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r3.z, l222\n"
+" \n"
+" dcl_literal l223, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r4.w, l223\n"
+" iadd r5.x___, r5.x, r5.y\n"
+" iadd r4.x___, r4.x, r5.x\n"
+" \n"
+" dcl_literal l224, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r4.x, l224\n"
+" iadd r4.__z_, r4.z, r5.x\n"
+" \n"
+" dcl_literal l225, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r4.w, l225\n"
+" iadd r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l226, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r4.x___, r4.x, l226\n"
+" \n"
+" dcl_literal l227, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r3.z, l227\n"
+" ior r3.__z_, r4.x, r3.z\n"
+" \n"
+" dcl_literal l228, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r3.w, l228\n"
+" \n"
+" dcl_literal l229, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.w, l229\n"
+" umul r4.___w, r4.w, r4.y\n"
+" umul r3._y__, r4.x, r3.y\n"
+" umul r4.x___, r4.x, r4.y\n"
+" \n"
+" dcl_literal l230, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r4.x, l230\n"
+" \n"
+" dcl_literal l231, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r3.y, l231\n"
+" iadd r3._y__, r4.y, r3.y\n"
+" iadd r3._y__, r4.w, r3.y\n"
+" \n"
+" dcl_literal l232, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3._y__, r3.y, l232\n"
+" \n"
+" dcl_literal l233, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r4.x, l233\n"
+" ior r3._y__, r3.y, r4.x\n"
+" iadd r4.x___, r3.y, r4.z\n"
+" ult r3._y__, r4.x, r3.y\n"
+" ult r4._y__, r1.y, r4.x\n"
+" ieq r4.__z_, r1.y, r4.x\n"
+" ult r4.___w, r1.z, r3.z\n"
+" and r4.___w, r4.z, r4.w\n"
+" ior r4._y__, r4.y, r4.w\n"
+" \n"
+" dcl_literal l234, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l235, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4._y__, r4.y, l234, l235\n"
+" ieq r4.___w, r1.z, r3.z\n"
+" and r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l236, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4._y__, r4.z, l236, r4.y\n"
+" \n"
+" dcl_literal l237, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, r4.y, l237\n"
+" ior r3._y__, r3.y, r4.y\n"
+" iadd r2.__z_, r3.z, r2.z_neg(xyzw)\n"
+" ult r4._y__, r3.z, r2.z\n"
+" iadd r4._y__, r4.x, r4.y\n"
+" iadd r3.___w, r4.y, r3.w_neg(xyzw)\n"
+" cmov_logical r3.___w, r3.y, r3.w, r4.x\n"
+" cmov_logical r2.__z_, r3.y, r2.z, r3.z\n"
+" iadd r2.__z_, r1.z, r2.z_neg(xyzw)\n"
+" ult r3._y__, r1.z, r2.z\n"
+" iadd r3._y__, r1.y, r3.y\n"
+" iadd r1._y__, r3.y, r3.w_neg(xyzw)\n"
+" mov r1.__z_, r2.z\n"
+"endif\n"
+"ult r2.__z_, r1.y, r0.y\n"
+"ieq r3._y__, r1.y, r0.y\n"
+"ult r3.__z_, r1.z, r0.z\n"
+"and r3.__z_, r3.y, r3.z\n"
+"ior r2.__z_, r2.z, r3.z\n"
+"dcl_literal l238, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l239, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2.__z_, r2.z, l238, l239\n"
+"ieq r3.__z_, r1.z, r0.z\n"
+"and r3._y__, r3.y, r3.z\n"
+"dcl_literal l240, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r3.y, l240, r2.z\n"
+"dcl_literal l241, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3._y__, r1.y, l241\n"
+"dcl_literal l242, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3.__z_, r1.y, l242\n"
+"dcl_literal l243, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.___w, r1.z, l243\n"
+"ior r3._y__, r3.y, r3.w\n"
+"dcl_literal l244, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3._y__, r3.y, l244\n"
+"dcl_literal l245, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.___w, r1.z, l245\n"
+"dcl_literal l246, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.z, l246\n"
+"dcl_literal l247, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l247\n"
+"dcl_literal l248, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l248\n"
+"dcl_literal l249, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l249, r4.x_neg(xyzw)\n"
+"dcl_literal l250, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l250\n"
+"dcl_literal l251, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l251\n"
+"dcl_literal l252, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l252\n"
+"dcl_literal l253, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.__z_, r3.z, r4.x, l253\n"
+"dcl_literal l254, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.y, l254\n"
+"dcl_literal l255, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l255\n"
+"dcl_literal l256, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l256\n"
+"dcl_literal l257, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l257, r4.x_neg(xyzw)\n"
+"dcl_literal l258, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l258\n"
+"dcl_literal l259, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l259\n"
+"dcl_literal l260, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.x___, r4.x, l260\n"
+"dcl_literal l261, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r4.x, l261\n"
+"dcl_literal l262, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.x___, r3.w, l262\n"
+"dcl_literal l263, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.x___, r4.x, l263\n"
+"dcl_literal l264, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l264\n"
+"dcl_literal l265, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.x___, l265, r4.x_neg(xyzw)\n"
+"dcl_literal l266, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.x___, r4.x, l266\n"
+"dcl_literal l267, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.x___, r4.x, l267\n"
+"dcl_literal l268, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r4.x___, r4.x, l268\n"
+"dcl_literal l269, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.___w, r3.w, r4.x, l269\n"
+"dcl_literal l270, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r4.x___, r3.z, l270\n"
+"iadd r3._y__, r3.z, r3.y\n"
+"cmov_logical r3.__z_, r4.x, r3.y, r3.z\n"
+"dcl_literal l271, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r4.x___, r3.y, l271\n"
+"iadd r3._y__, r3.y, r3.w\n"
+"cmov_logical r3._y__, r4.x, r3.y, r3.z\n"
+"ilt r3.__z_, r3.y, r2.y\n"
+"ieq r3.___w, r2.y, r3.y\n"
+"dcl_literal l272, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r2.z, l272\n"
+"and r2.__z_, r3.w, r2.z\n"
+"ior r2.__z_, r3.z, r2.z\n"
+"and r2.__z_, r3.x, r2.z\n"
+"iadd r2._y__, r2.y, r3.y_neg(xyzw)\n"
+"dcl_literal l273, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r2._y__, r2.z, r2.y, l273\n"
+"dcl_literal l274, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r2.y, l274\n"
+"if_logicalnz r2.z\n"
+" ilt r2.__z_, r2.w, r2.y\n"
+" iadd r2.___w, r2.y, r2.w_neg(xyzw)\n"
+" and r2.__z_, r2.z, r2.w\n"
+" \n"
+" dcl_literal l275, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, r2.y, l275\n"
+" \n"
+" dcl_literal l276, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2._y__, r2.y, l276, r2.z\n"
+" \n"
+" dcl_literal l277, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2.__z_, r2.y, l277\n"
+" \n"
+" dcl_literal l278, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r2.___w, r2.y, l278\n"
+" cmov_logical r2.___w, r2.z, r2.w, r2.y\n"
+" \n"
+" dcl_literal l279, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.x___, r2.y, l279\n"
+" \n"
+" dcl_literal l280, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.__z_, l280, r2.w_neg(xyzw)\n"
+" ushr r3.__z_, r0.z, r3.z\n"
+" ishl r2.___w, r0.z, r2.w\n"
+" ishl r3.x___, r0.y, r3.x\n"
+" ior r3.x___, r3.z, r3.x\n"
+" cmov_logical r3.x___, r2.z, r0.z, r3.x\n"
+" \n"
+" dcl_literal l281, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.__z_, r2.z, l281, r2.w\n"
+" cmov_logical r0._y__, r2.y, r3.x, r0.y\n"
+" cmov_logical r0.__z_, r2.y, r2.z, r0.z\n"
+" \n"
+" dcl_literal l282, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2._y__, r3.y, l282\n"
+" \n"
+" dcl_literal l283, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r2.__z_, r3.y, l283\n"
+" cmov_logical r2.__z_, r2.y, r2.z, r3.y\n"
+" \n"
+" dcl_literal l284, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r2.___w, r3.y, l284\n"
+" \n"
+" dcl_literal l285, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r2.__z_, l285, r2.z_neg(xyzw)\n"
+" ushr r3.x___, r1.z, r2.z\n"
+" ishl r3.__z_, r1.y, r2.w\n"
+" ior r3.x___, r3.x, r3.z\n"
+" cmov_logical r3.x___, r2.y, r1.z, r3.x\n"
+" cmov_logical r3.x___, r3.y, r3.x, r1.y\n"
+" ushr r2.__z_, r0.z, r2.z\n"
+" ishl r2.___w, r0.y, r2.w\n"
+" ior r2.__z_, r2.z, r2.w\n"
+" cmov_logical r2._y__, r2.y, r0.z, r2.z\n"
+" cmov_logical r2._y__, r3.y, r2.y, r0.y\n"
+" udiv r2.__z_, r3.x, r2.y\n"
+" umul r2.___w, r2.z, r2.y\n"
+" ilt r3._y__, r3.x, r2.w\n"
+" iadd r3.__z_, r2.w, r3.x_neg(xyzw)\n"
+" iadd r3.__z_, r3.z, r2.y\n"
+" \n"
+" dcl_literal l286, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r3.__z_, r3.z, l286\n"
+" iadd r2.___w, r3.x, r2.w_neg(xyzw)\n"
+" cmov_logical r2.___w, r3.y, r3.z, r2.w\n"
+" udiv r2._y__, r2.w, r2.y\n"
+" iadd r2.___w, r2.z, r2.y_neg(xyzw)\n"
+" iadd r2._y__, r2.z, r2.y\n"
+" cmov_logical r2._y__, r3.y, r2.w, r2.y\n"
+" \n"
+" dcl_literal l287, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r0.z, l287\n"
+" \n"
+" dcl_literal l288, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.___w, r0.z, l288\n"
+" \n"
+" dcl_literal l289, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.x___, r2.y, l289\n"
+" \n"
+" dcl_literal l290, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2._y__, r2.y, l290\n"
+" umul r3._y__, r2.w, r2.y\n"
+" umul r2.___w, r2.w, r3.x\n"
+" umul r3.__z_, r2.z, r2.y\n"
+" umul r2.__z_, r2.z, r3.x\n"
+" \n"
+" dcl_literal l291, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.___w, r2.z, l291\n"
+" \n"
+" dcl_literal l292, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r3.z, l292\n"
+" iadd r3.___w, r3.w, r4.x\n"
+" iadd r2.___w, r2.w, r3.w\n"
+" \n"
+" dcl_literal l293, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.___w, r2.w, l293\n"
+" iadd r3._y__, r3.y, r3.w\n"
+" \n"
+" dcl_literal l294, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.__z_, r3.z, l294\n"
+" iadd r3._y__, r3.y, r3.z\n"
+" \n"
+" dcl_literal l295, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.___w, r2.w, l295\n"
+" \n"
+" dcl_literal l296, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r2.z, l296\n"
+" ior r2.__z_, r2.w, r2.z\n"
+" \n"
+" dcl_literal l297, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.___w, r0.y, l297\n"
+" \n"
+" dcl_literal l298, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.__z_, r0.y, l298\n"
+" umul r3.__z_, r3.z, r3.x\n"
+" umul r2._y__, r2.w, r2.y\n"
+" umul r2.___w, r2.w, r3.x\n"
+" \n"
+" dcl_literal l299, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.x___, r2.w, l299\n"
+" \n"
+" dcl_literal l300, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2._y__, r2.y, l300\n"
+" iadd r2._y__, r3.x, r2.y\n"
+" iadd r2._y__, r3.z, r2.y\n"
+" \n"
+" dcl_literal l301, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2._y__, r2.y, l301\n"
+" \n"
+" dcl_literal l302, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.___w, r2.w, l302\n"
+" ior r2._y__, r2.y, r2.w\n"
+" iadd r2.___w, r2.y, r3.y\n"
+" ult r2._y__, r2.w, r2.y\n"
+" ult r3.x___, r1.y, r2.w\n"
+" ieq r3._y__, r1.y, r2.w\n"
+" ult r3.__z_, r1.z, r2.z\n"
+" and r3.__z_, r3.y, r3.z\n"
+" ior r3.x___, r3.x, r3.z\n"
+" \n"
+" dcl_literal l303, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l304, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r3.x___, r3.x, l303, l304\n"
+" ieq r3.__z_, r1.z, r2.z\n"
+" and r3._y__, r3.y, r3.z\n"
+" \n"
+" dcl_literal l305, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.x___, r3.y, l305, r3.x\n"
+" \n"
+" dcl_literal l306, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.x___, r3.x, l306\n"
+" ior r2._y__, r2.y, r3.x\n"
+" iadd r0.__z_, r2.z, r0.z_neg(xyzw)\n"
+" ult r3.x___, r2.z, r0.z\n"
+" iadd r3.x___, r2.w, r3.x\n"
+" iadd r0._y__, r3.x, r0.y_neg(xyzw)\n"
+" cmov_logical r0._yz_, r2.y, r0.yyzy, r2.wwzw\n"
+" iadd r0.__z_, r1.z, r0.z_neg(xyzw)\n"
+" ult r2._y__, r1.z, r0.z\n"
+" iadd r2._y__, r1.y, r2.y\n"
+" iadd r1._y__, r2.y, r0.y_neg(xyzw)\n"
+" mov r1.__z_, r0.z\n"
+"endif\n"
+"dcl_literal l307, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0._y__, r1.w, l307\n"
+"cmov_logical r0.x_z_, r0.y, r0.xxwx, r1.zzyz\n"
+"dcl_literal l308, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r1.w, r0.xzxx, l308\n"
+"dcl_literal l309, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r2.x, l309, r0.xyxx\n"
+"inegate r0.__z_, r0.x\n"
+"dcl_literal l310, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r0.___w, l310, r0.z\n"
+"iadd r0.___w, r0.w, r0.y_neg(xyzw)\n"
+"cmov_logical r0.xy__, r1.x, r0.zwzz, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smod_i8",
+"mdef(406)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l1, 0x000000FF, 0x000000FF, 0x00000080, 0x00000000\n"
+"and r1.xyz_, r0.xyxx, l1\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l2\n"
+"dcl_literal l3, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00\n"
+"ior r1.___w, r1.x, l3\n"
+"cmov_logical r1.x___, r1.z, r1.w, r1.x\n"
+"dcl_literal l4, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+"and r1.__z_, r0.w, l4\n"
+"dcl_literal l5, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00, 0xFFFFFF00\n"
+"ior r1.___w, r0.w, l5\n"
+"cmov_logical r0.___w, r1.z, r1.w, r0.w\n"
+"ixor r1.__z_, r1.x, r0.w\n"
+"imax r1.___w, r1.x, r1.x_neg(xyzw)\n"
+"imax r2.x___, r0.w, r0.w_neg(xyzw)\n"
+"udiv r1.___w, r1.w, r2.x\n"
+"inegate r2.x___, r1.w\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r1.__z_, r1.z, l6\n"
+"cmov_logical r1.__z_, r1.z, r2.x, r1.w\n"
+"dcl_literal l7, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.y, r1.z, l7\n"
+"imul r0.___w, r0.w, r1.y\n"
+"iadd r0.___w, r1.x, r0.w_neg(xyzw)\n"
+"dcl_literal l8, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.__z_, r0.w, l8\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smoothstep_f32",
+"mdef(407)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"ge r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000000\n"
+"and r1.xyz_, r0.xyzx, l0\n"
+"add r0.xy__, r0.x_neg(xyzw), r0.zyzz\n"
+"\n"
+"dcl_literal l1, 0x7F800000, 0x7F800000, 0x7F800000, 0x00000000\n"
+"ilt r1.xyz_, l1, r1.xyzx\n"
+"ior r0.__z_, r0.w, r1.x\n"
+"ior r0.__z_, r1.y, r0.z\n"
+"ior r0.__z_, r1.z, r0.z\n"
+"div_zeroop(infinity)_sat r0.x___, r0.x, r0.y\n"
+"mul_ieee r0._y__, r0.x, r0.x\n"
+"\n"
+"dcl_literal l2, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"\n"
+"dcl_literal l3, 0x40400000, 0x40400000, 0x40400000, 0x40400000\n"
+"mad_ieee r0.x___, r0.x_neg(xyzw), l2, l3\n"
+"mul_ieee r0.x___, r0.y, r0.x\n"
+"\n"
+"dcl_literal l4, 0x7FC00001, 0x7FC00001, 0x7FC00001, 0x7FC00001\n"
+"cmov_logical r0.x___, r0.z, l4, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__smul24_i32",
+"mdef(408)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"\n"
+"dcl_literal l0, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishl r1.xy__, r0.xyxx, l0\n"
+"\n"
+"dcl_literal l1, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishr r1.xy__, r1.xyxx, l1\n"
+"imul r0.x___, r1.x, r1.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smul_hi_i16",
+"mdef(409)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y_, r1.x\n"
+"imul r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l2, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ishr r0.x___, r0.w, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smul_hi_i32",
+"mdef(410)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y_, r1.x\n"
+"\n"
+"dcl_literal l4, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ishr r1.xy__, r0.xyxx, l4\n"
+"ixor r0.___w, r0.y, r0.x\n"
+"\n"
+"dcl_literal l5, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"ishr r0.___w, r0.w, l5\n"
+"ixor r2.xy__, r0.xyxx, r1.xyxx\n"
+"iadd r1.xy__, r2.xyxx, r1.xyxx_neg(xyzw)\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+"and r1.__zw, r1.xxxy, l6\n"
+"\n"
+"dcl_literal l7, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.xy__, r1.xyxx, l7\n"
+"umul r2.x___, r1.x, r1.y\n"
+"umul r1.xyz_, r1.xzzx, r1.wyww\n"
+"\n"
+"dcl_literal l8, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.___w, r1.z, l8\n"
+"\n"
+"dcl_literal l9, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r2._y__, r1.y, l9\n"
+"iadd r1.___w, r1.w, r2.y\n"
+"iadd r1.x___, r1.x, r1.w\n"
+"\n"
+"dcl_literal l10, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.___w, r1.x, l10\n"
+"iadd r1.___w, r2.x, r1.w\n"
+"\n"
+"dcl_literal l11, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1._y__, r1.y, l11\n"
+"iadd r1._y__, r1.w, r1.y\n"
+"\n"
+"dcl_literal l12, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ishl r1.x___, r1.x, l12\n"
+"\n"
+"dcl_literal l13, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r1.__z_, r1.z, l13\n"
+"ior r1.x___, r1.x, r1.z\n"
+"ixor r1.x_z_, r0.w, r1.xxyx\n"
+"iadd r1.x__w, r1.xxxz, r0.w_neg(xyzw)\n"
+"cmov_logical r1.x___, r1.x, r1.z, r1.w\n"
+"cmov_logical r0.x___, r0.w, r1.x, r1.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__smul_hi_i8",
+"mdef(411)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y_, r1.x\n"
+"imul r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l0, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ishr r0.___w, r0.w, l0\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sqrt_f32",
+"mdef(412)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x7F800000, 0x00000000\n"
+"and r0._yz_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.__z_, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r0.___w, r0.y, l2\n"
+"and r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r0.x, l3\n"
+"and r0.___w, r0.w, r1.x\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"\n"
+"dcl_literal l4, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l4, r0.y\n"
+"ior r0.__z_, r0.z, r0.y\n"
+"if_logicalnz r0.z\n"
+" \n"
+" dcl_literal l5, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.__z_, r0.x, l5\n"
+" itof r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l6, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r1.xy__, r0.z, l6\n"
+" \n"
+" dcl_literal l7, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.__z_, r1.x, l7\n"
+" \n"
+" dcl_literal l8, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r0.__z_, r0.z, l8\n"
+" \n"
+" dcl_literal l9, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.x___, r1.y, l9\n"
+" \n"
+" dcl_literal l10, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.__z_, l10, r0.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r1._y__, l11, r0.z\n"
+" \n"
+" dcl_literal l12, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.__z_, r1.y, l12, r0.z\n"
+" \n"
+" dcl_literal l13, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1._y__, l13, r0.z\n"
+" ishr r1.__z_, r1.x, r0.z\n"
+" inegate r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l14, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.__z_, r0.z, l14\n"
+" iadd r0.__z_, r1.x, r0.z\n"
+" cmov_logical r0.__z_, r1.y, r1.z, r0.z\n"
+" sqrt_vec r0.__z_, r0.z\n"
+" \n"
+" dcl_literal l15, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r1.xy__, r0.z, l15\n"
+" if_logicalz r1.x\n"
+" itof r1.__z_, r1.y\n"
+" \n"
+" dcl_literal l16, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r1.__zw, r1.z, l16\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l17\n"
+" \n"
+" dcl_literal l18, 0xFFFFFFF4, 0xFFFFFFF4, 0xFFFFFFF4, 0xFFFFFFF4\n"
+" iadd r1.__z_, r1.z, l18\n"
+" \n"
+" dcl_literal l19, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l19\n"
+" \n"
+" dcl_literal l20, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.__z_, l20, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l21, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.x___, l21, r1.z\n"
+" \n"
+" dcl_literal l22, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.x, l22, r1.z\n"
+" \n"
+" dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, l23, r1.z\n"
+" ishr r2._y__, r1.w, r1.z\n"
+" inegate r1.__z_, r1.z\n"
+" \n"
+" dcl_literal l24, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.__z_, r1.z, l24\n"
+" iadd r1.__z_, r1.w, r1.z\n"
+" cmov_logical r1.__z_, r2.x, r2.y, r1.z\n"
+" else\n"
+" \n"
+" dcl_literal l25, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r0.__z_, r0.z, l25\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.x___, r1.x, l26\n"
+" \n"
+" dcl_literal l27, 0xFA000000, 0xFA000000, 0xFA000000, 0xFA000000\n"
+" iadd r0.__z_, r0.z, l27\n"
+" \n"
+" dcl_literal l28, 0xFFFFFF75, 0xFFFFFF75, 0xFFFFFF75, 0xFFFFFF75\n"
+" iadd r1.x___, r1.x, l28\n"
+" \n"
+" dcl_literal l29, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.x___, l29, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l30, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.___w, l30, r1.x\n"
+" \n"
+" dcl_literal l31, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1._y__, r1.y, l31\n"
+" ishr r1.x___, r1.y, r1.x\n"
+" cmov_logical r1.__z_, r1.w, r1.x, r0.z\n"
+" endif\n"
+" \n"
+" dcl_literal l32, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+" cmov_logical r0.__z_, r0.w, l32, r1.z\n"
+" \n"
+" dcl_literal l33, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+" ior r0.___w, r0.x, l33\n"
+" cmov_logical r0.x___, r0.y, r0.w, r0.z\n"
+"else\n"
+" sqrt_vec r0.x___, r0.x\n"
+"endif\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sqrt_f64",
+"mdef(413)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x00000001, 0x3ff00000, 0x000fffff, 0x3ff80000\n"
+"dcl_literal l2, 0xfff80000, 0x3fe00000, -1, 0\n"
+"dcl_literal l3, 0x10000000, 256, -128, 0x7FF00000\n"
+"ilt r10.x, r0.y, l2.0\n"
+"deq r10.y, r0.xy, l2.00\n"
+"ult r10.z, r0.y, l3.x\n"
+"deq r10.w, r0.xy, l3.0w\n"
+"ior r10.w, r10.w, r10.y\n"
+"cmov_logical r4.x, r10.z, l3.y, l3.0\n"
+"cmov_logical r10.z, r10.z, l3.z, l3.0\n"
+"dldexp r5.xy, r0.xy, r4.x\n"
+"dsqrt r1.xy, r5.xy\n"
+"mov r1.x, l2.0\n"
+"drcp_zeroop(infinity) r3.xy, r1.xy\n"
+"mov r3.x, l2.0\n"
+"dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw)\n"
+"dldexp r4.xy, r4.xy, l2.z\n"
+"dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy\n"
+"dmad r4.xy, r1.xy_neg(yw), r3.xy, l1.0y\n"
+"dmad r3.xy, r3.xy, r4.xy, r3.xy\n"
+"dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw)\n"
+"dldexp r4.xy, r4.xy, l2.z\n"
+"dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy\n"
+"dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw)\n"
+"dldexp r4.xy, r4.xy, l2.z\n"
+"dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy\n"
+"dldexp r1.xy, r1.xy, r10.z\n"
+"cmov_logical r1.xy, r10.xx, l2.0x, r1.xy\n"
+"cmov_logical r0.xy, r10.ww, r0.xy, r1.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sqrt_f64_7XX",
+"mdef(414)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x00000001, 0x3ff00000, 0x40000000, 0x3fe00000\n"
+"dcl_literal l2, 0xfff80000, 0x7FF00000, -1, 0\n"
+"dfrexp r20, r0\n"
+"iand r21.x, r20.y, l1.x\n"
+"ishr r20.y, r20.y, l1.x\n"
+"dldexp r20.xy, l1.0y, r20.y\n"
+"dldexp r20.zw, r20.zw, r21.x\n"
+"d2f r21.x, r20.zw\n"
+"sqrt r21.y, r21.x\n"
+"iadd r21.y, r21.y, l2.z\n"
+"rsq r21.z, r21.x\n"
+"f2d r22.xy, r21.y\n"
+"f2d r21.xy, r21.z\n"
+"dmad r23.xy, r22.xy, r21.xy_neg(yw),l1.0y\n"
+"dmad r21.xy, r21.xy, r23.xy, r21.xy\n"
+"dmad r22.xy, r20.zw, r21.xy, r22.xy\n"
+"dmul r22.xy, r22.xy, l1.0w\n"
+"dmad r23.xy, r22.xy, r21.xy_neg(yw),l1.0y\n"
+"dmad r21.xy, r21.xy, r23.xy, r21.xy\n"
+"dmad r22.xy, r20.zw, r21.xy, r22.xy\n"
+"dmul r22.xy, r22.xy, l1.0w\n"
+"dadd r21.zw, r22.xy, r22.0y_neg(yw)\n"
+"dmad r23.xy, r22.0y, r22.0y, r20.zw_neg(yw)\n"
+"dmul r23.zw, r22.0y, r21.zw\n"
+"dmad r23.xy, r23.zw, l1.0z, r23.xy\n"
+"dmad r23.xy, r21.zw, r21.zw, r23.xy\n"
+"dmul r23.xy, r23.xy_neg(yw), r21.xy\n"
+"dmad r22.xy, r23.xy, l1.0w, r22.xy\n"
+"dmul r20.xy, r22.xy, r20.xy\n"
+"ilt r10.x, r0.y, l2.0\n"
+"deq r10.y, r0.xy, l2.00\n"
+"deq r10.w, r0.xy, l2.0y\n"
+"ixor r10.z, r0.y, r0.y_abs\n"
+"cmov_logical r20.xy, r10.xx, l2.0x, r20.xy\n"
+"cmov_logical r20.xy, r10.yy, r10.0z, r20.xy\n"
+"cmov_logical r0.xy, r10.ww, r0.xy, r20.xy\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__sra_i64_i64",
+"mdef(415)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x, r1.x, l0.x\n"
+"iand r3.x, r1.x, l0.y\n"
+"iadd r4.x, l0.y, r2_neg(x)\n"
+"ishr r5.x, r0.y, r2.x\n"
+"ishr r6.x, r0.y, l0.x\n"
+"ushr r7.x, r0.x, r2.x\n"
+"ishl r8.x, r0.y, r4.x\n"
+"cmov_logical r9.x, r2.x, r8.x, l0.z\n"
+"ior r10.x, r7.x, r9.x\n"
+"cmov_logical r0.x___, r3.x, r5.x, r10.x\n"
+"cmov_logical r0._y__, r3.x, r6.x, r5.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sra_i64_v2i64",
+"mdef(416)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x_z, r1.x0z, l0.x\n"
+"iand r3.x_z, r1.x0z, l0.y\n"
+"iadd r4.x_z, l0.y, r2_neg(xyz)\n"
+"ishr r5.x_z, r0.y0w, r2.x0z\n"
+"ishr r6.x_z, r0.y0w, l0.x\n"
+"ushr r7.x_z, r0.x0z, r2.x0z\n"
+"ishl r8.x_z, r0.y0w, r4.x0z\n"
+"cmov_logical r9.x_z, r2.x0z, r8.x0z, l0.z\n"
+"ior r10.x_z, r7.x0z, r9.x0z\n"
+"cmov_logical r0.x_z_, r3.x0z, r5.x0z, r10.x0z\n"
+"cmov_logical r0._y_w, r3.0x0z, r6.0x0z, r5.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sra_i64",
+"mdef(417)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x, r1.x, l0.x\n"
+"iand r3.x, r1.x, l0.y\n"
+"iadd r4.x, l0.y, r2_neg(x)\n"
+"ishr r5.x, r0.y, r2.x\n"
+"ishr r6.x, r0.y, l0.x\n"
+"ushr r7.x, r0.x, r2.x\n"
+"ishl r8.x, r0.y, r4.x\n"
+"cmov_logical r9.x, r2.x, r8.x, l0.z\n"
+"ior r10.x, r7.x, r9.x\n"
+"cmov_logical r0.x___, r3.x, r5.x, r10.x\n"
+"cmov_logical r0._y__, r3.x, r6.x, r5.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__sra_v2i64",
+"mdef(418)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"dcl_literal l0, 31, 32, 0, 0\n"
+"iand r2.x_z, r1.x0z, l0.x\n"
+"iand r3.x_z, r1.x0z, l0.y\n"
+"iadd r4.x_z, l0.y, r2_neg(xyz)\n"
+"ishr r5.x_z, r0.y0w, r2.x0z\n"
+"ishr r6.x_z, r0.y0w, l0.x\n"
+"ushr r7.x_z, r0.x0z, r2.x0z\n"
+"ishl r8.x_z, r0.y0w, r4.x0z\n"
+"cmov_logical r9.x_z, r2.x0z, r8.x0z, l0.z\n"
+"ior r10.x_z, r7.x0z, r9.x0z\n"
+"cmov_logical r0.x_z_, r3.x0z, r5.x0z, r10.x0z\n"
+"cmov_logical r0._y_w, r3.0x0z, r6.0x0z, r5.0x0z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__srhadd_i16",
+"mdef(419)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r0.___w, r0.w, l4\n"
+"\n"
+"dcl_literal l5, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r0.x___, r0.w, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__srhadd_i32",
+"mdef(420)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"ior r0.___w, r0.y, r0.x\n"
+"\n"
+"dcl_literal l8, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.___w, r0.w, l8\n"
+"\n"
+"dcl_literal l9, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r1.xy__, r0.xyxx, l9\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__srhadd_i8",
+"mdef(421)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l0, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r0.___w, r1.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ishr r0.___w, r0.w, l1\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__srotate_i16",
+"mdef(422)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"\n"
+"dcl_literal l8, 0x0000000F, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.yxyy, l8\n"
+"ishl r0.___w, r1.y, r1.x\n"
+"\n"
+"dcl_literal l9, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"iadd r1.x___, l9, r1.x_neg(xyzw)\n"
+"ushr r1.x___, r1.y, r1.x\n"
+"ior r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l10, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.___w, r0.w, l10\n"
+"\n"
+"dcl_literal l11, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ishl r0.___w, r0.w, l11\n"
+"\n"
+"dcl_literal l12, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ishr r0.x___, r0.w, l12\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__srotate_i32",
+"mdef(423)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"\n"
+"dcl_literal l16, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"and r0.___w, r0.y, l16\n"
+"ishl r1.x___, r0.x, r0.w\n"
+"\n"
+"dcl_literal l17, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r0.___w, l17, r0.w_neg(xyzw)\n"
+"ushr r0.___w, r0.x, r0.w\n"
+"ior r0.x___, r1.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__srotate_i8",
+"mdef(424)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"\n"
+"dcl_literal l0, 0x00000007, 0x000000FF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.yxyy, l0\n"
+"ishl r0.___w, r1.y, r1.x\n"
+"\n"
+"dcl_literal l1, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"iadd r1.x___, l1, r1.x_neg(xyzw)\n"
+"ushr r1.x___, r1.y, r1.x\n"
+"ior r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l2, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.___w, r0.w, l2\n"
+"\n"
+"dcl_literal l3, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ishl r0.___w, r0.w, l3\n"
+"\n"
+"dcl_literal l4, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ishr r0.x___, r0.w, l4\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ssub_sat_i16",
+"mdef(425)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l3, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000, 0xFFFF8000\n"
+"imax r0.___w, r0.w, l3\n"
+"\n"
+"dcl_literal l4, 0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF\n"
+"imin r0.x___, r0.w, l4\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ssub_sat_i32",
+"mdef(426)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l6, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r0.y, l6\n"
+"ilt r1._y__, r0.w, r0.x\n"
+"and r1.x___, r1.x, r1.y\n"
+"\n"
+"dcl_literal l7, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"cmov_logical r1.x___, r1.x, l7, r0.w\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, l8, r0.y\n"
+"ilt r0.___w, r0.x, r0.w\n"
+"and r0.___w, r1.y, r0.w\n"
+"\n"
+"dcl_literal l9, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"cmov_logical r0.x___, r0.w, l9, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ssub_sat_i8",
+"mdef(427)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l0, 0xFFFFFF80, 0xFFFFFF80, 0xFFFFFF80, 0xFFFFFF80\n"
+"imax r0.___w, r0.w, l0\n"
+"\n"
+"dcl_literal l1, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+"imin r0.x___, r0.w, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__step_f32",
+"mdef(428)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"lt r0.__z_, r0.y, r0.x\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r0.z, l0, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__store_128bit_global",
+"mdef(429)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x], r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_128bit_local",
+"mdef(430)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"lds_store_vec_id(0) mem, r0.x, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_128bit_private",
+"mdef(431)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x], r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_128bit_uav",
+"mdef(432)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"uav_raw_store_id(0) mem0, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_32bit_local",
+"mdef(433)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"lds_store_id(0) mem, r0.x, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_32bit_uav",
+"mdef(434)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"uav_raw_store_id(0) mem0.x, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_64bit_local",
+"mdef(435)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"lds_store_vec_id(0) mem.xy__, r0.x, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_64bit_uav",
+"mdef(436)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"uav_raw_store_id(0) mem0.xy, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_96bit_global",
+"mdef(437)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].xyz_, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_96bit_local",
+"mdef(438)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"lds_store_vec_id(0) mem.xyz_, r0.x, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_96bit_private",
+"mdef(439)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].xyz_, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_96bit_uav",
+"mdef(440)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"uav_raw_store_id(0) mem0.xyz, r0.x, r1\n"
+"mend\n"
+,2,0
+},
+{ "__store_hi_64bit_global",
+"mdef(441)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].__zw, r1.00xy\n"
+"mend\n"
+,2,0
+},
+{ "__store_hi_64bit_private",
+"mdef(442)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].__zw, r1.00xy\n"
+"mend\n"
+,2,0
+},
+{ "__store_lo_64bit_global",
+"mdef(443)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].xy__, r1.xy\n"
+"mend\n"
+,2,0
+},
+{ "__store_lo_64bit_private",
+"mdef(444)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].xy__, r1.xy\n"
+"mend\n"
+,2,0
+},
+{ "__store_w_32bit_global",
+"mdef(445)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].w, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_w_32bit_private",
+"mdef(446)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].w, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_x_32bit_global",
+"mdef(447)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].x, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_x_32bit_private",
+"mdef(448)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].x, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_y_32bit_global",
+"mdef(449)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].y, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_y_32bit_private",
+"mdef(450)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].y, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_z_32bit_global",
+"mdef(451)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov g[r0.x].z, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__store_z_32bit_private",
+"mdef(452)_out(0)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov x0[r0.x].z, r1.x\n"
+"mend\n"
+,2,0
+},
+{ "__tanh_f32",
+"mdef(453)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0._y__, r0.x_abs\n"
+"\n"
+"dcl_literal l0, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"mul_ieee r0.__z_, r0.y, l0\n"
+"\n"
+"dcl_literal l1, 0x4038AA3B, 0x4038AA3B, 0x4038AA3B, 0x4038AA3B\n"
+"\n"
+"dcl_literal l2, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r0.___w, r0.y, l1, l2\n"
+"round_z r0.___w, r0.w\n"
+"\n"
+"dcl_literal l3, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mul_ieee r1.x___, r0.w, l3\n"
+"\n"
+"dcl_literal l4, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r1.x___, r0.y, l4, r1.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l5, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1._y__, r0.w_neg(xyzw), l5, r1.x\n"
+"mul_ieee r1.__z_, r1.y, r1.y\n"
+"\n"
+"dcl_literal l6, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l7, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r1.___w, r1.z, l6, l7\n"
+"\n"
+"dcl_literal l8, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r1.___w, r1.z, r1.w, l8\n"
+"\n"
+"dcl_literal l9, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r1.___w, r1.z, r1.w, l9\n"
+"\n"
+"dcl_literal l10, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r1.___w, r1.z, r1.w, l10\n"
+"mad_ieee r1.__z_, r1.z_neg(xyzw), r1.w, r1.y\n"
+"mul_ieee r1._y__, r1.y, r1.z\n"
+"\n"
+"dcl_literal l11, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1.__z_, r1.z_neg(xyzw), l11\n"
+"div_zeroop(infinity) r1._y__, r1.y, r1.z\n"
+"\n"
+"dcl_literal l12, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1._y__, r0.w, l12, r1.y_neg(xyzw)\n"
+"add r1.x___, r1.x_neg(xyzw), r1.y\n"
+"\n"
+"dcl_literal l13, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.x___, r1.x_neg(xyzw), l13\n"
+"\n"
+"dcl_literal l14, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r1._y__, r0.w, l14\n"
+"if_logicalnz r1.y\n"
+" ftoi r1._y__, r0.w\n"
+" \n"
+" dcl_literal l15, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r2.xyz_, r1.x, l15\n"
+" if_logicalz r2.y\n"
+" itof r1.__z_, r2.z\n"
+" \n"
+" dcl_literal l16, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+" and r1.__zw, r1.z, l16\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.__z_, r1.z, l17\n"
+" iadd r1.__z_, r1.z, r1.y\n"
+" \n"
+" dcl_literal l18, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l18\n"
+" \n"
+" dcl_literal l19, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r1.__z_, l19, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.___w, l20, r1.z\n"
+" \n"
+" dcl_literal l21, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.__z_, r2.w, l21, r1.z\n"
+" \n"
+" dcl_literal l22, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.___w, l22, r1.z\n"
+" ishr r3.x___, r1.w, r1.z\n"
+" inegate r1.__z_, r1.z\n"
+" \n"
+" dcl_literal l23, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.__z_, r1.z, l23\n"
+" iadd r1.__z_, r1.w, r1.z\n"
+" cmov_logical r1.__z_, r2.w, r3.x, r1.z\n"
+" else\n"
+" \n"
+" dcl_literal l24, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.___w, r1.x, l24\n"
+" \n"
+" dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r2.y, l25\n"
+" iadd r2._y__, r2.y, r1.y\n"
+" \n"
+" dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1._y__, r1.y, l26\n"
+" iadd r1._y__, r1.w, r1.y\n"
+" \n"
+" dcl_literal l27, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.___w, r2.y, l27\n"
+" \n"
+" dcl_literal l28, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2._y__, l28, r1.w\n"
+" \n"
+" dcl_literal l29, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1._y__, r2.y, l29, r1.y\n"
+" \n"
+" dcl_literal l30, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.___w, l30, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l31, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.z, l31\n"
+" \n"
+" dcl_literal l32, 0x00000000, 0x00000000, 0x00000000, 0x00000017\n"
+" ilt r2.__zw, l32, r1.w\n"
+" \n"
+" dcl_literal l33, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.___w, r2.w, l33, r1.w\n"
+" ishr r1.___w, r2.y, r1.w\n"
+" cmov_logical r1.__z_, r2.z, r1.w, r1.y\n"
+" endif\n"
+" \n"
+" dcl_literal l34, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+" lt r0.___w, l34, r0.w\n"
+" \n"
+" dcl_literal l35, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r0.w, l35, r1.z\n"
+" ior r1.x___, r2.x, r0.w\n"
+"endif\n"
+"\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r0.z, l36\n"
+"\n"
+"dcl_literal l37, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l38, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r0.___w, r0.w, l37, l38\n"
+"\n"
+"dcl_literal l39, 0xC038AA3B, 0xC038AA3B, 0xC038AA3B, 0xC038AA3B\n"
+"mad_ieee r0.___w, r0.y, l39, r0.w\n"
+"round_z r0.___w, r0.w\n"
+"\n"
+"dcl_literal l40, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mul_ieee r1._y__, r0.w, l40\n"
+"\n"
+"dcl_literal l41, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"mad_ieee r1._y__, r0.y, l41, r1.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l42, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1.__z_, r0.w_neg(xyzw), l42, r1.y\n"
+"mul_ieee r1.___w, r1.z, r1.z\n"
+"\n"
+"dcl_literal l43, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l44, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r2.x___, r1.w, l43, l44\n"
+"\n"
+"dcl_literal l45, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r2.x___, r1.w, r2.x, l45\n"
+"\n"
+"dcl_literal l46, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r2.x___, r1.w, r2.x, l46\n"
+"\n"
+"dcl_literal l47, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r2.x___, r1.w, r2.x, l47\n"
+"mad_ieee r1.___w, r1.w_neg(xyzw), r2.x, r1.z\n"
+"mul_ieee r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l48, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r1.___w, r1.w_neg(xyzw), l48\n"
+"div_zeroop(infinity) r1.__z_, r1.z, r1.w\n"
+"\n"
+"dcl_literal l49, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r1.__z_, r0.w, l49, r1.z_neg(xyzw)\n"
+"add r1._y__, r1.y_neg(xyzw), r1.z\n"
+"mov r1.__z_, r1.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l50, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ne r1.___w, r0.w, l50\n"
+"if_logicalnz r1.w\n"
+" \n"
+" dcl_literal l51, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1.___w, r1.z, l51\n"
+" ftoi r0.___w, r0.w\n"
+" \n"
+" dcl_literal l52, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r2.xyz_, r1.w, l52\n"
+" if_logicalz r2.y\n"
+" itof r2.___w, r2.z\n"
+" \n"
+" dcl_literal l53, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.w, l53\n"
+" \n"
+" dcl_literal l54, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r3.x, l54\n"
+" iadd r2.___w, r2.w, r0.w\n"
+" \n"
+" dcl_literal l55, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.x___, r3.y, l55\n"
+" \n"
+" dcl_literal l56, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2.___w, l56, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l57, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r3._y__, l57, r2.w\n"
+" \n"
+" dcl_literal l58, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r3.y, l58, r2.w\n"
+" \n"
+" dcl_literal l59, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3._y__, l59, r2.w\n"
+" ishr r3.__z_, r3.x, r2.w\n"
+" inegate r2.___w, r2.w\n"
+" \n"
+" dcl_literal l60, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.___w, r2.w, l60\n"
+" iadd r2.___w, r3.x, r2.w\n"
+" cmov_logical r2.___w, r3.y, r3.z, r2.w\n"
+" else\n"
+" \n"
+" dcl_literal l61, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.___w, r1.w, l61\n"
+" \n"
+" dcl_literal l62, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r2.y, l62\n"
+" iadd r2._y__, r2.y, r0.w\n"
+" \n"
+" dcl_literal l63, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l63\n"
+" iadd r0.___w, r1.w, r0.w\n"
+" \n"
+" dcl_literal l64, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r1.___w, r2.y, l64\n"
+" \n"
+" dcl_literal l65, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r2._y__, l65, r1.w\n"
+" \n"
+" dcl_literal l66, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r2.y, l66, r0.w\n"
+" \n"
+" dcl_literal l67, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r1.___w, l67, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l68, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.z, l68\n"
+" \n"
+" dcl_literal l69, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r3.xy__, l69, r1.w\n"
+" \n"
+" dcl_literal l70, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r1.___w, r3.y, l70, r1.w\n"
+" ishr r1.___w, r2.y, r1.w\n"
+" cmov_logical r2.___w, r3.x, r1.w, r0.w\n"
+" endif\n"
+" ior r1._y__, r2.x, r2.w\n"
+" \n"
+" dcl_literal l71, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r1.__z_, r1.y, l71\n"
+"else\n"
+" \n"
+" dcl_literal l72, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" add r1._y__, r1.z, l72\n"
+"endif\n"
+"\n"
+"dcl_literal l73, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.z, l73\n"
+"\n"
+"dcl_literal l74, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.___w, r0.w, l74\n"
+"\n"
+"dcl_literal l75, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r1.___w, r0.z, l75\n"
+"and r0.___w, r0.w, r1.w\n"
+"cmov_logical r0.__z_, r0.w, r0.z, r1.z\n"
+"\n"
+"dcl_literal l76, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r0.___w, r0.x, l76\n"
+"mul_ieee r0.___w, r0.x, r0.w\n"
+"\n"
+"dcl_literal l77, 0x00000000, 0x00000000, 0x3F800000, 0x41B00000\n"
+"ge r1.__zw, l77, r0.y\n"
+"\n"
+"dcl_literal l78, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1._y__, r1.y, l78\n"
+"div_zeroop(infinity) r0.__z_, r0.z_neg(xyzw), r1.y\n"
+"cmov_logical r0.__z_, r1.z, r0.z, r0.w\n"
+"\n"
+"dcl_literal l79, 0x00000000, 0x3F800000, 0x41B00000, 0x00000000\n"
+"lt r1._yz_, l79, r0.y\n"
+"and r0.___w, r1.w, r1.y\n"
+"\n"
+"dcl_literal l80, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.x___, r1.x, l80\n"
+"\n"
+"dcl_literal l81, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"div_zeroop(infinity) r1.x___, l81, r1.x\n"
+"\n"
+"dcl_literal l82, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.x___, r1.x_neg(xyzw), l82\n"
+"cmov_logical r0.__z_, r0.w, r1.x, r0.z\n"
+"\n"
+"dcl_literal l83, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r1.z, l83, r0.z\n"
+"\n"
+"dcl_literal l84, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.y, l84\n"
+"cmov_logical r0.__z_, r0.w, r0.z, r0.x\n"
+"\n"
+"dcl_literal l85, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.y, l85\n"
+"\n"
+"dcl_literal l86, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.__z_, r0.w, l86, r0.z\n"
+"\n"
+"dcl_literal l87, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r0.___w, r0.x, l87\n"
+"cmov_logical r0.__z_, r0.w, r0.z_neg(xyzw), r0.z\n"
+"\n"
+"dcl_literal l88, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ult r0.___w, l88, r0.y\n"
+"\n"
+"dcl_literal l89, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r1.x___, r0.x, l89\n"
+"cmov_logical r0.__z_, r0.w, r1.x, r0.z\n"
+"cmov_logical r0.x___, r0.y, r0.z, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__tanpi_f32",
+"mdef(454)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"add r0._y__, r0.x_abs, r0.x_abs\n"
+"round_nearest r0._y__, r0.y\n"
+"dcl_literal l0, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r0.__z_, r0.x_abs, l0, r0.y_neg(xyzw)\n"
+"dcl_literal l1, 0x00000000, 0x3E800000, 0x00000000, 0x3FC90FDB\n"
+"mul_ieee r0._y_w, r0.yyyz, l1\n"
+"mul_ieee r1.x___, r0.w, r0.w\n"
+"mul_ieee r0.___w, r0.w, r1.x\n"
+"dcl_literal l2, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3\n"
+"dcl_literal l3, 0x3EC54587, 0x3EC54587, 0x3EC54587, 0x3EC54587\n"
+"mad_ieee r1._y__, r1.x_neg(xyzw), l2, l3\n"
+"mul_ieee r0.___w, r0.w, r1.y\n"
+"dcl_literal l4, 0x3C971480, 0x3C971480, 0x3C971480, 0x3C971480\n"
+"dcl_literal l5, 0xBF039337, 0xBF039337, 0xBF039337, 0xBF039337\n"
+"mad_ieee r1._y__, r1.x, l4, l5\n"
+"dcl_literal l6, 0x3F93F425, 0x3F93F425, 0x3F93F425, 0x3F93F425\n"
+"mad_ieee r1.x___, r1.y, r1.x, l6\n"
+"div_zeroop(infinity) r0.___w, r0.w, r1.x\n"
+"dcl_literal l7, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"mad_ieee r0.__z_, r0.z, l7, r0.w\n"
+"dcl_literal l8, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r0.___w, l8, r0.z\n"
+"frc r0._y__, r0.y\n"
+"dcl_literal l9, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+"mul_ieee r0._y__, r0.y, l9\n"
+"round_nearest r0._y__, r0.y\n"
+"dcl_literal l10, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r1.x___, r0.y, l10\n"
+"dcl_literal l11, 0x40400000, 0x40400000, 0x40400000, 0x40400000\n"
+"eq r0._y__, r0.y, l11\n"
+"frc r1.x___, r1.x\n"
+"add r1.x___, r1.x, r1.x\n"
+"round_nearest r1.x___, r1.x\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1.x___, l12, r1.x\n"
+"cmov_logical r0.__z_, r1.x, r0.w_neg(xyzw), r0.z\n"
+"dcl_literal l13, 0x7F800000, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000000\n"
+"and r1.xyz_, r0.x, l13\n"
+"itof r0.___w, r1.z\n"
+"dcl_literal l14, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r0.___w, r0.w, l14\n"
+"dcl_literal l15, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r2.xy__, r0.w, l15\n"
+"dcl_literal l16, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r0.___w, r2.x, l16\n"
+"dcl_literal l17, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r1.___w, r2.y, l17\n"
+"dcl_literal l18, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r0.___w, l18, r0.w_neg(xyzw)\n"
+"inegate r2.x___, r0.w\n"
+"dcl_literal l19, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.x___, r2.x, l19\n"
+"iadd r2.x___, r1.w, r2.x\n"
+"ishr r1.___w, r1.w, r0.w\n"
+"dcl_literal l20, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l20, r0.w\n"
+"cmov_logical r0.___w, r0.w, r1.w, r2.x\n"
+"dcl_literal l21, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r2.xy__, r1.zxzz, l21\n"
+"dcl_literal l22, 0x00000000, 0x00000000, 0x00000000, 0x7F800000\n"
+"ieq r1.xy_w, r1.xyxz, l22\n"
+"and r1.x___, r2.x, r1.x\n"
+"cmov_logical r0.__z_, r1.x, r0.w, r0.z\n"
+"dcl_literal l23, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ilt r0.___w, r1.z, l23\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, l24, r1.z\n"
+"and r0.___w, r2.y, r0.w\n"
+"dcl_literal l25, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r1.__z_, r0.x_abs, l25\n"
+"cmov_logical r0.__z_, r0.w, r1.z, r0.z\n"
+"frc r0.___w, r0.x_abs\n"
+"add r1.__z_, r0.x_abs, r0.w_neg(xyzw)\n"
+"dcl_literal l26, 0x00000000, 0x3F000000, 0x00000000, 0x00000000\n"
+"eq r2.xy__, r0.w, l26\n"
+"dcl_literal l27, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.x___, r0.x, l27\n"
+"dcl_literal l28, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ge r0.___w, r1.z, l28\n"
+"and r0.___w, r2.x, r0.w\n"
+"ior r0.___w, r1.y, r0.w\n"
+"dcl_literal l29, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r0.w, l29, r0.z\n"
+"dcl_literal l30, 0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000\n"
+"dcl_literal l31, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r0._y__, r0.y, l30, l31\n"
+"cmov_logical r0._y__, r2.y, r0.y, r0.z\n"
+"cmov_logical r0.x___, r0.x, r0.y_neg(xyzw), r0.y\n"
+"dcl_literal l32, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0.x___, r1.w, l32, r0.x\n"
+"dcl_literal l33, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"cmov_logical r0.x___, r1.x, l33, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__tan_f32",
+"mdef(455)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0._y__, r0.x_abs\n"
+"\n"
+"dcl_literal l0, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x32000000, 0x32000000, 0x32000000, 0x32000000\n"
+"ige r0.___w, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r1.x___, r0.z, l2\n"
+"and r0.___w, r0.w, r1.x\n"
+"if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l3, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680, 0x4A3FA680\n"
+" lt r0.___w, r0.y, l3\n"
+" if_logicalnz r0.w\n"
+" \n"
+" dcl_literal l4, 0x3F22F983, 0x3F22F983, 0x3F22F983, 0x3F22F983\n"
+" mul_ieee r0.___w, r0.y, l4\n"
+" round_nearest r0.___w, r0.w\n"
+" \n"
+" dcl_literal l5, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+" and r1.x___, r0.w, l5\n"
+" add r1._y__, r0.w, r1.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l6, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+" mul_ieee r1.__z_, r0.w, l6\n"
+" \n"
+" dcl_literal l7, 0x3FC90FDA, 0x33A22168, 0x27C234C4, 0x00000000\n"
+" mul_ieee r2.xyz_, r0.w, l7\n"
+" \n"
+" dcl_literal l8, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l8, r2.xyxx_neg(xyzw)\n"
+" \n"
+" dcl_literal l9, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.x, l9, r3.xyxx\n"
+" \n"
+" dcl_literal l10, 0x3FC90000, 0x33A22000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l10, r3.xyxx\n"
+" \n"
+" dcl_literal l11, 0x39FDA000, 0x2C340000, 0x00000000, 0x00000000\n"
+" mad_ieee r3.xy__, r1.y, l11, r3.xyxx\n"
+" add r0.___w, r0.y, r2.x_neg(xyzw)\n"
+" add r1.___w, r0.y, r0.w_neg(xyzw)\n"
+" add r1.___w, r2.x_neg(xyzw), r1.w\n"
+" add r1.___w, r3.x_neg(xyzw), r1.w\n"
+" add r0.___w, r0.w, r1.w\n"
+" add r1.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.w_neg(xyzw)\n"
+" add r0.___w, r2.y_neg(xyzw), r0.w\n"
+" add r0.___w, r3.y_neg(xyzw), r0.w\n"
+" add r0.___w, r1.w, r0.w\n"
+" \n"
+" dcl_literal l12, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.___w, r1.x, l12, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l13, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.x, l13, r1.w\n"
+" \n"
+" dcl_literal l14, 0x27C23000, 0x27C23000, 0x27C23000, 0x27C23000\n"
+" mad_ieee r1.x___, r1.y, l14, r1.x\n"
+" \n"
+" dcl_literal l15, 0x21188000, 0x21188000, 0x21188000, 0x21188000\n"
+" mad_ieee r1.x___, r1.y, l15, r1.x\n"
+" add r1._y__, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r0.w, r1.y_neg(xyzw)\n"
+" add r0.___w, r2.z_neg(xyzw), r0.w\n"
+" add r0.___w, r1.y, r0.w\n"
+" frc r1._y__, r1.z\n"
+" \n"
+" dcl_literal l16, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+" mul_ieee r1._y__, r1.y, l16\n"
+" round_nearest r1._y__, r1.y\n"
+" mov r1.x___, r1.x_neg(xyzw)\n"
+" else\n"
+" \n"
+" dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r1.__z_, r0.y, l17\n"
+" \n"
+" dcl_literal l18, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r1.___w, r0.y, l18\n"
+" \n"
+" dcl_literal l19, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r1.___w, r1.w, l19\n"
+" \n"
+" dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r1.w, l20\n"
+" \n"
+" dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.___w, r1.w, l21\n"
+" \n"
+" dcl_literal l22, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r3, l22, r1.w\n"
+" \n"
+" dcl_literal l23, 0x0000FE51, 0x000063AB, 0x00003C43, 0x00009041\n"
+" umul r4, l23, r2.x\n"
+" \n"
+" dcl_literal l24, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2._yz_, r4.yywy, l24\n"
+" \n"
+" dcl_literal l25, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r3.ywyy, l25\n"
+" iadd r2._yz_, r2.yyzy, r5.xxyx\n"
+" iadd r2._yz_, r4.xxzx, r2.yyzy\n"
+" \n"
+" dcl_literal l26, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x_z_, r2.yyzy, l26\n"
+" iadd r3.x_z_, r3.xxzx, r4.xxzx\n"
+" \n"
+" dcl_literal l27, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l27\n"
+" \n"
+" dcl_literal l28, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2._yz_, r2.yyzy, l28\n"
+" \n"
+" dcl_literal l29, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r4.ywyy, l29\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" ior r4.xy__, r2.yzyy, r4.xyxx\n"
+" iadd r5.__z_, r3.x, r4.y\n"
+" ult r2._y__, r5.z, r4.y\n"
+" \n"
+" dcl_literal l30, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r3.y, l30\n"
+" cmov_logical r2._y__, r2.y, r2.z, r3.y\n"
+" \n"
+" dcl_literal l31, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r3, l31, r1.w\n"
+" \n"
+" dcl_literal l32, 0x0000DB62, 0x00009599, 0x0000F534, 0x0000DDC0\n"
+" umul r6, l32, r2.x\n"
+" \n"
+" dcl_literal l33, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l33\n"
+" \n"
+" dcl_literal l34, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l34\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l35, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l35\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l36, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l36\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l37, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l37\n"
+" \n"
+" dcl_literal l38, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l38\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r5._y__, r2.y, r2.z\n"
+" ult r2._y__, r5.y, r2.z\n"
+" \n"
+" dcl_literal l39, 0x00000000, 0x00000000, 0x00000001, 0x00000001\n"
+" iadd r3.__zw, r3.xxxy, l39\n"
+" cmov_logical r2._y__, r2.y, r3.z, r3.x\n"
+" iadd r5.x___, r2.y, r2.w\n"
+" ult r2._y__, r5.x, r2.w\n"
+" cmov_logical r2._y__, r2.y, r3.w, r3.y\n"
+" \n"
+" dcl_literal l40, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r3, l40, r1.w\n"
+" \n"
+" dcl_literal l41, 0x0000FC27, 0x000057D1, 0x00004E44, 0x00001529\n"
+" umul r6, l41, r2.x\n"
+" \n"
+" dcl_literal l42, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__zw, r6.yyyw, l42\n"
+" \n"
+" dcl_literal l43, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+" and r7.xy__, r3.ywyy, l43\n"
+" iadd r2.__zw, r2.zzzw, r7.xxxy\n"
+" iadd r2.__zw, r6.xxxz, r2.zzzw\n"
+" \n"
+" dcl_literal l44, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x_z_, r2.zzwz, l44\n"
+" iadd r3.x_z_, r3.xxzx, r6.xxzx\n"
+" \n"
+" dcl_literal l45, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y_w, r3.yyyw, l45\n"
+" iadd r3.xy__, r3.xzxx, r3.ywyy\n"
+" \n"
+" dcl_literal l46, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__zw, r2.zzzw, l46\n"
+" \n"
+" dcl_literal l47, 0x00000000, 0x00000000, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__zw, r6.yyyw, l47\n"
+" ior r2.__zw, r2.zzzw, r3.zzzw\n"
+" iadd r3.___w, r2.y, r2.z\n"
+" ult r2._y__, r3.w, r2.z\n"
+" \n"
+" dcl_literal l48, 0x00000001, 0x00000001, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r3.xyxx, l48\n"
+" cmov_logical r2._y__, r2.y, r6.x, r3.x\n"
+" iadd r3.__z_, r2.y, r2.w\n"
+" ult r2._y__, r3.z, r2.w\n"
+" cmov_logical r2._y__, r2.y, r6.y, r3.y\n"
+" \n"
+" dcl_literal l49, 0x00000000, 0x00000000, 0x0000A2F9, 0x0000836E\n"
+" umul r2.__zw, l49, r1.w\n"
+" \n"
+" dcl_literal l50, 0x0000A2F9, 0x0000836E, 0x00000000, 0x00000000\n"
+" umul r3.xy__, l50, r2.x\n"
+" \n"
+" dcl_literal l51, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.___w, r3.y, l51\n"
+" \n"
+" dcl_literal l52, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r2.w, l52\n"
+" iadd r1.___w, r1.w, r2.x\n"
+" iadd r1.___w, r3.x, r1.w\n"
+" \n"
+" dcl_literal l53, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.x___, r1.w, l53\n"
+" iadd r2.x___, r2.z, r2.x\n"
+" \n"
+" dcl_literal l54, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__z_, r2.w, l54\n"
+" iadd r2.x___, r2.x, r2.z\n"
+" \n"
+" dcl_literal l55, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.___w, r1.w, l55\n"
+" \n"
+" dcl_literal l56, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r3.y, l56\n"
+" ior r1.___w, r1.w, r2.z\n"
+" iadd r2._y__, r2.y, r1.w\n"
+" ult r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l57, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r2.x, l57\n"
+" cmov_logical r1.___w, r1.w, r2.z, r2.x\n"
+" \n"
+" dcl_literal l58, 0xFFFFFF89, 0x00000000, 0xFFFFFF8A, 0x00000000\n"
+" iadd r2.x_z_, r1.z, l58\n"
+" \n"
+" dcl_literal l59, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r6.x___, r2.x, l59\n"
+" \n"
+" dcl_literal l60, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r1.__z_, r6.x, l60\n"
+" iadd r1.__z_, r2.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l61, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l61, r6.x\n"
+" \n"
+" dcl_literal l62, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r7, r2.x, l62\n"
+" inegate r2.___w, r7.x\n"
+" \n"
+" dcl_literal l63, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r8.x___, r7.y, l63\n"
+" \n"
+" dcl_literal l64, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r8._y__, r7.z, l64\n"
+" \n"
+" dcl_literal l65, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r8.__z_, r7.w, l65\n"
+" inegate r7.xyz_, r8.xyzx\n"
+" \n"
+" dcl_literal l66, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r8, r2.x, l66\n"
+" \n"
+" dcl_literal l67, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r9.x___, r8.x, l67\n"
+" \n"
+" dcl_literal l68, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r9._yzw, r8.yyzw, l68\n"
+" inegate r8, r9\n"
+" and r2.x___, r1.w, r2.w\n"
+" and r3.xy__, r2.y, r7.xyxx\n"
+" ior r2.x___, r2.x, r3.x\n"
+" and r7._yzw, r3.zzzw, r7.yyzz\n"
+" ior r2.x___, r2.x, r7.y\n"
+" ior r2.x___, r7.w, r2.x\n"
+" and r9, r5.xxyy, r8.xyyz\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.z, r2.x\n"
+" and r7._y_w, r5.z, r8.zzzw\n"
+" ior r2.x___, r2.x, r7.y\n"
+" and r2.___w, r4.x, r8.w\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r2.___w, r1.w, r7.x\n"
+" ior r2.___w, r3.y, r2.w\n"
+" ior r2.___w, r7.z, r2.w\n"
+" and r3.x___, r3.w, r8.x\n"
+" ior r2.___w, r2.w, r3.x\n"
+" ior r2.___w, r9.y, r2.w\n"
+" ior r2.___w, r9.w, r2.w\n"
+" ior r2.___w, r7.w, r2.w\n"
+" \n"
+" dcl_literal l69, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r3.x___, l69, r1.z\n"
+" and r3.x___, r2.x, r3.x\n"
+" inegate r3._y__, r1.z\n"
+" \n"
+" dcl_literal l70, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+" iadd r6._y__, l70, r3.y\n"
+" \n"
+" dcl_literal l71, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ult r4._y__, l71, r1.z\n"
+" \n"
+" dcl_literal l72, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r1.z, l72\n"
+" \n"
+" dcl_literal l73, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ushr r1.__z_, l73, r1.z\n"
+" and r1.__z_, r2.x, r1.z\n"
+" ior r1.__z_, r3.x, r1.z\n"
+" ushr r1.__z_, r1.z, r6.y\n"
+" ushr r2.x___, r3.x, r6.y\n"
+" \n"
+" dcl_literal l74, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" and r2.___w, r2.w, l74\n"
+" \n"
+" dcl_literal l75, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.___w, r2.w, l75\n"
+" ior r2.x___, r2.x, r2.w\n"
+" cmov_logical r7.__z_, r4.y, r1.z, r2.x\n"
+" \n"
+" dcl_literal l76, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r8.x___, r6.x, l76\n"
+" \n"
+" dcl_literal l77, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+" ishl r6.__z_, l77, r3.y\n"
+" \n"
+" dcl_literal l78, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.___w, r6.z, l78\n"
+" \n"
+" dcl_literal l79, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFFF\n"
+" mov r8._yzw, l79\n"
+" cmov_logical r6, r6.y, r6, r8\n"
+" \n"
+" dcl_literal l80, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.__z_, r6.y, l80\n"
+" \n"
+" dcl_literal l81, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r1.__z_, l81, r1.z\n"
+" \n"
+" dcl_literal l82, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r2.x___, r1.z, l82\n"
+" switch r6.x\n"
+" case 0\n"
+" and r3.x___, r1.w, r1.z\n"
+" \n"
+" dcl_literal l83, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l84, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l83, l84\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l85, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l85\n"
+" \n"
+" dcl_literal l86, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l86\n"
+" \n"
+" dcl_literal l87, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.zzwz_neg(xyzw), l87\n"
+" \n"
+" dcl_literal l88, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r11.x___, r2.y_neg(xyzw), l88\n"
+" and r3._y__, r1.w, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.x___, r2.y\n"
+" mov r11._yz_, r3.zzwz\n"
+" mov r3._y__, r1.w\n"
+" endif\n"
+" and r8.x___, r2.x, r3.y\n"
+" mov r8._yzw, r11.xxyz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 1\n"
+" and r3.x___, r2.y, r1.z\n"
+" \n"
+" dcl_literal l89, 0x00000001, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" \n"
+" dcl_literal l90, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.xy__, r3.x, l89, l90\n"
+" if_logicalnz r3.y\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l91, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.y_neg(xyzw), l91\n"
+" \n"
+" dcl_literal l92, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+" iadd r6.xy__, r5.zxzz_neg(xyzw), l92\n"
+" \n"
+" dcl_literal l93, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r11._yz_, r3.wwzw_neg(xyzw), l93\n"
+" and r3._y__, r2.y, r6.w\n"
+" iadd r3._y__, r6.w, r3.y_neg(xyzw)\n"
+" mov r10.x___, r6.y\n"
+" mov r10.__z_, r6.x\n"
+" mov r11.xy__, r11.zyzz\n"
+" else\n"
+" mov r10.xyz_, r5.xyzx\n"
+" mov r10.___w, r4.x\n"
+" mov r11.xy__, r3.zwzz\n"
+" mov r3._y__, r2.y\n"
+" endif\n"
+" and r11.__z_, r2.x, r3.y\n"
+" \n"
+" dcl_literal l94, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l94\n"
+" mov r8, r11.wzxy\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" case 2\n"
+" and r3.x___, r3.z, r1.z\n"
+" \n"
+" dcl_literal l95, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l95, l96\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l97, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l97\n"
+" \n"
+" dcl_literal l98, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6._y__, r3.w_neg(xyzw), l98\n"
+" and r4._y__, r3.z, r6.w\n"
+" iadd r6.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r5.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r5\n"
+" cmov_logical r11._yz_, r3.x, r6.xxyx, r3.zzwz\n"
+" and r11.x___, r2.x, r11.y\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.___w, l99\n"
+" mov r8, r11.wwxz\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 3\n"
+" and r3.x___, r3.w, r1.z\n"
+" \n"
+" dcl_literal l100, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l101, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l100, l101\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l102, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10.xyz_, r5.xyzx_neg(xyzw), l102\n"
+" and r4._y__, r3.w, r6.w\n"
+" iadd r4._y__, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" cmov_logical r3.x___, r3.x, r4.y, r3.w\n"
+" and r11.___w, r2.x, r3.x\n"
+" \n"
+" dcl_literal l103, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r11.xyz_, l103\n"
+" mov r8, r11\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 4\n"
+" and r3.x___, r5.x, r1.z\n"
+" \n"
+" dcl_literal l104, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l105, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l104, l105\n"
+" inegate r10.___w, r4.x\n"
+" \n"
+" dcl_literal l106, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000\n"
+" iadd r10._yz_, r5.yyzy_neg(xyzw), l106\n"
+" and r4._y__, r5.x, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r11.xyz_, r5.xyzx\n"
+" mov r11.___w, r4.x\n"
+" cmov_logical r10, r3.x, r10, r11\n"
+" and r3.x___, r2.x, r10.x\n"
+" \n"
+" dcl_literal l107, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l107\n"
+" mov r9._yzw, r10.yyzw\n"
+" mov r9.x___, r3.x\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 5\n"
+" and r3.x___, r5.y, r1.z\n"
+" \n"
+" dcl_literal l108, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l109, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l108, l109\n"
+" inegate r10.__z_, r4.x\n"
+" \n"
+" dcl_literal l110, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r10._y__, r5.z_neg(xyzw), l110\n"
+" and r4._y__, r5.y, r6.w\n"
+" iadd r10.x___, r6.w, r4.y_neg(xyzw)\n"
+" mov r4.__zw, r5.yyyz\n"
+" cmov_logical r10.x_zw, r3.x, r10.xxyz, r4.zzwx\n"
+" and r10._y__, r2.x, r10.x\n"
+" \n"
+" dcl_literal l111, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.x___, l111\n"
+" \n"
+" dcl_literal l112, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l112\n"
+" mov r9, r10\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 6\n"
+" and r3.x___, r5.z, r1.z\n"
+" \n"
+" dcl_literal l113, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l114, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.x, l113, l114\n"
+" inegate r4.__z_, r4.x\n"
+" and r4.___w, r5.z, r6.w\n"
+" iadd r4._y__, r6.w, r4.w_neg(xyzw)\n"
+" mov r6.x___, r5.z\n"
+" mov r6._y__, r4.x\n"
+" cmov_logical r10._yz_, r3.x, r4.yyzy, r6.xxyx\n"
+" and r10.x___, r2.x, r10.y\n"
+" \n"
+" dcl_literal l115, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r10.___w, l115\n"
+" \n"
+" dcl_literal l116, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l116\n"
+" mov r9, r10.wwxz\n"
+" mov r2.___w, r3.y\n"
+" break\n"
+" case 7\n"
+" and r1.__z_, r4.x, r1.z\n"
+" \n"
+" dcl_literal l117, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" \n"
+" dcl_literal l118, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.x___, r1.z, l117, l118\n"
+" and r3._y__, r4.x, r6.w\n"
+" iadd r3._y__, r6.z, r3.y_neg(xyzw)\n"
+" cmov_logical r1.__z_, r1.z, r3.y, r4.x\n"
+" and r6.___w, r2.x, r1.z\n"
+" \n"
+" dcl_literal l119, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r6.xyz_, l119\n"
+" \n"
+" dcl_literal l120, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8, l120\n"
+" mov r9, r6\n"
+" mov r2.___w, r3.x\n"
+" break\n"
+" default\n"
+" mov r8.__zw, r3.zzzw\n"
+" mov r8._y__, r2.y\n"
+" mov r8.x___, r1.w\n"
+" mov r9.___w, r4.x\n"
+" mov r9.xyz_, r5.xyzx\n"
+" \n"
+" dcl_literal l121, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r2.___w, l121\n"
+" break\n"
+" endswitch\n"
+" mov r3, r8\n"
+" mov r4, r9\n"
+" mov r1.__z_, r2.w\n"
+" \n"
+" dcl_literal l122, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5, r3, l122\n"
+" \n"
+" dcl_literal l123, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5, r5, l123\n"
+" \n"
+" dcl_literal l124, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r6, r5, l124\n"
+" \n"
+" dcl_literal l125, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r6, r6, l125\n"
+" \n"
+" dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l126\n"
+" \n"
+" dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r6, l127, r6_neg(xyzw)\n"
+" \n"
+" dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r6, r6, l128\n"
+" \n"
+" dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r6, r6, l129\n"
+" \n"
+" dcl_literal l130, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r6, r6, l130\n"
+" \n"
+" dcl_literal l131, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r5, r5, r6, l131\n"
+" \n"
+" dcl_literal l132, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r3, l132\n"
+" \n"
+" dcl_literal l133, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l133\n"
+" \n"
+" dcl_literal l134, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l134\n"
+" \n"
+" dcl_literal l135, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l135\n"
+" \n"
+" dcl_literal l136, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l136, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l137, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l137\n"
+" \n"
+" dcl_literal l138, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l138\n"
+" \n"
+" dcl_literal l139, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l139\n"
+" \n"
+" dcl_literal l140, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l140\n"
+" \n"
+" dcl_literal l141, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r8, r5, l141\n"
+" iadd r6, r5, r6\n"
+" cmov_logical r5, r8, r6, r5\n"
+" \n"
+" dcl_literal l142, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6, r4, l142\n"
+" \n"
+" dcl_literal l143, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6, r6, l143\n"
+" \n"
+" dcl_literal l144, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r8, r6, l144\n"
+" \n"
+" dcl_literal l145, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r8, r8, l145\n"
+" \n"
+" dcl_literal l146, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l146\n"
+" \n"
+" dcl_literal l147, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r8, l147, r8_neg(xyzw)\n"
+" \n"
+" dcl_literal l148, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r8, r8, l148\n"
+" \n"
+" dcl_literal l149, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r8, r8, l149\n"
+" \n"
+" dcl_literal l150, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r8, r8, l150\n"
+" \n"
+" dcl_literal l151, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r6, r6, r8, l151\n"
+" \n"
+" dcl_literal l152, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r8, r4, l152\n"
+" \n"
+" dcl_literal l153, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r9, r8, l153\n"
+" \n"
+" dcl_literal l154, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r9, r9, l154\n"
+" \n"
+" dcl_literal l155, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l155\n"
+" \n"
+" dcl_literal l156, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r9, l156, r9_neg(xyzw)\n"
+" \n"
+" dcl_literal l157, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r9, r9, l157\n"
+" \n"
+" dcl_literal l158, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ushr r9, r9, l158\n"
+" \n"
+" dcl_literal l159, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r9, r9, l159\n"
+" \n"
+" dcl_literal l160, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" cmov_logical r8, r8, r9, l160\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ieq r9, r6, l161\n"
+" iadd r8, r6, r8\n"
+" cmov_logical r6, r9, r8, r6\n"
+" \n"
+" dcl_literal l162, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ieq r1.___w, r5.x, l162\n"
+" \n"
+" dcl_literal l163, 0x00000020, 0x00000040, 0x00000000, 0x00000060\n"
+" iadd r2.xy_w, r5.yzyw, l163\n"
+" cmov_logical r1.___w, r1.w, r2.x, r5.x\n"
+" \n"
+" dcl_literal l164, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+" ieq r2.x___, r1.w, l164\n"
+" cmov_logical r1.___w, r2.x, r2.y, r1.w\n"
+" \n"
+" dcl_literal l165, 0x00000060, 0x00000060, 0x00000060, 0x00000060\n"
+" ieq r2.x___, r1.w, l165\n"
+" cmov_logical r1.___w, r2.x, r2.w, r1.w\n"
+" \n"
+" dcl_literal l166, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+" ieq r2.x___, r1.w, l166\n"
+" \n"
+" dcl_literal l167, 0x00000080, 0x000000A0, 0x000000C0, 0x000000E0\n"
+" iadd r5, r6, l167\n"
+" cmov_logical r1.___w, r2.x, r5.x, r1.w\n"
+" \n"
+" dcl_literal l168, 0x000000A0, 0x000000A0, 0x000000A0, 0x000000A0\n"
+" ieq r2.x___, r1.w, l168\n"
+" cmov_logical r1.___w, r2.x, r5.y, r1.w\n"
+" \n"
+" dcl_literal l169, 0x000000C0, 0x000000C0, 0x000000C0, 0x000000C0\n"
+" ieq r2.x___, r1.w, l169\n"
+" cmov_logical r1.___w, r2.x, r5.z, r1.w\n"
+" \n"
+" dcl_literal l170, 0x000000E0, 0x000000E0, 0x000000E0, 0x000000E0\n"
+" ieq r2.x___, r1.w, l170\n"
+" cmov_logical r1.___w, r2.x, r5.w, r1.w\n"
+" \n"
+" dcl_literal l171, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishr r2.x___, r1.w, l171\n"
+" \n"
+" dcl_literal l172, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ishl r2._y__, r2.x, l172\n"
+" iadd r2._y__, r1.w, r2.y_neg(xyzw)\n"
+" iadd r1.___w, r2.z, r1.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l173, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ishl r2.x___, l173, r2.x\n"
+" \n"
+" dcl_literal l174, 0x00000001, 0x00000002, 0x00000004, 0x00000008\n"
+" and r5, r2.x, l174\n"
+" inegate r2.__z_, r5.x\n"
+" \n"
+" dcl_literal l175, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" ushr r6.x__w, r5.y, l175\n"
+" \n"
+" dcl_literal l176, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+" ushr r6._y__, r5.z, l176\n"
+" \n"
+" dcl_literal l177, 0x00000003, 0x00000003, 0x00000003, 0x00000003\n"
+" ushr r6.__z_, r5.w, l177\n"
+" inegate r5, r6\n"
+" \n"
+" dcl_literal l178, 0x00000010, 0x00000020, 0x00000040, 0x00000080\n"
+" and r6, r2.x, l178\n"
+" \n"
+" dcl_literal l179, 0x00000004, 0x00000004, 0x00000004, 0x00000004\n"
+" ushr r8.x___, r6.x, l179\n"
+" \n"
+" dcl_literal l180, 0x00000005, 0x00000005, 0x00000005, 0x00000005\n"
+" ushr r8._y__, r6.y, l180\n"
+" \n"
+" dcl_literal l181, 0x00000006, 0x00000006, 0x00000006, 0x00000006\n"
+" ushr r8.__z_, r6.z, l181\n"
+" \n"
+" dcl_literal l182, 0x00000007, 0x00000007, 0x00000007, 0x00000007\n"
+" ushr r8.___w, r6.w, l182\n"
+" inegate r6, r8\n"
+" and r2.x__w, r3.xxxy, r2.z\n"
+" and r8, r3.yzwz, r5\n"
+" ior r2.x__w, r2.xxxw, r8.xxxw\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" and r8, r4, r6\n"
+" ior r2.x___, r2.x, r8.x\n"
+" ior r2.x___, r8.y, r2.x\n"
+" ior r2.x___, r8.z, r2.x\n"
+" ior r8._y_w, r8.w, r2.x\n"
+" and r2.x___, r3.w, r5.y\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.___w, r4.x, r5.z\n"
+" ior r2.x___, r2.x, r2.w\n"
+" and r9.xyz_, r4.yzwy, r6.xyzx\n"
+" ior r2.x___, r2.x, r9.x\n"
+" ior r2.x___, r9.y, r2.x\n"
+" ior r8.x___, r9.z, r2.x\n"
+" \n"
+" dcl_literal l183, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r2.y, l183\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r8.y, r2.y\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l184, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2.___w, l184, r2.y_neg(xyzw)\n"
+" ushr r8.___w, r2.x, r2.w\n"
+" \n"
+" dcl_literal l185, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r3._y__, r2.y, l185\n"
+" ishl r2.x___, r2.x, r3.y\n"
+" ushr r3.x___, r2.x, r3.y\n"
+" else\n"
+" and r2.x___, r3.z, r2.z\n"
+" and r2.__z_, r3.w, r5.w\n"
+" ior r2.x___, r2.x, r2.z\n"
+" and r2.__zw, r4.xxxy, r5.yyyz\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r2.x___, r2.w, r2.x\n"
+" and r2.__zw, r4.zzzw, r6.xxxy\n"
+" ior r2.x___, r2.x, r2.z\n"
+" ior r4._y__, r2.w, r2.x\n"
+" \n"
+" dcl_literal l186, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, l186, r2.y\n"
+" \n"
+" dcl_literal l187, 0xFFFFFFF8, 0x00000000, 0xFFFFFFF7, 0x00000000\n"
+" iadd r4.x_z_, r2.y, l187\n"
+" ishl r2.__z_, r8.y, r4.x\n"
+" ishl r2.___w, r8.x, r4.z\n"
+" \n"
+" dcl_literal l188, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r2._y__, l188, r2.y_neg(xyzw)\n"
+" ushr r2._y__, r8.x, r2.y\n"
+" ior r4.___w, r2.z, r2.y\n"
+" ushr r4.x___, r2.w, r4.z\n"
+" \n"
+" dcl_literal l189, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r8.__z_, l189\n"
+" cmov_logical r8, r2.x, r4.yxzw, r8\n"
+" mov r3.xy__, r8.yzyy\n"
+" endif\n"
+" \n"
+" dcl_literal l190, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r2.xy__, r8.w, l190\n"
+" \n"
+" dcl_literal l191, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.___w, r8.w, l191\n"
+" \n"
+" dcl_literal l192, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.___w, r2.w, l192\n"
+" \n"
+" dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l193\n"
+" \n"
+" dcl_literal l194, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.___w, l194, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l195, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.___w, r2.w, l195\n"
+" \n"
+" dcl_literal l196, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.___w, r2.w, l196\n"
+" inegate r8._y__, r2.w\n"
+" \n"
+" dcl_literal l197, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r2.__z_, l197\n"
+" cmov_logical r2.xy__, r2.x, r2.yzyy, r8.wyww\n"
+" iadd r2._y__, r1.w, r2.y\n"
+" \n"
+" dcl_literal l198, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r2._y__, r2.y, l198\n"
+" \n"
+" dcl_literal l199, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l199\n"
+" ior r7.x___, r2.x, r2.y\n"
+" \n"
+" dcl_literal l200, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.x___, r3.y, l200\n"
+" if_logicalnz r2.x\n"
+" ishl r2.x___, r3.x, r3.y\n"
+" ushr r2.x___, r2.x, r3.y\n"
+" \n"
+" dcl_literal l201, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" iadd r2._y__, l201, r3.y_neg(xyzw)\n"
+" ushr r2.x___, r2.x, r2.y\n"
+" else\n"
+" \n"
+" dcl_literal l202, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+" ult r2.__z_, l202, r3.y\n"
+" \n"
+" dcl_literal l203, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+" iadd r2.___w, r3.y, l203\n"
+" ishl r2.___w, r3.x, r2.w\n"
+" \n"
+" dcl_literal l204, 0x00000028, 0x00000028, 0x00000028, 0x00000028\n"
+" iadd r3._y__, l204, r3.y_neg(xyzw)\n"
+" ushr r3._y__, r8.x, r3.y\n"
+" ior r2.___w, r2.w, r3.y\n"
+" cmov_logical r2.x___, r2.z, r2.w, r3.x\n"
+" endif\n"
+" \n"
+" dcl_literal l205, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8, 0xFFFFFFE8\n"
+" iadd r1.___w, r1.w, l205\n"
+" \n"
+" dcl_literal l206, 0x00800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r3.xy__, r2.x, l206\n"
+" \n"
+" dcl_literal l207, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" ior r2.__z_, r2.x, l207\n"
+" \n"
+" dcl_literal l208, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+" add r2.__z_, r2.z, l208\n"
+" \n"
+" dcl_literal l209, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l209\n"
+" \n"
+" dcl_literal l210, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" iadd r2.__z_, l210, r2.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l211, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" and r2.__z_, r2.z, l211\n"
+" \n"
+" dcl_literal l212, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2.__z_, r2.z, l212\n"
+" inegate r2._y__, r2.z\n"
+" \n"
+" dcl_literal l213, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" mov r3.__z_, l213\n"
+" cmov_logical r2._yz_, r3.x, r3.yyzy, r2.xxyx\n"
+" iadd r1.___w, r1.w, r2.z\n"
+" \n"
+" dcl_literal l214, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" iadd r1.___w, r1.w, l214\n"
+" \n"
+" dcl_literal l215, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r1.___w, r1.w, l215\n"
+" ior r1.___w, r2.y, r1.w\n"
+" \n"
+" dcl_literal l216, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r7._y__, r2.x, r1.w, l216\n"
+" \n"
+" dcl_literal l217, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" iadd r2.__z_, r7.z, l217\n"
+" mov r2.xy__, r7.xyxx_neg(xyzw)\n"
+" cmov_logical r2.xyz_, r1.z, r2.xyzx, r7.xyzx\n"
+" \n"
+" dcl_literal l218, 0x00000000, 0x00000000, 0xFFFFF000, 0x00000003\n"
+" and r1.__zw, r2.xxxz, l218\n"
+" add r2.__z_, r2.x, r1.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l219, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mul_ieee r0.___w, r2.x, l219\n"
+" \n"
+" dcl_literal l220, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r2.x___, r1.z, l220, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l221, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.__z_, r1.z, l221, r2.x\n"
+" \n"
+" dcl_literal l222, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000\n"
+" mad_ieee r1.__z_, r2.z, l222, r1.z\n"
+" \n"
+" dcl_literal l223, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22, 0x39FDAA22\n"
+" mad_ieee r1.__z_, r2.z, l223, r1.z\n"
+" utof r1._y__, r1.w\n"
+" \n"
+" dcl_literal l224, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+" mad_ieee r1.x___, r2.y, l224, r1.z\n"
+" endif\n"
+" \n"
+" dcl_literal l225, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+" mul_ieee r1._y__, r1.y, l225\n"
+" frc r1._y__, r1.y\n"
+" add r1._y__, r1.y, r1.y\n"
+" round_nearest r1._y__, r1.y\n"
+" add r0.___w, r0.w, r1.x\n"
+" mul_ieee r1.x___, r0.w, r0.w\n"
+" mul_ieee r1.__z_, r0.w, r1.x\n"
+" \n"
+" dcl_literal l226, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3, 0x3C8CEDD3\n"
+" \n"
+" dcl_literal l227, 0x3EC54587, 0x3EC54587, 0x3EC54587, 0x3EC54587\n"
+" mad_ieee r1.___w, r1.x_neg(xyzw), l226, l227\n"
+" mul_ieee r1.__z_, r1.z, r1.w\n"
+" \n"
+" dcl_literal l228, 0x3C971480, 0x3C971480, 0x3C971480, 0x3C971480\n"
+" \n"
+" dcl_literal l229, 0xBF039337, 0xBF039337, 0xBF039337, 0xBF039337\n"
+" mad_ieee r1.___w, r1.x, l228, l229\n"
+" \n"
+" dcl_literal l230, 0x3F93F425, 0x3F93F425, 0x3F93F425, 0x3F93F425\n"
+" mad_ieee r1.x___, r1.w, r1.x, l230\n"
+" div_zeroop(infinity) r1.x___, r1.z, r1.x\n"
+" add r0.___w, r0.w, r1.x\n"
+" \n"
+" dcl_literal l231, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+" div_zeroop(infinity) r1.x___, l231, r0.w\n"
+" \n"
+" dcl_literal l232, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" lt r1._y__, l232, r1.y\n"
+" cmov_logical r0._y__, r1.y, r1.x_neg(xyzw), r0.w\n"
+"endif\n"
+"\n"
+"dcl_literal l233, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, r0.x, l233\n"
+"cmov_logical r0._y__, r0.w, r0.y_neg(xyzw), r0.y\n"
+"\n"
+"dcl_literal l234, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ieq r0.___w, r0.z, l234\n"
+"\n"
+"dcl_literal l235, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.w, l235, r0.y\n"
+"\n"
+"dcl_literal l236, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0.__z_, l236, r0.z\n"
+"\n"
+"dcl_literal l237, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.x___, r0.x, l237\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__tgamma_f32",
+"mdef(456)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000000, 0x7FFFFFFF, 0x80000000, 0x7F800000\n"
+"and r0._yzw, r0.x, l0\n"
+"frc r1.x___, r0.y\n"
+"\n"
+"dcl_literal l1, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r1._y__, r0.x, l1\n"
+"\n"
+"dcl_literal l2, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"lt r1.__z_, l2, r0.x\n"
+"and r1.__z_, r1.y, r1.z\n"
+"\n"
+"dcl_literal l3, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.___w, r0.x, l3\n"
+"cmov_logical r1.__z_, r1.z, r1.w, r0.x\n"
+"\n"
+"dcl_literal l4, 0x7FFFFFFF, 0x007FFFFF, 0x00000000, 0x00000000\n"
+"and r2.xy__, r1.z, l4\n"
+"\n"
+"dcl_literal l5, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.___w, r2.x, l5\n"
+"\n"
+"dcl_literal l6, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r1.___w, r1.w, l6\n"
+"\n"
+"dcl_literal l7, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r2.__z_, l7, r2.y\n"
+"\n"
+"dcl_literal l8, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r2.__z_, r2.z, l8\n"
+"\n"
+"dcl_literal l9, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r2.___w, r2.z, l9\n"
+"ior r2._y__, r2.y, r2.w\n"
+"\n"
+"dcl_literal l10, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.z, l10\n"
+"iadd r1.___w, r1.w, r2.z\n"
+"itof r1.___w, r1.w\n"
+"\n"
+"dcl_literal l11, 0x00000000, 0xBF800000, 0x3F800000, 0x00000000\n"
+"add r2._yz_, r2.y, l11\n"
+"div_zeroop(infinity) r2.__z_, r2.y, r2.z\n"
+"mul_ieee r2.___w, r2.z, r2.z\n"
+"mul_ieee r3.x___, r2.w, r2.w\n"
+"\n"
+"dcl_literal l12, 0x00000000, 0x3E1CD04F, 0x3E178897, 0x00000000\n"
+"\n"
+"dcl_literal l13, 0x00000000, 0x3E638E29, 0x3E3A3325, 0x00000000\n"
+"mad_ieee r3._yz_, r3.x, l12, l13\n"
+"\n"
+"dcl_literal l14, 0x00000000, 0x3ECCCCCD, 0x3E924925, 0x00000000\n"
+"mad_ieee r3._yz_, r3.x, r3.yyzy, l14\n"
+"mul_ieee r3._y__, r3.x, r3.y\n"
+"\n"
+"dcl_literal l15, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r3.x___, r3.x, r3.z, l15\n"
+"mad_ieee r2.___w, r2.w, r3.x, r3.y\n"
+"mul_ieee r3.x___, r2.y, r2.y\n"
+"\n"
+"dcl_literal l16, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.___w, r3.x, l16, r2.w\n"
+"\n"
+"dcl_literal l17, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r3._y__, r1.w, l17\n"
+"mad_ieee r2.__z_, r2.z, r2.w, r3.y\n"
+"\n"
+"dcl_literal l18, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.__z_, r3.x, l18, r2.z_neg(xyzw)\n"
+"add r2._y__, r2.y_neg(xyzw), r2.z\n"
+"\n"
+"dcl_literal l19, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r1.___w, r1.w, l19, r2.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l20, 0x00000000, 0x3F666666, 0x41C80000, 0x00000000\n"
+"ge r2._yz_, l20, r2.x\n"
+"mov r2.___w, r1.w_neg(xyzw)\n"
+"and r2.___w, r2.y, r2.w\n"
+"\n"
+"dcl_literal l21, 0x3F3B4A23, 0x3E6B851F, 0x00000000, 0x3F9D70A4\n"
+"ge r3, r2.x, l21\n"
+"and r2._y__, r2.y, r3.x\n"
+"\n"
+"dcl_literal l22, 0x3F800000, 0x40000000, 0x00000000, 0x00000000\n"
+"add r4.xy__, r2.x_neg(xyzw), l22\n"
+"and r2._y__, r2.y, r4.x\n"
+"\n"
+"dcl_literal l23, 0x3F3B4A23, 0x3E6B851F, 0x3F9D70A4, 0x3FDDA512\n"
+"lt r5, r2.x, l23\n"
+"and r3.xyz_, r3.yzwy, r5.xywx\n"
+"\n"
+"dcl_literal l24, 0xBEEC5B0C, 0xBF800000, 0xBFBB16C3, 0xC1C80000\n"
+"add r6, r2.x, l24\n"
+"cmov_logical r2._y__, r3.x, r6.x, r2.y\n"
+"\n"
+"dcl_literal l25, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r3.x___, r3.x, l25\n"
+"cmov_logical r2._y__, r3.y, r2.x, r2.y\n"
+"\n"
+"dcl_literal l26, 0x3F666666, 0x00000000, 0x42040000, 0x00000000\n"
+"lt r4.x_z_, l26, r2.x\n"
+"and r3.___w, r5.z, r4.x\n"
+"cmov_logical r2._y__, r3.w, r6.y, r2.y\n"
+"ior r3._y__, r3.y, r3.w\n"
+"\n"
+"dcl_literal l27, 0x00000002, 0x00000002, 0x00000002, 0x00000002\n"
+"cmov_logical r3.x___, r3.y, l27, r3.x\n"
+"cmov_logical r2._y__, r3.z, r6.z, r2.y\n"
+"\n"
+"dcl_literal l28, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r3.x___, r3.z, l28, r3.x\n"
+"\n"
+"dcl_literal l29, 0x3FDDA512, 0x40000000, 0x42240000, 0x42200000\n"
+"ge r5, r2.x, l29\n"
+"\n"
+"dcl_literal l30, 0x00000000, 0x40000000, 0x41000000, 0x00000000\n"
+"lt r3._yz_, r2.x, l30\n"
+"and r3.__zw, r5.xxxy, r3.yyyz\n"
+"cmov_logical r2._y__, r3.z, r4.y, r2.y\n"
+"\n"
+"dcl_literal l31, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r3.z, l31, r3.x\n"
+"mul_ieee r3.__z_, r2.y, r2.y\n"
+"\n"
+"dcl_literal l32, 0x37D383A2, 0x383C2C75, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l33, 0x39679767, 0x38E28445, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, l32, l33\n"
+"\n"
+"dcl_literal l34, 0x3A9C54A1, 0x3A05B634, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, r4.xyxx, l34\n"
+"\n"
+"dcl_literal l35, 0x3BF2027E, 0x3B3D6EC6, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, r4.xyxx, l35\n"
+"\n"
+"dcl_literal l36, 0x3D89F001, 0x3CA89915, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, r4.xyxx, l36\n"
+"\n"
+"dcl_literal l37, 0x3D9E233F, 0x3EA51A66, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, r4.xyxx, l37\n"
+"mul_ieee r4._y__, r3.z, r4.y\n"
+"mad_ieee r4.x___, r2.y, r4.x, r4.y\n"
+"\n"
+"dcl_literal l38, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r4.x___, r2.y_neg(xyzw), l38, r4.x\n"
+"add r4.x___, r2.w, r4.x\n"
+"mul_ieee r4._y__, r2.y, r3.z\n"
+"\n"
+"dcl_literal l39, 0x39A57B6B, 0xB9A3F927, 0x39AFE9F7, 0x00000000\n"
+"\n"
+"dcl_literal l40, 0xBAB7F476, 0x3A66F867, 0xBA0D3085, 0x00000000\n"
+"mad_ieee r6.xyz_, r4.y, l39, l40\n"
+"\n"
+"dcl_literal l41, 0x3BC7E707, 0xBB7177FE, 0x3B141699, 0x00000000\n"
+"mad_ieee r6.xyz_, r4.y, r6.xyzx, l41\n"
+"\n"
+"dcl_literal l42, 0xBD064D47, 0x3C93373D, 0xBC28FCFE, 0x00000000\n"
+"mad_ieee r6.xyz_, r4.y, r6.xyzx, l42\n"
+"\n"
+"dcl_literal l43, 0x3EF7B95E, 0xBE17213C, 0x3D845A15, 0x00000000\n"
+"mad_ieee r6.xyz_, r4.y, r6.xyzx, l43\n"
+"mad_ieee r4.___w, r2.y, r6.z, r6.y\n"
+"\n"
+"dcl_literal l44, 0x31E61C52, 0x31E61C52, 0x31E61C52, 0x31E61C52\n"
+"mad_ieee r4._y__, r4.y_neg(xyzw), r4.w, l44\n"
+"mad_ieee r3.__z_, r3.z, r6.x, r4.y_neg(xyzw)\n"
+"add r3.__z_, r2.w, r3.z\n"
+"\n"
+"dcl_literal l45, 0xBDF8CDCD, 0xBDF8CDCD, 0xBDF8CDCD, 0xBDF8CDCD\n"
+"add r3.__z_, r3.z, l45\n"
+"\n"
+"dcl_literal l46, 0x00000000, 0x3C5B3C5E, 0x00000000, 0x3B52D5DB\n"
+"\n"
+"dcl_literal l47, 0x00000000, 0x3E6A7578, 0x00000000, 0x3DD572AF\n"
+"mad_ieee r4._y_w, r2.y, l46, l47\n"
+"\n"
+"dcl_literal l48, 0x00000000, 0x3F7A4BB2, 0x00000000, 0x3F44EFDF\n"
+"mad_ieee r4._y_w, r2.y, r4.yyyw, l48\n"
+"\n"
+"dcl_literal l49, 0x00000000, 0x3FBA3AE7, 0x00000000, 0x4008392D\n"
+"mad_ieee r4._y_w, r2.y, r4.yyyw, l49\n"
+"\n"
+"dcl_literal l50, 0x00000000, 0x3F2200F4, 0x00000000, 0x401D2EBE\n"
+"mad_ieee r4._y_w, r2.y, r4.yyyw, l50\n"
+"\n"
+"dcl_literal l51, 0x00000000, 0xBD9E233F, 0x00000000, 0x3F800000\n"
+"mad_ieee r4._y_w, r2.y, r4.yyyw, l51\n"
+"mul_ieee r4._y__, r2.y, r4.y\n"
+"div_zeroop(infinity) r4._y__, r4.y, r4.w\n"
+"\n"
+"dcl_literal l52, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"mad_ieee r2._y__, r2.y, l52, r4.y\n"
+"add r2._y__, r2.w, r2.y\n"
+"ftoi r2.___w, r2.x\n"
+"round_z r4._y__, r2.x\n"
+"add r4._y__, r2.x, r4.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l53, 0x375943FE, 0x3A620FAB, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l54, 0x3B1F8219, 0x3CD01D14, 0x00000000, 0x00000000\n"
+"mad_ieee r5.xy__, r4.y, l53, l54\n"
+"\n"
+"dcl_literal l55, 0x3D11C643, 0x3E53B452, 0x00000000, 0x00000000\n"
+"mad_ieee r5.xy__, r5.xyxx, r4.y, l55\n"
+"\n"
+"dcl_literal l56, 0x3E11BDA2, 0x3F1E0B56, 0x00000000, 0x00000000\n"
+"mad_ieee r5.xy__, r5.xyxx, r4.y, l56\n"
+"\n"
+"dcl_literal l57, 0x3E172A19, 0x3F1B09B3, 0x00000000, 0x00000000\n"
+"mad_ieee r5.xy__, r5.xyxx, r4.y, l57\n"
+"\n"
+"dcl_literal l58, 0xBD3F8AA9, 0xBD3F8AA9, 0xBD3F8AA9, 0xBD3F8AA9\n"
+"mad_ieee r4.___w, r5.x, r4.y, l58\n"
+"mul_ieee r4.___w, r4.y, r4.w\n"
+"div_zeroop(infinity) r4.___w, r4.w, r5.y\n"
+"\n"
+"dcl_literal l59, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r4.___w, r4.y, l59, r4.w\n"
+"\n"
+"dcl_literal l60, 0x00000008, 0x00000007, 0x00000006, 0x00000005\n"
+"ige r7, r2.w, l60\n"
+"\n"
+"dcl_literal l61, 0x40E00000, 0x40C00000, 0x40A00000, 0x40800000\n"
+"add r8, r4.y, l61\n"
+"\n"
+"dcl_literal l62, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r5.x___, r7.x, r8.x, l62\n"
+"mul_ieee r5._y__, r8.y, r5.x\n"
+"cmov_logical r5.x___, r7.y, r5.y, r5.x\n"
+"mul_ieee r5._y__, r8.z, r5.x\n"
+"cmov_logical r5.x___, r7.z, r5.y, r5.x\n"
+"mul_ieee r5._y__, r8.w, r5.x\n"
+"cmov_logical r5.x___, r7.w, r5.y, r5.x\n"
+"\n"
+"dcl_literal l63, 0x00000004, 0x00000003, 0x00000000, 0x00000000\n"
+"ige r6.xy__, r2.w, l63\n"
+"\n"
+"dcl_literal l64, 0x40400000, 0x40000000, 0x00000000, 0x00000000\n"
+"add r7.xy__, r4.y, l64\n"
+"mul_ieee r2.___w, r5.x, r7.x\n"
+"cmov_logical r2.___w, r6.x, r2.w, r5.x\n"
+"mul_ieee r4._y__, r7.y, r2.w\n"
+"cmov_logical r2.___w, r6.y, r4.y, r2.w\n"
+"\n"
+"dcl_literal l65, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r4._y__, r2.w, l65\n"
+"\n"
+"dcl_literal l66, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r4._y__, r4.y, l66\n"
+"\n"
+"dcl_literal l67, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r2.w, l67\n"
+"\n"
+"dcl_literal l68, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r5.x___, l68, r2.w\n"
+"\n"
+"dcl_literal l69, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r5.x___, r5.x, l69\n"
+"\n"
+"dcl_literal l70, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r5._y__, r5.x, l70\n"
+"ior r2.___w, r2.w, r5.y\n"
+"\n"
+"dcl_literal l71, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r5.x___, r5.x, l71\n"
+"iadd r4._y__, r4.y, r5.x\n"
+"itof r4._y__, r4.y\n"
+"\n"
+"dcl_literal l72, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r5.xy__, r2.w, l72\n"
+"div_zeroop(infinity) r2.___w, r5.x, r5.y\n"
+"mul_ieee r5._y__, r2.w, r2.w\n"
+"mul_ieee r6.x___, r5.y, r5.y\n"
+"\n"
+"dcl_literal l73, 0x00000000, 0x3E1CD04F, 0x3E178897, 0x00000000\n"
+"\n"
+"dcl_literal l74, 0x00000000, 0x3E638E29, 0x3E3A3325, 0x00000000\n"
+"mad_ieee r6._yz_, r6.x, l73, l74\n"
+"\n"
+"dcl_literal l75, 0x00000000, 0x3ECCCCCD, 0x3E924925, 0x00000000\n"
+"mad_ieee r6._yz_, r6.x, r6.yyzy, l75\n"
+"mul_ieee r6._y__, r6.x, r6.y\n"
+"\n"
+"dcl_literal l76, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r6.x___, r6.x, r6.z, l76\n"
+"mad_ieee r5._y__, r5.y, r6.x, r6.y\n"
+"mul_ieee r6.x___, r5.x, r5.x\n"
+"\n"
+"dcl_literal l77, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r5._y__, r6.x, l77, r5.y\n"
+"\n"
+"dcl_literal l78, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mul_ieee r6._y__, r4.y, l78\n"
+"mad_ieee r2.___w, r2.w, r5.y, r6.y\n"
+"\n"
+"dcl_literal l79, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r2.___w, r6.x, l79, r2.w_neg(xyzw)\n"
+"add r2.___w, r5.x_neg(xyzw), r2.w\n"
+"\n"
+"dcl_literal l80, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2.___w, r4.y, l80, r2.w_neg(xyzw)\n"
+"add r2.___w, r4.w, r2.w\n"
+"\n"
+"dcl_literal l81, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.___w, r1.w, l81\n"
+"mul_ieee r1.___w, r2.x, r1.w\n"
+"cmov_logical r1.___w, r3.w, r2.w, r1.w\n"
+"\n"
+"dcl_literal l82, 0x00000000, 0x00000001, 0x00000002, 0x00000000\n"
+"ieq r6.xyz_, r3.x, l82\n"
+"and r3.xy_w, r3.y, r6.xyxz\n"
+"cmov_logical r1.___w, r3.x, r4.x, r1.w\n"
+"cmov_logical r1.___w, r3.y, r3.z, r1.w\n"
+"ior r2.___w, r3.w, r3.w\n"
+"cmov_logical r1.___w, r2.w, r2.y, r1.w\n"
+"\n"
+"dcl_literal l83, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2._y__, r1.w, l83\n"
+"\n"
+"dcl_literal l84, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l85, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r2._y__, r2.y, l84, l85\n"
+"\n"
+"dcl_literal l86, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r2._y__, r1.w, l86, r2.y\n"
+"round_z r2._y__, r2.y\n"
+"\n"
+"dcl_literal l87, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r1.___w, r2.y_neg(xyzw), l87, r1.w\n"
+"\n"
+"dcl_literal l88, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r2.___w, r2.y_neg(xyzw), l88, r1.w\n"
+"mul_ieee r3.x___, r2.w, r2.w\n"
+"\n"
+"dcl_literal l89, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l90, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r3._y__, r3.x, l89, l90\n"
+"\n"
+"dcl_literal l91, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r3._y__, r3.x, r3.y, l91\n"
+"\n"
+"dcl_literal l92, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r3._y__, r3.x, r3.y, l92\n"
+"\n"
+"dcl_literal l93, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3._y__, r3.x, r3.y, l93\n"
+"mad_ieee r3.x___, r3.x_neg(xyzw), r3.y, r2.w\n"
+"mul_ieee r2.___w, r2.w, r3.x\n"
+"\n"
+"dcl_literal l94, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r3.x___, r3.x_neg(xyzw), l94\n"
+"div_zeroop(infinity) r2.___w, r2.w, r3.x\n"
+"\n"
+"dcl_literal l95, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r2.___w, r2.y, l95, r2.w_neg(xyzw)\n"
+"add r1.___w, r1.w_neg(xyzw), r2.w\n"
+"\n"
+"dcl_literal l96, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r1.___w, r1.w_neg(xyzw), l96\n"
+"ftoi r2.___w, r2.y\n"
+"\n"
+"dcl_literal l97, 0x80000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+"and r3.xyz_, r1.w, l97\n"
+"if_logicalz r3.y\n"
+" itof r3.___w, r3.z\n"
+" \n"
+" dcl_literal l98, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r3.w, l98\n"
+" \n"
+" dcl_literal l99, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.___w, r4.x, l99\n"
+" iadd r3.___w, r3.w, r2.w\n"
+" \n"
+" dcl_literal l100, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.x___, r4.y, l100\n"
+" \n"
+" dcl_literal l101, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r3.___w, l101, r3.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l102, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4._y__, l102, r3.w\n"
+" \n"
+" dcl_literal l103, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.___w, r4.y, l103, r3.w\n"
+" \n"
+" dcl_literal l104, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, l104, r3.w\n"
+" ishr r4.___w, r4.x, r3.w\n"
+" inegate r3.___w, r3.w\n"
+" \n"
+" dcl_literal l105, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.___w, r3.w, l105\n"
+" iadd r3.___w, r4.x, r3.w\n"
+" cmov_logical r3.___w, r4.y, r4.w, r3.w\n"
+"else\n"
+" \n"
+" dcl_literal l106, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r1.___w, r1.w, l106\n"
+" \n"
+" dcl_literal l107, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l107\n"
+" iadd r3._y__, r3.y, r2.w\n"
+" \n"
+" dcl_literal l108, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2.___w, r2.w, l108\n"
+" iadd r1.___w, r1.w, r2.w\n"
+" \n"
+" dcl_literal l109, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r2.___w, r3.y, l109\n"
+" \n"
+" dcl_literal l110, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3._y__, l110, r2.w\n"
+" \n"
+" dcl_literal l111, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r1.___w, r3.y, l111, r1.w\n"
+" \n"
+" dcl_literal l112, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r2.___w, l112, r2.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l113, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3._y__, r3.z, l113\n"
+" \n"
+" dcl_literal l114, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r4.xy__, l114, r2.w\n"
+" \n"
+" dcl_literal l115, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2.___w, r4.y, l115, r2.w\n"
+" ishr r2.___w, r3.y, r2.w\n"
+" cmov_logical r3.___w, r4.x, r2.w, r1.w\n"
+"endif\n"
+"\n"
+"dcl_literal l116, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r1.___w, l116, r2.y\n"
+"\n"
+"dcl_literal l117, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r1.___w, r1.w, l117, r3.w\n"
+"ior r1.___w, r3.x, r1.w\n"
+"round_z r2._y__, r6.w\n"
+"add r2._y__, r6.w, r2.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l118, 0x41C80000, 0x42200000, 0x421C0000, 0x42180000\n"
+"add r3, r2.y, l118\n"
+"cmov_logical r6.___w, r2.z, r2.x, r3.x\n"
+"\n"
+"dcl_literal l119, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r2.__z_, l119, r6.w\n"
+"mul_ieee r2.___w, r2.z, r2.z\n"
+"\n"
+"dcl_literal l120, 0xBAD5C4E8, 0xBAD5C4E8, 0xBAD5C4E8, 0xBAD5C4E8\n"
+"\n"
+"dcl_literal l121, 0x3A5B3DD2, 0x3A5B3DD2, 0x3A5B3DD2, 0x3A5B3DD2\n"
+"mad_ieee r4.x___, r2.w, l120, l121\n"
+"\n"
+"dcl_literal l122, 0xBA1C065C, 0xBA1C065C, 0xBA1C065C, 0xBA1C065C\n"
+"mad_ieee r4.x___, r2.w, r4.x, l122\n"
+"\n"
+"dcl_literal l123, 0x3A500CFD, 0x3A500CFD, 0x3A500CFD, 0x3A500CFD\n"
+"mad_ieee r4.x___, r2.w, r4.x, l123\n"
+"\n"
+"dcl_literal l124, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r4.x___, r2.w, r4.x, l124\n"
+"\n"
+"dcl_literal l125, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB, 0x3DAAAAAB\n"
+"mad_ieee r2.___w, r2.w, r4.x, l125\n"
+"\n"
+"dcl_literal l126, 0x3ED67F1D, 0x3ED67F1D, 0x3ED67F1D, 0x3ED67F1D\n"
+"mad_ieee r2.__z_, r2.z, r2.w, l126\n"
+"\n"
+"dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r2.___w, r5.z, r3.y, l127\n"
+"mul_ieee r3._y__, r3.z, r2.w\n"
+"cmov_logical r2.___w, r5.w, r3.y, r2.w\n"
+"\n"
+"dcl_literal l128, 0x421C0000, 0x42180000, 0x42140000, 0x42100000\n"
+"ge r5, r2.x, l128\n"
+"mul_ieee r3._y__, r3.w, r2.w\n"
+"cmov_logical r2.___w, r5.x, r3.y, r2.w\n"
+"\n"
+"dcl_literal l129, 0x42140000, 0x42100000, 0x420C0000, 0x42080000\n"
+"add r7, r2.y, l129\n"
+"mul_ieee r3._y__, r2.w, r7.x\n"
+"cmov_logical r2.___w, r5.y, r3.y, r2.w\n"
+"mul_ieee r3._y__, r7.y, r2.w\n"
+"cmov_logical r2.___w, r5.z, r3.y, r2.w\n"
+"mul_ieee r3._y__, r7.z, r2.w\n"
+"cmov_logical r2.___w, r5.w, r3.y, r2.w\n"
+"\n"
+"dcl_literal l130, 0x420C0000, 0x42080000, 0x42040000, 0x42000000\n"
+"ge r5, r2.x, l130\n"
+"mul_ieee r3._y__, r7.w, r2.w\n"
+"cmov_logical r2.___w, r5.x, r3.y, r2.w\n"
+"\n"
+"dcl_literal l131, 0x42040000, 0x42000000, 0x41F80000, 0x41F00000\n"
+"add r7, r2.y, l131\n"
+"mul_ieee r3._y__, r2.w, r7.x\n"
+"cmov_logical r2.___w, r5.y, r3.y, r2.w\n"
+"mul_ieee r3._y__, r7.y, r2.w\n"
+"cmov_logical r2.___w, r5.z, r3.y, r2.w\n"
+"mul_ieee r3._y__, r7.z, r2.w\n"
+"cmov_logical r2.___w, r5.w, r3.y, r2.w\n"
+"\n"
+"dcl_literal l132, 0x41F80000, 0x41F00000, 0x41E80000, 0x41E00000\n"
+"ge r5, r2.x, l132\n"
+"mul_ieee r3._y__, r7.w, r2.w\n"
+"cmov_logical r2.___w, r5.x, r3.y, r2.w\n"
+"\n"
+"dcl_literal l133, 0x41E80000, 0x41E00000, 0x41D80000, 0x41D00000\n"
+"add r7, r2.y, l133\n"
+"mul_ieee r2._y__, r2.w, r7.x\n"
+"cmov_logical r2._y__, r5.y, r2.y, r2.w\n"
+"mul_ieee r2.___w, r7.y, r2.y\n"
+"cmov_logical r2._y__, r5.z, r2.w, r2.y\n"
+"mul_ieee r2.___w, r7.z, r2.y\n"
+"cmov_logical r2._y__, r5.w, r2.w, r2.y\n"
+"\n"
+"dcl_literal l134, 0x00000000, 0x41D80000, 0x41D00000, 0x41000000\n"
+"ge r3._yzw, r2.x, l134\n"
+"mul_ieee r2.x___, r7.w, r2.y\n"
+"cmov_logical r2.x___, r3.y, r2.x, r2.y\n"
+"mul_ieee r2._y__, r3.x, r2.x\n"
+"cmov_logical r2.x___, r3.z, r2.y, r2.x\n"
+"\n"
+"dcl_literal l135, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ilt r2._y__, r6.w, l135\n"
+"\n"
+"dcl_literal l136, 0x4C000000, 0x4C000000, 0x4C000000, 0x4C000000\n"
+"mul_ieee r6._y__, r6.w, l136\n"
+"\n"
+"dcl_literal l137, 0xFFFFFFE7, 0x00000000, 0x00000000, 0x00000000\n"
+"mov r6.x_z_, l137\n"
+"cmov_logical r2._y_w, r2.y, r6.xxxy, r6.zzzw\n"
+"\n"
+"dcl_literal l138, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r2.w, l138\n"
+"iadd r2._y__, r3.x, r2.y\n"
+"\n"
+"dcl_literal l139, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+"iadd r2._y__, l139, r2.y\n"
+"\n"
+"dcl_literal l140, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r2.w, l140\n"
+"\n"
+"dcl_literal l141, 0x004AFB20, 0x004AFB20, 0x004AFB20, 0x004AFB20\n"
+"iadd r3.x___, l141, r2.w\n"
+"\n"
+"dcl_literal l142, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"and r3.x___, r3.x, l142\n"
+"\n"
+"dcl_literal l143, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ixor r3._y__, r3.x, l143\n"
+"ior r2.___w, r2.w, r3.y\n"
+"\n"
+"dcl_literal l144, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r3.x___, r3.x, l144\n"
+"iadd r2._y__, r2.y, r3.x\n"
+"itof r2._y__, r2.y\n"
+"\n"
+"dcl_literal l145, 0xBF800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"add r3.xy__, r2.w, l145\n"
+"div_zeroop(infinity) r2.___w, r3.x, r3.y\n"
+"mul_ieee r3._y__, r2.w, r2.w\n"
+"mul_ieee r3.__z_, r3.y, r3.y\n"
+"\n"
+"dcl_literal l146, 0x3E1CD04F, 0x3E178897, 0x00000000, 0x00000000\n"
+"\n"
+"dcl_literal l147, 0x3E638E29, 0x3E3A3325, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, l146, l147\n"
+"\n"
+"dcl_literal l148, 0x3ECCCCCD, 0x3E924925, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r3.z, r4.xyxx, l148\n"
+"mul_ieee r4.x___, r3.z, r4.x\n"
+"\n"
+"dcl_literal l149, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB, 0x3F2AAAAB\n"
+"mad_ieee r3.__z_, r3.z, r4.y, l149\n"
+"mad_ieee r3._y__, r3.y, r3.z, r4.x\n"
+"mul_ieee r3.__z_, r3.x, r3.x\n"
+"\n"
+"dcl_literal l150, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r4.x___, r3.z, l150\n"
+"\n"
+"dcl_literal l151, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mad_ieee r3._y__, r3.z, l151, r3.y\n"
+"mad_ieee r2.___w, r2.w_neg(xyzw), r3.y, r4.x\n"
+"add r2.___w, r3.x_neg(xyzw), r2.w\n"
+"\n"
+"dcl_literal l152, 0xBFB8AA3B, 0xBFB8AA3B, 0xBFB8AA3B, 0xBFB8AA3B\n"
+"mul_ieee r2.___w, r2.w, l152\n"
+"frc r3.x___, r2.w_abs\n"
+"add r3._y__, r2.w_abs, r3.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l153, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, r2.w, l153\n"
+"\n"
+"dcl_literal l154, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"\n"
+"dcl_literal l155, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r2.___w, r2.w, l154, l155\n"
+"mul_ieee r3.__z_, r3.x, r2.w\n"
+"mad_ieee r2._y__, r3.y, r2.w, r2.y\n"
+"frc r3._y__, r6.w_abs\n"
+"add r4.x___, r6.w_abs, r3.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l156, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r4._y__, r6.w, l156\n"
+"\n"
+"dcl_literal l157, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"\n"
+"dcl_literal l158, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r4._y__, r4.y, l157, l158\n"
+"mul_ieee r4.___w, r4.x, r4.y\n"
+"mul_ieee r5.x___, r3.y, r4.y\n"
+"\n"
+"dcl_literal l159, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r5._y__, r3.z, l159\n"
+"mad_ieee r2.___w, r3.x, r2.w, r5.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l160, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r3.x___, r4.w, l160\n"
+"mad_ieee r4.x___, r4.x, r4.y, r3.x_neg(xyzw)\n"
+"mul_ieee r5.__z_, r3.z, r4.w\n"
+"mad_ieee r5.___w, r5.y, r3.x, r5.z_neg(xyzw)\n"
+"mad_ieee r5.___w, r5.y, r4.x, r5.w\n"
+"mad_ieee r3.x___, r2.w, r3.x, r5.w\n"
+"mad_ieee r3.x___, r2.w, r4.x, r3.x\n"
+"\n"
+"dcl_literal l161, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r4.x___, r2.y, l161\n"
+"add r5.___w, r2.y, r4.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l162, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000, 0xFFFFF000\n"
+"and r6.x___, r5.x, l162\n"
+"mad_ieee r3._y__, r3.y, r4.y, r6.x_neg(xyzw)\n"
+"mul_ieee r4._y__, r2.y, r5.x\n"
+"mad_ieee r6._y__, r4.x, r6.x, r4.y_neg(xyzw)\n"
+"mad_ieee r4.x___, r4.x, r3.y, r6.y\n"
+"mad_ieee r4.x___, r5.w, r6.x, r4.x\n"
+"mad_ieee r4.x___, r5.w, r3.y, r4.x\n"
+"mul_ieee r3.__z_, r3.z, r5.x\n"
+"mad_ieee r5.x___, r5.y, r6.x, r3.z_neg(xyzw)\n"
+"mad_ieee r5.x___, r5.y, r3.y, r5.x\n"
+"mad_ieee r5.x___, r2.w, r6.x, r5.x\n"
+"mad_ieee r2.___w, r2.w, r3.y, r5.x\n"
+"frc r3._y__, r5.z_abs\n"
+"add r5.x___, r5.z_abs, r3.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l163, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r5._y__, r5.z, l163\n"
+"\n"
+"dcl_literal l164, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"\n"
+"dcl_literal l165, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r5._y__, r5.y, l164, l165\n"
+"mul_ieee r5.x___, r5.x, r5.y\n"
+"mul_ieee r3._y__, r3.y, r5.y\n"
+"mad_ieee r2._y__, r2.y, r4.w, r5.x\n"
+"frc r4.___w, r4.y_abs\n"
+"add r5.x___, r4.y_abs, r4.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l166, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r4._y__, r4.y, l166\n"
+"\n"
+"dcl_literal l167, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"\n"
+"dcl_literal l168, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r4._y__, r4.y, l167, l168\n"
+"mul_ieee r4.___w, r4.w, r4.y\n"
+"mad_ieee r2._y__, r5.x, r4.y, r2.y\n"
+"frc r4._y__, r3.z_abs\n"
+"add r5.x___, r3.z_abs, r4.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l169, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r3.__z_, r3.z, l169\n"
+"\n"
+"dcl_literal l170, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"\n"
+"dcl_literal l171, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r3.__z_, r3.z, l170, l171\n"
+"mul_ieee r4._y__, r4.y, r3.z\n"
+"mad_ieee r2._y__, r5.x, r3.z, r2.y\n"
+"ge r3.__z_, r4.y_abs, r4.w_abs\n"
+"cmov_logical r5.x___, r3.z, r4.y, r4.w\n"
+"cmov_logical r3.__z_, r3.z, r4.w, r4.y\n"
+"ge r4._y__, r5.x_abs, r3.y_abs\n"
+"cmov_logical r4.___w, r4.y, r5.x, r3.y\n"
+"cmov_logical r3._y__, r4.y, r3.y, r5.x\n"
+"ge r4._y__, r3.z_abs, r3.y_abs\n"
+"cmov_logical r5.x___, r4.y, r3.z, r3.y\n"
+"cmov_logical r3._y__, r4.y, r3.y, r3.z\n"
+"add r3.__z_, r3.x, r4.x\n"
+"add r3.__z_, r2.w, r3.z\n"
+"add r3.__z_, r3.y, r3.z\n"
+"add r3.__z_, r5.x, r3.z\n"
+"add r3.__z_, r4.w, r3.z\n"
+"add r4._y__, r4.w, r3.z_neg(xyzw)\n"
+"add r4._y__, r5.x, r4.y\n"
+"add r3._y__, r3.y, r4.y\n"
+"add r3.x___, r3.x, r3.y\n"
+"add r3.x___, r4.x, r3.x\n"
+"add r2.___w, r2.w, r3.x\n"
+"frc r3.x___, r3.z_abs\n"
+"add r3._y__, r3.z_abs, r3.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l172, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r3.__z_, r3.z, l172\n"
+"\n"
+"dcl_literal l173, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"\n"
+"dcl_literal l174, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r3.__z_, r3.z, l173, l174\n"
+"mul_ieee r3.x___, r3.x, r3.z\n"
+"mad_ieee r2._y__, r3.y, r3.z, r2.y\n"
+"\n"
+"dcl_literal l175, 0x00000000, 0x3F317180, 0x3717F7D1, 0x00000000\n"
+"mul_ieee r3._yz_, r3.x, l175\n"
+"mul_ieee r4.xy__, r3.yzyy, r3.yzyy\n"
+"\n"
+"dcl_literal l176, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mul_ieee r4._y__, r4.y, l176\n"
+"\n"
+"dcl_literal l177, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l178, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r4.___w, r4.x, l177, l178\n"
+"\n"
+"dcl_literal l179, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r4.___w, r4.x, r4.w, l179\n"
+"\n"
+"dcl_literal l180, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r4.___w, r4.x, r4.w, l180\n"
+"\n"
+"dcl_literal l181, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r4.___w, r4.x, r4.w, l181\n"
+"mad_ieee r4.x___, r4.x_neg(xyzw), r4.w, r3.y\n"
+"mul_ieee r3._y__, r3.y, r4.x\n"
+"\n"
+"dcl_literal l182, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"add r4.x___, r4.x, l182\n"
+"div_zeroop(infinity) r3._y__, r3.y, r4.x\n"
+"\n"
+"dcl_literal l183, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r3._y__, r3.x_neg(xyzw), l183, r3.y\n"
+"\n"
+"dcl_literal l184, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r4.x___, r3.x, l184, r4.y_neg(xyzw)\n"
+"mul_ieee r3.__z_, r3.z, r4.x\n"
+"\n"
+"dcl_literal l185, 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000\n"
+"add r4.x___, r4.x, l185\n"
+"div_zeroop(infinity) r3.__z_, r3.z, r4.x\n"
+"\n"
+"dcl_literal l186, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3.x___, r3.x_neg(xyzw), l186, r3.z\n"
+"\n"
+"dcl_literal l187, 0x3F317180, 0x3717F7D1, 0x00000000, 0x00000000\n"
+"mul_ieee r4.xy__, r2.w, l187\n"
+"mul_ieee r5.xy__, r4.xyxx, r4.xyxx\n"
+"\n"
+"dcl_literal l188, 0x00000000, 0x00000000, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l189, 0x00000000, 0x00000000, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r5.__zw, r5.xxxy, l188, l189\n"
+"\n"
+"dcl_literal l190, 0x00000000, 0x00000000, 0x388AB355, 0x388AB355\n"
+"mad_ieee r5.__zw, r5.xxxy, r5.zzzw, l190\n"
+"\n"
+"dcl_literal l191, 0x00000000, 0x00000000, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r5.__zw, r5.xxxy, r5.zzzw, l191\n"
+"\n"
+"dcl_literal l192, 0x00000000, 0x00000000, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r5.__zw, r5.xxxy, r5.zzzw, l192\n"
+"mad_ieee r5.xy__, r5.xyxx_neg(xyzw), r5.zwzz, r4.xyxx\n"
+"mul_ieee r4.xy__, r4.xyxx, r5.xyxx\n"
+"\n"
+"dcl_literal l193, 0xC0000000, 0xC0000000, 0x00000000, 0x00000000\n"
+"add r5.xy__, r5.xyxx, l193\n"
+"div_zeroop(infinity) r4.xy__, r4.xyxx, r5.xyxx\n"
+"\n"
+"dcl_literal l194, 0x3F317180, 0x3717F7D1, 0x00000000, 0x00000000\n"
+"mad_ieee r4.xy__, r2.w_neg(xyzw), l194, r4.xyxx\n"
+"mad_ieee r2.___w, r3.y, r4.x, r4.x_neg(xyzw)\n"
+"add r2.___w, r3.y_neg(xyzw), r2.w\n"
+"mad_ieee r3._y__, r3.x, r4.y, r4.y_neg(xyzw)\n"
+"add r3.x___, r3.x_neg(xyzw), r3.y\n"
+"mad_ieee r3.x___, r2.w, r3.x, r3.x\n"
+"add r2.___w, r2.w, r3.x\n"
+"\n"
+"dcl_literal l195, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.___w, r2.w, l195\n"
+"ftoi r3.x___, r2.y\n"
+"\n"
+"dcl_literal l196, 0x80000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r4.xy_w, r2.w, l196\n"
+"if_logicalz r4.y\n"
+" itof r3._y__, r4.w\n"
+" \n"
+" dcl_literal l197, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3._yz_, r3.y, l197\n"
+" \n"
+" dcl_literal l198, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l198\n"
+" iadd r3._y__, r3.y, r3.x\n"
+" \n"
+" dcl_literal l199, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r3.z, l199\n"
+" \n"
+" dcl_literal l200, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r3._y__, l200, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l201, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5.x___, l201, r3.y\n"
+" \n"
+" dcl_literal l202, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r5.x, l202, r3.y\n"
+" \n"
+" dcl_literal l203, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.x___, l203, r3.y\n"
+" ishr r5._y__, r3.z, r3.y\n"
+" inegate r3._y__, r3.y\n"
+" \n"
+" dcl_literal l204, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3._y__, r3.y, l204\n"
+" iadd r3._y__, r3.z, r3.y\n"
+" cmov_logical r3._y__, r5.x, r5.y, r3.y\n"
+"else\n"
+" \n"
+" dcl_literal l205, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.___w, r2.w, l205\n"
+" \n"
+" dcl_literal l206, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r4.y, l206\n"
+" iadd r3.__z_, r3.z, r3.x\n"
+" \n"
+" dcl_literal l207, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.x___, r3.x, l207\n"
+" iadd r2.___w, r2.w, r3.x\n"
+" \n"
+" dcl_literal l208, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3.x___, r3.z, l208\n"
+" \n"
+" dcl_literal l209, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.__z_, l209, r3.x\n"
+" \n"
+" dcl_literal l210, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.___w, r3.z, l210, r2.w\n"
+" \n"
+" dcl_literal l211, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.x___, l211, r3.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l212, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r4.w, l212\n"
+" \n"
+" dcl_literal l213, 0x00000000, 0x00000000, 0x00000000, 0x00000017\n"
+" ilt r4._y_w, l213, r3.x\n"
+" \n"
+" dcl_literal l214, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.x___, r4.w, l214, r3.x\n"
+" ishr r3.x___, r3.z, r3.x\n"
+" cmov_logical r3._y__, r4.y, r3.x, r2.w\n"
+"endif\n"
+"\n"
+"dcl_literal l215, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r2._y__, l215, r2.y\n"
+"\n"
+"dcl_literal l216, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2._y__, r2.y, l216, r3.y\n"
+"ior r2._y__, r4.x, r2.y\n"
+"\n"
+"dcl_literal l217, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r2.___w, r2.z, l217\n"
+"\n"
+"dcl_literal l218, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l219, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r2.___w, r2.w, l218, l219\n"
+"\n"
+"dcl_literal l220, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r2.___w, r2.z, l220, r2.w\n"
+"round_z r2.___w, r2.w\n"
+"\n"
+"dcl_literal l221, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2.__z_, r2.w_neg(xyzw), l221, r2.z\n"
+"\n"
+"dcl_literal l222, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3.x___, r2.w_neg(xyzw), l222, r2.z\n"
+"mul_ieee r3._y__, r3.x, r3.x\n"
+"\n"
+"dcl_literal l223, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l224, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r3.__z_, r3.y, l223, l224\n"
+"\n"
+"dcl_literal l225, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r3.__z_, r3.y, r3.z, l225\n"
+"\n"
+"dcl_literal l226, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r3.__z_, r3.y, r3.z, l226\n"
+"\n"
+"dcl_literal l227, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r3.__z_, r3.y, r3.z, l227\n"
+"mad_ieee r3._y__, r3.y_neg(xyzw), r3.z, r3.x\n"
+"mul_ieee r3.x___, r3.x, r3.y\n"
+"\n"
+"dcl_literal l228, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r3._y__, r3.y_neg(xyzw), l228\n"
+"div_zeroop(infinity) r3.x___, r3.x, r3.y\n"
+"\n"
+"dcl_literal l229, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3.x___, r2.w, l229, r3.x_neg(xyzw)\n"
+"add r2.__z_, r2.z_neg(xyzw), r3.x\n"
+"\n"
+"dcl_literal l230, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.__z_, r2.z_neg(xyzw), l230\n"
+"ftoi r3.x___, r2.w\n"
+"\n"
+"dcl_literal l231, 0x80000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r4.xy_w, r2.z, l231\n"
+"if_logicalz r4.y\n"
+" itof r3._y__, r4.w\n"
+" \n"
+" dcl_literal l232, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r3._yz_, r3.y, l232\n"
+" \n"
+" dcl_literal l233, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3._y__, r3.y, l233\n"
+" iadd r3._y__, r3.y, r3.x\n"
+" \n"
+" dcl_literal l234, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r3.z, l234\n"
+" \n"
+" dcl_literal l235, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r3._y__, l235, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l236, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5.x___, l236, r3.y\n"
+" \n"
+" dcl_literal l237, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r5.x, l237, r3.y\n"
+" \n"
+" dcl_literal l238, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.x___, l238, r3.y\n"
+" ishr r5._y__, r3.z, r3.y\n"
+" inegate r3._y__, r3.y\n"
+" \n"
+" dcl_literal l239, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3._y__, r3.y, l239\n"
+" iadd r3._y__, r3.z, r3.y\n"
+" cmov_logical r3._y__, r5.x, r5.y, r3.y\n"
+"else\n"
+" \n"
+" dcl_literal l240, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.__z_, r2.z, l240\n"
+" \n"
+" dcl_literal l241, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r4.y, l241\n"
+" iadd r3.__z_, r3.z, r3.x\n"
+" \n"
+" dcl_literal l242, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.x___, r3.x, l242\n"
+" iadd r2.__z_, r2.z, r3.x\n"
+" \n"
+" dcl_literal l243, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3.x___, r3.z, l243\n"
+" \n"
+" dcl_literal l244, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r3.__z_, l244, r3.x\n"
+" \n"
+" dcl_literal l245, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.__z_, r3.z, l245, r2.z\n"
+" \n"
+" dcl_literal l246, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.x___, l246, r3.x_neg(xyzw)\n"
+" \n"
+" dcl_literal l247, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.__z_, r4.w, l247\n"
+" \n"
+" dcl_literal l248, 0x00000000, 0x00000000, 0x00000000, 0x00000017\n"
+" ilt r4._y_w, l248, r3.x\n"
+" \n"
+" dcl_literal l249, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.x___, r4.w, l249, r3.x\n"
+" ishr r3.x___, r3.z, r3.x\n"
+" cmov_logical r3._y__, r4.y, r3.x, r2.z\n"
+"endif\n"
+"\n"
+"dcl_literal l250, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r2.__z_, l250, r2.w\n"
+"\n"
+"dcl_literal l251, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.__z_, r2.z, l251, r3.y\n"
+"ior r2.__z_, r4.x, r2.z\n"
+"\n"
+"dcl_literal l252, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"add r2.___w, r6.w, l252\n"
+"\n"
+"dcl_literal l253, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"lt r3.x___, r2.w, l253\n"
+"\n"
+"dcl_literal l254, 0xBF000000, 0xBF000000, 0xBF000000, 0xBF000000\n"
+"\n"
+"dcl_literal l255, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"cmov_logical r3.x___, r3.x, l254, l255\n"
+"\n"
+"dcl_literal l256, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B, 0x3FB8AA3B\n"
+"mad_ieee r3.x___, r2.w, l256, r3.x\n"
+"round_z r3.x___, r3.x\n"
+"\n"
+"dcl_literal l257, 0x3F317180, 0x3F317180, 0x3F317180, 0x3F317180\n"
+"mad_ieee r2.___w, r3.x_neg(xyzw), l257, r2.w\n"
+"\n"
+"dcl_literal l258, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3._y__, r3.x_neg(xyzw), l258, r2.w\n"
+"mul_ieee r3.__z_, r3.y, r3.y\n"
+"\n"
+"dcl_literal l259, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C, 0x3331BB4C\n"
+"\n"
+"dcl_literal l260, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E, 0xB5DDEA0E\n"
+"mad_ieee r4.x___, r3.z, l259, l260\n"
+"\n"
+"dcl_literal l261, 0x388AB355, 0x388AB355, 0x388AB355, 0x388AB355\n"
+"mad_ieee r4.x___, r3.z, r4.x, l261\n"
+"\n"
+"dcl_literal l262, 0xBB360B61, 0xBB360B61, 0xBB360B61, 0xBB360B61\n"
+"mad_ieee r4.x___, r3.z, r4.x, l262\n"
+"\n"
+"dcl_literal l263, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB, 0x3E2AAAAB\n"
+"mad_ieee r4.x___, r3.z, r4.x, l263\n"
+"mad_ieee r3.__z_, r3.z_neg(xyzw), r4.x, r3.y\n"
+"mul_ieee r3._y__, r3.y, r3.z\n"
+"\n"
+"dcl_literal l264, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"add r3.__z_, r3.z_neg(xyzw), l264\n"
+"div_zeroop(infinity) r3._y__, r3.y, r3.z\n"
+"\n"
+"dcl_literal l265, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1, 0x3717F7D1\n"
+"mad_ieee r3._y__, r3.x, l265, r3.y_neg(xyzw)\n"
+"add r2.___w, r2.w_neg(xyzw), r3.y\n"
+"\n"
+"dcl_literal l266, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"add r2.___w, r2.w_neg(xyzw), l266\n"
+"ftoi r3._y__, r3.x\n"
+"\n"
+"dcl_literal l267, 0x80000000, 0x7F800000, 0x00000000, 0x007FFFFF\n"
+"and r4.xy_w, r2.w, l267\n"
+"if_logicalz r4.y\n"
+" itof r3.__z_, r4.w\n"
+" \n"
+" dcl_literal l268, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r5.xy__, r3.z, l268\n"
+" \n"
+" dcl_literal l269, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r5.x, l269\n"
+" iadd r3.__z_, r3.z, r3.y\n"
+" \n"
+" dcl_literal l270, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r5.x___, r5.y, l270\n"
+" \n"
+" dcl_literal l271, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r3.__z_, l271, r3.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l272, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r5._y__, l272, r3.z\n"
+" \n"
+" dcl_literal l273, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.__z_, r5.y, l273, r3.z\n"
+" \n"
+" dcl_literal l274, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5._y__, l274, r3.z\n"
+" ishr r5.__z_, r5.x, r3.z\n"
+" inegate r3.__z_, r3.z\n"
+" \n"
+" dcl_literal l275, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3.__z_, r3.z, l275\n"
+" iadd r3.__z_, r5.x, r3.z\n"
+" cmov_logical r3.__z_, r5.y, r5.z, r3.z\n"
+"else\n"
+" \n"
+" dcl_literal l276, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2.___w, r2.w, l276\n"
+" \n"
+" dcl_literal l277, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r4._y__, r4.y, l277\n"
+" iadd r4._y__, r4.y, r3.y\n"
+" \n"
+" dcl_literal l278, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r3._y__, r3.y, l278\n"
+" iadd r2.___w, r2.w, r3.y\n"
+" \n"
+" dcl_literal l279, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81, 0xFFFFFF81\n"
+" iadd r3._y__, r4.y, l279\n"
+" \n"
+" dcl_literal l280, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r4._y__, l280, r3.y\n"
+" \n"
+" dcl_literal l281, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r2.___w, r4.y, l281, r2.w\n"
+" \n"
+" dcl_literal l282, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3._y__, l282, r3.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l283, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4._y__, r4.w, l283\n"
+" \n"
+" dcl_literal l284, 0x00000000, 0x00000017, 0x00000000, 0x00000000\n"
+" ilt r5.xy__, l284, r3.y\n"
+" \n"
+" dcl_literal l285, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3._y__, r5.y, l285, r3.y\n"
+" ishr r3._y__, r4.y, r3.y\n"
+" cmov_logical r3.__z_, r5.x, r3.y, r2.w\n"
+"endif\n"
+"\n"
+"dcl_literal l286, 0x437E0000, 0x437E0000, 0x437E0000, 0x437E0000\n"
+"lt r2.___w, l286, r3.x\n"
+"\n"
+"dcl_literal l287, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"cmov_logical r2.___w, r2.w, l287, r3.z\n"
+"ior r2.___w, r4.x, r2.w\n"
+"sqrt_vec r3.x___, r6.w\n"
+"\n"
+"dcl_literal l288, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.__z_, r1.z, l288\n"
+"and r1.__z_, r4.z, r1.z\n"
+"\n"
+"dcl_literal l289, 0x2E800000, 0x2E800000, 0x2E800000, 0x2E800000\n"
+"mul_ieee r3._y__, r2.y, l289\n"
+"cmov_logical r2._y__, r1.z, r3.y, r2.y\n"
+"mul_ieee r2.___w, r2.w, r3.x\n"
+"div_zeroop(infinity) r3.x___, r2.y, r2.w\n"
+"mul_ieee r3.x___, r2.z, r3.x\n"
+"mul_ieee r3.x___, r2.x, r3.x\n"
+"div_zeroop(infinity) r2._y__, r2.w, r2.y\n"
+"div_zeroop(infinity) r2.x___, r2.y, r2.x\n"
+"div_zeroop(infinity) r2.x___, r2.x, r2.z\n"
+"cmov_logical r1.__z_, r1.z, r2.x, r3.x\n"
+"cmov_logical r1.__z_, r3.w, r1.z, r1.w\n"
+"\n"
+"dcl_literal l290, 0x00800000, 0x32000000, 0x00000000, 0x00000000\n"
+"ilt r2.xy__, r0.y, l290\n"
+"itof r1.___w, r0.y\n"
+"\n"
+"dcl_literal l291, 0x00000000, 0x00000000, 0x7F800000, 0x007FFFFF\n"
+"and r2.__zw, r1.w, l291\n"
+"\n"
+"dcl_literal l292, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r2.__z_, r2.z, l292\n"
+"\n"
+"dcl_literal l293, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"iadd r2.__z_, r2.z, l293\n"
+"\n"
+"dcl_literal l294, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r2.___w, r2.w, l294\n"
+"\n"
+"dcl_literal l295, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r2.__z_, l295, r2.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l296, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r3.x___, l296, r2.z\n"
+"\n"
+"dcl_literal l297, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r2.__z_, r3.x, l297, r2.z\n"
+"\n"
+"dcl_literal l298, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.x___, l298, r2.z\n"
+"ishr r3._y__, r2.w, r2.z\n"
+"inegate r2.__z_, r2.z\n"
+"\n"
+"dcl_literal l299, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r2.__z_, r2.z, l299\n"
+"iadd r2.__z_, r2.w, r2.z\n"
+"cmov_logical r2.__z_, r3.x, r3.y, r2.z\n"
+"cmov_logical r2.__z_, r2.x, r2.z, r0.y\n"
+"\n"
+"dcl_literal l300, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"div_zeroop(infinity) r2.___w, l300, r2.z\n"
+"ior r0.__z_, r0.z, r2.w\n"
+"\n"
+"dcl_literal l301, 0x35800000, 0x35800000, 0x35800000, 0x35800000\n"
+"ige r2.___w, l301, r0.y\n"
+"cmov_logical r1.__z_, r2.w, r0.z, r1.z\n"
+"add r3.x___, r0.y, r1.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l302, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"mul_ieee r3._y__, r3.x, l302\n"
+"frc r3._y__, r3.y\n"
+"\n"
+"dcl_literal l303, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"eq r3._y__, r3.y, l303\n"
+"add r3.__z_, r0.y, r0.y\n"
+"round_nearest r3.__z_, r3.z\n"
+"\n"
+"dcl_literal l304, 0x40000000, 0x40000000, 0x40000000, 0x40000000\n"
+"mad_ieee r3.___w, r0.y, l304, r3.z_neg(xyzw)\n"
+"\n"
+"dcl_literal l305, 0x3E800000, 0x3E800000, 0x3E800000, 0x3E800000\n"
+"mul_ieee r3.__z_, r3.z, l305\n"
+"frc r3.__z_, r3.z\n"
+"\n"
+"dcl_literal l306, 0x40800000, 0x40800000, 0x40800000, 0x40800000\n"
+"mul_ieee r3.__z_, r3.z, l306\n"
+"round_nearest r3.__z_, r3.z\n"
+"\n"
+"dcl_literal l307, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB\n"
+"mul_ieee r3.___w, r3.w, l307\n"
+"mul_ieee r4.x___, r3.w, r3.w\n"
+"mul_ieee r4._y__, r3.w, r4.x\n"
+"\n"
+"dcl_literal l308, 0x00000000, 0x00000000, 0x3636DF25, 0xB492923A\n"
+"\n"
+"dcl_literal l309, 0x00000000, 0x00000000, 0xB95009D4, 0x37D00AE2\n"
+"mad_ieee r4.__zw, r4.x, l308, l309\n"
+"\n"
+"dcl_literal l310, 0x00000000, 0x00000000, 0x3C088887, 0xBAB60B60\n"
+"mad_ieee r4.__zw, r4.x, r4.zzzw, l310\n"
+"\n"
+"dcl_literal l311, 0x00000000, 0x00000000, 0xBE2AAAAB, 0x3D2AAAAB\n"
+"mad_ieee r4.__zw, r4.x, r4.zzzw, l311\n"
+"mad_ieee r3.___w, r4.y, r4.z, r3.w\n"
+"\n"
+"dcl_literal l312, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
+"\n"
+"dcl_literal l313, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"mad_ieee r4._y__, r4.x_neg(xyzw), l312, l313\n"
+"mul_ieee r4.x___, r4.x, r4.x\n"
+"mad_ieee r4.x___, r4.x, r4.w, r4.y\n"
+"\n"
+"dcl_literal l314, 0x00000000, 0x3F800000, 0x40000000, 0x40400000\n"
+"eq r5, r3.z, l314\n"
+"\n"
+"dcl_literal l315, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ge r3.__z_, r0.y, l315\n"
+"and r5, r5, r3.z\n"
+"and r3.__z_, r3.w, r5.x\n"
+"cmov_logical r3.__z_, r5.y, r4.x, r3.z\n"
+"cmov_logical r3.__z_, r5.z, r3.w_neg(xyzw), r3.z\n"
+"cmov_logical r3.__z_, r5.w, r4.x_neg(xyzw), r3.z\n"
+"\n"
+"dcl_literal l316, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ine r4.xy__, r0.ywyy, l316\n"
+"\n"
+"dcl_literal l317, 0x00000000, 0x00000000, 0x7F800000, 0x00000000\n"
+"ieq r5.xyz_, r0.wyyw, l317\n"
+"and r0.___w, r4.x, r5.x\n"
+"\n"
+"dcl_literal l318, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r1.___w, r1.w, l318\n"
+"\n"
+"dcl_literal l319, 0x7F800000, 0x00000000, 0x007FFFFF, 0x00000000\n"
+"and r4.x_z_, r1.w, l319\n"
+"\n"
+"dcl_literal l320, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishr r1.___w, r4.x, l320\n"
+"\n"
+"dcl_literal l321, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+"ior r3.___w, r4.z, l321\n"
+"\n"
+"dcl_literal l322, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+"iadd r1.___w, l322, r1.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l323, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ilt r4.x___, l323, r1.w\n"
+"\n"
+"dcl_literal l324, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"cmov_logical r1.___w, r4.x, l324, r1.w\n"
+"\n"
+"dcl_literal l325, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.x___, l325, r1.w\n"
+"ishr r4.__z_, r3.w, r1.w\n"
+"inegate r1.___w, r1.w\n"
+"\n"
+"dcl_literal l326, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1.___w, r1.w, l326\n"
+"iadd r1.___w, r3.w, r1.w\n"
+"cmov_logical r1.___w, r4.x, r4.z, r1.w\n"
+"cmov_logical r0.___w, r0.w, r1.w, r3.z\n"
+"and r1.___w, r2.y, r4.y\n"
+"\n"
+"dcl_literal l327, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"mul_ieee r2._y__, r0.y, l327\n"
+"cmov_logical r0.___w, r1.w, r2.y, r0.w\n"
+"\n"
+"dcl_literal l328, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ge r1.___w, r3.x, l328\n"
+"\n"
+"dcl_literal l329, 0x00000000, 0x00000000, 0x3F000000, 0x00000000\n"
+"eq r3.x_z_, r1.x, l329\n"
+"and r1.x___, r1.w, r3.x\n"
+"ior r1.x___, r5.y, r1.x\n"
+"\n"
+"dcl_literal l330, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.___w, r1.x, l330, r0.w\n"
+"\n"
+"dcl_literal l331, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"\n"
+"dcl_literal l332, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"cmov_logical r1.x___, r3.y, l331, l332\n"
+"cmov_logical r0.___w, r3.z, r1.x, r0.w\n"
+"\n"
+"dcl_literal l333, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1.x___, r0.x, l333\n"
+"cmov_logical r0.___w, r1.x, r0.w_neg(xyzw), r0.w\n"
+"mul_ieee r1.___w, r2.z, r0.w\n"
+"\n"
+"dcl_literal l334, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"div_zeroop(infinity) r1.___w, l334, r1.w\n"
+"mul_ieee r1.___w, r1.z, r1.w\n"
+"\n"
+"dcl_literal l335, 0x00000000, 0x80000000, 0x7F800000, 0x007FFFFF\n"
+"and r3._yzw, r1.w, l335\n"
+"if_logicalz r3.z\n"
+" itof r2._y__, r3.w\n"
+" \n"
+" dcl_literal l336, 0x7F800000, 0x007FFFFF, 0x00000000, 0x00000000\n"
+" and r4.xy__, r2.y, l336\n"
+" \n"
+" dcl_literal l337, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r2._y__, r4.x, l337\n"
+" \n"
+" dcl_literal l338, 0xFFFFFFDE, 0xFFFFFFDE, 0xFFFFFFDE, 0xFFFFFFDE\n"
+" iadd r2._y__, r2.y, l338\n"
+" \n"
+" dcl_literal l339, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r4.x___, r4.y, l339\n"
+" \n"
+" dcl_literal l340, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r2._y__, l340, r2.y_neg(xyzw)\n"
+" \n"
+" dcl_literal l341, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r4._y__, l341, r2.y\n"
+" \n"
+" dcl_literal l342, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r2._y__, r4.y, l342, r2.y\n"
+" \n"
+" dcl_literal l343, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, l343, r2.y\n"
+" ishr r4.__z_, r4.x, r2.y\n"
+" inegate r2._y__, r2.y\n"
+" \n"
+" dcl_literal l344, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r2._y__, r2.y, l344\n"
+" iadd r2._y__, r4.x, r2.y\n"
+" cmov_logical r2._y__, r4.y, r4.z, r2.y\n"
+"else\n"
+" \n"
+" dcl_literal l345, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r4.x___, r1.w, l345\n"
+" \n"
+" dcl_literal l346, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r3.__z_, r3.z, l346\n"
+" \n"
+" dcl_literal l347, 0xEF000000, 0xEF000000, 0xEF000000, 0xEF000000\n"
+" iadd r4.x___, r4.x, l347\n"
+" \n"
+" dcl_literal l348, 0xFFFFFF5F, 0xFFFFFF5F, 0xFFFFFF5F, 0xFFFFFF5F\n"
+" iadd r3.__z_, r3.z, l348\n"
+" \n"
+" dcl_literal l349, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+" iadd r3.__z_, l349, r3.z_neg(xyzw)\n"
+" \n"
+" dcl_literal l350, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r3.___w, r3.w, l350\n"
+" \n"
+" dcl_literal l351, 0x00000000, 0x00000000, 0x00000017, 0x00000000\n"
+" ilt r4._yz_, l351, r3.z\n"
+" \n"
+" dcl_literal l352, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r3.__z_, r4.z, l352, r3.z\n"
+" ishr r3.__z_, r3.w, r3.z\n"
+" cmov_logical r2._y__, r4.y, r3.z, r4.x\n"
+"endif\n"
+"ior r2._y__, r3.y, r2.y\n"
+"\n"
+"dcl_literal l353, 0x42040000, 0x42040000, 0x42040000, 0x42040000\n"
+"lt r3._y__, l353, r2.z\n"
+"and r3._y__, r1.y, r3.y\n"
+"cmov_logical r1.___w, r3.y, r2.y, r1.w\n"
+"\n"
+"dcl_literal l354, 0x42040000, 0x42040000, 0x42040000, 0x42040000\n"
+"ge r2._y__, l354, r2.z\n"
+"and r2._y__, r1.y, r2.y\n"
+"mul_ieee r3._y__, r2.z, r1.z\n"
+"mul_ieee r0.___w, r0.w, r3.y\n"
+"\n"
+"dcl_literal l355, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB\n"
+"div_zeroop(infinity) r0.___w, l355, r0.w\n"
+"cmov_logical r0.___w, r2.y, r0.w, r1.w\n"
+"div_zeroop(infinity) r1.___w, r1.z, r0.x\n"
+"\n"
+"dcl_literal l356, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"lt r2._y__, r2.z, l356\n"
+"and r2._y__, r1.y, r2.y\n"
+"cmov_logical r0.___w, r2.y, r1.w, r0.w\n"
+"cmov_logical r0.__z_, r2.w, r0.z, r0.w\n"
+"cmov_logical r0.__z_, r1.y, r0.z, r1.z\n"
+"\n"
+"dcl_literal l357, 0x00000000, 0x00000000, 0x80000000, 0x7F800000\n"
+"and r1.__zw, r0.z, l357\n"
+"if_logicalz r1.w\n"
+" \n"
+" dcl_literal l358, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+" and r0.___w, r0.z, l358\n"
+" itof r0.___w, r0.w\n"
+" \n"
+" dcl_literal l359, 0x00000000, 0x7F800000, 0x007FFFFF, 0x00000000\n"
+" and r2._yz_, r0.w, l359\n"
+" \n"
+" dcl_literal l360, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r0.___w, r2.y, l360\n"
+" \n"
+" dcl_literal l361, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" iadd r0.___w, r0.w, l361\n"
+" \n"
+" dcl_literal l362, 0x00800000, 0x00800000, 0x00800000, 0x00800000\n"
+" ior r2._y__, r2.z, l362\n"
+" \n"
+" dcl_literal l363, 0x00000096, 0x00000096, 0x00000096, 0x00000096\n"
+" iadd r0.___w, l363, r0.w_neg(xyzw)\n"
+" \n"
+" dcl_literal l364, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ilt r2.__z_, l364, r0.w\n"
+" \n"
+" dcl_literal l365, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+" cmov_logical r0.___w, r2.z, l365, r0.w\n"
+" \n"
+" dcl_literal l366, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.__z_, l366, r0.w\n"
+" ishr r2.___w, r2.y, r0.w\n"
+" inegate r0.___w, r0.w\n"
+" \n"
+" dcl_literal l367, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishl r0.___w, r0.w, l367\n"
+" iadd r0.___w, r2.y, r0.w\n"
+" cmov_logical r0.___w, r2.z, r2.w, r0.w\n"
+"else\n"
+" \n"
+" dcl_literal l368, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+" and r2._y__, r0.z, l368\n"
+" \n"
+" dcl_literal l369, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+" ishr r1.___w, r1.w, l369\n"
+" \n"
+" dcl_literal l370, 0x0C000000, 0x0C000000, 0x0C000000, 0x0C000000\n"
+" iadd r2._y__, r2.y, l370\n"
+" \n"
+" dcl_literal l371, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99, 0xFFFFFF99\n"
+" iadd r1.___w, r1.w, l371\n"
+" \n"
+" dcl_literal l372, 0x0000007F, 0x0000007F, 0x0000007F, 0x0000007F\n"
+" ilt r1.___w, l372, r1.w\n"
+" \n"
+" dcl_literal l373, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+" cmov_logical r0.___w, r1.w, l373, r2.y\n"
+"endif\n"
+"ior r0.___w, r1.z, r0.w\n"
+"cmov_logical r0.__z_, r2.x, r0.w, r0.z\n"
+"\n"
+"dcl_literal l374, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.___w, l374, r0.x\n"
+"and r0.___w, r5.z, r0.w\n"
+"cmov_logical r0.__z_, r0.w, r0.x, r0.z\n"
+"\n"
+"dcl_literal l375, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"ilt r0._y__, l375, r0.y\n"
+"and r0.___w, r5.z, r1.x\n"
+"ior r0._y__, r0.y, r0.w\n"
+"\n"
+"dcl_literal l376, 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000\n"
+"ior r0.___w, r0.x, l376\n"
+"cmov_logical r0._y__, r0.y, r0.w, r0.z\n"
+"and r0.__z_, r1.y, r3.x\n"
+"\n"
+"dcl_literal l377, 0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000\n"
+"cmov_logical r0._y__, r0.z, l377, r0.y\n"
+"\n"
+"dcl_literal l378, 0x3F800000, 0x00000000, 0x40000000, 0x00000000\n"
+"ieq r0.x_z_, r0.x, l378\n"
+"ior r0.x___, r0.z, r0.x\n"
+"\n"
+"dcl_literal l379, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"cmov_logical r0.x___, r0.x, l379, r0.y\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__trunc_f32",
+"mdef(457)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"round_z r0.x___, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uabs_diff_u16",
+"mdef(458)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"imax r0.___w, r0.w_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l1, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.x___, r0.w, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uabs_diff_u32",
+"mdef(459)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umin r0.___w, r0.y, r0.x\n"
+"umax r1.x___, r0.y, r0.x\n"
+"iadd r0.x___, r1.x, r0.w_neg(xyzw)\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uabs_diff_u8",
+"mdef(460)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"imax r0.___w, r0.w_neg(xyzw), r0.w\n"
+"\n"
+"dcl_literal l0, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.x___, r0.w, l0\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uabs_u16",
+"mdef(461)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uabs_u32",
+"mdef(462)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uabs_u8",
+"mdef(463)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"mov r0, r0\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uadd_sat_i32",
+"mdef(464)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"ult r1.x___, r0.w, r0.x\n"
+"\n"
+"dcl_literal l10, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r0.x___, r1.x, l10, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uadd_sat_u16",
+"mdef(465)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l5, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"umin r0.x___, r0.w, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uadd_sat_u8",
+"mdef(466)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l2, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"umin r0.x___, r1.x, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uall_u16",
+"mdef(467)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l3, 0x00008000, 0x00008000, 0x00008000, 0x00008000\n"
+"and r0.__z_, r0.x, l3\n"
+"\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l4, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uall_u32",
+"mdef(468)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r0.x___, r0.x, l6\n"
+"\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l7, l8\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uall_u8",
+"mdef(469)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l1, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uany_i16",
+"mdef(470)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l3, 0x00008000, 0x00008000, 0x00008000, 0x00008000\n"
+"and r0.__z_, r0.x, l3\n"
+"\n"
+"dcl_literal l4, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l5, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l4, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uany_i32",
+"mdef(471)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l6, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"and r0.x___, r0.x, l6\n"
+"\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l8, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l7, l8\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uany_u8",
+"mdef(472)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"dcl_literal l0, 0x00000080, 0x00000080, 0x00000080, 0x00000080\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"\n"
+"dcl_literal l2, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.z, l1, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uclz_u16",
+"mdef(473)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l10, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.x, l10\n"
+"\n"
+"dcl_literal l11, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l11\n"
+"\n"
+"dcl_literal l12, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l12\n"
+"\n"
+"dcl_literal l13, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l13\n"
+"\n"
+"dcl_literal l14, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l14, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l15, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l15\n"
+"\n"
+"dcl_literal l16, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l16\n"
+"\n"
+"dcl_literal l17, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.___w, r0.w, l17\n"
+"\n"
+"dcl_literal l18, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.x___, r0.z, r0.w, l18\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uclz_u32",
+"mdef(474)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l19, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r0.__z_, r0.x, l19\n"
+"\n"
+"dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.z, l20\n"
+"\n"
+"dcl_literal l21, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l21\n"
+"\n"
+"dcl_literal l22, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l22\n"
+"\n"
+"dcl_literal l23, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l23\n"
+"\n"
+"dcl_literal l24, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l24, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l25, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l25\n"
+"\n"
+"dcl_literal l26, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l26\n"
+"\n"
+"dcl_literal l27, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.___w, r0.w, l27\n"
+"\n"
+"dcl_literal l28, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.__z_, r0.z, r0.w, l28\n"
+"\n"
+"dcl_literal l29, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.___w, r0.x, l29\n"
+"\n"
+"dcl_literal l30, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1.x___, r0.w, l30\n"
+"\n"
+"dcl_literal l31, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.x___, r1.x, l31\n"
+"\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l32\n"
+"\n"
+"dcl_literal l33, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1.x___, l33, r1.x_neg(xyzw)\n"
+"\n"
+"dcl_literal l34, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l34\n"
+"\n"
+"dcl_literal l35, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1.x___, r1.x, l35\n"
+"\n"
+"dcl_literal l36, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r1.x___, r1.x, l36\n"
+"\n"
+"dcl_literal l37, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.___w, r0.w, r1.x, l37\n"
+"\n"
+"dcl_literal l38, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ieq r1.x___, r0.z, l38\n"
+"iadd r0.___w, r0.z, r0.w\n"
+"cmov_logical r0.x___, r1.x, r0.w, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__uclz_u8",
+"mdef(475)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"\n"
+"\n"
+"dcl_literal l0, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.x, l0\n"
+"\n"
+"dcl_literal l1, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l1\n"
+"\n"
+"dcl_literal l2, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l2\n"
+"\n"
+"dcl_literal l3, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l3\n"
+"\n"
+"dcl_literal l4, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l4, r0.w_neg(xyzw)\n"
+"\n"
+"dcl_literal l5, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l5\n"
+"\n"
+"dcl_literal l6, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l6\n"
+"\n"
+"dcl_literal l7, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.___w, r0.w, l7\n"
+"\n"
+"dcl_literal l8, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"cmov_logical r0.__z_, r0.z, r0.w, l8\n"
+"\n"
+"dcl_literal l9, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8\n"
+"iadd r0.x___, r0.z, l9\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__udiv_i16",
+"mdef(476)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l21, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l21\n"
+"dcl_literal l22, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l22\n"
+"udiv r0.___w, r1.x, r0.w\n"
+"dcl_literal l23, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.___w, r0.w, l23\n"
+"dcl_literal l24, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r1.y, r0.w, l24\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__udiv_i32",
+"mdef(477)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"udiv r0.x, r0.x, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__udiv_i64",
+"mdef(478)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0.__zw, r1.yyxy\n"
+"ieq r1.xy__, r0.yxyy, r0.wzww\n"
+"ult r1.__zw, r0.yyyx, r0.wwwz\n"
+"and r1.___w, r1.x, r1.w\n"
+"ior r1.___w, r1.z, r1.w\n"
+"dcl_literal l1, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.___w, r1.w, l1, l2\n"
+"and r1._y__, r1.y, r1.x\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.y, l3, r1.w\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r2.xy__, r0.wzww, l4\n"
+"and r1.___w, r2.y, r2.x\n"
+"dcl_literal l5, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.__z_, r1.w, l5, r0.z\n"
+"dcl_literal l6, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r2._yz_, r0.wwyw, l6\n"
+"dcl_literal l7, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r2.___w, r0.x, l7\n"
+"dcl_literal l8, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.xy__, r0.wyww, l8\n"
+"dcl_literal l9, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.__z_, r0.z, l9\n"
+"ior r3.x___, r3.x, r3.z\n"
+"dcl_literal l10, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3.x___, r3.x, l10\n"
+"dcl_literal l11, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.__z_, r0.z, l11\n"
+"dcl_literal l12, 0x3F800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"ior r4.xy__, r2.yzyy, l12\n"
+"dcl_literal l13, 0xBF800000, 0xBF800000, 0x00000000, 0x00000000\n"
+"add r4.xy__, r4.xyxx, l13\n"
+"dcl_literal l14, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"and r4.xy__, r4.xyxx, l14\n"
+"dcl_literal l15, 0x3F800000, 0x3F800000, 0x00000000, 0x00000000\n"
+"iadd r4.xy__, l15, r4.xyxx_neg(xyzw)\n"
+"dcl_literal l16, 0x7F800000, 0x7F800000, 0x00000000, 0x00000000\n"
+"and r4.xy__, r4.xyxx, l16\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.xy__, r4.xyxx, l17\n"
+"dcl_literal l18, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000\n"
+"iadd r4.xy__, r4.xyxx, l18\n"
+"dcl_literal l19, 0x00000000, 0x00000017, 0x00000017, 0x00000000\n"
+"cmov_logical r2._yz_, r2.yyzy, r4.xxyx, l19\n"
+"dcl_literal l20, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.x, l20\n"
+"dcl_literal l21, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l21\n"
+"dcl_literal l22, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l22\n"
+"dcl_literal l23, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l23, r3.w_neg(xyzw)\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l24\n"
+"dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l25\n"
+"dcl_literal l26, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l26\n"
+"dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.x___, r3.x, r3.w, l27\n"
+"dcl_literal l28, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.z, l28\n"
+"dcl_literal l29, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l29\n"
+"dcl_literal l30, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l30\n"
+"dcl_literal l31, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l31, r3.w_neg(xyzw)\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l32\n"
+"dcl_literal l33, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l33\n"
+"dcl_literal l34, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.___w, r3.w, l34\n"
+"dcl_literal l35, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.__z_, r3.z, r3.w, l35\n"
+"dcl_literal l36, 0x00000017, 0x00000017, 0x00000000, 0x00000000\n"
+"ieq r4.xy__, r2.yzyy, l36\n"
+"iadd r3.x___, r2.y, r3.x\n"
+"cmov_logical r2._y__, r4.x, r3.x, r2.y\n"
+"dcl_literal l37, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.___w, r3.x, l37\n"
+"iadd r3.x___, r3.x, r3.z\n"
+"cmov_logical r2._y__, r3.w, r3.x, r2.y\n"
+"dcl_literal l38, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ige r3.x___, r2.y, l38\n"
+"dcl_literal l39, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3.__z_, r2.y, l39\n"
+"dcl_literal l40, 0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F\n"
+"cmov_logical r3.x___, r3.x, r3.z, l40\n"
+"ult r3.__z_, r0.x, r0.z\n"
+"and r3.__z_, r1.x, r3.z\n"
+"ior r1.__z_, r1.z, r3.z\n"
+"dcl_literal l41, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l42, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.__z_, r1.z, l41, l42\n"
+"ieq r3.__z_, r0.x, r0.z\n"
+"and r1.x___, r1.x, r3.z\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.x___, r1.x, l43, r1.z\n"
+"ior r1.__z_, r2.w, r3.y\n"
+"dcl_literal l44, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.__z_, r1.z, l44\n"
+"dcl_literal l45, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r2.___w, r0.x, l45\n"
+"dcl_literal l46, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3._y__, r1.z, l46\n"
+"dcl_literal l47, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3._y__, r3.y, l47\n"
+"dcl_literal l48, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l48\n"
+"dcl_literal l49, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3._y__, l49, r3.y_neg(xyzw)\n"
+"dcl_literal l50, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l50\n"
+"dcl_literal l51, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3._y__, r3.y, l51\n"
+"dcl_literal l52, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3._y__, r3.y, l52\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r1.__z_, r1.z, r3.y, l53\n"
+"dcl_literal l54, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3._y__, r2.w, l54\n"
+"dcl_literal l55, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3._y__, r3.y, l55\n"
+"dcl_literal l56, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l56\n"
+"dcl_literal l57, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3._y__, l57, r3.y_neg(xyzw)\n"
+"dcl_literal l58, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3._y__, r3.y, l58\n"
+"dcl_literal l59, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3._y__, r3.y, l59\n"
+"dcl_literal l60, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3._y__, r3.y, l60\n"
+"dcl_literal l61, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r2.___w, r2.w, r3.y, l61\n"
+"iadd r1.__z_, r2.z, r1.z\n"
+"cmov_logical r2.__z_, r4.y, r1.z, r2.z\n"
+"dcl_literal l62, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3._y__, r1.z, l62\n"
+"iadd r1.__z_, r1.z, r2.w\n"
+"cmov_logical r1.__z_, r3.y, r1.z, r2.z\n"
+"ilt r2.__z_, r1.z, r2.y\n"
+"ieq r2.___w, r2.y, r1.z\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r1.x, l63\n"
+"and r1.x___, r2.w, r1.x\n"
+"ior r1.x___, r2.z, r1.x\n"
+"iadd r2.__z_, r2.y, r1.z_neg(xyzw)\n"
+"dcl_literal l64, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.x___, r1.x, r2.z, l64\n"
+"ilt r2.__z_, r3.x, r1.x\n"
+"iadd r2.___w, r1.x, r3.x_neg(xyzw)\n"
+"and r2.__z_, r2.z, r2.w\n"
+"dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r2.___w, r1.x, l65\n"
+"dcl_literal l66, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r2.w, l66, r2.z\n"
+"dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r1.x, l67\n"
+"if_logicalnz r1.x\n"
+" \n"
+" dcl_literal l68, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2.___w, r2.z, l68\n"
+" \n"
+" dcl_literal l69, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3._y__, r2.z, l69\n"
+" cmov_logical r3._y__, r2.w, r3.y, r2.z\n"
+" \n"
+" dcl_literal l70, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.__z_, r2.z, l70\n"
+" \n"
+" dcl_literal l71, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.___w, l71, r3.y_neg(xyzw)\n"
+" ushr r3.___w, r0.z, r3.w\n"
+" ishl r3._y__, r0.z, r3.y\n"
+" ishl r3.__z_, r0.w, r3.z\n"
+" ior r3.__z_, r3.w, r3.z\n"
+" cmov_logical r3.__z_, r2.w, r0.z, r3.z\n"
+" \n"
+" dcl_literal l72, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.___w, r2.w, l72, r3.y\n"
+" cmov_logical r3._y__, r2.z, r3.z, r0.w\n"
+" cmov_logical r2.___w, r2.z, r2.w, r0.z\n"
+" \n"
+" dcl_literal l73, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.__z_, r1.z, l73\n"
+" \n"
+" dcl_literal l74, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.___w, r1.z, l74\n"
+" cmov_logical r3.___w, r3.z, r3.w, r1.z\n"
+" \n"
+" dcl_literal l75, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.x___, r1.z, l75\n"
+" \n"
+" dcl_literal l76, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.___w, l76, r3.w_neg(xyzw)\n"
+" ushr r4._y__, r0.x, r3.w\n"
+" ishl r4.__z_, r0.y, r4.x\n"
+" ior r4._y__, r4.y, r4.z\n"
+" cmov_logical r4._y__, r3.z, r0.x, r4.y\n"
+" cmov_logical r4._y__, r1.z, r4.y, r0.y\n"
+" ushr r3.___w, r2.w, r3.w\n"
+" ishl r4.x___, r3.y, r4.x\n"
+" ior r3.___w, r3.w, r4.x\n"
+" cmov_logical r3.__z_, r3.z, r2.w, r3.w\n"
+" cmov_logical r1.__z_, r1.z, r3.z, r3.y\n"
+" udiv r3.__z_, r4.y, r1.z\n"
+" umul r3.___w, r3.z, r1.z\n"
+" ilt r4.x___, r4.y, r3.w\n"
+" iadd r4.__z_, r3.w, r4.y_neg(xyzw)\n"
+" iadd r4.__z_, r4.z, r1.z\n"
+" \n"
+" dcl_literal l77, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.__z_, r4.z, l77\n"
+" iadd r3.___w, r4.y, r3.w_neg(xyzw)\n"
+" cmov_logical r3.___w, r4.x, r4.z, r3.w\n"
+" udiv r1.__z_, r3.w, r1.z\n"
+" iadd r3.___w, r3.z, r1.z_neg(xyzw)\n"
+" iadd r1.__z_, r3.z, r1.z\n"
+" cmov_logical r1.__z_, r4.x, r3.w, r1.z\n"
+" \n"
+" dcl_literal l78, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r2.w, l78\n"
+" \n"
+" dcl_literal l79, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.___w, r2.w, l79\n"
+" \n"
+" dcl_literal l80, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r1.z, l80\n"
+" \n"
+" dcl_literal l81, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r1.z, l81\n"
+" umul r4.__z_, r3.w, r4.y\n"
+" umul r3.___w, r3.w, r4.x\n"
+" umul r4.___w, r3.z, r4.y\n"
+" umul r3.__z_, r3.z, r4.x\n"
+" \n"
+" dcl_literal l82, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r3.z, l82\n"
+" \n"
+" dcl_literal l83, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r4.w, l83\n"
+" iadd r5.x___, r5.x, r5.y\n"
+" iadd r3.___w, r3.w, r5.x\n"
+" \n"
+" dcl_literal l84, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r3.w, l84\n"
+" iadd r4.__z_, r4.z, r5.x\n"
+" \n"
+" dcl_literal l85, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r4.w, l85\n"
+" iadd r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l86, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3.___w, r3.w, l86\n"
+" \n"
+" dcl_literal l87, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r3.z, l87\n"
+" ior r3.__z_, r3.w, r3.z\n"
+" \n"
+" dcl_literal l88, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.y, l88\n"
+" \n"
+" dcl_literal l89, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.y, l89\n"
+" umul r4.___w, r4.w, r4.x\n"
+" umul r4._y__, r3.w, r4.y\n"
+" umul r3.___w, r3.w, r4.x\n"
+" \n"
+" dcl_literal l90, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r3.w, l90\n"
+" \n"
+" dcl_literal l91, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4._y__, r4.y, l91\n"
+" iadd r4.x___, r4.x, r4.y\n"
+" iadd r4.x___, r4.w, r4.x\n"
+" \n"
+" dcl_literal l92, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r4.x___, r4.x, l92\n"
+" \n"
+" dcl_literal l93, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.w, l93\n"
+" ior r3.___w, r4.x, r3.w\n"
+" iadd r4.x___, r3.w, r4.z\n"
+" ult r3.___w, r4.x, r3.w\n"
+" ult r4._y__, r0.y, r4.x\n"
+" ieq r4.__z_, r0.y, r4.x\n"
+" ult r4.___w, r0.x, r3.z\n"
+" and r4.___w, r4.z, r4.w\n"
+" ior r4._y__, r4.y, r4.w\n"
+" \n"
+" dcl_literal l94, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l95, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4._y__, r4.y, l94, l95\n"
+" ieq r4.___w, r0.x, r3.z\n"
+" and r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4._y__, r4.z, l96, r4.y\n"
+" \n"
+" dcl_literal l97, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4._y__, r4.y, l97\n"
+" ior r3.___w, r3.w, r4.y\n"
+" \n"
+" dcl_literal l98, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4._y__, r1.z, l98\n"
+" cmov_logical r1.__z_, r3.w, r4.y, r1.z\n"
+" iadd r2.___w, r3.z, r2.w_neg(xyzw)\n"
+" ult r4._y__, r3.z, r2.w\n"
+" iadd r4._y__, r4.x, r4.y\n"
+" iadd r3._y__, r4.y, r3.y_neg(xyzw)\n"
+" cmov_logical r3._y__, r3.w, r3.y, r4.x\n"
+" cmov_logical r2.___w, r3.w, r2.w, r3.z\n"
+" iadd r3.___w, r0.x, r2.w_neg(xyzw)\n"
+" ult r2.___w, r0.x, r3.w\n"
+" iadd r2.___w, r0.y, r2.w\n"
+" iadd r3.__z_, r2.w, r3.y_neg(xyzw)\n"
+"else\n"
+" mov r3.__zw, r0.yyyx\n"
+" \n"
+" dcl_literal l99, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r1.__z_, l99\n"
+"endif\n"
+"ult r2.___w, r3.z, r0.w\n"
+"ieq r3._y__, r3.z, r0.w\n"
+"ult r4.x___, r3.w, r0.z\n"
+"and r4.x___, r3.y, r4.x\n"
+"ior r2.___w, r2.w, r4.x\n"
+"dcl_literal l100, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l101, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2.___w, r2.w, l100, l101\n"
+"ieq r4.x___, r3.w, r0.z\n"
+"and r3._y__, r3.y, r4.x\n"
+"dcl_literal l102, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r3.y, l102, r2.w\n"
+"dcl_literal l103, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3._y__, r3.z, l103\n"
+"dcl_literal l104, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r4.x___, r3.z, l104\n"
+"dcl_literal l105, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r4._y__, r3.w, l105\n"
+"ior r3._y__, r3.y, r4.y\n"
+"dcl_literal l106, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3._y__, r3.y, l106\n"
+"dcl_literal l107, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r4._y__, r3.w, l107\n"
+"dcl_literal l108, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.__z_, r4.x, l108\n"
+"dcl_literal l109, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.__z_, r4.z, l109\n"
+"dcl_literal l110, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.__z_, r4.z, l110\n"
+"dcl_literal l111, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.__z_, l111, r4.z_neg(xyzw)\n"
+"dcl_literal l112, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.__z_, r4.z, l112\n"
+"dcl_literal l113, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.__z_, r4.z, l113\n"
+"dcl_literal l114, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.__z_, r4.z, l114\n"
+"dcl_literal l115, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r4.x___, r4.x, r4.z, l115\n"
+"dcl_literal l116, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.__z_, r3.y, l116\n"
+"dcl_literal l117, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.__z_, r4.z, l117\n"
+"dcl_literal l118, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.__z_, r4.z, l118\n"
+"dcl_literal l119, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.__z_, l119, r4.z_neg(xyzw)\n"
+"dcl_literal l120, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.__z_, r4.z, l120\n"
+"dcl_literal l121, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.__z_, r4.z, l121\n"
+"dcl_literal l122, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r4.__z_, r4.z, l122\n"
+"dcl_literal l123, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r4.z, l123\n"
+"dcl_literal l124, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r4.__z_, r4.y, l124\n"
+"dcl_literal l125, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r4.__z_, r4.z, l125\n"
+"dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.__z_, r4.z, l126\n"
+"dcl_literal l127, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r4.__z_, l127, r4.z_neg(xyzw)\n"
+"dcl_literal l128, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r4.__z_, r4.z, l128\n"
+"dcl_literal l129, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r4.__z_, r4.z, l129\n"
+"dcl_literal l130, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r4.__z_, r4.z, l130\n"
+"dcl_literal l131, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r4._y__, r4.y, r4.z, l131\n"
+"dcl_literal l132, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r4.__z_, r4.x, l132\n"
+"iadd r3._y__, r4.x, r3.y\n"
+"cmov_logical r4.x___, r4.z, r3.y, r4.x\n"
+"dcl_literal l133, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r4.__z_, r3.y, l133\n"
+"iadd r3._y__, r3.y, r4.y\n"
+"cmov_logical r3._y__, r4.z, r3.y, r4.x\n"
+"ilt r4.x___, r3.y, r2.y\n"
+"ieq r4._y__, r2.y, r3.y\n"
+"dcl_literal l134, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.___w, r2.w, l134\n"
+"and r2.___w, r4.y, r2.w\n"
+"ior r2.___w, r4.x, r2.w\n"
+"and r1.x___, r1.x, r2.w\n"
+"iadd r2.___w, r2.y, r3.y_neg(xyzw)\n"
+"dcl_literal l135, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.x___, r1.x, r2.w, l135\n"
+"ilt r2.___w, r3.x, r1.x\n"
+"iadd r4.x___, r1.x, r3.x_neg(xyzw)\n"
+"and r2.___w, r2.w, r4.x\n"
+"dcl_literal l136, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.x___, r1.x, l136\n"
+"dcl_literal l137, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r4.x, l137, r2.w\n"
+"dcl_literal l138, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r1.x, l138\n"
+"if_logicalnz r1.x\n"
+" \n"
+" dcl_literal l139, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r4.x___, r2.w, l139\n"
+" \n"
+" dcl_literal l140, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4._y__, r2.w, l140\n"
+" cmov_logical r4._y__, r4.x, r4.y, r2.w\n"
+" \n"
+" dcl_literal l141, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.__z_, r2.w, l141\n"
+" \n"
+" dcl_literal l142, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.___w, l142, r4.y_neg(xyzw)\n"
+" ushr r4.___w, r0.z, r4.w\n"
+" ishl r4._y__, r0.z, r4.y\n"
+" ishl r4.__z_, r0.w, r4.z\n"
+" ior r4.__z_, r4.w, r4.z\n"
+" cmov_logical r4.__z_, r4.x, r0.z, r4.z\n"
+" \n"
+" dcl_literal l143, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.x___, r4.x, l143, r4.y\n"
+" cmov_logical r4.xy__, r2.w, r4.xzxx, r0.zwzz\n"
+" \n"
+" dcl_literal l144, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r4.__z_, r3.y, l144\n"
+" \n"
+" dcl_literal l145, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4.___w, r3.y, l145\n"
+" cmov_logical r4.___w, r4.z, r4.w, r3.y\n"
+" \n"
+" dcl_literal l146, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.x___, r3.y, l146\n"
+" \n"
+" dcl_literal l147, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.___w, l147, r4.w_neg(xyzw)\n"
+" ushr r5._y__, r3.w, r4.w\n"
+" ishl r5.__z_, r3.z, r5.x\n"
+" ior r5._y__, r5.y, r5.z\n"
+" cmov_logical r5._y__, r4.z, r3.w, r5.y\n"
+" cmov_logical r5._y__, r3.y, r5.y, r3.z\n"
+" ushr r4.___w, r4.x, r4.w\n"
+" ishl r5.x___, r4.y, r5.x\n"
+" ior r4.___w, r4.w, r5.x\n"
+" cmov_logical r4.__z_, r4.z, r4.x, r4.w\n"
+" cmov_logical r3._y__, r3.y, r4.z, r4.y\n"
+" udiv r4.__z_, r5.y, r3.y\n"
+" umul r4.___w, r4.z, r3.y\n"
+" ilt r5.x___, r5.y, r4.w\n"
+" iadd r5.__z_, r4.w, r5.y_neg(xyzw)\n"
+" iadd r5.__z_, r5.z, r3.y\n"
+" \n"
+" dcl_literal l148, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r5.__z_, r5.z, l148\n"
+" iadd r4.___w, r5.y, r4.w_neg(xyzw)\n"
+" cmov_logical r4.___w, r5.x, r5.z, r4.w\n"
+" udiv r3._y__, r4.w, r3.y\n"
+" iadd r4.___w, r4.z, r3.y_neg(xyzw)\n"
+" iadd r3._y__, r4.z, r3.y\n"
+" cmov_logical r3._y__, r5.x, r4.w, r3.y\n"
+" \n"
+" dcl_literal l149, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r4.x, l149\n"
+" \n"
+" dcl_literal l150, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r4.x, l150\n"
+" \n"
+" dcl_literal l151, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r3.y, l151\n"
+" \n"
+" dcl_literal l152, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5._y__, r3.y, l152\n"
+" umul r5.__z_, r4.w, r5.y\n"
+" umul r4.___w, r4.w, r5.x\n"
+" umul r5.___w, r4.z, r5.y\n"
+" umul r4.__z_, r4.z, r5.x\n"
+" \n"
+" dcl_literal l153, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x___, r4.z, l153\n"
+" \n"
+" dcl_literal l154, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6._y__, r5.w, l154\n"
+" iadd r6.x___, r6.x, r6.y\n"
+" iadd r4.___w, r4.w, r6.x\n"
+" \n"
+" dcl_literal l155, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.x___, r4.w, l155\n"
+" iadd r5.__z_, r5.z, r6.x\n"
+" \n"
+" dcl_literal l156, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r5.w, l156\n"
+" iadd r5.__z_, r5.z, r5.w\n"
+" \n"
+" dcl_literal l157, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r4.___w, r4.w, l157\n"
+" \n"
+" dcl_literal l158, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r4.z, l158\n"
+" ior r4.__z_, r4.w, r4.z\n"
+" \n"
+" dcl_literal l159, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.___w, r4.y, l159\n"
+" \n"
+" dcl_literal l160, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r4.y, l160\n"
+" umul r5.___w, r5.w, r5.x\n"
+" umul r5._y__, r4.w, r5.y\n"
+" umul r4.___w, r4.w, r5.x\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r4.w, l161\n"
+" \n"
+" dcl_literal l162, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r5.y, l162\n"
+" iadd r5.x___, r5.x, r5.y\n"
+" iadd r5.x___, r5.w, r5.x\n"
+" \n"
+" dcl_literal l163, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r5.x___, r5.x, l163\n"
+" \n"
+" dcl_literal l164, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.___w, r4.w, l164\n"
+" ior r4.___w, r5.x, r4.w\n"
+" iadd r5.x___, r4.w, r5.z\n"
+" ult r4.___w, r5.x, r4.w\n"
+" ult r5._y__, r3.z, r5.x\n"
+" ieq r5.__z_, r3.z, r5.x\n"
+" ult r5.___w, r3.w, r4.z\n"
+" and r5.___w, r5.z, r5.w\n"
+" ior r5._y__, r5.y, r5.w\n"
+" \n"
+" dcl_literal l165, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l166, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r5._y__, r5.y, l165, l166\n"
+" ieq r5.___w, r3.w, r4.z\n"
+" and r5.__z_, r5.z, r5.w\n"
+" \n"
+" dcl_literal l167, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r5._y__, r5.z, l167, r5.y\n"
+" \n"
+" dcl_literal l168, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5._y__, r5.y, l168\n"
+" ior r4.___w, r4.w, r5.y\n"
+" \n"
+" dcl_literal l169, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r5._y__, r3.y, l169\n"
+" cmov_logical r3._y__, r4.w, r5.y, r3.y\n"
+" iadd r4.x___, r4.z, r4.x_neg(xyzw)\n"
+" ult r5._y__, r4.z, r4.x\n"
+" iadd r5._y__, r5.x, r5.y\n"
+" iadd r4._y__, r5.y, r4.y_neg(xyzw)\n"
+" cmov_logical r4._y__, r4.w, r4.y, r5.x\n"
+" cmov_logical r4.x___, r4.w, r4.x, r4.z\n"
+" iadd r4.x___, r3.w, r4.x_neg(xyzw)\n"
+" ult r4.__z_, r3.w, r4.x\n"
+" iadd r4.__z_, r3.z, r4.z\n"
+" iadd r3.__z_, r4.z, r4.y_neg(xyzw)\n"
+" mov r3.___w, r4.x\n"
+"else\n"
+" \n"
+" dcl_literal l170, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r3._y__, l170\n"
+"endif\n"
+"ult r4.x___, r3.z, r0.w\n"
+"ieq r4._y__, r3.z, r0.w\n"
+"ult r4.__z_, r3.w, r0.z\n"
+"and r4.__z_, r4.y, r4.z\n"
+"ior r4.x___, r4.x, r4.z\n"
+"dcl_literal l171, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l172, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r4.x___, r4.x, l171, l172\n"
+"ieq r4.__z_, r3.w, r0.z\n"
+"and r4._y__, r4.y, r4.z\n"
+"dcl_literal l173, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.x___, r4.y, l173, r4.x\n"
+"dcl_literal l174, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r4._y__, r3.z, l174\n"
+"dcl_literal l175, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r4.__z_, r3.z, l175\n"
+"dcl_literal l176, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r4.___w, r3.w, l176\n"
+"ior r4._y__, r4.y, r4.w\n"
+"dcl_literal l177, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r4._y__, r4.y, l177\n"
+"dcl_literal l178, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r4.___w, r3.w, l178\n"
+"dcl_literal l179, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.x___, r4.z, l179\n"
+"dcl_literal l180, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.x___, r5.x, l180\n"
+"dcl_literal l181, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l181\n"
+"dcl_literal l182, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.x___, l182, r5.x_neg(xyzw)\n"
+"dcl_literal l183, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l183\n"
+"dcl_literal l184, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.x___, r5.x, l184\n"
+"dcl_literal l185, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.x___, r5.x, l185\n"
+"dcl_literal l186, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r4.__z_, r4.z, r5.x, l186\n"
+"dcl_literal l187, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.x___, r4.y, l187\n"
+"dcl_literal l188, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.x___, r5.x, l188\n"
+"dcl_literal l189, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l189\n"
+"dcl_literal l190, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.x___, l190, r5.x_neg(xyzw)\n"
+"dcl_literal l191, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l191\n"
+"dcl_literal l192, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.x___, r5.x, l192\n"
+"dcl_literal l193, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.x___, r5.x, l193\n"
+"dcl_literal l194, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r4._y__, r4.y, r5.x, l194\n"
+"dcl_literal l195, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.x___, r4.w, l195\n"
+"dcl_literal l196, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.x___, r5.x, l196\n"
+"dcl_literal l197, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l197\n"
+"dcl_literal l198, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.x___, l198, r5.x_neg(xyzw)\n"
+"dcl_literal l199, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.x___, r5.x, l199\n"
+"dcl_literal l200, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.x___, r5.x, l200\n"
+"dcl_literal l201, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r5.x___, r5.x, l201\n"
+"dcl_literal l202, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r4.___w, r4.w, r5.x, l202\n"
+"dcl_literal l203, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r5.x___, r4.z, l203\n"
+"iadd r4._y__, r4.z, r4.y\n"
+"cmov_logical r4.__z_, r5.x, r4.y, r4.z\n"
+"dcl_literal l204, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r5.x___, r4.y, l204\n"
+"iadd r4._y__, r4.y, r4.w\n"
+"cmov_logical r4._y__, r5.x, r4.y, r4.z\n"
+"ilt r4.__z_, r4.y, r2.y\n"
+"ieq r4.___w, r2.y, r4.y\n"
+"dcl_literal l205, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r4.x___, r4.x, l205\n"
+"and r4.x___, r4.w, r4.x\n"
+"ior r4.x___, r4.z, r4.x\n"
+"and r1.x___, r1.x, r4.x\n"
+"iadd r4.x___, r2.y, r4.y_neg(xyzw)\n"
+"dcl_literal l206, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.x___, r1.x, r4.x, l206\n"
+"ilt r4.x___, r3.x, r1.x\n"
+"iadd r4.__z_, r1.x, r3.x_neg(xyzw)\n"
+"and r4.x___, r4.x, r4.z\n"
+"dcl_literal l207, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r4.__z_, r1.x, l207\n"
+"dcl_literal l208, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.x___, r4.z, l208, r4.x\n"
+"dcl_literal l209, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r1.x, l209\n"
+"if_logicalnz r1.x\n"
+" \n"
+" dcl_literal l210, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r4.__z_, r4.x, l210\n"
+" \n"
+" dcl_literal l211, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4.___w, r4.x, l211\n"
+" cmov_logical r4.___w, r4.z, r4.w, r4.x\n"
+" \n"
+" dcl_literal l212, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.x___, r4.x, l212\n"
+" \n"
+" dcl_literal l213, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5._y__, l213, r4.w_neg(xyzw)\n"
+" ushr r5._y__, r0.z, r5.y\n"
+" ishl r4.___w, r0.z, r4.w\n"
+" ishl r5.x___, r0.w, r5.x\n"
+" ior r5.x___, r5.y, r5.x\n"
+" cmov_logical r5.x___, r4.z, r0.z, r5.x\n"
+" \n"
+" dcl_literal l214, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.__z_, r4.z, l214, r4.w\n"
+" cmov_logical r4.___w, r4.x, r5.x, r0.w\n"
+" cmov_logical r4.__z_, r4.x, r4.z, r0.z\n"
+" \n"
+" dcl_literal l215, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r5.x___, r4.y, l215\n"
+" \n"
+" dcl_literal l216, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r5._y__, r4.y, l216\n"
+" cmov_logical r5._y__, r5.x, r5.y, r4.y\n"
+" \n"
+" dcl_literal l217, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.__z_, r4.y, l217\n"
+" \n"
+" dcl_literal l218, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5._y__, l218, r5.y_neg(xyzw)\n"
+" ushr r5.___w, r3.w, r5.y\n"
+" ishl r6.x___, r3.z, r5.z\n"
+" ior r5.___w, r5.w, r6.x\n"
+" cmov_logical r5.___w, r5.x, r3.w, r5.w\n"
+" cmov_logical r5.___w, r4.y, r5.w, r3.z\n"
+" ushr r5._y__, r4.z, r5.y\n"
+" ishl r5.__z_, r4.w, r5.z\n"
+" ior r5._y__, r5.y, r5.z\n"
+" cmov_logical r5.x___, r5.x, r4.z, r5.y\n"
+" cmov_logical r4._y__, r4.y, r5.x, r4.w\n"
+" udiv r5.x___, r5.w, r4.y\n"
+" umul r5._y__, r5.x, r4.y\n"
+" ilt r5.__z_, r5.w, r5.y\n"
+" iadd r6.x___, r5.y, r5.w_neg(xyzw)\n"
+" iadd r6.x___, r6.x, r4.y\n"
+" \n"
+" dcl_literal l219, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r6.x___, r6.x, l219\n"
+" iadd r5._y__, r5.w, r5.y_neg(xyzw)\n"
+" cmov_logical r5._y__, r5.z, r6.x, r5.y\n"
+" udiv r4._y__, r5.y, r4.y\n"
+" iadd r5._y__, r5.x, r4.y_neg(xyzw)\n"
+" iadd r4._y__, r5.x, r4.y\n"
+" cmov_logical r4._y__, r5.z, r5.y, r4.y\n"
+" \n"
+" dcl_literal l220, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r4.z, l220\n"
+" \n"
+" dcl_literal l221, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5._y__, r4.z, l221\n"
+" \n"
+" dcl_literal l222, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.__z_, r4.y, l222\n"
+" \n"
+" dcl_literal l223, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r4.y, l223\n"
+" umul r6.x___, r5.y, r5.w\n"
+" umul r5._y__, r5.y, r5.z\n"
+" umul r6._y__, r5.x, r5.w\n"
+" umul r5.x___, r5.x, r5.z\n"
+" \n"
+" dcl_literal l224, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.__z_, r5.x, l224\n"
+" \n"
+" dcl_literal l225, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6.___w, r6.y, l225\n"
+" iadd r6.__z_, r6.z, r6.w\n"
+" iadd r5._y__, r5.y, r6.z\n"
+" \n"
+" dcl_literal l226, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6.__z_, r5.y, l226\n"
+" iadd r6.x___, r6.x, r6.z\n"
+" \n"
+" dcl_literal l227, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r6.y, l227\n"
+" iadd r6.x___, r6.x, r6.y\n"
+" \n"
+" dcl_literal l228, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r5._y__, r5.y, l228\n"
+" \n"
+" dcl_literal l229, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r5.x, l229\n"
+" ior r5.x___, r5.y, r5.x\n"
+" \n"
+" dcl_literal l230, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r4.w, l230\n"
+" \n"
+" dcl_literal l231, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r6._y__, r4.w, l231\n"
+" umul r6._y__, r6.y, r5.z\n"
+" umul r5._y_w, r5.y, r5.zzzw\n"
+" \n"
+" dcl_literal l232, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.__z_, r5.y, l232\n"
+" \n"
+" dcl_literal l233, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.___w, r5.w, l233\n"
+" iadd r5.__z_, r5.z, r5.w\n"
+" iadd r5.__z_, r6.y, r5.z\n"
+" \n"
+" dcl_literal l234, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r5.__z_, r5.z, l234\n"
+" \n"
+" dcl_literal l235, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5._y__, r5.y, l235\n"
+" ior r5._y__, r5.z, r5.y\n"
+" iadd r5.__z_, r5.y, r6.x\n"
+" ult r5._y__, r5.z, r5.y\n"
+" ult r5.___w, r3.z, r5.z\n"
+" ieq r6.x___, r3.z, r5.z\n"
+" ult r6._y__, r3.w, r5.x\n"
+" and r6._y__, r6.x, r6.y\n"
+" ior r5.___w, r5.w, r6.y\n"
+" \n"
+" dcl_literal l236, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l237, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r5.___w, r5.w, l236, l237\n"
+" ieq r6._y__, r3.w, r5.x\n"
+" and r6.x___, r6.x, r6.y\n"
+" \n"
+" dcl_literal l238, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r5.___w, r6.x, l238, r5.w\n"
+" \n"
+" dcl_literal l239, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r5.___w, r5.w, l239\n"
+" ior r5._y__, r5.y, r5.w\n"
+" \n"
+" dcl_literal l240, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r5.___w, r4.y, l240\n"
+" cmov_logical r4._y__, r5.y, r5.w, r4.y\n"
+" iadd r4.__z_, r5.x, r4.z_neg(xyzw)\n"
+" ult r5.___w, r5.x, r4.z\n"
+" iadd r5.___w, r5.z, r5.w\n"
+" iadd r4.___w, r5.w, r4.w_neg(xyzw)\n"
+" cmov_logical r4.__zw, r5.y, r4.zzzw, r5.xxxz\n"
+" iadd r4.__z_, r3.w, r4.z_neg(xyzw)\n"
+" ult r5.x___, r3.w, r4.z\n"
+" iadd r5.x___, r3.z, r5.x\n"
+" iadd r3.__z_, r5.x, r4.w_neg(xyzw)\n"
+" mov r3.___w, r4.z\n"
+"else\n"
+" \n"
+" dcl_literal l241, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r4._y__, l241\n"
+"endif\n"
+"ult r4.__z_, r3.z, r0.w\n"
+"ieq r4.___w, r3.z, r0.w\n"
+"ult r5.x___, r3.w, r0.z\n"
+"and r5.x___, r4.w, r5.x\n"
+"ior r4.__z_, r4.z, r5.x\n"
+"dcl_literal l242, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l243, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r4.__z_, r4.z, l242, l243\n"
+"ieq r5.x___, r3.w, r0.z\n"
+"and r4.___w, r4.w, r5.x\n"
+"dcl_literal l244, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r4.__z_, r4.w, l244, r4.z\n"
+"dcl_literal l245, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r4.___w, r3.z, l245\n"
+"dcl_literal l246, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r5.x___, r3.z, l246\n"
+"dcl_literal l247, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r5._y__, r3.w, l247\n"
+"ior r4.___w, r4.w, r5.y\n"
+"dcl_literal l248, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r4.___w, r4.w, l248\n"
+"dcl_literal l249, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r5._y__, r3.w, l249\n"
+"dcl_literal l250, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.__z_, r5.x, l250\n"
+"dcl_literal l251, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.__z_, r5.z, l251\n"
+"dcl_literal l252, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l252\n"
+"dcl_literal l253, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.__z_, l253, r5.z_neg(xyzw)\n"
+"dcl_literal l254, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l254\n"
+"dcl_literal l255, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.__z_, r5.z, l255\n"
+"dcl_literal l256, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.__z_, r5.z, l256\n"
+"dcl_literal l257, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r5.x___, r5.x, r5.z, l257\n"
+"dcl_literal l258, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.__z_, r4.w, l258\n"
+"dcl_literal l259, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.__z_, r5.z, l259\n"
+"dcl_literal l260, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l260\n"
+"dcl_literal l261, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.__z_, l261, r5.z_neg(xyzw)\n"
+"dcl_literal l262, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l262\n"
+"dcl_literal l263, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.__z_, r5.z, l263\n"
+"dcl_literal l264, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r5.__z_, r5.z, l264\n"
+"dcl_literal l265, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r4.___w, r4.w, r5.z, l265\n"
+"dcl_literal l266, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r5.__z_, r5.y, l266\n"
+"dcl_literal l267, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r5.__z_, r5.z, l267\n"
+"dcl_literal l268, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l268\n"
+"dcl_literal l269, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r5.__z_, l269, r5.z_neg(xyzw)\n"
+"dcl_literal l270, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r5.__z_, r5.z, l270\n"
+"dcl_literal l271, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r5.__z_, r5.z, l271\n"
+"dcl_literal l272, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r5.__z_, r5.z, l272\n"
+"dcl_literal l273, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r5._y__, r5.y, r5.z, l273\n"
+"dcl_literal l274, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r5.__z_, r5.x, l274\n"
+"iadd r4.___w, r5.x, r4.w\n"
+"cmov_logical r5.x___, r5.z, r4.w, r5.x\n"
+"dcl_literal l275, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r5.__z_, r4.w, l275\n"
+"iadd r4.___w, r4.w, r5.y\n"
+"cmov_logical r4.___w, r5.z, r4.w, r5.x\n"
+"ilt r5.x___, r4.w, r2.y\n"
+"ieq r5._y__, r2.y, r4.w\n"
+"dcl_literal l276, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r4.__z_, r4.z, l276\n"
+"and r4.__z_, r5.y, r4.z\n"
+"ior r4.__z_, r5.x, r4.z\n"
+"and r1.x___, r1.x, r4.z\n"
+"iadd r2._y__, r2.y, r4.w_neg(xyzw)\n"
+"dcl_literal l277, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.x___, r1.x, r2.y, l277\n"
+"ilt r2._y__, r3.x, r1.x\n"
+"iadd r3.x___, r1.x, r3.x_neg(xyzw)\n"
+"and r2._y__, r2.y, r3.x\n"
+"dcl_literal l278, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r3.x___, r1.x, l278\n"
+"dcl_literal l279, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2._y__, r3.x, l279, r2.y\n"
+"dcl_literal l280, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r1.x, l280\n"
+"if_logicalnz r1.x\n"
+" \n"
+" dcl_literal l281, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r1.x___, r2.y, l281\n"
+" \n"
+" dcl_literal l282, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.x___, r2.y, l282\n"
+" cmov_logical r3.x___, r1.x, r3.x, r2.y\n"
+" \n"
+" dcl_literal l283, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.__z_, r2.y, l283\n"
+" \n"
+" dcl_literal l284, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r5.x___, l284, r3.x_neg(xyzw)\n"
+" ushr r5.x___, r0.z, r5.x\n"
+" ishl r3.x___, r0.z, r3.x\n"
+" ishl r4.__z_, r0.w, r4.z\n"
+" ior r4.__z_, r5.x, r4.z\n"
+" cmov_logical r4.__z_, r1.x, r0.z, r4.z\n"
+" \n"
+" dcl_literal l285, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.x___, r1.x, l285, r3.x\n"
+" cmov_logical r0.___w, r2.y, r4.z, r0.w\n"
+" cmov_logical r1.x___, r2.y, r1.x, r0.z\n"
+" \n"
+" dcl_literal l286, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3.x___, r4.w, l286\n"
+" \n"
+" dcl_literal l287, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r4.__z_, r4.w, l287\n"
+" cmov_logical r4.__z_, r3.x, r4.z, r4.w\n"
+" \n"
+" dcl_literal l288, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r5.x___, r4.w, l288\n"
+" \n"
+" dcl_literal l289, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.__z_, l289, r4.z_neg(xyzw)\n"
+" ushr r5._y__, r3.w, r4.z\n"
+" ishl r5.__z_, r3.z, r5.x\n"
+" ior r5._y__, r5.y, r5.z\n"
+" cmov_logical r5._y__, r3.x, r3.w, r5.y\n"
+" cmov_logical r5._y__, r4.w, r5.y, r3.z\n"
+" ushr r4.__z_, r1.x, r4.z\n"
+" ishl r5.x___, r0.w, r5.x\n"
+" ior r4.__z_, r4.z, r5.x\n"
+" cmov_logical r3.x___, r3.x, r1.x, r4.z\n"
+" cmov_logical r3.x___, r4.w, r3.x, r0.w\n"
+" udiv r4.__z_, r5.y, r3.x\n"
+" umul r4.___w, r4.z, r3.x\n"
+" ilt r5.x___, r5.y, r4.w\n"
+" iadd r5.__z_, r4.w, r5.y_neg(xyzw)\n"
+" iadd r5.__z_, r5.z, r3.x\n"
+" \n"
+" dcl_literal l290, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r5.__z_, r5.z, l290\n"
+" iadd r4.___w, r5.y, r4.w_neg(xyzw)\n"
+" cmov_logical r4.___w, r5.x, r5.z, r4.w\n"
+" udiv r3.x___, r4.w, r3.x\n"
+" iadd r4.___w, r4.z, r3.x_neg(xyzw)\n"
+" iadd r3.x___, r4.z, r3.x\n"
+" cmov_logical r3.x___, r5.x, r4.w, r3.x\n"
+" \n"
+" dcl_literal l291, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r1.x, l291\n"
+" \n"
+" dcl_literal l292, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.x___, r1.x, l292\n"
+" \n"
+" dcl_literal l293, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.___w, r3.x, l293\n"
+" \n"
+" dcl_literal l294, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.x___, r3.x, l294\n"
+" umul r5._y__, r1.x, r5.x\n"
+" umul r1.x___, r1.x, r4.w\n"
+" umul r5.__z_, r4.z, r5.x\n"
+" umul r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l295, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r4.z, l295\n"
+" \n"
+" dcl_literal l296, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r6.x___, r5.z, l296\n"
+" iadd r5.___w, r5.w, r6.x\n"
+" iadd r1.x___, r1.x, r5.w\n"
+" \n"
+" dcl_literal l297, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.___w, r1.x, l297\n"
+" iadd r5._y__, r5.y, r5.w\n"
+" \n"
+" dcl_literal l298, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r5.__z_, r5.z, l298\n"
+" iadd r5._y__, r5.y, r5.z\n"
+" \n"
+" dcl_literal l299, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.x___, r1.x, l299\n"
+" \n"
+" dcl_literal l300, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r4.z, l300\n"
+" ior r1.x___, r1.x, r4.z\n"
+" \n"
+" dcl_literal l301, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r0.w, l301\n"
+" \n"
+" dcl_literal l302, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r0.___w, r0.w, l302\n"
+" umul r0.___w, r0.w, r4.w\n"
+" umul r5.x___, r4.z, r5.x\n"
+" umul r4.__z_, r4.z, r4.w\n"
+" \n"
+" dcl_literal l303, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r4.z, l303\n"
+" \n"
+" dcl_literal l304, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r5.x, l304\n"
+" iadd r4.___w, r4.w, r5.x\n"
+" iadd r0.___w, r0.w, r4.w\n"
+" \n"
+" dcl_literal l305, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r0.___w, r0.w, l305\n"
+" \n"
+" dcl_literal l306, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r4.z, l306\n"
+" ior r0.___w, r0.w, r4.z\n"
+" iadd r4.__z_, r0.w, r5.y\n"
+" ult r0.___w, r4.z, r0.w\n"
+" ult r4.___w, r3.z, r4.z\n"
+" ieq r3.__z_, r3.z, r4.z\n"
+" ult r4.__z_, r3.w, r1.x\n"
+" and r4.__z_, r3.z, r4.z\n"
+" ior r4.__z_, r4.w, r4.z\n"
+" \n"
+" dcl_literal l307, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l308, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4.__z_, r4.z, l307, l308\n"
+" ieq r1.x___, r3.w, r1.x\n"
+" and r1.x___, r3.z, r1.x\n"
+" \n"
+" dcl_literal l309, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.x___, r1.x, l309, r4.z\n"
+" \n"
+" dcl_literal l310, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, r1.x, l310\n"
+" ior r0.___w, r0.w, r1.x\n"
+" \n"
+" dcl_literal l311, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r1.x___, r3.x, l311\n"
+" cmov_logical r0.___w, r0.w, r1.x, r3.x\n"
+"else\n"
+" \n"
+" dcl_literal l312, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" mov r0.___w, l312\n"
+"endif\n"
+"dcl_literal l313, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.x___, r2.z, l313\n"
+"dcl_literal l314, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3.x___, r2.z, l314\n"
+"cmov_logical r3.x___, r1.x, r3.x, r2.z\n"
+"dcl_literal l315, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r3.__z_, l315, r3.x_neg(xyzw)\n"
+"ushr r3.__z_, r1.z, r3.z\n"
+"ishl r3.x___, r1.z, r3.x\n"
+"cmov_logical r3.__z_, r1.x, r1.z, r3.z\n"
+"dcl_literal l316, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.x___, r1.x, l316, r3.x\n"
+"dcl_literal l317, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r3.x___, r2.z, r3.z, l317\n"
+"cmov_logical r1.x___, r2.z, r1.x, r1.z\n"
+"dcl_literal l318, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.__z_, r2.w, l318\n"
+"dcl_literal l319, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r2.__z_, r2.w, l319\n"
+"cmov_logical r2.__z_, r1.z, r2.z, r2.w\n"
+"dcl_literal l320, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r3.__z_, l320, r2.z_neg(xyzw)\n"
+"ushr r3.__z_, r3.y, r3.z\n"
+"ishl r2.__z_, r3.y, r2.z\n"
+"cmov_logical r3.__z_, r1.z, r3.y, r3.z\n"
+"dcl_literal l321, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.__z_, r1.z, l321, r2.z\n"
+"dcl_literal l322, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r2.w, r3.z, l322\n"
+"cmov_logical r1.__z_, r2.w, r1.z, r3.y\n"
+"iadd r1.__z_, r1.x, r1.z\n"
+"ult r1.x___, r1.z, r1.x\n"
+"dcl_literal l323, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.x___, r1.x, l323\n"
+"iadd r1.x___, r3.x, r1.x\n"
+"iadd r1.x___, r2.z, r1.x\n"
+"dcl_literal l324, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r2.__z_, r4.x, l324\n"
+"dcl_literal l325, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r2.___w, r4.x, l325\n"
+"cmov_logical r2.___w, r2.z, r2.w, r4.x\n"
+"dcl_literal l326, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r3.x___, l326, r2.w_neg(xyzw)\n"
+"ushr r3.x___, r4.y, r3.x\n"
+"ishl r2.___w, r4.y, r2.w\n"
+"cmov_logical r3.x___, r2.z, r4.y, r3.x\n"
+"dcl_literal l327, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r2.z, l327, r2.w\n"
+"dcl_literal l328, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r4.x, r3.x, l328\n"
+"cmov_logical r2.__z_, r4.x, r2.z, r4.y\n"
+"iadd r2.__z_, r1.z, r2.z\n"
+"ult r1.__z_, r2.z, r1.z\n"
+"dcl_literal l329, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.__z_, r1.z, l329\n"
+"iadd r1.x___, r1.x, r1.z\n"
+"iadd r1.x___, r2.w, r1.x\n"
+"dcl_literal l330, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.__z_, r2.y, l330\n"
+"dcl_literal l331, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r2.___w, r2.y, l331\n"
+"cmov_logical r2.___w, r1.z, r2.w, r2.y\n"
+"dcl_literal l332, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r3.x___, l332, r2.w_neg(xyzw)\n"
+"ushr r3.x___, r0.w, r3.x\n"
+"ishl r2.___w, r0.w, r2.w\n"
+"cmov_logical r3.x___, r1.z, r0.w, r3.x\n"
+"dcl_literal l333, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.__z_, r1.z, l333, r2.w\n"
+"dcl_literal l334, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.___w, r2.y, r3.x, l334\n"
+"cmov_logical r0.___w, r2.y, r1.z, r0.w\n"
+"iadd r0.___w, r2.z, r0.w\n"
+"ult r1.__z_, r0.w, r2.z\n"
+"dcl_literal l335, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.__z_, r1.z, l335\n"
+"iadd r1.x___, r1.x, r1.z\n"
+"iadd r1.x___, r2.w, r1.x\n"
+"dcl_literal l336, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r1._y__, r1.y, l336\n"
+"dcl_literal l337, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.x___, r1.y, l337, r1.x\n"
+"dcl_literal l338, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.___w, r1.y, l338, r0.w\n"
+"dcl_literal l339, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ieq r0.__z_, r0.z, l339\n"
+"and r0.__z_, r2.x, r0.z\n"
+"cmov_logical r0._y__, r0.z, r0.y, r1.x\n"
+"cmov_logical r0.x___, r0.z, r0.x, r0.w\n"
+"dcl_literal l340, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r1.w, l340, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__udiv_i8",
+"mdef(479)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l9, 0x000000FF, 0x000000FF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l9\n"
+"dcl_literal l10, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l10\n"
+"udiv r0.___w, r1.x, r0.w\n"
+"dcl_literal l11, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.___w, r0.w, l11\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r1.y, r0.w, l12\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uhadd_u16",
+"mdef(480)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l3, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ushr r0.x___, r0.w, l3\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uhadd_u32",
+"mdef(481)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"and r0.___w, r0.y, r0.x\n"
+"\n"
+"dcl_literal l6, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.___w, r0.w, l6\n"
+"\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ushr r1.xy__, r0.xyxx, l7\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__uhadd_u8",
+"mdef(482)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l1, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ushr r0.x___, r1.x, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__ultof_f32",
+"mdef(483)_out(1)_in(1)\n"
+"mov r0, in0\n"
+"dcl_literal l1, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r0.__z_, r0.y, l1\n"
+"dcl_literal l2, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r0.___w, r0.x, l2\n"
+"ior r0.__z_, r0.z, r0.w\n"
+"dcl_literal l3, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r0.__z_, r0.z, l3\n"
+"dcl_literal l4, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r0.___w, r0.z, l4\n"
+"dcl_literal l5, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r0.___w, r0.w, l5\n"
+"dcl_literal l6, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l6\n"
+"dcl_literal l7, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r0.___w, l7, r0.w_neg(xyzw)\n"
+"dcl_literal l8, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r0.___w, r0.w, l8\n"
+"dcl_literal l9, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r0.___w, r0.w, l9\n"
+"dcl_literal l10, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r0.___w, r0.w, l10\n"
+"dcl_literal l11, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r0.__z_, r0.z, r0.w, l11\n"
+"dcl_literal l12, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r0.___w, r0.y, l12\n"
+"dcl_literal l13, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1.x___, r0.w, l13\n"
+"dcl_literal l14, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1.x___, r1.x, l14\n"
+"dcl_literal l15, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l15\n"
+"dcl_literal l16, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1.x___, l16, r1.x_neg(xyzw)\n"
+"dcl_literal l17, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1.x___, r1.x, l17\n"
+"dcl_literal l18, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1.x___, r1.x, l18\n"
+"dcl_literal l19, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r1.x___, r1.x, l19\n"
+"dcl_literal l20, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r0.___w, r0.w, r1.x, l20\n"
+"iadd r0.__z_, r0.w, r0.z\n"
+"dcl_literal l21, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r1.x___, r0.x, l21\n"
+"dcl_literal l22, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r1._y__, r1.x, l22\n"
+"dcl_literal l23, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r1._y__, r1.y, l23\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1._y__, r1.y, l24\n"
+"dcl_literal l25, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r1._y__, l25, r1.y_neg(xyzw)\n"
+"dcl_literal l26, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r1._y__, r1.y, l26\n"
+"dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r1._y__, r1.y, l27\n"
+"dcl_literal l28, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r1._y__, r1.y, l28\n"
+"dcl_literal l29, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r1.x___, r1.x, r1.y, l29\n"
+"iadd r1.x___, r0.z, r1.x\n"
+"dcl_literal l30, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r1._y__, r0.w, l30\n"
+"cmov_logical r0.___w, r1.y, r0.z, r0.w\n"
+"dcl_literal l31, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r0.__z_, r0.z, l31\n"
+"cmov_logical r0.__z_, r0.z, r1.x, r0.w\n"
+"dcl_literal l32, 0x00000040, 0x00000040, 0x00000040, 0x00000040\n"
+"imin r0.___w, r0.z, l32\n"
+"dcl_literal l33, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"imax r0.___w, r0.w, l33\n"
+"dcl_literal l34, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"imin r1.x___, r0.w, l34\n"
+"ishl r1.x___, r0.y, r1.x\n"
+"dcl_literal l35, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r1._y__, r0.w, l35\n"
+"dcl_literal l36, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"ige r1.__z_, r0.w, l36\n"
+"cmov_logical r1._y__, r1.z, r1.y, r0.w\n"
+"dcl_literal l37, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r1.___w, l37, r1.y_neg(xyzw)\n"
+"ishl r1._y__, r0.x, r1.y\n"
+"ushr r1.___w, r0.x, r1.w\n"
+"ior r1.x___, r1.x, r1.w\n"
+"cmov_logical r1.x___, r1.z, r1.y, r1.x\n"
+"dcl_literal l38, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.z, l38, r1.y\n"
+"cmov_logical r0.xy__, r0.w, r1.yxyy, r0.xyxx\n"
+"dcl_literal l39, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF\n"
+"and r0.___w, r0.y, l39\n"
+"dcl_literal l40, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0._y__, r0.y, l40\n"
+"dcl_literal l41, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ishl r1.x___, r0.w, l41\n"
+"dcl_literal l42, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ushr r0.___w, r0.w, l42\n"
+"dcl_literal l43, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ushr r1._y__, r0.x, l43\n"
+"ior r1.x___, r1.x, r1.y\n"
+"dcl_literal l44, 0x00000018, 0x00000018, 0x00000018, 0x00000018\n"
+"ishl r1._y__, r0.x, l44\n"
+"dcl_literal l45, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r0.x___, r0.x, l45\n"
+"dcl_literal l46, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ult r1._y__, l46, r1.y\n"
+"dcl_literal l47, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1._y__, r1.y, l47\n"
+"ior r1.x___, r1.x, r1.y\n"
+"dcl_literal l48, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ieq r1._y__, r1.x, l48\n"
+"dcl_literal l49, 0x80000000, 0x80000000, 0x80000000, 0x80000000\n"
+"ult r1.x___, l49, r1.x\n"
+"dcl_literal l50, 0x000000BE, 0x000000BE, 0x000000BE, 0x000000BE\n"
+"iadd r1.__z_, r0.z_neg(xyzw), l50\n"
+"dcl_literal l51, 0x0000003F, 0x0000003F, 0x0000003F, 0x0000003F\n"
+"iadd r0.__z_, l51, r0.z_neg(xyzw)\n"
+"dcl_literal l52, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ishl r1.__z_, r1.z, l52\n"
+"ior r0.___w, r0.w, r1.z\n"
+"dcl_literal l53, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r1.__z_, r0.w, l53\n"
+"iadd r1.__z_, r0.w, r1.z\n"
+"cmov_logical r0.___w, r1.y, r1.z, r0.w\n"
+"dcl_literal l54, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r1._y__, r0.w, l54\n"
+"cmov_logical r0.___w, r1.x, r1.y, r0.w\n"
+"and r0.x___, r0.y, r0.x\n"
+"dcl_literal l55, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82, 0xFFFFFF82\n"
+"ilt r0._y__, r0.z, l55\n"
+"ior r0.x___, r0.x, r0.y\n"
+"dcl_literal l56, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.x___, r0.x, l56, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,1,1
+},
+{ "__umad24_u32",
+"mdef(484)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"\n"
+"dcl_literal l2, 0x00FFFFFF, 0x00FFFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l2\n"
+"umul r1.x___, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umad_hi_u16",
+"mdef(485)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"umul r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l8, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.x___, r1.x, l8\n"
+"iadd r1.x___, r1.x, r0.z\n"
+"\n"
+"dcl_literal l9, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.x___, r1.x, l9\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umad_hi_u32",
+"mdef(486)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"\n"
+"dcl_literal l20, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l20\n"
+"\n"
+"dcl_literal l21, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.__zw, r0.xxxy, l21\n"
+"umul r2.x___, r1.z, r1.w\n"
+"umul r1.x_zw, r1.xxzx, r1.yyyw\n"
+"\n"
+"dcl_literal l22, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.x___, r1.x, l22\n"
+"\n"
+"dcl_literal l23, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r1._y__, r1.w, l23\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r1.x___, r1.z, r1.x\n"
+"\n"
+"dcl_literal l24, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.x___, r1.x, l24\n"
+"iadd r1.x___, r2.x, r1.x\n"
+"\n"
+"dcl_literal l25, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1._y__, r1.w, l25\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umad_hi_u8",
+"mdef(487)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"umul r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l3, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ushr r1.x___, r1.x, l3\n"
+"iadd r1.x___, r1.x, r0.z\n"
+"\n"
+"dcl_literal l4, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.x___, r1.x, l4\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umad_sat_u16",
+"mdef(488)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"umul r1.x___, r0.x, r0.y\n"
+"iadd r1.x___, r1.x, r0.y\n"
+"\n"
+"dcl_literal l5, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"umin r0.___w, r1.x, l5\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umad_sat_u32",
+"mdef(489)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"umul r1.x___, r0.x, r0.y\n"
+"iadd r1._y__, r1.x, r0.y\n"
+"ult r1.x___, r1.y, r1.x\n"
+"\n"
+"dcl_literal l10, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r0.___w, r1.x, l10, r1.y\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umad_sat_u8",
+"mdef(490)_out(1)_in(3)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r2, in2\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"mov r0.__z_, r2.x\n"
+"umul r1.x___, r0.x, r0.y\n"
+"iadd r1.x___, r1.x, r0.y\n"
+"\n"
+"dcl_literal l2, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"umin r0.___w, r1.x, l2\n"
+"mov r0.x___, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,3,1
+},
+{ "__umax_u16",
+"mdef(491)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umax r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umax_u32",
+"mdef(492)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umax r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umax_u8",
+"mdef(493)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umax r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umin_u16",
+"mdef(494)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umin r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umin_u32",
+"mdef(495)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umin r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umin_u8",
+"mdef(496)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"umin r0.x___, r0.y, r0.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umod_i16",
+"mdef(497)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l21, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l21\n"
+"dcl_literal l22, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l22\n"
+"udiv r1.__z_, r1.x, r0.w\n"
+"dcl_literal l23, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.y, r1.z, l23\n"
+"umul r0.___w, r0.w, r1.y\n"
+"iadd r0.___w, r1.x, r0.w_neg(xyzw)\n"
+"dcl_literal l24, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.__z_, r0.w, l24\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umod_i32",
+"mdef(498)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"udiv r2.x, r0.x, r1.x\n"
+"umul r2.x, r2.x, r1.x\n"
+"iadd r0.x, r0.x, r2.x_neg(xyzw)\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umod_i64",
+"mdef(499)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0.__zw, r1.yyxy\n"
+"ieq r1.xy__, r0.yxyy, r0.wzww\n"
+"ult r1.__zw, r0.yyyx, r0.wwwz\n"
+"and r1.___w, r1.x, r1.w\n"
+"ior r1.___w, r1.z, r1.w\n"
+"dcl_literal l1, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.___w, r1.w, l1, l2\n"
+"and r1._y__, r1.y, r1.x\n"
+"dcl_literal l3, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.y, l3, r1.w\n"
+"dcl_literal l4, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ieq r2.xy__, r0.wzww, l4\n"
+"and r1.___w, r2.y, r2.x\n"
+"dcl_literal l5, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.__z_, r1.w, l5, r0.z\n"
+"dcl_literal l6, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r2.xy__, r0.wyww, l6\n"
+"dcl_literal l7, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r2.__z_, r0.x, l7\n"
+"dcl_literal l8, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.xy__, r0.wyww, l8\n"
+"dcl_literal l9, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r2.___w, r0.z, l9\n"
+"ior r2.___w, r3.x, r2.w\n"
+"dcl_literal l10, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r2.___w, r2.w, l10\n"
+"dcl_literal l11, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.x___, r0.z, l11\n"
+"dcl_literal l12, 0x00000000, 0x00000000, 0x3F800000, 0x3F800000\n"
+"ior r3.__zw, r2.xxxy, l12\n"
+"dcl_literal l13, 0x00000000, 0x00000000, 0xBF800000, 0xBF800000\n"
+"add r3.__zw, r3.zzzw, l13\n"
+"dcl_literal l14, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"and r3.__zw, r3.zzzw, l14\n"
+"dcl_literal l15, 0x00000000, 0x00000000, 0x3F800000, 0x3F800000\n"
+"iadd r3.__zw, l15, r3.zzzw_neg(xyzw)\n"
+"dcl_literal l16, 0x00000000, 0x00000000, 0x7F800000, 0x7F800000\n"
+"and r3.__zw, r3.zzzw, l16\n"
+"dcl_literal l17, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.__zw, r3.zzzw, l17\n"
+"dcl_literal l18, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.__zw, r3.zzzw, l18\n"
+"dcl_literal l19, 0x00000017, 0x00000017, 0x00000000, 0x00000000\n"
+"cmov_logical r2.xy__, r2.xyxx, r3.zwzz, l19\n"
+"dcl_literal l20, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.__z_, r2.w, l20\n"
+"dcl_literal l21, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.__z_, r3.z, l21\n"
+"dcl_literal l22, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l22\n"
+"dcl_literal l23, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.__z_, l23, r3.z_neg(xyzw)\n"
+"dcl_literal l24, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l24\n"
+"dcl_literal l25, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.__z_, r3.z, l25\n"
+"dcl_literal l26, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.__z_, r3.z, l26\n"
+"dcl_literal l27, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r2.___w, r2.w, r3.z, l27\n"
+"dcl_literal l28, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.__z_, r3.x, l28\n"
+"dcl_literal l29, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.__z_, r3.z, l29\n"
+"dcl_literal l30, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l30\n"
+"dcl_literal l31, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.__z_, l31, r3.z_neg(xyzw)\n"
+"dcl_literal l32, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.__z_, r3.z, l32\n"
+"dcl_literal l33, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.__z_, r3.z, l33\n"
+"dcl_literal l34, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.__z_, r3.z, l34\n"
+"dcl_literal l35, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.x___, r3.x, r3.z, l35\n"
+"dcl_literal l36, 0x00000000, 0x00000000, 0x00000017, 0x00000017\n"
+"ieq r3.__zw, r2.xxxy, l36\n"
+"iadd r2.___w, r2.x, r2.w\n"
+"cmov_logical r2.x___, r3.z, r2.w, r2.x\n"
+"dcl_literal l37, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.__z_, r2.w, l37\n"
+"iadd r2.___w, r2.w, r3.x\n"
+"cmov_logical r2.x___, r3.z, r2.w, r2.x\n"
+"dcl_literal l38, 0x00000030, 0x00000030, 0x00000030, 0x00000030\n"
+"ige r2.___w, r2.x, l38\n"
+"dcl_literal l39, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+"iadd r3.x___, r2.x, l39\n"
+"dcl_literal l40, 0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F\n"
+"cmov_logical r2.___w, r2.w, r3.x, l40\n"
+"ult r3.x___, r0.x, r0.z\n"
+"and r3.x___, r1.x, r3.x\n"
+"ior r1.__z_, r1.z, r3.x\n"
+"dcl_literal l41, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l42, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r1.__z_, r1.z, l41, l42\n"
+"ieq r3.x___, r0.x, r0.z\n"
+"and r1.x___, r1.x, r3.x\n"
+"dcl_literal l43, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1.x___, r1.x, l43, r1.z\n"
+"ior r1.__z_, r2.z, r3.y\n"
+"dcl_literal l44, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r1.__z_, r1.z, l44\n"
+"dcl_literal l45, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r2.__z_, r0.x, l45\n"
+"dcl_literal l46, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.x___, r1.z, l46\n"
+"dcl_literal l47, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.x___, r3.x, l47\n"
+"dcl_literal l48, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.x___, r3.x, l48\n"
+"dcl_literal l49, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.x___, l49, r3.x_neg(xyzw)\n"
+"dcl_literal l50, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.x___, r3.x, l50\n"
+"dcl_literal l51, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.x___, r3.x, l51\n"
+"dcl_literal l52, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.x___, r3.x, l52\n"
+"dcl_literal l53, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r1.__z_, r1.z, r3.x, l53\n"
+"dcl_literal l54, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.x___, r2.z, l54\n"
+"dcl_literal l55, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.x___, r3.x, l55\n"
+"dcl_literal l56, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.x___, r3.x, l56\n"
+"dcl_literal l57, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.x___, l57, r3.x_neg(xyzw)\n"
+"dcl_literal l58, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.x___, r3.x, l58\n"
+"dcl_literal l59, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.x___, r3.x, l59\n"
+"dcl_literal l60, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.x___, r3.x, l60\n"
+"dcl_literal l61, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r2.__z_, r2.z, r3.x, l61\n"
+"iadd r1.__z_, r2.y, r1.z\n"
+"cmov_logical r2._y__, r3.w, r1.z, r2.y\n"
+"dcl_literal l62, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.x___, r1.z, l62\n"
+"iadd r1.__z_, r1.z, r2.z\n"
+"cmov_logical r1.__z_, r3.x, r1.z, r2.y\n"
+"ilt r2._y__, r1.z, r2.x\n"
+"ieq r2.__z_, r2.x, r1.z\n"
+"dcl_literal l63, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r1.x___, r1.x, l63\n"
+"and r1.x___, r2.z, r1.x\n"
+"ior r1.x___, r2.y, r1.x\n"
+"iadd r2._y__, r2.x, r1.z_neg(xyzw)\n"
+"dcl_literal l64, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r1.x___, r1.x, r2.y, l64\n"
+"dcl_literal l65, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2._y__, r1.x, l65\n"
+"if_logicalnz r2.y\n"
+" ilt r2.__z_, r2.w, r1.x\n"
+" iadd r3.x___, r1.x, r2.w_neg(xyzw)\n"
+" and r2.__z_, r2.z, r3.x\n"
+" \n"
+" dcl_literal l66, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r1.x___, r1.x, l66\n"
+" \n"
+" dcl_literal l67, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r1.x___, r1.x, l67, r2.z\n"
+" \n"
+" dcl_literal l68, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2.__z_, r1.x, l68\n"
+" \n"
+" dcl_literal l69, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.x___, r1.x, l69\n"
+" cmov_logical r3.x___, r2.z, r3.x, r1.x\n"
+" \n"
+" dcl_literal l70, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3._y__, r1.x, l70\n"
+" \n"
+" dcl_literal l71, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.__z_, l71, r3.x_neg(xyzw)\n"
+" ushr r3.__z_, r0.z, r3.z\n"
+" ishl r3.x___, r0.z, r3.x\n"
+" ishl r3._y__, r0.w, r3.y\n"
+" ior r3._y__, r3.z, r3.y\n"
+" cmov_logical r3._y__, r2.z, r0.z, r3.y\n"
+" \n"
+" dcl_literal l72, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.__z_, r2.z, l72, r3.x\n"
+" cmov_logical r3.x___, r1.x, r3.y, r0.w\n"
+" cmov_logical r1.x___, r1.x, r2.z, r0.z\n"
+" \n"
+" dcl_literal l73, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2.__z_, r1.z, l73\n"
+" \n"
+" dcl_literal l74, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3._y__, r1.z, l74\n"
+" cmov_logical r3._y__, r2.z, r3.y, r1.z\n"
+" \n"
+" dcl_literal l75, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.__z_, r1.z, l75\n"
+" \n"
+" dcl_literal l76, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3._y__, l76, r3.y_neg(xyzw)\n"
+" ushr r3.___w, r0.x, r3.y\n"
+" ishl r4.x___, r0.y, r3.z\n"
+" ior r3.___w, r3.w, r4.x\n"
+" cmov_logical r3.___w, r2.z, r0.x, r3.w\n"
+" cmov_logical r3.___w, r1.z, r3.w, r0.y\n"
+" ushr r3._y__, r1.x, r3.y\n"
+" ishl r3.__z_, r3.x, r3.z\n"
+" ior r3._y__, r3.y, r3.z\n"
+" cmov_logical r2.__z_, r2.z, r1.x, r3.y\n"
+" cmov_logical r1.__z_, r1.z, r2.z, r3.x\n"
+" udiv r2.__z_, r3.w, r1.z\n"
+" umul r3._y__, r2.z, r1.z\n"
+" ilt r3.__z_, r3.w, r3.y\n"
+" iadd r4.x___, r3.y, r3.w_neg(xyzw)\n"
+" iadd r4.x___, r4.x, r1.z\n"
+" \n"
+" dcl_literal l77, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.x___, r4.x, l77\n"
+" iadd r3._y__, r3.w, r3.y_neg(xyzw)\n"
+" cmov_logical r3._y__, r3.z, r4.x, r3.y\n"
+" udiv r1.__z_, r3.y, r1.z\n"
+" iadd r3._y__, r2.z, r1.z_neg(xyzw)\n"
+" iadd r1.__z_, r2.z, r1.z\n"
+" cmov_logical r1.__z_, r3.z, r3.y, r1.z\n"
+" \n"
+" dcl_literal l78, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r1.x, l78\n"
+" \n"
+" dcl_literal l79, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y__, r1.x, l79\n"
+" \n"
+" dcl_literal l80, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.__z_, r1.z, l80\n"
+" \n"
+" dcl_literal l81, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r1.__z_, r1.z, l81\n"
+" umul r3.___w, r3.y, r1.z\n"
+" umul r3._y__, r3.y, r3.z\n"
+" umul r4.x___, r2.z, r1.z\n"
+" umul r2.__z_, r2.z, r3.z\n"
+" \n"
+" dcl_literal l82, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r2.z, l82\n"
+" \n"
+" dcl_literal l83, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.__z_, r4.x, l83\n"
+" iadd r4._y__, r4.y, r4.z\n"
+" iadd r3._y__, r3.y, r4.y\n"
+" \n"
+" dcl_literal l84, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4._y__, r3.y, l84\n"
+" iadd r3.___w, r3.w, r4.y\n"
+" \n"
+" dcl_literal l85, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r4.x, l85\n"
+" iadd r3.___w, r3.w, r4.x\n"
+" \n"
+" dcl_literal l86, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3._y__, r3.y, l86\n"
+" \n"
+" dcl_literal l87, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r2.z, l87\n"
+" ior r2.__z_, r3.y, r2.z\n"
+" \n"
+" dcl_literal l88, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r3.x, l88\n"
+" \n"
+" dcl_literal l89, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r3.x, l89\n"
+" umul r4.x___, r4.x, r3.z\n"
+" umul r1.__z_, r3.y, r1.z\n"
+" umul r3._y__, r3.y, r3.z\n"
+" \n"
+" dcl_literal l90, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.__z_, r3.y, l90\n"
+" \n"
+" dcl_literal l91, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r1.__z_, r1.z, l91\n"
+" iadd r1.__z_, r3.z, r1.z\n"
+" iadd r1.__z_, r4.x, r1.z\n"
+" \n"
+" dcl_literal l92, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r1.__z_, r1.z, l92\n"
+" \n"
+" dcl_literal l93, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r3.y, l93\n"
+" ior r1.__z_, r1.z, r3.y\n"
+" iadd r3._y__, r1.z, r3.w\n"
+" ult r1.__z_, r3.y, r1.z\n"
+" ult r3.__z_, r0.y, r3.y\n"
+" ieq r3.___w, r0.y, r3.y\n"
+" ult r4.x___, r0.x, r2.z\n"
+" and r4.x___, r3.w, r4.x\n"
+" ior r3.__z_, r3.z, r4.x\n"
+" \n"
+" dcl_literal l94, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l95, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r3.__z_, r3.z, l94, l95\n"
+" ieq r4.x___, r0.x, r2.z\n"
+" and r3.___w, r3.w, r4.x\n"
+" \n"
+" dcl_literal l96, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3.__z_, r3.w, l96, r3.z\n"
+" \n"
+" dcl_literal l97, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r3.__z_, r3.z, l97\n"
+" ior r1.__z_, r1.z, r3.z\n"
+" iadd r1.x___, r2.z, r1.x_neg(xyzw)\n"
+" ult r3.__z_, r2.z, r1.x\n"
+" iadd r3.__z_, r3.y, r3.z\n"
+" iadd r3.x___, r3.z, r3.x_neg(xyzw)\n"
+" cmov_logical r3.x___, r1.z, r3.x, r3.y\n"
+" cmov_logical r1.x___, r1.z, r1.x, r2.z\n"
+" iadd r1.__z_, r0.x, r1.x_neg(xyzw)\n"
+" ult r2.__z_, r0.x, r1.z\n"
+" iadd r2.__z_, r0.y, r2.z\n"
+" iadd r1.x___, r2.z, r3.x_neg(xyzw)\n"
+"else\n"
+" mov r1.x_z_, r0.yyxy\n"
+"endif\n"
+"ult r2.__z_, r1.x, r0.w\n"
+"ieq r3.x___, r1.x, r0.w\n"
+"ult r3._y__, r1.z, r0.z\n"
+"and r3._y__, r3.x, r3.y\n"
+"ior r2.__z_, r2.z, r3.y\n"
+"dcl_literal l98, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l99, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2.__z_, r2.z, l98, l99\n"
+"ieq r3._y__, r1.z, r0.z\n"
+"and r3.x___, r3.x, r3.y\n"
+"dcl_literal l100, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2.__z_, r3.x, l100, r2.z\n"
+"dcl_literal l101, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.x___, r1.x, l101\n"
+"dcl_literal l102, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3._y__, r1.x, l102\n"
+"dcl_literal l103, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.__z_, r1.z, l103\n"
+"ior r3.x___, r3.x, r3.z\n"
+"dcl_literal l104, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3.x___, r3.x, l104\n"
+"dcl_literal l105, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.__z_, r1.z, l105\n"
+"dcl_literal l106, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.y, l106\n"
+"dcl_literal l107, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l107\n"
+"dcl_literal l108, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l108\n"
+"dcl_literal l109, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l109, r3.w_neg(xyzw)\n"
+"dcl_literal l110, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l110\n"
+"dcl_literal l111, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l111\n"
+"dcl_literal l112, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l112\n"
+"dcl_literal l113, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r3.w, l113\n"
+"dcl_literal l114, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.x, l114\n"
+"dcl_literal l115, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l115\n"
+"dcl_literal l116, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l116\n"
+"dcl_literal l117, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l117, r3.w_neg(xyzw)\n"
+"dcl_literal l118, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l118\n"
+"dcl_literal l119, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l119\n"
+"dcl_literal l120, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l120\n"
+"dcl_literal l121, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.x___, r3.x, r3.w, l121\n"
+"dcl_literal l122, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.z, l122\n"
+"dcl_literal l123, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l123\n"
+"dcl_literal l124, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l124\n"
+"dcl_literal l125, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l125, r3.w_neg(xyzw)\n"
+"dcl_literal l126, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l126\n"
+"dcl_literal l127, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l127\n"
+"dcl_literal l128, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.___w, r3.w, l128\n"
+"dcl_literal l129, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.__z_, r3.z, r3.w, l129\n"
+"dcl_literal l130, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r3.___w, r3.y, l130\n"
+"iadd r3.x___, r3.y, r3.x\n"
+"cmov_logical r3._y__, r3.w, r3.x, r3.y\n"
+"dcl_literal l131, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.___w, r3.x, l131\n"
+"iadd r3.x___, r3.x, r3.z\n"
+"cmov_logical r3.x___, r3.w, r3.x, r3.y\n"
+"ilt r3._y__, r3.x, r2.x\n"
+"ieq r3.__z_, r2.x, r3.x\n"
+"dcl_literal l132, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r2.z, l132\n"
+"and r2.__z_, r3.z, r2.z\n"
+"ior r2.__z_, r3.y, r2.z\n"
+"and r2._y__, r2.y, r2.z\n"
+"iadd r2.__z_, r2.x, r3.x_neg(xyzw)\n"
+"dcl_literal l133, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r2._y__, r2.y, r2.z, l133\n"
+"dcl_literal l134, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r2.y, l134\n"
+"if_logicalnz r2.z\n"
+" ilt r3._y__, r2.w, r2.y\n"
+" iadd r3.__z_, r2.y, r2.w_neg(xyzw)\n"
+" and r3._y__, r3.y, r3.z\n"
+" \n"
+" dcl_literal l135, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, r2.y, l135\n"
+" \n"
+" dcl_literal l136, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2._y__, r2.y, l136, r3.y\n"
+" \n"
+" dcl_literal l137, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3._y__, r2.y, l137\n"
+" \n"
+" dcl_literal l138, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.__z_, r2.y, l138\n"
+" cmov_logical r3.__z_, r3.y, r3.z, r2.y\n"
+" \n"
+" dcl_literal l139, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.___w, r2.y, l139\n"
+" \n"
+" dcl_literal l140, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.x___, l140, r3.z_neg(xyzw)\n"
+" ushr r4.x___, r0.z, r4.x\n"
+" ishl r3.__z_, r0.z, r3.z\n"
+" ishl r3.___w, r0.w, r3.w\n"
+" ior r3.___w, r4.x, r3.w\n"
+" cmov_logical r3.___w, r3.y, r0.z, r3.w\n"
+" \n"
+" dcl_literal l141, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.y, l141, r3.z\n"
+" cmov_logical r3.__z_, r2.y, r3.w, r0.w\n"
+" cmov_logical r2._y__, r2.y, r3.y, r0.z\n"
+" \n"
+" dcl_literal l142, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3._y__, r3.x, l142\n"
+" \n"
+" dcl_literal l143, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.___w, r3.x, l143\n"
+" cmov_logical r3.___w, r3.y, r3.w, r3.x\n"
+" \n"
+" dcl_literal l144, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.x___, r3.x, l144\n"
+" \n"
+" dcl_literal l145, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.___w, l145, r3.w_neg(xyzw)\n"
+" ushr r4._y__, r1.z, r3.w\n"
+" ishl r4.__z_, r1.x, r4.x\n"
+" ior r4._y__, r4.y, r4.z\n"
+" cmov_logical r4._y__, r3.y, r1.z, r4.y\n"
+" cmov_logical r4._y__, r3.x, r4.y, r1.x\n"
+" ushr r3.___w, r2.y, r3.w\n"
+" ishl r4.x___, r3.z, r4.x\n"
+" ior r3.___w, r3.w, r4.x\n"
+" cmov_logical r3._y__, r3.y, r2.y, r3.w\n"
+" cmov_logical r3.x___, r3.x, r3.y, r3.z\n"
+" udiv r3._y__, r4.y, r3.x\n"
+" umul r3.___w, r3.y, r3.x\n"
+" ilt r4.x___, r4.y, r3.w\n"
+" iadd r4.__z_, r3.w, r4.y_neg(xyzw)\n"
+" iadd r4.__z_, r4.z, r3.x\n"
+" \n"
+" dcl_literal l146, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.__z_, r4.z, l146\n"
+" iadd r3.___w, r4.y, r3.w_neg(xyzw)\n"
+" cmov_logical r3.___w, r4.x, r4.z, r3.w\n"
+" udiv r3.x___, r3.w, r3.x\n"
+" iadd r3.___w, r3.y, r3.x_neg(xyzw)\n"
+" iadd r3.x___, r3.y, r3.x\n"
+" cmov_logical r3.x___, r4.x, r3.w, r3.x\n"
+" \n"
+" dcl_literal l147, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r2.y, l147\n"
+" \n"
+" dcl_literal l148, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.___w, r2.y, l148\n"
+" \n"
+" dcl_literal l149, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r3.x, l149\n"
+" \n"
+" dcl_literal l150, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.x___, r3.x, l150\n"
+" umul r4._y__, r3.w, r3.x\n"
+" umul r3.___w, r3.w, r4.x\n"
+" umul r4.__z_, r3.y, r3.x\n"
+" umul r3._y__, r3.y, r4.x\n"
+" \n"
+" dcl_literal l151, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.y, l151\n"
+" \n"
+" dcl_literal l152, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r4.z, l152\n"
+" iadd r4.___w, r4.w, r5.x\n"
+" iadd r3.___w, r3.w, r4.w\n"
+" \n"
+" dcl_literal l153, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.w, l153\n"
+" iadd r4._y__, r4.y, r4.w\n"
+" \n"
+" dcl_literal l154, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r4.z, l154\n"
+" iadd r4._y__, r4.y, r4.z\n"
+" \n"
+" dcl_literal l155, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3.___w, r3.w, l155\n"
+" \n"
+" dcl_literal l156, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r3.y, l156\n"
+" ior r3._y__, r3.w, r3.y\n"
+" \n"
+" dcl_literal l157, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.z, l157\n"
+" \n"
+" dcl_literal l158, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r3.z, l158\n"
+" umul r4.__z_, r4.z, r4.x\n"
+" umul r3.x___, r3.w, r3.x\n"
+" umul r3.___w, r3.w, r4.x\n"
+" \n"
+" dcl_literal l159, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r3.w, l159\n"
+" \n"
+" dcl_literal l160, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.x___, r3.x, l160\n"
+" iadd r3.x___, r4.x, r3.x\n"
+" iadd r3.x___, r4.z, r3.x\n"
+" \n"
+" dcl_literal l161, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3.x___, r3.x, l161\n"
+" \n"
+" dcl_literal l162, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.w, l162\n"
+" ior r3.x___, r3.x, r3.w\n"
+" iadd r3.___w, r3.x, r4.y\n"
+" ult r3.x___, r3.w, r3.x\n"
+" ult r4.x___, r1.x, r3.w\n"
+" ieq r4._y__, r1.x, r3.w\n"
+" ult r4.__z_, r1.z, r3.y\n"
+" and r4.__z_, r4.y, r4.z\n"
+" ior r4.x___, r4.x, r4.z\n"
+" \n"
+" dcl_literal l163, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l164, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4.x___, r4.x, l163, l164\n"
+" ieq r4.__z_, r1.z, r3.y\n"
+" and r4._y__, r4.y, r4.z\n"
+" \n"
+" dcl_literal l165, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.x___, r4.y, l165, r4.x\n"
+" \n"
+" dcl_literal l166, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.x___, r4.x, l166\n"
+" ior r3.x___, r3.x, r4.x\n"
+" iadd r2._y__, r3.y, r2.y_neg(xyzw)\n"
+" ult r4.x___, r3.y, r2.y\n"
+" iadd r4.x___, r3.w, r4.x\n"
+" iadd r3.__z_, r4.x, r3.z_neg(xyzw)\n"
+" cmov_logical r3.__z_, r3.x, r3.z, r3.w\n"
+" cmov_logical r2._y__, r3.x, r2.y, r3.y\n"
+" iadd r2._y__, r1.z, r2.y_neg(xyzw)\n"
+" ult r3.x___, r1.z, r2.y\n"
+" iadd r3.x___, r1.x, r3.x\n"
+" iadd r1.x___, r3.x, r3.z_neg(xyzw)\n"
+" mov r1.__z_, r2.y\n"
+"endif\n"
+"ult r2._y__, r1.x, r0.w\n"
+"ieq r3.x___, r1.x, r0.w\n"
+"ult r3._y__, r1.z, r0.z\n"
+"and r3._y__, r3.x, r3.y\n"
+"ior r2._y__, r2.y, r3.y\n"
+"dcl_literal l167, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l168, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2._y__, r2.y, l167, l168\n"
+"ieq r3._y__, r1.z, r0.z\n"
+"and r3.x___, r3.x, r3.y\n"
+"dcl_literal l169, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2._y__, r3.x, l169, r2.y\n"
+"dcl_literal l170, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.x___, r1.x, l170\n"
+"dcl_literal l171, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3._y__, r1.x, l171\n"
+"dcl_literal l172, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.__z_, r1.z, l172\n"
+"ior r3.x___, r3.x, r3.z\n"
+"dcl_literal l173, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3.x___, r3.x, l173\n"
+"dcl_literal l174, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.__z_, r1.z, l174\n"
+"dcl_literal l175, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.y, l175\n"
+"dcl_literal l176, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l176\n"
+"dcl_literal l177, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l177\n"
+"dcl_literal l178, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l178, r3.w_neg(xyzw)\n"
+"dcl_literal l179, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l179\n"
+"dcl_literal l180, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l180\n"
+"dcl_literal l181, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l181\n"
+"dcl_literal l182, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r3.w, l182\n"
+"dcl_literal l183, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.x, l183\n"
+"dcl_literal l184, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l184\n"
+"dcl_literal l185, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l185\n"
+"dcl_literal l186, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l186, r3.w_neg(xyzw)\n"
+"dcl_literal l187, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l187\n"
+"dcl_literal l188, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l188\n"
+"dcl_literal l189, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l189\n"
+"dcl_literal l190, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.x___, r3.x, r3.w, l190\n"
+"dcl_literal l191, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.z, l191\n"
+"dcl_literal l192, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l192\n"
+"dcl_literal l193, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l193\n"
+"dcl_literal l194, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l194, r3.w_neg(xyzw)\n"
+"dcl_literal l195, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l195\n"
+"dcl_literal l196, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l196\n"
+"dcl_literal l197, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.___w, r3.w, l197\n"
+"dcl_literal l198, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.__z_, r3.z, r3.w, l198\n"
+"dcl_literal l199, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r3.___w, r3.y, l199\n"
+"iadd r3.x___, r3.y, r3.x\n"
+"cmov_logical r3._y__, r3.w, r3.x, r3.y\n"
+"dcl_literal l200, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.___w, r3.x, l200\n"
+"iadd r3.x___, r3.x, r3.z\n"
+"cmov_logical r3.x___, r3.w, r3.x, r3.y\n"
+"ilt r3._y__, r3.x, r2.x\n"
+"ieq r3.__z_, r2.x, r3.x\n"
+"dcl_literal l201, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2._y__, r2.y, l201\n"
+"and r2._y__, r3.z, r2.y\n"
+"ior r2._y__, r3.y, r2.y\n"
+"and r2._y__, r2.z, r2.y\n"
+"iadd r2.__z_, r2.x, r3.x_neg(xyzw)\n"
+"dcl_literal l202, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r2._y__, r2.y, r2.z, l202\n"
+"dcl_literal l203, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2.__z_, r2.y, l203\n"
+"if_logicalnz r2.z\n"
+" ilt r3._y__, r2.w, r2.y\n"
+" iadd r3.__z_, r2.y, r2.w_neg(xyzw)\n"
+" and r3._y__, r3.y, r3.z\n"
+" \n"
+" dcl_literal l204, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2._y__, r2.y, l204\n"
+" \n"
+" dcl_literal l205, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2._y__, r2.y, l205, r3.y\n"
+" \n"
+" dcl_literal l206, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3._y__, r2.y, l206\n"
+" \n"
+" dcl_literal l207, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.__z_, r2.y, l207\n"
+" cmov_logical r3.__z_, r3.y, r3.z, r2.y\n"
+" \n"
+" dcl_literal l208, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r3.___w, r2.y, l208\n"
+" \n"
+" dcl_literal l209, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r4.x___, l209, r3.z_neg(xyzw)\n"
+" ushr r4.x___, r0.z, r4.x\n"
+" ishl r3.__z_, r0.z, r3.z\n"
+" ishl r3.___w, r0.w, r3.w\n"
+" ior r3.___w, r4.x, r3.w\n"
+" cmov_logical r3.___w, r3.y, r0.z, r3.w\n"
+" \n"
+" dcl_literal l210, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r3._y__, r3.y, l210, r3.z\n"
+" cmov_logical r3.__z_, r2.y, r3.w, r0.w\n"
+" cmov_logical r2._y__, r2.y, r3.y, r0.z\n"
+" \n"
+" dcl_literal l211, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r3._y__, r3.x, l211\n"
+" \n"
+" dcl_literal l212, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r3.___w, r3.x, l212\n"
+" cmov_logical r3.___w, r3.y, r3.w, r3.x\n"
+" \n"
+" dcl_literal l213, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r4.x___, r3.x, l213\n"
+" \n"
+" dcl_literal l214, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3.___w, l214, r3.w_neg(xyzw)\n"
+" ushr r4._y__, r1.z, r3.w\n"
+" ishl r4.__z_, r1.x, r4.x\n"
+" ior r4._y__, r4.y, r4.z\n"
+" cmov_logical r4._y__, r3.y, r1.z, r4.y\n"
+" cmov_logical r4._y__, r3.x, r4.y, r1.x\n"
+" ushr r3.___w, r2.y, r3.w\n"
+" ishl r4.x___, r3.z, r4.x\n"
+" ior r3.___w, r3.w, r4.x\n"
+" cmov_logical r3._y__, r3.y, r2.y, r3.w\n"
+" cmov_logical r3.x___, r3.x, r3.y, r3.z\n"
+" udiv r3._y__, r4.y, r3.x\n"
+" umul r3.___w, r3.y, r3.x\n"
+" ilt r4.x___, r4.y, r3.w\n"
+" iadd r4.__z_, r3.w, r4.y_neg(xyzw)\n"
+" iadd r4.__z_, r4.z, r3.x\n"
+" \n"
+" dcl_literal l215, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r4.__z_, r4.z, l215\n"
+" iadd r3.___w, r4.y, r3.w_neg(xyzw)\n"
+" cmov_logical r3.___w, r4.x, r4.z, r3.w\n"
+" udiv r3.x___, r3.w, r3.x\n"
+" iadd r3.___w, r3.y, r3.x_neg(xyzw)\n"
+" iadd r3.x___, r3.y, r3.x\n"
+" cmov_logical r3.x___, r4.x, r3.w, r3.x\n"
+" \n"
+" dcl_literal l216, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r2.y, l216\n"
+" \n"
+" dcl_literal l217, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.___w, r2.y, l217\n"
+" \n"
+" dcl_literal l218, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r4.x___, r3.x, l218\n"
+" \n"
+" dcl_literal l219, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.x___, r3.x, l219\n"
+" umul r4._y__, r3.w, r3.x\n"
+" umul r3.___w, r3.w, r4.x\n"
+" umul r4.__z_, r3.y, r3.x\n"
+" umul r3._y__, r3.y, r4.x\n"
+" \n"
+" dcl_literal l220, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.y, l220\n"
+" \n"
+" dcl_literal l221, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r5.x___, r4.z, l221\n"
+" iadd r4.___w, r4.w, r5.x\n"
+" iadd r3.___w, r3.w, r4.w\n"
+" \n"
+" dcl_literal l222, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.___w, r3.w, l222\n"
+" iadd r4._y__, r4.y, r4.w\n"
+" \n"
+" dcl_literal l223, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r4.z, l223\n"
+" iadd r4._y__, r4.y, r4.z\n"
+" \n"
+" dcl_literal l224, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3.___w, r3.w, l224\n"
+" \n"
+" dcl_literal l225, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3._y__, r3.y, l225\n"
+" ior r3._y__, r3.w, r3.y\n"
+" \n"
+" dcl_literal l226, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.z, l226\n"
+" \n"
+" dcl_literal l227, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.__z_, r3.z, l227\n"
+" umul r4.__z_, r4.z, r4.x\n"
+" umul r3.x___, r3.w, r3.x\n"
+" umul r3.___w, r3.w, r4.x\n"
+" \n"
+" dcl_literal l228, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r4.x___, r3.w, l228\n"
+" \n"
+" dcl_literal l229, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.x___, r3.x, l229\n"
+" iadd r3.x___, r4.x, r3.x\n"
+" iadd r3.x___, r4.z, r3.x\n"
+" \n"
+" dcl_literal l230, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r3.x___, r3.x, l230\n"
+" \n"
+" dcl_literal l231, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.w, l231\n"
+" ior r3.x___, r3.x, r3.w\n"
+" iadd r3.___w, r3.x, r4.y\n"
+" ult r3.x___, r3.w, r3.x\n"
+" ult r4.x___, r1.x, r3.w\n"
+" ieq r4._y__, r1.x, r3.w\n"
+" ult r4.__z_, r1.z, r3.y\n"
+" and r4.__z_, r4.y, r4.z\n"
+" ior r4.x___, r4.x, r4.z\n"
+" \n"
+" dcl_literal l232, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l233, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r4.x___, r4.x, l232, l233\n"
+" ieq r4.__z_, r1.z, r3.y\n"
+" and r4._y__, r4.y, r4.z\n"
+" \n"
+" dcl_literal l234, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r4.x___, r4.y, l234, r4.x\n"
+" \n"
+" dcl_literal l235, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r4.x___, r4.x, l235\n"
+" ior r3.x___, r3.x, r4.x\n"
+" iadd r2._y__, r3.y, r2.y_neg(xyzw)\n"
+" ult r4.x___, r3.y, r2.y\n"
+" iadd r4.x___, r3.w, r4.x\n"
+" iadd r3.__z_, r4.x, r3.z_neg(xyzw)\n"
+" cmov_logical r3.__z_, r3.x, r3.z, r3.w\n"
+" cmov_logical r2._y__, r3.x, r2.y, r3.y\n"
+" iadd r2._y__, r1.z, r2.y_neg(xyzw)\n"
+" ult r3.x___, r1.z, r2.y\n"
+" iadd r3.x___, r1.x, r3.x\n"
+" iadd r1.x___, r3.x, r3.z_neg(xyzw)\n"
+" mov r1.__z_, r2.y\n"
+"endif\n"
+"ult r2._y__, r1.x, r0.w\n"
+"ieq r3.x___, r1.x, r0.w\n"
+"ult r3._y__, r1.z, r0.z\n"
+"and r3._y__, r3.x, r3.y\n"
+"ior r2._y__, r2.y, r3.y\n"
+"dcl_literal l236, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"dcl_literal l237, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r2._y__, r2.y, l236, l237\n"
+"ieq r3._y__, r1.z, r0.z\n"
+"and r3.x___, r3.x, r3.y\n"
+"dcl_literal l238, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r2._y__, r3.x, l238, r2.y\n"
+"dcl_literal l239, 0x0000000E, 0x0000000E, 0x0000000E, 0x0000000E\n"
+"ishl r3.x___, r1.x, l239\n"
+"dcl_literal l240, 0x00000009, 0x00000009, 0x00000009, 0x00000009\n"
+"ushr r3._y__, r1.x, l240\n"
+"dcl_literal l241, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"ushr r3.__z_, r1.z, l241\n"
+"ior r3.x___, r3.x, r3.z\n"
+"dcl_literal l242, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF\n"
+"and r3.x___, r3.x, l242\n"
+"dcl_literal l243, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF\n"
+"and r3.__z_, r1.z, l243\n"
+"dcl_literal l244, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.y, l244\n"
+"dcl_literal l245, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l245\n"
+"dcl_literal l246, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l246\n"
+"dcl_literal l247, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l247, r3.w_neg(xyzw)\n"
+"dcl_literal l248, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l248\n"
+"dcl_literal l249, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l249\n"
+"dcl_literal l250, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l250\n"
+"dcl_literal l251, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3._y__, r3.y, r3.w, l251\n"
+"dcl_literal l252, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.x, l252\n"
+"dcl_literal l253, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l253\n"
+"dcl_literal l254, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l254\n"
+"dcl_literal l255, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l255, r3.w_neg(xyzw)\n"
+"dcl_literal l256, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l256\n"
+"dcl_literal l257, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l257\n"
+"dcl_literal l258, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"iadd r3.___w, r3.w, l258\n"
+"dcl_literal l259, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"cmov_logical r3.x___, r3.x, r3.w, l259\n"
+"dcl_literal l260, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"ior r3.___w, r3.z, l260\n"
+"dcl_literal l261, 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000\n"
+"add r3.___w, r3.w, l261\n"
+"dcl_literal l262, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l262\n"
+"dcl_literal l263, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000\n"
+"iadd r3.___w, l263, r3.w_neg(xyzw)\n"
+"dcl_literal l264, 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000\n"
+"and r3.___w, r3.w, l264\n"
+"dcl_literal l265, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ushr r3.___w, r3.w, l265\n"
+"dcl_literal l266, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA, 0xFFFFFFFA\n"
+"iadd r3.___w, r3.w, l266\n"
+"dcl_literal l267, 0x00000012, 0x00000012, 0x00000012, 0x00000012\n"
+"cmov_logical r3.__z_, r3.z, r3.w, l267\n"
+"dcl_literal l268, 0x00000017, 0x00000017, 0x00000017, 0x00000017\n"
+"ieq r3.___w, r3.y, l268\n"
+"iadd r3.x___, r3.y, r3.x\n"
+"cmov_logical r3._y__, r3.w, r3.x, r3.y\n"
+"dcl_literal l269, 0x0000002E, 0x0000002E, 0x0000002E, 0x0000002E\n"
+"ieq r3.___w, r3.x, l269\n"
+"iadd r3.x___, r3.x, r3.z\n"
+"cmov_logical r3.x___, r3.w, r3.x, r3.y\n"
+"ilt r3._y__, r3.x, r2.x\n"
+"ieq r3.__z_, r2.x, r3.x\n"
+"dcl_literal l270, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2._y__, r2.y, l270\n"
+"and r2._y__, r3.z, r2.y\n"
+"ior r2._y__, r3.y, r2.y\n"
+"and r2._y__, r2.z, r2.y\n"
+"iadd r2.x___, r2.x, r3.x_neg(xyzw)\n"
+"dcl_literal l271, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+"cmov_logical r2.x___, r2.y, r2.x, l271\n"
+"dcl_literal l272, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ige r2._y__, r2.x, l272\n"
+"if_logicalnz r2.y\n"
+" ilt r2._y__, r2.w, r2.x\n"
+" iadd r2.__z_, r2.x, r2.w_neg(xyzw)\n"
+" and r2._y__, r2.y, r2.z\n"
+" \n"
+" dcl_literal l273, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.x___, r2.x, l273\n"
+" \n"
+" dcl_literal l274, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.x___, r2.x, l274, r2.y\n"
+" \n"
+" dcl_literal l275, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2._y__, r2.x, l275\n"
+" \n"
+" dcl_literal l276, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r2.__z_, r2.x, l276\n"
+" cmov_logical r2.__z_, r2.y, r2.z, r2.x\n"
+" \n"
+" dcl_literal l277, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r2.___w, r2.x, l277\n"
+" \n"
+" dcl_literal l278, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r3._y__, l278, r2.z_neg(xyzw)\n"
+" ushr r3._y__, r0.z, r3.y\n"
+" ishl r2.__z_, r0.z, r2.z\n"
+" ishl r2.___w, r0.w, r2.w\n"
+" ior r2.___w, r3.y, r2.w\n"
+" cmov_logical r2.___w, r2.y, r0.z, r2.w\n"
+" \n"
+" dcl_literal l279, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2._y__, r2.y, l279, r2.z\n"
+" cmov_logical r0.__zw, r2.x, r2.yyyw, r0.zzzw\n"
+" \n"
+" dcl_literal l280, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" ige r2.x___, r3.x, l280\n"
+" \n"
+" dcl_literal l281, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0, 0xFFFFFFE0\n"
+" iadd r2._y__, r3.x, l281\n"
+" cmov_logical r2._y__, r2.x, r2.y, r3.x\n"
+" \n"
+" dcl_literal l282, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" imin r2.__z_, r3.x, l282\n"
+" \n"
+" dcl_literal l283, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+" iadd r2._y__, l283, r2.y_neg(xyzw)\n"
+" ushr r2.___w, r1.z, r2.y\n"
+" ishl r3._y__, r1.x, r2.z\n"
+" ior r2.___w, r2.w, r3.y\n"
+" cmov_logical r2.___w, r2.x, r1.z, r2.w\n"
+" cmov_logical r2.___w, r3.x, r2.w, r1.x\n"
+" ushr r2._y__, r0.z, r2.y\n"
+" ishl r2.__z_, r0.w, r2.z\n"
+" ior r2._y__, r2.y, r2.z\n"
+" cmov_logical r2.x___, r2.x, r0.z, r2.y\n"
+" cmov_logical r2.x___, r3.x, r2.x, r0.w\n"
+" udiv r2._y__, r2.w, r2.x\n"
+" umul r2.__z_, r2.y, r2.x\n"
+" ilt r3.x___, r2.w, r2.z\n"
+" iadd r3._y__, r2.z, r2.w_neg(xyzw)\n"
+" iadd r3._y__, r3.y, r2.x\n"
+" \n"
+" dcl_literal l284, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" iadd r3._y__, r3.y, l284\n"
+" iadd r2.__z_, r2.w, r2.z_neg(xyzw)\n"
+" cmov_logical r2.__z_, r3.x, r3.y, r2.z\n"
+" udiv r2.x___, r2.z, r2.x\n"
+" iadd r2.__z_, r2.y, r2.x_neg(xyzw)\n"
+" iadd r2.x___, r2.y, r2.x\n"
+" cmov_logical r2.x___, r3.x, r2.z, r2.x\n"
+" \n"
+" dcl_literal l285, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2._y__, r0.z, l285\n"
+" \n"
+" dcl_literal l286, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.__z_, r0.z, l286\n"
+" \n"
+" dcl_literal l287, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.___w, r2.x, l287\n"
+" \n"
+" dcl_literal l288, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.x___, r2.x, l288\n"
+" umul r3.x___, r2.z, r2.x\n"
+" umul r2.__z_, r2.z, r2.w\n"
+" umul r3._y__, r2.y, r2.x\n"
+" umul r2._y__, r2.y, r2.w\n"
+" \n"
+" dcl_literal l289, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.__z_, r2.y, l289\n"
+" \n"
+" dcl_literal l290, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r3.___w, r3.y, l290\n"
+" iadd r3.__z_, r3.z, r3.w\n"
+" iadd r2.__z_, r2.z, r3.z\n"
+" \n"
+" dcl_literal l291, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3.__z_, r2.z, l291\n"
+" iadd r3.x___, r3.x, r3.z\n"
+" \n"
+" dcl_literal l292, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y__, r3.y, l292\n"
+" iadd r3.x___, r3.x, r3.y\n"
+" \n"
+" dcl_literal l293, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.__z_, r2.z, l293\n"
+" \n"
+" dcl_literal l294, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2._y__, r2.y, l294\n"
+" ior r2._y__, r2.z, r2.y\n"
+" \n"
+" dcl_literal l295, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r0.w, l295\n"
+" \n"
+" dcl_literal l296, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r3._y__, r0.w, l296\n"
+" umul r3._y__, r3.y, r2.w\n"
+" umul r2.x_z_, r2.z, r2.xxwx\n"
+" \n"
+" dcl_literal l297, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ushr r2.___w, r2.z, l297\n"
+" \n"
+" dcl_literal l298, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.x___, r2.x, l298\n"
+" iadd r2.x___, r2.w, r2.x\n"
+" iadd r2.x___, r3.y, r2.x\n"
+" \n"
+" dcl_literal l299, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+" ishl r2.x___, r2.x, l299\n"
+" \n"
+" dcl_literal l300, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+" and r2.__z_, r2.z, l300\n"
+" ior r2.x___, r2.x, r2.z\n"
+" iadd r2.__z_, r2.x, r3.x\n"
+" ult r2.x___, r2.z, r2.x\n"
+" ult r2.___w, r1.x, r2.z\n"
+" ieq r3.x___, r1.x, r2.z\n"
+" ult r3._y__, r1.z, r2.y\n"
+" and r3._y__, r3.x, r3.y\n"
+" ior r2.___w, r2.w, r3.y\n"
+" \n"
+" dcl_literal l301, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+" \n"
+" dcl_literal l302, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+" cmov_logical r2.___w, r2.w, l301, l302\n"
+" ieq r3._y__, r1.z, r2.y\n"
+" and r3.x___, r3.x, r3.y\n"
+" \n"
+" dcl_literal l303, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" cmov_logical r2.___w, r3.x, l303, r2.w\n"
+" \n"
+" dcl_literal l304, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+" ilt r2.___w, r2.w, l304\n"
+" ior r2.x___, r2.x, r2.w\n"
+" iadd r0.__z_, r2.y, r0.z_neg(xyzw)\n"
+" ult r2.___w, r2.y, r0.z\n"
+" iadd r2.___w, r2.z, r2.w\n"
+" iadd r0.___w, r2.w, r0.w_neg(xyzw)\n"
+" cmov_logical r0.__zw, r2.x, r0.zzzw, r2.yyyz\n"
+" iadd r0.__z_, r1.z, r0.z_neg(xyzw)\n"
+" ult r2.x___, r1.z, r0.z\n"
+" iadd r2.x___, r1.x, r2.x\n"
+" iadd r1.x___, r2.x, r0.w_neg(xyzw)\n"
+" mov r1.__z_, r0.z\n"
+"endif\n"
+"dcl_literal l305, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"ilt r0.__z_, r1.y, l305\n"
+"cmov_logical r0.xy__, r0.z, r0.xyxx, r1.zxzz\n"
+"dcl_literal l306, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r1.y, r0.xyxx, l306\n"
+"dcl_literal l307, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.xy__, r1.w, l307, r0.xyxx\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umod_i8",
+"mdef(500)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"mov r0._y__, r1.x\n"
+"dcl_literal l9, 0x000000FF, 0x000000FF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l9\n"
+"dcl_literal l10, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"cmov_logical r0.___w, r1.y, r1.y, l10\n"
+"udiv r1.__z_, r1.x, r0.w\n"
+"dcl_literal l11, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r1._y__, r1.y, r1.z, l11\n"
+"umul r0.___w, r0.w, r1.y\n"
+"iadd r0.___w, r1.x, r0.w_neg(xyzw)\n"
+"dcl_literal l12, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.__z_, r0.w, l12\n"
+"mov r0.x___, r0.z\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umul24_u32",
+"mdef(501)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"\n"
+"dcl_literal l2, 0x00FFFFFF, 0x00FFFFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l2\n"
+"umul r1.x___, r1.x, r1.y\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umul_hi_u16",
+"mdef(502)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"\n"
+"mov r0._y_, r1.x\n"
+"umul r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l3, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r0.x___, r0.w, l3\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umul_hi_u32",
+"mdef(503)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y_, r1.x\n"
+"\n"
+"dcl_literal l14, 0x0000FFFF, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.xyxx, l14\n"
+"\n"
+"dcl_literal l15, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.__zw, r0.xxxy, l15\n"
+"umul r0.___w, r1.z, r1.w\n"
+"umul r1.x_zw, r1.xxzx, r1.yyyw\n"
+"\n"
+"dcl_literal l16, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.x___, r1.x, l16\n"
+"\n"
+"dcl_literal l17, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r1._y__, r1.w, l17\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r1.x___, r1.z, r1.x\n"
+"\n"
+"dcl_literal l18, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.x___, r1.x, l18\n"
+"iadd r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l19, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"ushr r1.x___, r1.w, l19\n"
+"iadd r0.x___, r0.w, r1.x\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__umul_hi_u8",
+"mdef(504)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y_, r1.x\n"
+"umul r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l1, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"ushr r0.x___, r0.w, l1\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__urhadd_u16",
+"mdef(505)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r0.___w, r0.x, r0.y\n"
+"\n"
+"dcl_literal l6, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r0.___w, r0.w, l6\n"
+"\n"
+"dcl_literal l7, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ushr r0.x___, r0.w, l7\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__urhadd_u32",
+"mdef(506)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"ior r0.___w, r0.y, r0.x\n"
+"\n"
+"dcl_literal l10, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"and r0.___w, r0.w, l10\n"
+"\n"
+"dcl_literal l11, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ushr r1.xy__, r0.xyxx, l11\n"
+"iadd r1.x___, r1.x, r1.y\n"
+"iadd r0.x___, r1.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__urhadd_u8",
+"mdef(507)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"iadd r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l2, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"iadd r0.___w, r1.x, l2\n"
+"\n"
+"dcl_literal l3, 0x00000001, 0x00000001, 0x00000001, 0x00000001\n"
+"ushr r0.x___, r0.w, l3\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__urotate_u16",
+"mdef(508)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"\n"
+"dcl_literal l13, 0x0000000F, 0x0000FFFF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.yxyy, l13\n"
+"ishl r0.___w, r1.y, r1.x\n"
+"\n"
+"dcl_literal l14, 0x00000010, 0x00000010, 0x00000010, 0x00000010\n"
+"iadd r1.x___, l14, r1.x_neg(xyzw)\n"
+"ushr r1.x___, r1.y, r1.x\n"
+"ior r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l15, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"and r0.x___, r0.w, l15\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__urotate_u32",
+"mdef(509)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"\n"
+"dcl_literal l16, 0x0000001F, 0x0000001F, 0x0000001F, 0x0000001F\n"
+"and r0.___w, r0.y, l16\n"
+"ishl r1.x___, r0.x, r0.w\n"
+"\n"
+"dcl_literal l17, 0x00000020, 0x00000020, 0x00000020, 0x00000020\n"
+"iadd r0.___w, l17, r0.w_neg(xyzw)\n"
+"ushr r0.___w, r0.x, r0.w\n"
+"ior r0.x___, r1.x, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__urotate_u8",
+"mdef(510)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__,r1.x\n"
+"\n"
+"dcl_literal l5, 0x00000007, 0x000000FF, 0x00000000, 0x00000000\n"
+"and r1.xy__, r0.yxyy, l5\n"
+"ishl r0.___w, r1.y, r1.x\n"
+"\n"
+"dcl_literal l6, 0x00000008, 0x00000008, 0x00000008, 0x00000008\n"
+"iadd r1.x___, l6, r1.x_neg(xyzw)\n"
+"ushr r1.x___, r1.y, r1.x\n"
+"ior r0.___w, r0.w, r1.x\n"
+"\n"
+"dcl_literal l7, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"and r0.x___, r0.w, l7\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__usub_sat_u16",
+"mdef(511)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l5, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF\n"
+"umin r0.x___, r0.w, l5\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__usub_sat_u32",
+"mdef(512)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"ult r1.x___, r0.x, r0.y\n"
+"\n"
+"dcl_literal l10, 0x00000000, 0x00000000, 0x00000000, 0x00000000\n"
+"cmov_logical r0.__z_, r1.x, l10, r0.w\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "__usub_sat_u8",
+"mdef(513)_out(1)_in(2)\n"
+"mov r0, in0\n"
+"mov r1, in1\n"
+"\n"
+"mov r0._y__, r1.x\n"
+"iadd r0.___w, r0.x, r0.y_neg(xyzw)\n"
+"\n"
+"dcl_literal l2, 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF\n"
+"umin r0.x___, r0.w, l2\n"
+"mov out0, r0\n"
+"mend\n"
+,2,1
+},
+{ "", NULL }};
+#endif // _macrodb_gen_HPP_
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
new file mode 100644
index 00000000000..f78157a85bd
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+#ifndef LLVM_GPU_H
+#define LLVM_GPU_H
+
+#include <llvm-c/Core.h>
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+#define RADEON_LLVM_MAX_INPUTS 16 * 4
+#define RADEON_LLVM_MAX_OUTPUTS 16 * 4
+#define RADEON_LLVM_MAX_BRANCH_DEPTH 16
+#define RADEON_LLVM_MAX_LOOP_DEPTH 16
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct radeon_llvm_branch {
+ LLVMBasicBlockRef endif_block;
+ LLVMBasicBlockRef if_block;
+ LLVMBasicBlockRef else_block;
+ unsigned has_else;
+};
+
+struct radeon_llvm_loop {
+ LLVMBasicBlockRef loop_block;
+ LLVMBasicBlockRef endloop_block;
+};
+
+struct radeon_llvm_context {
+
+ struct lp_build_tgsi_soa_context soa;
+
+ /*=== Front end configuration ===*/
+
+ /* Special Intrinsics */
+
+ /** Write to an output register: float store_output(float, i32) */
+ const char * store_output_intr;
+
+ /** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32)
+ * The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value
+ * in 2-bits.
+ * Swizzle{0-1} = X Channel
+ * Swizzle{2-3} = Y Channel
+ * Swizzle{4-5} = Z Channel
+ * Swizzle{6-7} = W Channel
+ */
+ const char * swizzle_intr;
+
+ /* Instructions that are not described by any of the TGSI opcodes. */
+
+ /** This function is responsible for initilizing the inputs array and will be
+ * called once for each input declared in the TGSI shader.
+ */
+ void (*load_input)(struct radeon_llvm_context *,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl);
+
+
+ /** User data to use with the callbacks */
+ void * userdata;
+
+ /** This array contains the input values for the shader. Typically these
+ * values will be in the form of a target intrinsic that will inform the
+ * backend how to load the actual inputs to the shader.
+ */
+ LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
+ LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][NUM_CHANNELS];
+ unsigned output_reg_count;
+
+ unsigned reserved_reg_count;
+ /*=== Private Members ===*/
+
+ struct radeon_llvm_branch branch[RADEON_LLVM_MAX_BRANCH_DEPTH];
+ struct radeon_llvm_loop loop[RADEON_LLVM_MAX_LOOP_DEPTH];
+
+ unsigned branch_depth;
+ unsigned loop_depth;
+
+
+ LLVMValueRef main_fn;
+
+ struct gallivm_state gallivm;
+};
+
+unsigned radeon_llvm_compile(
+ LLVMModuleRef M,
+ unsigned char ** bytes,
+ unsigned * byte_count,
+ const char * gpu_family,
+ unsigned dump);
+
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
+
+void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
+
+inline static struct radeon_llvm_context * radeon_llvm_context(
+ struct lp_build_tgsi_context * bld_base)
+{
+ return (struct radeon_llvm_context*)bld_base;
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* LLVM_GPU_H */
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
new file mode 100644
index 00000000000..f5e357a392f
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#include "radeon_llvm.h"
+
+#include <llvm/Module.h>
+#include <llvm/PassManager.h>
+#include <llvm/ADT/Triple.h>
+#include <llvm/Support/FormattedStream.h>
+#include <llvm/Support/Host.h>
+#include <llvm/Support/TargetRegistry.h>
+#include <llvm/Support/TargetSelect.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Target/TargetMachine.h>
+
+#include <llvm/Transforms/Scalar.h>
+
+#include <llvm-c/Target.h>
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace llvm;
+
+#ifndef EXTERNAL_LLVM
+#include "AMDISATargetMachine.h"
+Target TheAMDISATarget;
+#endif
+
+
+/**
+ * Compile an LLVM module to machine code.
+ *
+ * @param bytes This function allocates memory for the byte stream, it is the
+ * caller's responsibility to free it.
+ */
+extern "C" unsigned
+radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
+ unsigned * byte_count, const char * gpu_family,
+ unsigned dump) {
+
+#ifdef EXTERNAL_LLVM
+ const Target * AMDISATarget = NULL;
+
+ /* XXX: Can we just initialize the AMDISA target here? */
+ InitializeAllTargets();
+ Triple::ArchType Arch = Triple::getArchTypeForLLVMName("amdisa");
+ if (Arch == Triple::UnknownArch) {
+ fprintf(stderr, "Unknown Arch\n");
+ }
+ std::string AMDISAArchName = "amdisa";
+ AMDISATriple.setArch(Arch);
+ for (TargetRegistry::iterator it = TargetRegistry::begin(),
+ ie = TargetRegistry::end(); it != ie; ++it) {
+ if (it->getName() == AMDISAArchName) {
+ AMDISATarget = &*it;
+ break;
+ }
+ }
+
+ if(!AMDISATarget) {
+ fprintf(stderr, "Can't find target\n");
+ return 1;
+ }
+#else
+ RegisterTargetMachine<AMDISATargetMachine> Y(TheAMDISATarget);
+ RegisterMCAsmInfoFn A(TheAMDISATarget, createMCAsmInfo);
+#endif
+
+ Module * mod = unwrap(M);
+ Triple AMDISATriple(sys::getHostTriple());
+
+
+ std::string FS = gpu_family;
+
+#ifdef EXTERNAL_LLVM
+ std::auto_ptr<TargetMachine> tm(AMDISATarget->createTargetMachine(
+ AMDISATriple.getTriple(), FS));
+ TargetMachine &AMDISATargetMachine = *tm.get();
+#else
+ AMDISATargetMachine * tm = new AMDISATargetMachine(TheAMDISATarget,
+ AMDISATriple.getTriple(), gpu_family, "", Reloc::Default,
+ CodeModel::Default);
+ TargetMachine &AMDISATargetMachine = *tm;
+ /* XXX: Use TargetMachine.Options in 3.0 */
+ if (dump) {
+ tm->dumpCode();
+ }
+#endif
+ const TargetData * AMDISAData = AMDISATargetMachine.getTargetData();
+ PassManager PM;
+ PM.add(new TargetData(*AMDISAData));
+ PM.add(createPromoteMemoryToRegisterPass());
+ AMDISATargetMachine.setAsmVerbosityDefault(true);
+
+ std::string CodeString;
+ raw_string_ostream oStream(CodeString);
+ formatted_raw_ostream out(oStream);
+
+ /* Optional extra paramater true / false to disable verify */
+ if (AMDISATargetMachine.addPassesToEmitFile(PM, out, TargetMachine::CGFT_AssemblyFile,
+ CodeGenOpt::Default, true)){
+ fprintf(stderr, "AddingPasses failed.\n");
+ return 1;
+ }
+ PM.run(*mod);
+
+ out.flush();
+ std::string &data = oStream.str();
+
+ *bytes = (unsigned char*)malloc(data.length() * sizeof(unsigned char));
+ memcpy(*bytes, data.c_str(), data.length() * sizeof(unsigned char));
+ *byte_count = data.length();
+
+#ifndef EXTERNAL_LLVM
+ delete tm;
+#endif
+
+ return 0;
+}
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
new file mode 100644
index 00000000000..304f48c990d
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#include "radeon_llvm.h"
+
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_gather.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_init.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_math.h"
+#include "util/u_debug.h"
+
+static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
+{
+ return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+}
+
+static struct radeon_llvm_branch * get_current_branch(
+ struct radeon_llvm_context * ctx)
+{
+ return ctx->branch_depth > 0 ?
+ ctx->branch + (ctx->branch_depth - 1) : NULL;
+}
+
+static void radeon_llvm_fetch_args_2_reverse_soa(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ assert(emit_data->info->num_src == 2);
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 1, emit_data->chan);
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, emit_data->chan);
+ emit_data->arg_count = 2;
+ emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+}
+
+static LLVMValueRef emit_swizzle(
+ struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef value,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w)
+{
+ unsigned char swizzles[4];
+ swizzles[0] = swizzle_x;
+ swizzles[1] = swizzle_y;
+ swizzles[2] = swizzle_z;
+ swizzles[3] = swizzle_w;
+
+
+ return lp_build_swizzle_aos(&bld_base->base, value, swizzles);
+}
+
+static void emit_declaration(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_declaration *decl)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ switch(decl->Declaration.File) {
+ case TGSI_FILE_ADDRESS:
+ {
+ unsigned idx;
+ for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+ unsigned chan;
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ ctx->soa.addr[idx][chan] = lp_build_alloca(
+ &ctx->gallivm,
+ ctx->soa.uint_bld.elem_type, "");
+ }
+ }
+ break;
+ }
+
+ case TGSI_FILE_TEMPORARY:
+ lp_emit_declaration_soa(bld_base, decl);
+ break;
+
+ case TGSI_FILE_INPUT:
+ {
+ unsigned idx;
+ for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+ ctx->load_input(ctx, idx, decl);
+ }
+ }
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ {
+ unsigned idx;
+ for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+ unsigned chan;
+ assert(idx < RADEON_LLVM_MAX_OUTPUTS);
+ for (chan = 0; chan < NUM_CHANNELS; chan++) {
+ ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
+ ctx->soa.bld_base.base.elem_type, "");
+ }
+ }
+
+ ctx->output_reg_count = MAX2(ctx->output_reg_count,
+ decl->Range.Last + 1);
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+static void
+emit_store(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4])
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ struct lp_build_context base = bld->bld_base.base;
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ LLVMValueRef temp_ptr;
+ unsigned chan, chan_index;
+ boolean is_vec_store = FALSE;
+ if (dst[0]) {
+ LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
+ is_vec_store = (k == LLVMVectorTypeKind);
+ }
+
+ if (is_vec_store) {
+ LLVMValueRef values[4] = {};
+ FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, chan);
+ values[chan] = LLVMBuildExtractElement(gallivm->builder,
+ dst[0], index, "");
+ }
+ bld_base->emit_store(bld_base, inst, info, values);
+ return;
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ LLVMValueRef value = dst[chan_index];
+
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
+ struct lp_build_emit_data clamp_emit_data;
+
+ memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
+ clamp_emit_data.arg_count = 3;
+ clamp_emit_data.args[0] = value;
+ clamp_emit_data.args[2] = base.one;
+
+ switch(inst->Instruction.Saturate) {
+ case TGSI_SAT_ZERO_ONE:
+ clamp_emit_data.args[1] = base.zero;
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ clamp_emit_data.args[1] = LLVMConstReal(
+ base.elem_type, -1.0f);
+ break;
+ default:
+ assert(0);
+ }
+ value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
+ &clamp_emit_data);
+ }
+
+ switch(reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
+ break;
+
+ default:
+ return;
+ }
+ LLVMBuildStore(builder, value, temp_ptr);
+ }
+}
+
+static void bgnloop_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBasicBlockRef loop_block;
+ LLVMBasicBlockRef endloop_block;
+ endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
+ ctx->main_fn, "ENDLOOP");
+ loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
+ endloop_block, "LOOP");
+ LLVMBuildBr(gallivm->builder, loop_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
+ ctx->loop_depth++;
+ ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
+ ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+}
+
+static void brk_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
+
+ LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+}
+
+static void cont_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
+
+ LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+}
+
+static void else_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+
+ /* We need to add a terminator to the current block if the previous
+ * instruction was an ENDIF.Example:
+ * IF
+ * [code]
+ * IF
+ * [code]
+ * ELSE
+ * [code]
+ * ENDIF <--
+ * ELSE<--
+ * [code]
+ * ENDIF
+ */
+
+ if (current_block != current_branch->if_block) {
+ LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+ }
+ if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
+ LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+ }
+ current_branch->has_else = 1;
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+}
+
+static void endif_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+
+ /* If we have consecutive ENDIF instructions, then the first ENDIF
+ * will not have a terminator, so we need to add one. */
+ if (current_block != current_branch->if_block
+ && current_block != current_branch->else_block
+ && !LLVMGetBasicBlockTerminator(current_block)) {
+
+ LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+ }
+ if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+ LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+ }
+
+ if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
+ LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+ }
+
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
+ ctx->branch_depth--;
+}
+
+static void endloop_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
+
+ if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
+ LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+ }
+
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
+ ctx->loop_depth--;
+}
+
+static void if_emit(
+ const struct lp_build_opcode_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMValueRef cond;
+ LLVMBasicBlockRef if_block, else_block, endif_block;
+ cond = LLVMBuildFCmp(gallivm->builder, LLVMRealOEQ, emit_data->args[0],
+ bld_base->base.one, "");
+
+ endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
+ ctx->main_fn, "ENDIF");
+ if_block = LLVMInsertBasicBlockInContext(gallivm->context,
+ endif_block, "IF");
+ else_block = LLVMInsertBasicBlockInContext(gallivm->context,
+ endif_block, "ELSE");
+ LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
+
+ ctx->branch_depth++;
+ ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
+ ctx->branch[ctx->branch_depth - 1].if_block = if_block;
+ ctx->branch[ctx->branch_depth - 1].else_block = else_block;
+ ctx->branch[ctx->branch_depth - 1].has_else = 0;
+}
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
+ * when we used CHAN_ALL. We should be able to get this to work,
+ * but for now we will swizzle it ourselves
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, CHAN_ALL);
+
+ */
+
+ LLVMValueRef coords[4];
+ unsigned chan;
+ for (chan = 0; chan < 4; chan++) {
+ coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan);
+ }
+
+ emit_data->arg_count = 1;
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
+{
+ struct lp_type type;
+ LLVMTypeRef main_fn_type;
+ LLVMBasicBlockRef main_fn_body;
+
+ /* Initialize the gallivm object:
+ * We are only using the module, context, and builder fields of this struct.
+ * This should be enough for us to be able to pass our gallivm struct to the
+ * helper functions in the gallivm module.
+ */
+ memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
+ ctx->gallivm.context = LLVMContextCreate();
+ ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
+ ctx->gallivm.context);
+ ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
+
+ /* Setup the module */
+ main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
+ NULL, 0, 0);
+ ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
+ main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
+ ctx->main_fn, "main_body");
+ LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
+
+ ctx->store_output_intr = "llvm.AMDISA.store.output.";
+ ctx->swizzle_intr = "llvm.AMDISA.swizzle";
+ struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
+
+ /* XXX: We need to revisit this.I think the correct way to do this is
+ * to use length = 4 here and use the elem_bld for everything. */
+ type.floating = TRUE;
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = 1;
+
+ lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
+ lp_build_context_init(&ctx->soa.uint_bld, &ctx->gallivm, lp_uint_type(type));
+
+ bld_base->soa = 1;
+ bld_base->emit_store = emit_store;
+ bld_base->emit_swizzle = emit_swizzle;
+ bld_base->emit_declaration = emit_declaration;
+ bld_base->emit_immediate = lp_emit_immediate_soa;
+
+ /* Allocate outputs */
+ ctx->soa.outputs = ctx->outputs;
+
+ /* XXX: Is there a better way to initialize all this ? */
+
+ lp_set_default_actions(bld_base);
+
+ bld_base->op_actions[TGSI_OPCODE_ABS].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs.";
+ bld_base->op_actions[TGSI_OPCODE_ARL].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_ARL].intr_name = "llvm.AMDISA.arl";
+ bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
+ bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
+ bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
+ bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
+ bld_base->op_actions[TGSI_OPCODE_CMP].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDISA.cndlt";
+ bld_base->op_actions[TGSI_OPCODE_COS].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDISA.cos";
+ bld_base->op_actions[TGSI_OPCODE_DDX].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDISA.ddx";
+ bld_base->op_actions[TGSI_OPCODE_DDY].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDISA.ddy";
+ bld_base->op_actions[TGSI_OPCODE_DIV].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDISA.div";
+ bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
+ bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
+ bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
+ bld_base->op_actions[TGSI_OPCODE_EX2].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
+ bld_base->op_actions[TGSI_OPCODE_FLR].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.AMDISA.floor";
+ bld_base->op_actions[TGSI_OPCODE_FRC].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
+ bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
+ bld_base->op_actions[TGSI_OPCODE_KIL].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDISA.kill";
+ bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDISA.kilp";
+ bld_base->op_actions[TGSI_OPCODE_LG2].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.AMDIL.log.";
+ bld_base->op_actions[TGSI_OPCODE_LRP].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDISA.lrp";
+ bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min.";
+ bld_base->op_actions[TGSI_OPCODE_MAD].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_MAD].intr_name = "llvm.AMDIL.mad.";
+ bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max.";
+ bld_base->op_actions[TGSI_OPCODE_MUL].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_MUL].intr_name = "llvm.AMDISA.mul";
+ bld_base->op_actions[TGSI_OPCODE_POW].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.AMDISA.pow";
+ bld_base->op_actions[TGSI_OPCODE_RCP].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_RCP].intr_name = "llvm.AMDISA.rcp";
+ bld_base->op_actions[TGSI_OPCODE_SSG].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDISA.ssg";
+ bld_base->op_actions[TGSI_OPCODE_SGE].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDISA.sge.";
+ bld_base->op_actions[TGSI_OPCODE_SEQ].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDISA.seq";
+ bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa;
+ bld_base->op_actions[TGSI_OPCODE_SLE].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SLE].intr_name = "llvm.AMDISA.sge";
+ bld_base->op_actions[TGSI_OPCODE_SLT].fetch_args = radeon_llvm_fetch_args_2_reverse_soa;
+ bld_base->op_actions[TGSI_OPCODE_SLT].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SLT].intr_name = "llvm.AMDISA.sgt";
+ bld_base->op_actions[TGSI_OPCODE_SNE].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SNE].intr_name = "llvm.AMDISA.sne";
+ bld_base->op_actions[TGSI_OPCODE_SGT].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SGT].intr_name = "llvm.AMDISA.sgt";
+ bld_base->op_actions[TGSI_OPCODE_SIN].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.AMDISA.sin";
+ bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDISA.tex";
+ bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDISA.txb";
+ bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDISA.txd";
+ bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDISA.txl";
+ bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDISA.tex";
+ bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = lp_build_tgsi_intrinsic;
+ bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDISA.trunc";
+
+ bld_base->rsq_action.emit = lp_build_tgsi_intrinsic;
+ bld_base->rsq_action.intr_name = "llvm.AMDISA.rsq";
+}
+
+void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
+{
+ LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
+ LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
+}