summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am2
-rw-r--r--configure.ac1
-rw-r--r--orc-float/Makefile.am16
-rw-r--r--orc-float/README76
-rw-r--r--orc-float/orcfloat-sse.c334
-rw-r--r--orc-float/orcfloat.c188
-rw-r--r--orc-float/orcfloat.h15
-rw-r--r--orc-test/orctest.c60
-rw-r--r--orc/orcexecutor.c6
-rw-r--r--orc/orcprogram-sse.c7
-rw-r--r--orc/orcrules-sse.c22
-rw-r--r--orc/x86.h1
12 files changed, 724 insertions, 4 deletions
diff --git a/Makefile.am b/Makefile.am
index e43dde4..65eeed8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,7 @@
AUTOMAKE_OPTIONS = foreign
-SUBDIRS = orc orc-pixel orc-test testsuite examples doc tools
+SUBDIRS = orc orc-float orc-pixel orc-test testsuite examples doc tools
EXTRA_DIST = COPYING autogen.sh gtk-doc.make
diff --git a/configure.ac b/configure.ac
index cbb97f2..7ddb7bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -118,6 +118,7 @@ AC_CONFIG_FILES([
Makefile
doc/Makefile
orc/Makefile
+orc-float/Makefile
orc-pixel/Makefile
orc-test/Makefile
testsuite/Makefile
diff --git a/orc-float/Makefile.am b/orc-float/Makefile.am
new file mode 100644
index 0000000..bd1cc67
--- /dev/null
+++ b/orc-float/Makefile.am
@@ -0,0 +1,16 @@
+
+pkgincludedir = $(includedir)/orc-@ORC_MAJORMINOR@/orc-float
+
+lib_LTLIBRARIES = liborc-float-@ORC_MAJORMINOR@.la
+
+liborc_float_@ORC_MAJORMINOR@_la_LIBADD = $(ORC_LIBS)
+liborc_float_@ORC_MAJORMINOR@_la_LDFLAGS = \
+ -no-undefined -export-symbols-regex 'orc_'
+liborc_float_@ORC_MAJORMINOR@_la_CFLAGS = $(ORC_CFLAGS)
+
+liborc_float_@ORC_MAJORMINOR@_la_SOURCES = \
+ orcfloat.c orcfloat-sse.c
+
+pkginclude_HEADERS = \
+ orcfloat.h
+
diff --git a/orc-float/README b/orc-float/README
new file mode 100644
index 0000000..359257c
--- /dev/null
+++ b/orc-float/README
@@ -0,0 +1,76 @@
+
+orc-float
+=========
+
+See the notes about orc-pixel.
+
+Specifying floating point parameters requires you to use the integer
+that corresponds to the bit pattern of the floating point number you
+want.
+
+The SSE backend generates nearly correct output, however, it's not
+bit exact with the C code for some operations. Of course, it's
+notoriously difficult to get bit-exact floating point code in C.
+
+
+
+diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
+index d4220db..a754193 100644
+--- a/testsuite/Makefile.am
++++ b/testsuite/Makefile.am
+@@ -4,5 +4,6 @@ TESTS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile t
+ orcbin_PROGRAMS = test1 test2 test3 test4 test5 test_local_opcode_execution test_compile test_accsadubl test-schro
+
+ AM_CFLAGS = $(ORC_CFLAGS)
+-LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la
++LIBS = $(ORC_LIBS) $(top_builddir)/orc-test/liborc-test-0.3.la \
++ $(top_builddir)/orc-float/liborc-float-0.3.la
+
+diff --git a/testsuite/test_compile.c b/testsuite/test_compile.c
+index f95aeb5..a3f58e2 100644
+--- a/testsuite/test_compile.c
++++ b/testsuite/test_compile.c
+@@ -6,6 +6,7 @@
+
+ #include <orc/orc.h>
+ #include <orc-test/orctest.h>
++#include <orc-float/orcfloat.h>
+
+
+ int error = FALSE;
+@@ -22,8 +23,9 @@ main (int argc, char *argv[])
+
+ orc_init();
+ orc_test_init();
++ orc_float_init();
+
+- opcode_set = orc_opcode_set_get ("sys");
++ opcode_set = orc_opcode_set_get ("float");
+
+ for(i=0;i<opcode_set->n_opcodes;i++){
+ printf("/* %s %d,%d,%d %p */\n",
+diff --git a/testsuite/test_local_opcode_execution.c b/testsuite/test_local_opcode_execution.c
+index fc62a49..17230d6 100644
+--- a/testsuite/test_local_opcode_execution.c
++++ b/testsuite/test_local_opcode_execution.c
+@@ -5,6 +5,7 @@
+
+ #include <orc/orc.h>
+ #include <orc-test/orctest.h>
++#include <orc-float/orcfloat.h>
+
+
+ int error = FALSE;
+@@ -20,10 +21,11 @@ main (int argc, char *argv[])
+ int i;
+ OrcOpcodeSet *opcode_set;
+
++ orc_float_init();
+ orc_test_init();
+ orc_init();
+
+- opcode_set = orc_opcode_set_get ("sys");
++ opcode_set = orc_opcode_set_get ("float");
+
+ for(i=0;i<opcode_set->n_opcodes;i++){
+ printf("/* %s src %d,%d,%d */\n",
diff --git a/orc-float/orcfloat-sse.c b/orc-float/orcfloat-sse.c
new file mode 100644
index 0000000..0a433ea
--- /dev/null
+++ b/orc-float/orcfloat-sse.c
@@ -0,0 +1,334 @@
+
+#include <orc-float/orcfloat.h>
+#include <orc/orc.h>
+#include <orc/orcdebug.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <orc/x86.h>
+
+#define X86_MODRM(mod, rm, reg) ((((mod)&3)<<6)|(((rm)&7)<<0)|(((reg)&7)<<3))
+
+#ifdef HAVE_AMD64
+int x86_64 = 1;
+int x86_ptr_size = 8;
+int x86_exec_ptr = X86_EDI;
+#else
+int x86_64 = 0;
+int x86_ptr_size = 4;
+int x86_exec_ptr = X86_EBP;
+#endif
+
+const char *
+x86_get_regname_sse(int i)
+{
+ static const char *x86_regs[] = {
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
+ };
+
+ if (i>=X86_XMM0 && i<X86_XMM0 + 16) return x86_regs[i - X86_XMM0];
+ switch (i) {
+ case 0:
+ return "UNALLOCATED";
+ case 1:
+ return "direct";
+ default:
+ return "ERROR";
+ }
+}
+
+int
+x86_get_regnum(int i)
+{
+ return (i&0xf);
+}
+
+void
+x86_emit_rex (OrcCompiler *compiler, int size, int reg1, int reg2, int reg3)
+{
+ int rex = 0x40;
+
+ if (x86_64) {
+ if (size >= 8) rex |= 0x08;
+ if (reg1 == 1 || (x86_get_regnum(reg1)>=8)) rex |= 0x4;
+ if (reg2 == 1 || (x86_get_regnum(reg2)>=8)) rex |= 0x2;
+ if (reg3 == 1 || (x86_get_regnum(reg3)>=8)) rex |= 0x1;
+
+ if (rex != 0x40) *compiler->codeptr++ = rex;
+ }
+}
+
+void
+x86_emit_modrm_reg (OrcCompiler *compiler, int reg1, int reg2)
+{
+ *compiler->codeptr++ = X86_MODRM(3, reg1, reg2);
+}
+
+void
+sse_emit_f20f (OrcCompiler *p, const char *insn_name, int code,
+ int src, int dest)
+{
+ ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
+ x86_get_regname_sse(src),
+ x86_get_regname_sse(dest));
+ *p->codeptr++ = 0xf2;
+ x86_emit_rex (p, 0, src, 0, dest);
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = code;
+ x86_emit_modrm_reg (p, src, dest);
+}
+
+void
+sse_emit_f30f (OrcCompiler *p, const char *insn_name, int code,
+ int src, int dest)
+{
+ ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
+ x86_get_regname_sse(src),
+ x86_get_regname_sse(dest));
+ *p->codeptr++ = 0xf3;
+ x86_emit_rex (p, 0, src, 0, dest);
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = code;
+ x86_emit_modrm_reg (p, src, dest);
+}
+
+void
+sse_emit_660f (OrcCompiler *p, const char *insn_name, int code,
+ int src, int dest)
+{
+ ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
+ x86_get_regname_sse(src),
+ x86_get_regname_sse(dest));
+ *p->codeptr++ = 0x66;
+ x86_emit_rex (p, 0, src, 0, dest);
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = code;
+ x86_emit_modrm_reg (p, src, dest);
+}
+
+void
+sse_emit_0f (OrcCompiler *p, const char *insn_name, int code,
+ int src, int dest)
+{
+ ORC_ASM_CODE(p," %s %%%s, %%%s\n", insn_name,
+ x86_get_regname_sse(src),
+ x86_get_regname_sse(dest));
+ x86_emit_rex (p, 0, src, 0, dest);
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = code;
+ x86_emit_modrm_reg (p, src, dest);
+}
+
+
+#define UNARY(opcode,insn_name,code) \
+static void \
+sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ sse_emit_0f (p, insn_name, code, \
+ p->vars[insn->src_args[0]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+#define BINARY(opcode,insn_name,code) \
+static void \
+sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ sse_emit_0f (p, insn_name, code, \
+ p->vars[insn->src_args[1]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+
+BINARY(addf, "addps", 0x58)
+BINARY(subf, "subps", 0x5c)
+BINARY(mulf, "mulps", 0x59)
+BINARY(divf, "divps", 0x5e)
+BINARY(maxf, "maxps", 0x5f)
+BINARY(minf, "minps", 0x5d)
+UNARY(invf, "rcpps", 0x53)
+UNARY(sqrtf, "sqrtps", 0x51)
+UNARY(invsqrtf, "rsqrtps", 0x52)
+
+static void
+sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_0f (p, "cmpeqps", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x00;
+}
+
+static void
+sse_rule_cmpltf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_0f (p, "cmpltps", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x01;
+}
+
+static void
+sse_rule_cmplef (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_0f (p, "cmpleps", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x02;
+}
+
+static void
+sse_rule_convfl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_660f (p, "cvtps2dq", 0x5b,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+sse_rule_convlf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_0f (p, "cvtdq2ps", 0x5b,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+#define UNARY_66(opcode,insn_name,code) \
+static void \
+sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ sse_emit_660f (p, insn_name, code, \
+ p->vars[insn->src_args[0]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+#define BINARY_66(opcode,insn_name,code) \
+static void \
+sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
+{ \
+ sse_emit_660f (p, insn_name, code, \
+ p->vars[insn->src_args[1]].alloc, \
+ p->vars[insn->dest_args[0]].alloc); \
+}
+
+BINARY_66(addg, "addpd", 0x58)
+BINARY_66(subg, "subpd", 0x5c)
+BINARY_66(mulg, "mulpd", 0x59)
+BINARY_66(divg, "divpd", 0x5e)
+BINARY_66(maxg, "maxpd", 0x5f)
+BINARY_66(ming, "minpd", 0x5d)
+#if 0
+/* These don't actually exist */
+UNARY_66(invg, "rcppd", 0x53)
+UNARY_66(sqrtg, "sqrtpd", 0x51)
+UNARY_66(invsqrtg, "rsqrtpd", 0x52)
+#endif
+
+static void
+sse_rule_cmpeqg (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_660f (p, "cmpeqpd", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x00;
+}
+
+static void
+sse_rule_cmpltg (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_660f (p, "cmpltpd", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x01;
+}
+
+static void
+sse_rule_cmpleg (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_660f (p, "cmplepd", 0xc2,
+ p->vars[insn->src_args[1]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+ *p->codeptr++ = 0x02;
+}
+
+static void
+sse_rule_convgl (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_f20f (p, "cvtpd2dq", 0xe6,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+sse_rule_convlg (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_f30f (p, "cvtdq2pd", 0xe6,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+sse_rule_convgf (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_660f (p, "cvtpd2ps", 0x5a,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+static void
+sse_rule_convfg (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ sse_emit_0f (p, "cvtps2pd", 0x5a,
+ p->vars[insn->src_args[0]].alloc,
+ p->vars[insn->dest_args[0]].alloc);
+}
+
+
+void
+orc_float_sse_register_rules (OrcTarget *target)
+{
+ OrcRuleSet *rule_set;
+
+ rule_set = orc_rule_set_new (orc_opcode_set_get("float"),
+ orc_target_get_by_name ("sse"));
+
+ orc_rule_register (rule_set, "addf", sse_rule_addf, NULL);
+ orc_rule_register (rule_set, "subf", sse_rule_subf, NULL);
+ orc_rule_register (rule_set, "mulf", sse_rule_mulf, NULL);
+ orc_rule_register (rule_set, "divf", sse_rule_divf, NULL);
+ orc_rule_register (rule_set, "minf", sse_rule_minf, NULL);
+ orc_rule_register (rule_set, "maxf", sse_rule_maxf, NULL);
+ orc_rule_register (rule_set, "invf", sse_rule_invf, NULL);
+ orc_rule_register (rule_set, "sqrtf", sse_rule_sqrtf, NULL);
+ orc_rule_register (rule_set, "invsqrtf", sse_rule_invsqrtf, NULL);
+ orc_rule_register (rule_set, "cmpeqf", sse_rule_cmpeqf, NULL);
+ orc_rule_register (rule_set, "cmpltf", sse_rule_cmpltf, NULL);
+ orc_rule_register (rule_set, "cmplef", sse_rule_cmplef, NULL);
+ orc_rule_register (rule_set, "convfl", sse_rule_convfl, NULL);
+ orc_rule_register (rule_set, "convlf", sse_rule_convlf, NULL);
+
+ orc_rule_register (rule_set, "addg", sse_rule_addg, NULL);
+ orc_rule_register (rule_set, "subg", sse_rule_subg, NULL);
+ orc_rule_register (rule_set, "mulg", sse_rule_mulg, NULL);
+ orc_rule_register (rule_set, "divg", sse_rule_divg, NULL);
+ orc_rule_register (rule_set, "ming", sse_rule_ming, NULL);
+ orc_rule_register (rule_set, "maxg", sse_rule_maxg, NULL);
+#if 0
+/* These don't actually exist */
+ orc_rule_register (rule_set, "invg", sse_rule_invg, NULL);
+ orc_rule_register (rule_set, "sqrtg", sse_rule_sqrtg, NULL);
+ orc_rule_register (rule_set, "invsqrtg", sse_rule_invsqrtg, NULL);
+#endif
+ orc_rule_register (rule_set, "cmpeqg", sse_rule_cmpeqg, NULL);
+ orc_rule_register (rule_set, "cmpltg", sse_rule_cmpltg, NULL);
+ orc_rule_register (rule_set, "cmpleg", sse_rule_cmpleg, NULL);
+ orc_rule_register (rule_set, "convgl", sse_rule_convgl, NULL);
+ orc_rule_register (rule_set, "convlg", sse_rule_convlg, NULL);
+
+ orc_rule_register (rule_set, "convgf", sse_rule_convgf, NULL);
+ orc_rule_register (rule_set, "convfg", sse_rule_convfg, NULL);
+}
+
+
diff --git a/orc-float/orcfloat.c b/orc-float/orcfloat.c
new file mode 100644
index 0000000..3b66c6b
--- /dev/null
+++ b/orc-float/orcfloat.c
@@ -0,0 +1,188 @@
+
+#include <orc-float/orcfloat.h>
+#include <orc/orc.h>
+#include <orc/orcdebug.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+static OrcStaticOpcode opcodes[];
+
+void orc_float_sse_register_rules (void);
+
+void
+orc_float_init (void)
+{
+ orc_init ();
+
+ orc_opcode_register_static (opcodes, "float");
+ orc_float_sse_register_rules ();
+}
+
+#define ORC_FLOAT_READ(addr) (*(float *)(addr))
+#define ORC_FLOAT_WRITE(addr,value) do{ (*(float *)(addr)) = (value); }while(0)
+
+#define UNARY_F(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ float a = ORC_FLOAT_READ(&ex->src_values[0]); \
+ ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \
+}
+
+#define BINARY_F(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ float a = ORC_FLOAT_READ(&ex->src_values[0]); \
+ float b = ORC_FLOAT_READ(&ex->src_values[1]); \
+ ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \
+}
+
+#define BINARY_FL(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ float a = ORC_FLOAT_READ(&ex->src_values[0]); \
+ float b = ORC_FLOAT_READ(&ex->src_values[1]); \
+ ex->dest_values[0] = code ; \
+}
+
+BINARY_F(addf, a + b)
+BINARY_F(subf, a - b)
+BINARY_F(mulf, a * b)
+BINARY_F(divf, a / b)
+UNARY_F(invf, (1.0f/a) )
+UNARY_F(orc_sqrtf, sqrtf(a) )
+BINARY_F(maxf, (a>b) ? a : b)
+BINARY_F(minf, (a<b) ? a : b)
+UNARY_F(invsqrtf, 1.0f/sqrtf(a))
+
+BINARY_FL(cmpeqf, (a == b) ? (~0) : 0)
+BINARY_FL(cmpltf, (a < b) ? (~0) : 0)
+BINARY_FL(cmplef, (a <= b) ? (~0) : 0)
+
+static void
+convfl (OrcOpcodeExecutor *ex, void *user)
+{
+ ex->dest_values[0] = ORC_FLOAT_READ(&ex->src_values[0]);
+}
+
+static void
+convlf (OrcOpcodeExecutor *ex, void *user)
+{
+ ORC_FLOAT_WRITE(&ex->dest_values[0], ex->src_values[0]);
+}
+
+#define ORC_DOUBLE_READ(addr) (*(double *)(addr))
+#define ORC_DOUBLE_WRITE(addr,value) do{ (*(double *)(addr)) = (value); }while(0)
+
+#define UNARY_G(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ double a = ORC_DOUBLE_READ(&ex->src_values[0]); \
+ ORC_DOUBLE_WRITE(&ex->dest_values[0], code ); \
+}
+
+#define BINARY_G(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ double a = ORC_DOUBLE_READ(&ex->src_values[0]); \
+ double b = ORC_DOUBLE_READ(&ex->src_values[1]); \
+ ORC_DOUBLE_WRITE(&ex->dest_values[0], code ); \
+}
+
+#define BINARY_GQ(name,code) \
+static void \
+name (OrcOpcodeExecutor *ex, void *user) \
+{ \
+ double a = ORC_DOUBLE_READ(&ex->src_values[0]); \
+ double b = ORC_DOUBLE_READ(&ex->src_values[1]); \
+ ex->dest_values[0] = code ; \
+}
+
+BINARY_G(addg, a + b)
+BINARY_G(subg, a - b)
+BINARY_G(mulg, a * b)
+BINARY_G(divg, a / b)
+UNARY_G(invg, (1.0f/a) )
+UNARY_G(sqrtg, sqrt(a) )
+BINARY_G(maxg, (a>b) ? a : b)
+BINARY_G(ming, (a<b) ? a : b)
+UNARY_G(invsqrtg, 1.0f/sqrt(a))
+
+BINARY_GQ(cmpeqg, (a == b) ? (~0) : 0)
+BINARY_GQ(cmpltg, (a < b) ? (~0) : 0)
+BINARY_GQ(cmpleg, (a <= b) ? (~0) : 0)
+
+static void
+convgl (OrcOpcodeExecutor *ex, void *user)
+{
+ ex->dest_values[0] = ORC_DOUBLE_READ(&ex->src_values[0]);
+}
+
+static void
+convlg (OrcOpcodeExecutor *ex, void *user)
+{
+ ORC_DOUBLE_WRITE(&ex->dest_values[0], ex->src_values[0]);
+}
+
+static void
+convgf (OrcOpcodeExecutor *ex, void *user)
+{
+ ORC_FLOAT_WRITE(&ex->dest_values[0], ORC_DOUBLE_READ(&ex->src_values[0]));
+}
+
+static void
+convfg (OrcOpcodeExecutor *ex, void *user)
+{
+ ORC_DOUBLE_WRITE(&ex->dest_values[0], ORC_FLOAT_READ(&ex->src_values[0]));
+}
+
+
+
+static OrcStaticOpcode opcodes[] = {
+ { "addf", addf, NULL, 0, { 4 }, { 4, 4 } },
+ { "subf", subf, NULL, 0, { 4 }, { 4, 4 } },
+ { "mulf", mulf, NULL, 0, { 4 }, { 4, 4 } },
+ { "divf", divf, NULL, 0, { 4 }, { 4, 4 } },
+ { "invf", invf, NULL, 0, { 4 }, { 4 } },
+ { "sqrtf", orc_sqrtf, NULL, 0, { 4 }, { 4 } },
+ { "maxf", maxf, NULL, 0, { 4 }, { 4, 4 } },
+ { "minf", minf, NULL, 0, { 4 }, { 4, 4 } },
+ { "invsqrtf", invsqrtf, NULL, 0, { 4 }, { 4 } },
+
+ { "cmpeqf", cmpeqf, NULL, 0, { 4 }, { 4, 4 } },
+ { "cmpltf", cmpltf, NULL, 0, { 4 }, { 4, 4 } },
+ { "cmplef", cmplef, NULL, 0, { 4 }, { 4, 4 } },
+
+ { "convfl", convfl, NULL, 0, { 4 }, { 4 } },
+ { "convlf", convlf, NULL, 0, { 4 }, { 4 } },
+
+ { "addg", addg, NULL, 0, { 8 }, { 8, 8 } },
+ { "subg", subg, NULL, 0, { 8 }, { 8, 8 } },
+ { "mulg", mulg, NULL, 0, { 8 }, { 8, 8 } },
+ { "divg", divg, NULL, 0, { 8 }, { 8, 8 } },
+ { "invg", invg, NULL, 0, { 8 }, { 8 } },
+ { "sqrtg", sqrtg, NULL, 0, { 8 }, { 8 } },
+ { "maxg", maxg, NULL, 0, { 8 }, { 8, 8 } },
+ { "ming", ming, NULL, 0, { 8 }, { 8, 8 } },
+ { "invsqrtg", invsqrtg, NULL, 0, { 8 }, { 8 } },
+
+ { "cmpeqg", cmpeqg, NULL, 0, { 8 }, { 8, 8 } },
+ { "cmpltg", cmpltg, NULL, 0, { 8 }, { 8, 8 } },
+ { "cmpleg", cmpleg, NULL, 0, { 8 }, { 8, 8 } },
+
+ { "convgl", convgl, NULL, 0, { 4 }, { 8 } },
+ { "convlg", convlg, NULL, 0, { 8 }, { 4 } },
+
+ { "convgf", convgf, NULL, 0, { 4 }, { 8 } },
+ { "convfg", convfg, NULL, 0, { 8 }, { 4 } },
+
+ { "" }
+};
+
diff --git a/orc-float/orcfloat.h b/orc-float/orcfloat.h
new file mode 100644
index 0000000..8db126f
--- /dev/null
+++ b/orc-float/orcfloat.h
@@ -0,0 +1,15 @@
+
+#ifndef _ORC_FLOAT_FLOAT_H_
+#define _ORC_FLOAT_FLOAT_H_
+
+#include <orc/orc.h>
+#include <orc/orcutils.h>
+
+ORC_BEGIN_DECLS
+
+void orc_float_init (void);
+
+ORC_END_DECLS
+
+#endif
+
diff --git a/orc-test/orctest.c b/orc-test/orctest.c
index fb1028e..88e4564 100644
--- a/orc-test/orctest.c
+++ b/orc-test/orctest.c
@@ -82,11 +82,20 @@ orc_test_gcc_compile (OrcProgram *p)
void
orc_test_random_bits (void *data, int n_bytes)
{
+#if 1
uint8_t *d = data;
int i;
for(i=0;i<n_bytes;i++){
d[i] = rand();
}
+#endif
+#if 0
+ float *d = data;
+ int i;
+ for(i=0;i<n_bytes/4;i++){
+ d[i] = ((rand() & 0xffff)-32768)*0.01;
+ }
+#endif
}
int
@@ -95,25 +104,32 @@ print_array_val_signed (void *array, int size, int i)
switch (size) {
case 1:
{
- uint8_t *a = array;
+ int8_t *a = array;
printf(" %4d", a[i]);
return a[i];
}
break;
case 2:
{
- uint16_t *a = array;
+ int16_t *a = array;
printf(" %5d", a[i]);
return a[i];
}
break;
case 4:
{
- uint32_t *a = array;
+ int32_t *a = array;
printf(" %10d", a[i]);
return a[i];
}
break;
+ case 8:
+ {
+ int64_t *a = array;
+ printf(" %20lld", a[i]);
+ return a[i];
+ }
+ break;
default:
return -1;
}
@@ -144,6 +160,13 @@ print_array_val_unsigned (void *array, int size, int i)
return a[i];
}
break;
+ case 8:
+ {
+ uint64_t *a = array;
+ printf(" %20llu", a[i]);
+ return a[i];
+ }
+ break;
default:
return -1;
}
@@ -174,7 +197,38 @@ print_array_val_hex (void *array, int size, int i)
return a[i];
}
break;
+ case 8:
+ {
+ uint64_t *a = array;
+ printf(" %16llx", a[i]);
+ return a[i];
+ }
+ break;
+ default:
+ return -1;
+ }
+}
+
+float
+print_array_val_float (void *array, int size, int i)
+{
+ switch (size) {
+ case 4:
+ {
+ float *a = array;
+ printf(" %g", a[i]);
+ return a[i];
+ }
+ break;
+ case 8:
+ {
+ double *a = array;
+ printf(" %g", a[i]);
+ return a[i];
+ }
+ break;
default:
+ printf(" ERROR");
return -1;
}
}
diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c
index 8c643c3..74c928e 100644
--- a/orc/orcexecutor.c
+++ b/orc/orcexecutor.c
@@ -141,6 +141,9 @@ orc_executor_emulate (OrcExecutor *ex)
case 4:
opcode_ex.src_values[k] = *(int32_t *)ptr;
break;
+ case 8:
+ opcode_ex.src_values[k] = *(int64_t *)ptr;
+ break;
default:
ORC_ERROR("unhandled size %d", program->vars[insn->src_args[k]].size);
}
@@ -172,6 +175,9 @@ orc_executor_emulate (OrcExecutor *ex)
case 4:
*(int32_t *)ptr = opcode_ex.dest_values[k];
break;
+ case 8:
+ *(int64_t *)ptr = opcode_ex.dest_values[k];
+ break;
default:
ORC_ERROR("unhandled size %d", program->vars[insn->dest_args[k]].size);
}
diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c
index 01ae501..744f500 100644
--- a/orc/orcprogram-sse.c
+++ b/orc/orcprogram-sse.c
@@ -103,6 +103,9 @@ orc_compiler_sse_init (OrcCompiler *compiler)
case 4:
compiler->loop_shift = 2;
break;
+ case 8:
+ compiler->loop_shift = 1;
+ break;
default:
ORC_ERROR("unhandled max var size %d",
orc_program_get_max_var_size (compiler->program));
@@ -228,6 +231,8 @@ sse_load_constants (OrcCompiler *compiler)
sse_emit_loadpw (compiler, compiler->vars[i].alloc, i);
} else if (compiler->vars[i].size == 4) {
sse_emit_loadpl (compiler, compiler->vars[i].alloc, i);
+ } else if (compiler->vars[i].size == 8) {
+ sse_emit_loadpq (compiler, compiler->vars[i].alloc, i);
} else {
ORC_PROGRAM_ERROR(compiler, "unimplemented");
}
@@ -370,6 +375,8 @@ get_shift (int size)
return 1;
case 4:
return 2;
+ case 8:
+ return 3;
default:
ORC_ERROR("bad size %d", size);
}
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index e1b4d75..0f15980 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -191,6 +191,28 @@ sse_emit_loadpl (OrcCompiler *p, int reg, int param)
*p->codeptr++ = 0x00;
}
+void
+sse_emit_loadpq (OrcCompiler *p, int reg, int param)
+{
+ ORC_ASM_CODE(p," movq %d(%%%s), %%%s\n",
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]),
+ x86_get_regname_ptr(x86_exec_ptr),
+ x86_get_regname_sse(reg));
+ *p->codeptr++ = 0xf3;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x7e;
+ x86_emit_modrm_memoffset (p, reg,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, params[param]), x86_exec_ptr);
+
+ ORC_ASM_CODE(p," pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
+ x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x66;
+ *p->codeptr++ = 0x0f;
+ *p->codeptr++ = 0x70;
+ x86_emit_modrm_reg (p, reg, reg);
+ *p->codeptr++ = 0x00;
+}
+
static void
sse_rule_copyx (OrcCompiler *p, void *user, OrcInstruction *insn)
{
diff --git a/orc/x86.h b/orc/x86.h
index 2090911..5b3df04 100644
--- a/orc/x86.h
+++ b/orc/x86.h
@@ -68,6 +68,7 @@ void sse_emit_loadil (OrcCompiler *p, int reg, int value);
void sse_emit_loadpb (OrcCompiler *p, int reg, int value);
void sse_emit_loadpw (OrcCompiler *p, int reg, int value);
void sse_emit_loadpl (OrcCompiler *p, int reg, int value);
+void sse_emit_loadpq (OrcCompiler *p, int reg, int value);
void sse_emit_660f (OrcCompiler *p, const char *insn_name, int code,
int src, int dest);