summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTapani Pälli <tapani.palli@intel.com>2014-12-31 11:14:02 +0200
committerTapani Pälli <tapani.palli@intel.com>2015-01-13 15:01:45 +0200
commit2e3909057e269b3466ceef8d2573abf82078e5c6 (patch)
treea7e5c88e39c86210487a5bedf197d1bce6344b1d
parent0602a7efefebe4da07bc3911aad3b9229b946d70 (diff)
glsl: dfloor_to_arith WIPfp64_floor
status: works for positive values, problems with negative ones. Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-rw-r--r--src/glsl/ir_optimization.h1
-rw-r--r--src/glsl/lower_instructions.cpp127
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp3
3 files changed, 130 insertions, 1 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 180ae6f0aaf..8f0f024a5bc 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -44,6 +44,7 @@
#define DOPS_TO_DFRAC 0x1000
#define DFREXP_DLDEXP_TO_ARITH 0x2000
#define DSQRT_TO_FSQRT 0x4000
+#define DFLOOR_TO_ARITH 0x8000
/**
* \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 7868be51cf6..febc87b424c 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -45,6 +45,7 @@
* - SAT_TO_CLAMP
* - DOPS_TO_DFRAC
* - DSQRT_TO_FSQRT
+ * - DFLOOR_TO_ARITH
*
* SUB_TO_ADD_NEG:
* ---------------
@@ -125,6 +126,10 @@
* --------------
* Splits double square root into exponent division and single precision
* square root.
+ *
+ * DFLOOR_TO_ARITH
+ * ---------------
+ * Provides floor with pure luck.
*/
#include "main/core.h" /* for M_LOG2E */
@@ -170,6 +175,7 @@ private:
void double_lrp(ir_expression *);
void dceil_to_dfrac(ir_expression *);
void dfloor_to_dfrac(ir_expression *);
+ void dfloor_to_arith(ir_expression *);
void dround_even_to_dfrac(ir_expression *);
void dtrunc_to_dfrac(ir_expression *);
void dsign_to_csel(ir_expression *);
@@ -1095,6 +1101,120 @@ lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
ir->operands[1] = new(ir) ir_dereference_variable(t2);
}
+
+void
+lower_instructions_visitor::dfloor_to_arith(ir_expression *ir)
+{
+ ir_instruction &i = *base_ir;
+ exec_list instructions;
+ ir_factory factory;
+ factory.instructions = &instructions;
+ factory.mem_ctx = ir;
+
+ const unsigned vec_elem = ir->type->vector_elements;
+ ir_rvalue *results[4] = {NULL};
+
+ ir_constant *double_zero = new(ir) ir_constant(0.0, vec_elem);
+ ir_constant *double_one = new(ir) ir_constant(1.0, vec_elem);
+
+ for (unsigned elem = 0; elem < vec_elem; elem++) {
+
+ ir_variable *result =
+ factory.make_temp(glsl_type::double_type, "result");
+
+ ir_dereference *result_dref = new(ir) ir_dereference_variable(result);
+
+ /* if (value == 0.0) return 0.0; */
+ factory.emit(if_tree(equal(ir->operands[0]->clone(ir, NULL), double_zero),
+ assign(result, double_zero->clone(ir, NULL))));
+
+ /* if (value < 0.0) return floor(x) = -floor(abs(x)) - 1.0; */
+ factory.emit(if_tree(less(ir->operands[0]->clone(ir, NULL),
+ double_zero->clone(ir, NULL)),
+ assign(result,
+ sub(neg(expr(ir_unop_floor,
+ abs(ir->operands[0]->clone(ir, NULL)))), double_one))));
+
+ /* if (value > 0.0) ... */
+ ir_variable *unpacked =
+ factory.make_temp(glsl_type::uvec2_type, "unpacked");
+
+ factory.emit(assign(unpacked,
+ expr(ir_unop_unpack_double_2x32,
+ swizzle(ir->operands[0]->clone(ir, NULL), elem, 1))));
+
+ ir_rvalue *hi = swizzle_y(unpacked);
+
+ /* extract exponent and mantissa from hi */
+ ir_variable *exponent =
+ factory.make_temp(glsl_type::uint_type, "exponent");
+
+ ir_variable *iexp =
+ factory.make_temp(glsl_type::int_type, "iexp");
+
+ ir_variable *mantissa =
+ factory.make_temp(glsl_type::uint_type, "mantissa");
+
+ /* exponent = (bits >> 20) & 0x7ff */
+ factory.emit(assign(exponent,
+ bit_and(rshift(hi, factory.constant(20u)),
+ factory.constant(0x7ffu))));
+
+ /* mantissa = bits & 0xfffff; (20 last bits) */
+ factory.emit(assign(mantissa,
+ bit_and(hi->clone(ir, NULL), factory.constant(0xfffffu))));
+
+ /* remove the double bias */
+ factory.emit(assign(iexp, sub(expr(ir_unop_u2i, exponent), factory.constant(1023))));
+ factory.emit(assign(exponent, sub(exponent, factory.constant(1023u))));
+
+ /* if value > 0.0 AND if exponent < 0, floor(x) = 0 */
+ factory.emit(if_tree(less(iexp, factory.constant(0)),
+ assign(result, double_zero->clone(ir, NULL))));
+
+ /* if value > 0.0 AND exponent == 0, floor(x) = 1 */
+ factory.emit(if_tree(equal(iexp, factory.constant(0)),
+ assign(result, double_one->clone(ir, NULL))));
+
+ /* calculate MANTISSA_BITS - exp */
+ ir_variable *nmb =
+ factory.make_temp(glsl_type::uint_type, "nmb");
+ factory.emit(assign(nmb, sub(factory.constant(20u), exponent)));
+
+ /* some temporary helpers */
+ ir_variable *a = factory.make_temp(glsl_type::uint_type, "a");
+ ir_variable *b = factory.make_temp(glsl_type::uint_type, "b");
+
+ /* is exponent bigger than zero? */
+ ir_expression *a_e = greater(iexp, factory.constant(0));
+ /* is value bigger than zero? */
+ ir_expression *b_e = greater(ir->operands[0], double_zero->clone(ir, NULL));
+
+ /* return uint32_t mf = (1 << exp) + (m >> nmb) ... or exp2f(exp) + (m >> nmb) */
+ factory.emit(assign(a, lshift(factory.constant(1u), exponent)));
+ factory.emit(assign(b, rshift(mantissa, nmb)));
+
+ /* if value > 0.0 AND exponent > 0.0, assign result */
+ factory.emit(if_tree(expr(ir_binop_all_equal, b_e, factory.constant(true)),
+ if_tree(expr(ir_binop_all_equal, a_e, factory.constant(true)),
+ assign(result_dref, expr(ir_unop_i2d, (expr(ir_unop_u2i, add(a, b))))))));
+
+ results[elem] = result_dref;
+ }
+
+ i.insert_before(&instructions);
+
+ /* Put the dvec back together */
+ ir->operation = ir_quadop_vector;
+ ir->operands[0] = results[0];
+ ir->operands[1] = results[1];
+ ir->operands[2] = results[2];
+ ir->operands[3] = results[3];
+
+ this->progress = true;
+}
+
+
void
lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
{
@@ -1264,6 +1384,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
div_to_mul_rcp(ir);
break;
+ case ir_unop_floor:
+ if (lowering(DFLOOR_TO_ARITH) && ir->operands[0]->type->is_double())
+ dfloor_to_arith(ir);
+ break;
+
case ir_unop_sqrt:
if (lowering(DSQRT_TO_FSQRT) && ir->operands[0]->type->is_double())
dsqrt_to_fsqrt(ir);
@@ -1336,10 +1461,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
dceil_to_dfrac(ir);
break;
+#if 0
case ir_unop_floor:
if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
dfloor_to_dfrac(ir);
break;
+#endif
case ir_unop_round_even:
if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 58d6e77c379..cce29e06132 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -146,7 +146,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
bitfield_insert |
LDEXP_TO_ARITH |
DFREXP_DLDEXP_TO_ARITH |
- DSQRT_TO_FSQRT);
+ DSQRT_TO_FSQRT |
+ DFLOOR_TO_ARITH);
/* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
* if-statements need to be flattened.