diff options
author | Tapani Pälli <tapani.palli@intel.com> | 2014-12-31 11:14:02 +0200 |
---|---|---|
committer | Tapani Pälli <tapani.palli@intel.com> | 2015-01-13 15:01:45 +0200 |
commit | 2e3909057e269b3466ceef8d2573abf82078e5c6 (patch) | |
tree | a7e5c88e39c86210487a5bedf197d1bce6344b1d | |
parent | 0602a7efefebe4da07bc3911aad3b9229b946d70 (diff) |
glsl: dfloor_to_arith WIPfp64_floor
status: works for positive values, problems with negative ones.
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/lower_instructions.cpp | 127 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 3 |
3 files changed, 130 insertions, 1 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 180ae6f0aaf..8f0f024a5bc 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -44,6 +44,7 @@ #define DOPS_TO_DFRAC 0x1000 #define DFREXP_DLDEXP_TO_ARITH 0x2000 #define DSQRT_TO_FSQRT 0x4000 +#define DFLOOR_TO_ARITH 0x8000 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 7868be51cf6..febc87b424c 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -45,6 +45,7 @@ * - SAT_TO_CLAMP * - DOPS_TO_DFRAC * - DSQRT_TO_FSQRT + * - DFLOOR_TO_ARITH * * SUB_TO_ADD_NEG: * --------------- @@ -125,6 +126,10 @@ * -------------- * Splits double square root into exponent division and single precision * square root. + * + * DFLOOR_TO_ARITH + * --------------- + * Provides floor with pure luck. */ #include "main/core.h" /* for M_LOG2E */ @@ -170,6 +175,7 @@ private: void double_lrp(ir_expression *); void dceil_to_dfrac(ir_expression *); void dfloor_to_dfrac(ir_expression *); + void dfloor_to_arith(ir_expression *); void dround_even_to_dfrac(ir_expression *); void dtrunc_to_dfrac(ir_expression *); void dsign_to_csel(ir_expression *); @@ -1095,6 +1101,120 @@ lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) ir->operands[1] = new(ir) ir_dereference_variable(t2); } + +void +lower_instructions_visitor::dfloor_to_arith(ir_expression *ir) +{ + ir_instruction &i = *base_ir; + exec_list instructions; + ir_factory factory; + factory.instructions = &instructions; + factory.mem_ctx = ir; + + const unsigned vec_elem = ir->type->vector_elements; + ir_rvalue *results[4] = {NULL}; + + ir_constant *double_zero = new(ir) ir_constant(0.0, vec_elem); + ir_constant *double_one = new(ir) ir_constant(1.0, vec_elem); + + for (unsigned elem = 0; elem < vec_elem; elem++) { + + ir_variable *result = + factory.make_temp(glsl_type::double_type, "result"); + + ir_dereference *result_dref = new(ir) ir_dereference_variable(result); + + /* if (value == 0.0) return 0.0; */ + factory.emit(if_tree(equal(ir->operands[0]->clone(ir, NULL), double_zero), + assign(result, double_zero->clone(ir, NULL)))); + + /* if (value < 0.0) return floor(x) = -floor(abs(x)) - 1.0; */ + factory.emit(if_tree(less(ir->operands[0]->clone(ir, NULL), + double_zero->clone(ir, NULL)), + assign(result, + sub(neg(expr(ir_unop_floor, + abs(ir->operands[0]->clone(ir, NULL)))), double_one)))); + + /* if (value > 0.0) ... */ + ir_variable *unpacked = + factory.make_temp(glsl_type::uvec2_type, "unpacked"); + + factory.emit(assign(unpacked, + expr(ir_unop_unpack_double_2x32, + swizzle(ir->operands[0]->clone(ir, NULL), elem, 1)))); + + ir_rvalue *hi = swizzle_y(unpacked); + + /* extract exponent and mantissa from hi */ + ir_variable *exponent = + factory.make_temp(glsl_type::uint_type, "exponent"); + + ir_variable *iexp = + factory.make_temp(glsl_type::int_type, "iexp"); + + ir_variable *mantissa = + factory.make_temp(glsl_type::uint_type, "mantissa"); + + /* exponent = (bits >> 20) & 0x7ff */ + factory.emit(assign(exponent, + bit_and(rshift(hi, factory.constant(20u)), + factory.constant(0x7ffu)))); + + /* mantissa = bits & 0xfffff; (20 last bits) */ + factory.emit(assign(mantissa, + bit_and(hi->clone(ir, NULL), factory.constant(0xfffffu)))); + + /* remove the double bias */ + factory.emit(assign(iexp, sub(expr(ir_unop_u2i, exponent), factory.constant(1023)))); + factory.emit(assign(exponent, sub(exponent, factory.constant(1023u)))); + + /* if value > 0.0 AND if exponent < 0, floor(x) = 0 */ + factory.emit(if_tree(less(iexp, factory.constant(0)), + assign(result, double_zero->clone(ir, NULL)))); + + /* if value > 0.0 AND exponent == 0, floor(x) = 1 */ + factory.emit(if_tree(equal(iexp, factory.constant(0)), + assign(result, double_one->clone(ir, NULL)))); + + /* calculate MANTISSA_BITS - exp */ + ir_variable *nmb = + factory.make_temp(glsl_type::uint_type, "nmb"); + factory.emit(assign(nmb, sub(factory.constant(20u), exponent))); + + /* some temporary helpers */ + ir_variable *a = factory.make_temp(glsl_type::uint_type, "a"); + ir_variable *b = factory.make_temp(glsl_type::uint_type, "b"); + + /* is exponent bigger than zero? */ + ir_expression *a_e = greater(iexp, factory.constant(0)); + /* is value bigger than zero? */ + ir_expression *b_e = greater(ir->operands[0], double_zero->clone(ir, NULL)); + + /* return uint32_t mf = (1 << exp) + (m >> nmb) ... or exp2f(exp) + (m >> nmb) */ + factory.emit(assign(a, lshift(factory.constant(1u), exponent))); + factory.emit(assign(b, rshift(mantissa, nmb))); + + /* if value > 0.0 AND exponent > 0.0, assign result */ + factory.emit(if_tree(expr(ir_binop_all_equal, b_e, factory.constant(true)), + if_tree(expr(ir_binop_all_equal, a_e, factory.constant(true)), + assign(result_dref, expr(ir_unop_i2d, (expr(ir_unop_u2i, add(a, b)))))))); + + results[elem] = result_dref; + } + + i.insert_before(&instructions); + + /* Put the dvec back together */ + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + this->progress = true; +} + + void lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) { @@ -1264,6 +1384,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) div_to_mul_rcp(ir); break; + case ir_unop_floor: + if (lowering(DFLOOR_TO_ARITH) && ir->operands[0]->type->is_double()) + dfloor_to_arith(ir); + break; + case ir_unop_sqrt: if (lowering(DSQRT_TO_FSQRT) && ir->operands[0]->type->is_double()) dsqrt_to_fsqrt(ir); @@ -1336,10 +1461,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) dceil_to_dfrac(ir); break; +#if 0 case ir_unop_floor: if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) dfloor_to_dfrac(ir); break; +#endif case ir_unop_round_even: if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 58d6e77c379..cce29e06132 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -146,7 +146,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) bitfield_insert | LDEXP_TO_ARITH | DFREXP_DLDEXP_TO_ARITH | - DSQRT_TO_FSQRT); + DSQRT_TO_FSQRT | + DFLOOR_TO_ARITH); /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. |