summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2017-01-16 16:39:06 +0100
committerEmil Velikov <emil.l.velikov@gmail.com>2017-03-14 00:13:12 +0000
commit77b794d85b58ba834c69b9f4ad0a796196e0177d (patch)
tree1b810de3566ffc77e0e513bc7810371d8dd19be4 /src
parent83cf2d72f7fe18f17dc3bdf20bb6c58e0dc23b86 (diff)
glsl: split DIV_TO_MUL_RCP into single- and double-precision flags
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Tested-by: Glenn Kennard <glenn.kennard@gmail.com> Tested-by: James Harvey <lothmordor@gmail.com> Cc: 17.0 <mesa-stable@lists.freedesktop.org> (cherry picked from commit b71c415c3d288da4b5f533ece42f50f4f20a8c33)
Diffstat (limited to 'src')
-rw-r--r--src/compiler/glsl/ir_optimization.h4
-rw-r--r--src/compiler/glsl/lower_instructions.cpp19
2 files changed, 14 insertions, 9 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 6f2bc321fa7..95af29544d2 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -30,7 +30,7 @@
/* Operations for lower_instructions() */
#define SUB_TO_ADD_NEG 0x01
-#define DIV_TO_MUL_RCP 0x02
+#define FDIV_TO_MUL_RCP 0x02
#define EXP_TO_EXP2 0x04
#define POW_TO_EXP2 0x08
#define LOG_TO_LOG2 0x10
@@ -49,6 +49,8 @@
#define FIND_LSB_TO_FLOAT_CAST 0x20000
#define FIND_MSB_TO_FLOAT_CAST 0x40000
#define IMUL_HIGH_TO_MUL 0x80000
+#define DDIV_TO_MUL_RCP 0x100000
+#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
/**
* \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 372ded180a5..2927660db95 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -54,8 +54,8 @@
* want to recognize add(op0, neg(op1)) or the other way around to
* produce a subtract anyway.
*
- * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
- * --------------------------------------
+ * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
+ * ---------------------------------------------------------
* Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
*
* Many GPUs don't have a divide instruction (945 and 965 included),
@@ -63,9 +63,11 @@
* reciprocal. By breaking the operation down, constant reciprocals
* can get constant folded.
*
- * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
- * handles the integer case, converting to and from floating point so that
- * RCP is possible.
+ * FDIV_TO_MUL_RCP only lowers single-precision floating point division;
+ * DDIV_TO_MUL_RCP only lowers double-precision floating point division.
+ * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
+ * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
+ * point so that RCP is possible.
*
* EXP_TO_EXP2 and LOG_TO_LOG2:
* ----------------------------
@@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
/* Don't generate new IR that would need to be lowered in an additional
* pass.
*/
- if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
+ if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
+ (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
div_to_mul_rcp(div_expr);
ir_expression *const floor_expr =
@@ -1588,8 +1591,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
case ir_binop_div:
if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
int_div_to_mul_rcp(ir);
- else if ((ir->operands[1]->type->is_float() ||
- ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
+ else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
+ (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
div_to_mul_rcp(ir);
break;